In [37]:
import pandas as pd
import numpy as np
import ast
import matplotlib.pyplot as plt

In [38]:
# Mount Drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


<h1>DATA PREPARATION</h1>

In [39]:
data = pd.read_csv('/content/drive/My Drive/Colab Notebooks/blackjack_simulator.csv', nrows=100000)
data.head(10)

Unnamed: 0,shoe_id,cards_remaining,dealer_up,initial_hand,dealer_final,dealer_final_value,player_final,player_final_value,actions_taken,run_count,true_count,win
0,0,416,10,"[10, 11]","[10, 4, 10]",24,"[[10, 11]]",['BJ'],[['S']],1,0,1.5
1,0,411,10,"[5, 5]","[10, 8]",18,"[[5, 5, 11]]",[21],"[['H', 'S']]",-2,0,1.0
2,0,406,6,"[3, 10]","[6, 6, 10]",22,"[[3, 10]]",[13],[['S']],-2,0,1.0
3,0,401,10,"[5, 9]","[10, 8]",18,"[[5, 9, 11, 3]]",[18],"[['H', 'H', 'S']]",-1,0,0.0
4,0,395,8,"[6, 10]","[8, 2, 10]",20,"[[6, 10, 10]]",[26],[['H']],-1,0,-1.0
5,0,389,7,"[3, 4]","[7, 2, 10]",19,"[[3, 4, 11]]",[18],"[['H', 'S']]",-2,0,-1.0
6,0,383,6,"[3, 3]","[6, 10, 10]",26,"[[3, 2, 10], [3, 4, 5]]","[15, 12]","[['P', 'H', 'S'], ['H', 'S']]",-1,0,2.0
7,0,374,3,"[10, 8]","[3, 10, 7]",20,"[[10, 8]]",[18],[['S']],2,0,-1.0
8,0,369,5,"[8, 10]","[5, 10, 9]",24,"[[8, 10]]",[18],[['S']],1,0,1.0
9,0,364,9,"[6, 8]","[9, 2, 5, 11]",17,"[[6, 8, 6]]",[20],"[['H', 'S']]",0,0,1.0


In [40]:
# Drop columns not needed for our model
data.drop(columns=["shoe_id", "cards_remaining", "dealer_up", "initial_hand", "run_count", "true_count"], inplace=True)

In [41]:
# Convert strings to lists
data['dealer_final'] = data['dealer_final'].apply(lambda x: ast.literal_eval(x))
data['player_final'] = data['player_final'].apply(lambda x: ast.literal_eval(x))
data['player_final_value'] = data['player_final_value'].apply(lambda x: ast.literal_eval(x))
data['actions_taken'] = data['actions_taken'].apply(lambda x: ast.literal_eval(x))

In [42]:
# Remove hands with splitting (computer-vision is not supporting this feature yet)
data = data[data['player_final'].apply(lambda x: len(x) != 2)]
data.head(10)

Unnamed: 0,dealer_final,dealer_final_value,player_final,player_final_value,actions_taken,win
0,"[10, 4, 10]",24,"[[10, 11]]",[BJ],[[S]],1.5
1,"[10, 8]",18,"[[5, 5, 11]]",[21],"[[H, S]]",1.0
2,"[6, 6, 10]",22,"[[3, 10]]",[13],[[S]],1.0
3,"[10, 8]",18,"[[5, 9, 11, 3]]",[18],"[[H, H, S]]",0.0
4,"[8, 2, 10]",20,"[[6, 10, 10]]",[26],[[H]],-1.0
5,"[7, 2, 10]",19,"[[3, 4, 11]]",[18],"[[H, S]]",-1.0
7,"[3, 10, 7]",20,"[[10, 8]]",[18],[[S]],-1.0
8,"[5, 10, 9]",24,"[[8, 10]]",[18],[[S]],1.0
9,"[9, 2, 5, 11]",17,"[[6, 8, 6]]",[20],"[[H, S]]",1.0
10,"[10, 10]",20,"[[11, 10]]",[BJ],[[S]],1.5


In [43]:
# Extract single element from list
data['player_final'] = data['player_final'].apply(lambda x: x[0])
data['actions_taken'] = data['actions_taken'].apply(lambda x: x[0])
data['player_final_value'] = data['player_final_value'].apply(lambda x: x[0])
data.head(10)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['player_final'] = data['player_final'].apply(lambda x: x[0])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['actions_taken'] = data['actions_taken'].apply(lambda x: x[0])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['player_final_value'] = data['player_final_value'].apply(lambda x:

Unnamed: 0,dealer_final,dealer_final_value,player_final,player_final_value,actions_taken,win
0,"[10, 4, 10]",24,"[10, 11]",BJ,[S],1.5
1,"[10, 8]",18,"[5, 5, 11]",21,"[H, S]",1.0
2,"[6, 6, 10]",22,"[3, 10]",13,[S],1.0
3,"[10, 8]",18,"[5, 9, 11, 3]",18,"[H, H, S]",0.0
4,"[8, 2, 10]",20,"[6, 10, 10]",26,[H],-1.0
5,"[7, 2, 10]",19,"[3, 4, 11]",18,"[H, S]",-1.0
7,"[3, 10, 7]",20,"[10, 8]",18,[S],-1.0
8,"[5, 10, 9]",24,"[8, 10]",18,[S],1.0
9,"[9, 2, 5, 11]",17,"[6, 8, 6]",20,"[H, S]",1.0
10,"[10, 10]",20,"[11, 10]",BJ,[S],1.5


In [44]:
# Split columns for future calculations
def cards_splitter(data: pd.DataFrame, cards: pd.Series, column_names: str):
    stopsign = cards.apply(lambda x: len(x))
    for i in range(1,max(stopsign)):
        data[f'{column_names}_{i}'] = cards.apply(lambda x: None if len(x)<i else x[i-1])

In [45]:
cards_splitter(data, data['player_final'], 'player_card')
cards_splitter(data, data['dealer_final'], 'dealer_card')
cards_splitter(data, data['actions_taken'], 'action_taken')

In [46]:
data.head(10)

Unnamed: 0,dealer_final,dealer_final_value,player_final,player_final_value,actions_taken,win,player_card_1,player_card_2,player_card_3,player_card_4,...,dealer_card_4,dealer_card_5,dealer_card_6,dealer_card_7,action_taken_1,action_taken_2,action_taken_3,action_taken_4,action_taken_5,action_taken_6
0,"[10, 4, 10]",24,"[10, 11]",BJ,[S],1.5,10,11,,,...,,,,,S,,,,,
1,"[10, 8]",18,"[5, 5, 11]",21,"[H, S]",1.0,5,5,11.0,,...,,,,,H,S,,,,
2,"[6, 6, 10]",22,"[3, 10]",13,[S],1.0,3,10,,,...,,,,,S,,,,,
3,"[10, 8]",18,"[5, 9, 11, 3]",18,"[H, H, S]",0.0,5,9,11.0,3.0,...,,,,,H,H,S,,,
4,"[8, 2, 10]",20,"[6, 10, 10]",26,[H],-1.0,6,10,10.0,,...,,,,,H,,,,,
5,"[7, 2, 10]",19,"[3, 4, 11]",18,"[H, S]",-1.0,3,4,11.0,,...,,,,,H,S,,,,
7,"[3, 10, 7]",20,"[10, 8]",18,[S],-1.0,10,8,,,...,,,,,S,,,,,
8,"[5, 10, 9]",24,"[8, 10]",18,[S],1.0,8,10,,,...,,,,,S,,,,,
9,"[9, 2, 5, 11]",17,"[6, 8, 6]",20,"[H, S]",1.0,6,8,6.0,,...,11.0,,,,H,S,,,,
10,"[10, 10]",20,"[11, 10]",BJ,[S],1.5,11,10,,,...,,,,,S,,,,,


In [47]:
# Drop dealer_final, player_final, and actions_taken once splitted
data.drop(columns=["dealer_final", "player_final", "actions_taken"], inplace=True)

In [49]:
# Replace "BJ" from 21 in dealer and player final value
data['dealer_final_value'] = data['dealer_final_value'].replace('BJ',21)
data['player_final_value'] = data['player_final_value'].replace('BJ',21)

In [50]:
data.head(10)

Unnamed: 0,dealer_final_value,player_final_value,win,player_card_1,player_card_2,player_card_3,player_card_4,player_card_5,player_card_6,player_card_7,...,dealer_card_4,dealer_card_5,dealer_card_6,dealer_card_7,action_taken_1,action_taken_2,action_taken_3,action_taken_4,action_taken_5,action_taken_6
0,24,21,1.5,10,11,,,,,,...,,,,,S,,,,,
1,18,21,1.0,5,5,11.0,,,,,...,,,,,H,S,,,,
2,22,13,1.0,3,10,,,,,,...,,,,,S,,,,,
3,18,18,0.0,5,9,11.0,3.0,,,,...,,,,,H,H,S,,,
4,20,26,-1.0,6,10,10.0,,,,,...,,,,,H,,,,,
5,19,18,-1.0,3,4,11.0,,,,,...,,,,,H,S,,,,
7,20,18,-1.0,10,8,,,,,,...,,,,,S,,,,,
8,24,18,1.0,8,10,,,,,,...,,,,,S,,,,,
9,17,20,1.0,6,8,6.0,,,,,...,11.0,,,,H,S,,,,
10,20,21,1.5,11,10,,,,,,...,,,,,S,,,,,


In [51]:
# Convert values to integers
data['dealer_final_value'] = data['dealer_final_value'].astype('int64')
data['player_final_value'] = data['player_final_value'].astype('int64')

In [52]:
# Normalize win column to be either 1 (player wins) or 0 (push, tie between player and dealer, or player looses)
data.loc[(data['dealer_final_value'] < data['player_final_value']) & (data['player_final_value'] <= 21) , 'win'] = 1 # p wins
data.loc[data['dealer_final_value'] >  21 , 'win'] = 1 # p wins
data.loc[data['dealer_final_value'] == data['player_final_value'], 'win'] = 0 # tie
data.loc[(data['dealer_final_value'] > data['player_final_value']) & (data['dealer_final_value'] <= 21) , 'win'] = 0 # p loses
data.loc[data['player_final_value'] > 21 , 'win'] = 0 # p loses
data['win'] = data['win'].astype('int64')

In [53]:
data['win']

0        1
1        1
2        1
3        0
4        0
        ..
99995    1
99996    1
99997    1
99998    1
99999    1
Name: win, Length: 97789, dtype: int64

<h1>MODELING: XGBoost</h1>

In [55]:
data.head(10)

Unnamed: 0,dealer_final_value,player_final_value,win,player_card_1,player_card_2,player_card_3,player_card_4,player_card_5,player_card_6,player_card_7,...,dealer_card_4,dealer_card_5,dealer_card_6,dealer_card_7,action_taken_1,action_taken_2,action_taken_3,action_taken_4,action_taken_5,action_taken_6
0,24,21,1,10,11,,,,,,...,,,,,S,,,,,
1,18,21,1,5,5,11.0,,,,,...,,,,,H,S,,,,
2,22,13,1,3,10,,,,,,...,,,,,S,,,,,
3,18,18,0,5,9,11.0,3.0,,,,...,,,,,H,H,S,,,
4,20,26,0,6,10,10.0,,,,,...,,,,,H,,,,,
5,19,18,0,3,4,11.0,,,,,...,,,,,H,S,,,,
7,20,18,0,10,8,,,,,,...,,,,,S,,,,,
8,24,18,1,8,10,,,,,,...,,,,,S,,,,,
9,17,20,1,6,8,6.0,,,,,...,11.0,,,,H,S,,,,
10,20,21,1,11,10,,,,,,...,,,,,S,,,,,


In [56]:
# Drop redundant columns (we already have the information contained in every card)
cleaned_data = data.drop(columns=['dealer_final_value', 'player_final_value'])

In [57]:
cleaned_data.head(10)

Unnamed: 0,win,player_card_1,player_card_2,player_card_3,player_card_4,player_card_5,player_card_6,player_card_7,dealer_card_1,dealer_card_2,...,dealer_card_4,dealer_card_5,dealer_card_6,dealer_card_7,action_taken_1,action_taken_2,action_taken_3,action_taken_4,action_taken_5,action_taken_6
0,1,10,11,,,,,,10,4,...,,,,,S,,,,,
1,1,5,5,11.0,,,,,10,8,...,,,,,H,S,,,,
2,1,3,10,,,,,,6,6,...,,,,,S,,,,,
3,0,5,9,11.0,3.0,,,,10,8,...,,,,,H,H,S,,,
4,0,6,10,10.0,,,,,8,2,...,,,,,H,,,,,
5,0,3,4,11.0,,,,,7,2,...,,,,,H,S,,,,
7,0,10,8,,,,,,3,10,...,,,,,S,,,,,
8,1,8,10,,,,,,5,10,...,,,,,S,,,,,
9,1,6,8,6.0,,,,,9,2,...,11.0,,,,H,S,,,,
10,1,11,10,,,,,,10,10,...,,,,,S,,,,,


In [60]:
# Check for null values
cleaned_data.isnull().sum().sort_values(ascending=False)

player_card_7     97764
action_taken_6    97757
dealer_card_7     97744
action_taken_5    97561
player_card_6     97493
dealer_card_6     97331
action_taken_4    95999
player_card_5     95503
dealer_card_5     93701
action_taken_3    88607
player_card_4     85649
dealer_card_4     76273
action_taken_2    67364
player_card_3     53475
dealer_card_3     33167
action_taken_1     2336
player_card_1         0
dealer_card_2         0
dealer_card_1         0
player_card_2         0
win                   0
dtype: int64

In [61]:
# Delete rows with null values for column action_taken_1 (player always has to take action!)
cleaned_data = cleaned_data.dropna(subset=['action_taken_1'])

In [62]:
# One-hot-encoding action columns
from sklearn.preprocessing import OneHotEncoder

ohe = OneHotEncoder(sparse = False)
ohe.fit(cleaned_data[['action_taken_1', 'action_taken_2', 'action_taken_3', 'action_taken_4', 'action_taken_5', 'action_taken_6']])

# Display the detected categories
print(f"The categories detected by the OneHotEncoder are {ohe.categories_}")

The categories detected by the OneHotEncoder are [array(['D', 'H', 'N', 'P', 'R', 'S'], dtype=object), array(['D', 'H', 'P', 'R', 'S', None], dtype=object), array(['D', 'H', 'P', 'S', None], dtype=object), array(['D', 'H', 'S', None], dtype=object), array(['H', 'S', None], dtype=object), array(['H', 'S', None], dtype=object)]




In [63]:
# Transform the action columns
cleaned_data[ohe.get_feature_names_out()] = ohe.transform(cleaned_data[['action_taken_1', 'action_taken_2', 'action_taken_3', 'action_taken_4', 'action_taken_5', 'action_taken_6']])
# Drop action columns once they have been encoded
cleaned_data.drop(columns = ['action_taken_1', 'action_taken_2', 'action_taken_3', 'action_taken_4', 'action_taken_5', 'action_taken_6'], inplace = True)

In [64]:
cleaned_data.head(10)

Unnamed: 0,win,player_card_1,player_card_2,player_card_3,player_card_4,player_card_5,player_card_6,player_card_7,dealer_card_1,dealer_card_2,...,action_taken_4_D,action_taken_4_H,action_taken_4_S,action_taken_4_None,action_taken_5_H,action_taken_5_S,action_taken_5_None,action_taken_6_H,action_taken_6_S,action_taken_6_None
0,1,10,11,,,,,,10,4,...,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0
1,1,5,5,11.0,,,,,10,8,...,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0
2,1,3,10,,,,,,6,6,...,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0
3,0,5,9,11.0,3.0,,,,10,8,...,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0
4,0,6,10,10.0,,,,,8,2,...,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0
5,0,3,4,11.0,,,,,7,2,...,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0
7,0,10,8,,,,,,3,10,...,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0
8,1,8,10,,,,,,5,10,...,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0
9,1,6,8,6.0,,,,,9,2,...,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0
10,1,11,10,,,,,,10,10,...,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0


In [66]:
# Check correlation between features and target
corr = cleaned_data.corr()
corr.style.background_gradient(cmap='coolwarm')

Unnamed: 0,win,player_card_1,player_card_2,player_card_3,player_card_4,player_card_5,player_card_6,player_card_7,dealer_card_1,dealer_card_2,dealer_card_3,dealer_card_4,dealer_card_5,dealer_card_6,dealer_card_7,action_taken_1_D,action_taken_1_H,action_taken_1_N,action_taken_1_P,action_taken_1_R,action_taken_1_S,action_taken_2_D,action_taken_2_H,action_taken_2_P,action_taken_2_R,action_taken_2_S,action_taken_2_None,action_taken_3_D,action_taken_3_H,action_taken_3_P,action_taken_3_S,action_taken_3_None,action_taken_4_D,action_taken_4_H,action_taken_4_S,action_taken_4_None,action_taken_5_H,action_taken_5_S,action_taken_5_None,action_taken_6_H,action_taken_6_S,action_taken_6_None
win,1.0,0.134811,0.125394,-0.025473,-0.190801,-0.31145,-0.471547,-0.508658,-0.138259,-0.077794,0.126664,0.283322,0.401444,0.451003,0.291615,0.076302,-0.15099,-0.109076,-9.4e-05,-0.086403,0.191188,0.005536,-0.118681,-0.00155,-0.053422,0.081527,0.036367,0.005737,-0.071343,0.001115,0.055676,-0.002733,0.003655,-0.031128,0.019545,-0.000658,-0.007051,0.00713,-0.002502,-0.003493,0.006442,-0.004537
player_card_1,0.134811,1.0,-0.031983,-0.001701,0.016408,-0.025398,-0.011591,0.283124,-0.01396,-0.001423,-0.001972,-0.010874,-0.023718,0.07716,0.182648,-0.177475,-0.358828,0.000704,-0.02495,0.034254,0.437121,-0.035416,-0.27592,-0.010709,0.02328,-0.164867,0.336752,-0.006233,-0.146635,-0.001778,-0.187086,0.243293,-0.004792,-0.06985,-0.092517,0.115841,-0.02996,-0.038159,0.048149,-0.012204,-0.022551,0.02554
player_card_2,0.125394,-0.031983,1.0,-0.00521,-0.018308,-0.022063,-0.089975,-0.054495,-0.012907,-0.002107,-0.003513,0.000544,-0.005378,-0.032729,-0.045405,-0.187335,-0.362783,-0.001653,0.002154,0.029934,0.447073,-0.029499,-0.27483,-0.000207,0.02436,-0.16826,0.336905,-0.010518,-0.144368,-0.002553,-0.17832,0.234912,-0.000348,-0.058109,-0.098365,0.114643,-0.023478,-0.032195,0.039699,-0.007727,-0.01313,0.015116
player_card_3,-0.025473,-0.001701,-0.00521,1.0,0.000681,-0.004906,0.020846,-0.121862,-0.002074,-0.005889,0.006372,-0.010036,-0.044756,0.075679,-0.03222,0.003518,-0.005174,0.003093,0.002484,,,-0.002763,-0.228026,0.001921,,-0.027148,0.243915,-0.000793,-0.136437,0.005446,-0.166272,0.2269,0.004349,-0.06678,-0.088139,0.110792,-0.026802,-0.039321,0.047552,-0.003368,-0.022482,0.021983
player_card_4,-0.190801,0.016408,-0.018308,0.000681,1.0,-0.009109,0.006299,0.133606,0.00722,0.003316,0.009886,0.035306,-0.065684,-0.073321,0.679366,,-5e-06,-0.000453,0.003352,,,,-0.003631,0.003631,,,,,-0.164239,,-0.301424,0.466341,,-0.071195,-0.157857,0.177172,-0.048194,-0.027867,0.048948,0.003083,-0.038832,0.034457
player_card_5,-0.31145,-0.025398,-0.022063,-0.004906,-0.009109,1.0,0.072519,-0.308303,-0.027703,-0.01422,-0.063658,0.053971,0.224084,0.356887,1.0,,0.015846,-0.01175,-0.029128,,,,0.038146,-0.038146,,,,,,,,,,-0.100177,-0.483387,0.553896,-0.079892,-0.157471,0.179621,-0.042891,-0.057869,0.07027
player_card_6,-0.471547,-0.011591,-0.089975,0.020846,0.006299,0.072519,1.0,0.0055,0.036169,0.073433,-0.022704,0.027233,0.32062,,,,-0.0332,0.0332,,,,,,,,,,,,,,,,,,,-0.162946,-0.495356,0.589357,0.086162,-0.276234,0.217733
player_card_7,-0.508658,0.283124,-0.054495,-0.121862,0.133606,-0.308303,0.0055,1.0,0.208054,0.425235,0.069197,-0.377219,,,,,-0.149505,0.149505,,,,,,,,,,,,,,,,,,,,,,-0.03469,-0.552317,0.580641
dealer_card_1,-0.138259,-0.01396,-0.012907,-0.002074,0.00722,-0.027703,0.036169,0.208054,1.0,-0.028218,0.000133,0.009374,-0.010588,0.042317,0.006275,-0.155783,0.192819,0.377105,-0.037437,0.190796,-0.367021,0.058777,0.251538,-0.023596,0.135894,0.074004,-0.282766,-0.01139,0.148155,-0.00119,0.112658,-0.182154,-0.002498,0.062691,0.0894,-0.109267,0.026138,0.033121,-0.04186,0.008286,0.016017,-0.017989
dealer_card_2,-0.077794,-0.001423,-0.002107,-0.005889,0.003316,-0.01422,0.073433,0.425235,-0.028218,1.0,-0.004836,-0.004523,0.013104,-0.033407,-0.154472,0.007401,-0.009028,0.00984,-0.001703,-0.010188,0.003004,-0.016183,-0.039147,-0.0009,-0.041361,-0.014056,0.051702,0.00221,-0.025532,0.001272,-0.020829,0.032511,0.003105,-0.012637,-0.015239,0.019576,-0.013768,-0.006512,0.012661,-0.005057,-0.012483,0.013466


In [67]:
# Check if dataset is balanced (looks like it is after adding push and lose under the same 0 category)
cleaned_data['win'].value_counts()

0    53498
1    41955
Name: win, dtype: int64

In [68]:
# Import libraries to implement XGBoost
from sklearn.model_selection import RandomizedSearchCV
from xgboost import XGBClassifier

from sklearn.model_selection import train_test_split
from sklearn import metrics

from scipy.stats import randint, uniform

In [69]:
# Select features (X) and target (y)
X = cleaned_data.drop('win',axis=1)
y = cleaned_data['win']

In [74]:
# Split dataset between training and testing
x_train,x_test,y_train,y_test= train_test_split(X, y, test_size=0.2)

In [72]:
# Defined range of hyperparameters to random grid search
gbm_param_grid = {
    'n_estimators': [100,150],
    'max_depth': randint(2, 12),
    'colsample_bytree' : uniform(0.05, 0.5),
    'learning_rate':np.linspace(0.01,2,100),
    'alpha' : np.linspace(0.01,100,100),
    'min_child_weight' : randint(1,20),
    'grow_policy':['depthwise', 'lossguide'],
    'refresh_leaf':[0,1],
    'scale_pos_weight':randint(1,100),
    'lambda':np.linspace(0.01,2,100),
    'tree_method':["gpu_hist"],
    'objective':['binary:logistic'],
    'colsample_bylevel':np.linspace(0.01,1,100),
    }

In [75]:
# Initialize the model
xgb = XGBClassifier()

# Define gridsearch
random_search = RandomizedSearchCV(estimator=xgb, param_distributions=gbm_param_grid, cv=5, n_iter=100, n_jobs=-1, scoring="balanced_accuracy")

# Fit the model
random_search.fit(x_train, y_train)

# Get the best parameters and best model
best_params = random_search.best_params_
best_model = random_search.best_estimator_

In [76]:
# Check for perfomance on test set
y_pred = random_search.predict(x_test)
print(metrics.classification_report(y_test, y_pred, digits = 3))

              precision    recall  f1-score   support

           0      0.964     0.815     0.883     10744
           1      0.802     0.961     0.874      8347

    accuracy                          0.879     19091
   macro avg      0.883     0.888     0.879     19091
weighted avg      0.893     0.879     0.879     19091



In [77]:
# Check confusion matrix on test set
pd.DataFrame({'True':y_test, 'Predicted': y_pred}).groupby(['True','Predicted']).size()

True  Predicted
0     0            8760
      1            1984
1     0             327
      1            8020
dtype: int64

In [97]:
# Sample prediction
sample_test_data = pd.DataFrame({
    'player_card_1': [10],  # Player's first card
    'player_card_2': [11],  # Player's second card
    'player_card_3': [0],  # Player's third card
    'player_card_4': [0],  # Player's fourth card
    'player_card_5': [0],  # Player's fifth card
    'player_card_6': [0],  # Player's sixth card
    'player_card_7': [0],  # Player's seventh card
    'dealer_card_1': [2],  # Dealer's first card
    'dealer_card_2': [0],  # Dealer's second card
    'dealer_card_3': [0],  # Dealer's third cad
    'dealer_card_4': [0],  # Dealer's fourth card
    'dealer_card_5': [0],  # Dealer's fifth card
    'dealer_card_6': [0],  # Dealer's sixth card
    'dealer_card_7': [0],  # Dealer's seventh card
    'action_taken_1_D': [0],  # Action taken on first hand (Dealer)
    'action_taken_1_H': [0],  # Action taken on first hand (Hit)
    'action_taken_1_N': [0],  # Action taken on first hand (Double)
    'action_taken_1_P': [0],  # Action taken on first hand (Split)
    'action_taken_1_R': [0],  # Action taken on first hand (Surrender)
    'action_taken_1_S': [1],  # Action taken on first hand (Stand)
    'action_taken_2_D': [0],  # Action taken on second hand (Dealer)
    'action_taken_2_H': [0],  # Action taken on second hand (Hit)
    'action_taken_2_P': [0],  # Action taken on second hand (Split)
    'action_taken_2_R': [0],  # Action taken on second hand (Surrender)
    'action_taken_2_S': [0],  # Action taken on second hand (Stand)
    'action_taken_2_None': [1],  # Action taken on second hand
    'action_taken_3_D': [0],  # Action taken on third hand (Dealer)
    'action_taken_3_H': [0],  # Action taken on third hand (Hit)
    'action_taken_3_P': [0],  # Action taken on third hand (Split)
    'action_taken_3_S': [0],  # Action taken on third hand (Stand)
    'action_taken_3_None': [1],  # Action taken on third hand
    'action_taken_4_D': [0],  # Action taken on fourth hand (Dealer)
    'action_taken_4_H': [0],  # Action taken on fourth hand (Hit)
    'action_taken_4_S': [0],  # Action taken on fourth hand (Stand)
    'action_taken_4_None': [1],  # Action taken on fourth hand
    'action_taken_5_H': [0],  # Action taken on fifth hand (Hit)
    'action_taken_5_S': [0],  # Action taken on fifth hand (Stand)
    'action_taken_5_None': [1],  # Action taken on fifth hand
    'action_taken_6_H': [0],  # Action taken on sixth hand (Hit)
    'action_taken_6_S': [0],  # Action taken on sixth hand (Stand)
    'action_taken_6_None': [1]  # Action taken on sixth hand
})

# Reorder columns to match the specified order
sample_test_data = sample_test_data[['player_card_1', 'player_card_2', 'player_card_3',
                                     'player_card_4', 'player_card_5', 'player_card_6', 'player_card_7',
                                     'dealer_card_1', 'dealer_card_2', 'dealer_card_3', 'dealer_card_4',
                                     'dealer_card_5', 'dealer_card_6', 'dealer_card_7', 'action_taken_1_D',
                                     'action_taken_1_H', 'action_taken_1_N', 'action_taken_1_P',
                                     'action_taken_1_R', 'action_taken_1_S', 'action_taken_2_D',
                                     'action_taken_2_H', 'action_taken_2_P', 'action_taken_2_R',
                                     'action_taken_2_S', 'action_taken_2_None', 'action_taken_3_D',
                                     'action_taken_3_H', 'action_taken_3_P', 'action_taken_3_S',
                                     'action_taken_3_None', 'action_taken_4_D', 'action_taken_4_H',
                                     'action_taken_4_S', 'action_taken_4_None', 'action_taken_5_H',
                                     'action_taken_5_S', 'action_taken_5_None', 'action_taken_6_H',
                                     'action_taken_6_S', 'action_taken_6_None']]

In [98]:
random_search.predict_proba(sample_test_data)

array([[0.05823392, 0.9417661 ]], dtype=float32)

In [82]:
# Saving the model as .sav
import joblib
joblib.dump(best_model, 'xgb_model_blackjack_complete.sav')

['xgb_model_blackjack_complete.sav']

In [99]:
# Saving the model weights as .json
best_model.save_model("xgb_model_blackjack_complete.json")