# Machine Learning Applications to Simulated Blackjack Data

Scott Atkinson

## [0. Contents](#0.)<a id='0.'></a>

## [1. Import libraries](#1.)

## [2. Obtain summary DataFrame](#2.)

## [3. Modeling](#3.)

## 1. Import libraries<a id='1.'></a>

In [10]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats
from sklearn.model_selection import train_test_split, GridSearchCV, cross_validate
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import r2_score
from sklearn.pipeline import Pipeline
from sklearn.neural_network import MLPRegressor
import time
import json
%matplotlib inline
plt.rcParams['figure.figsize'] = [15, 7]

## 2. Obtain summary DataFrame<a id='2.'></a>

In [11]:
# losses function without plots

def losses(df):
    df.reset_index(drop=True, inplace=True)
    mins = []
    np.random.seed(11)
    start_indexes = np.random.choice(950000, size=100000, replace=False)
    for i in start_indexes:
        sample50k = np.array(df.loc[i:i + 49999, 'accumulated_winnings'])
        if i != 0:
            mins.append(sample50k.min() - df.loc[i-1,'accumulated_winnings'])
        else:
            mins.append(sample50k.min())
    return mins

Produce dataframe with summary stats for non-counter strategies.

In [None]:
strats = ['naive','cc','basic']
decks = [2,4,6]
pens = [0.5, 1, 1.5, 2]
seventeens = ['S17','H17']
BJpayouts = [[3,2],[6,5]]

product = []
for strat in strats:
    for deck in decks:
        for pen in pens:
            for seventeen in seventeens:
                for BJpay in BJpayouts:
                    if pen == deck:
                        continue
                    else:
                        product.append((strat,deck,pen,BJpay,seventeen))
                    

strat_list = []
deck_list = []
pen_list = []
seventeen_list = []
BJ_pay_list = []
prob_win_list = []
EV_list = []
prob_win_pos_list = []
EV_pos_list = []
fifth_loss_list = []

for params in product:
    strat, deck, pen, BJ_pay, seventeen = params
    strat_list.append(strat)
    deck_list.append(deck)
    pen_list.append(pen)
    seventeen_list.append(seventeen)
    BJ_pay_list.append(BJ_pay)
    read_path = '/media/saa/My Passport/Blackjack/sim_dfs/%s/%s_%s_%s_%s_%s.csv'\
    %(strat,deck,pen,seventeen, BJpay, strat)
    df = pd.read_csv(read_path)
    prob_win_list.append(np.sum(df['total_round_winnings'] >=0)/1000000)
    EV_list.append(np.mean(df['total_round_winnings']))
    prob_win_pos_list.append(np.sum((df['opening_count']>0) & (df['total_round_winnings']>= 0))
                      /np.sum(df['opening_count']>0) )  
    EV_pos_list.append(np.mean(df[df['opening_count']>0]['total_round_winnings']))
    fifth_loss_list.append(np.percentile(losses(df),[0,5])[1])
    


dic = {'strategy':strat_list, 'decks':deck_list,\
       'penetration':pen_list, 'seventeen':seventeen_list,\
       'BJ_payout':BJ_pay_list, 'probability_of_winning':prob_win_list,\
       'expected_value':EV_list, 'prob_of_winning_on_pos_count':prob_win_pos_list,\
       'expected_value_on_pos_count':EV_pos_list, 'fifth_percentile_loss':fifth_loss_list}
out = pd.DataFrame(dic)
out.to_csv('summary_dfs/noncounters.csv')

In [12]:
noncounters = pd.read_csv('summary_dfs/noncounters.csv')

Append count bet and spot columns to eventually align with counters summary DataFrame.

In [14]:
for count in range(-1,7):
    noncounters['%s_count_bet'%(count)] = 1
    noncounters['%s_count_spot'%(count)] = 1

In [17]:
noncounters = noncounters.drop('Unnamed: 0', axis = 1)
noncounters.columns

Index(['strategy', 'decks', 'penetration', 'seventeen', 'BJ_payout',
       'probability_of_winning', 'expected_value',
       'prob_of_winning_on_pos_count', 'expected_value_on_pos_count',
       'fifth_percentile_loss', '-1_count_bet', '-1_count_spot', '0_count_bet',
       '0_count_spot', '1_count_bet', '1_count_spot', '2_count_bet',
       '2_count_spot', '3_count_bet', '3_count_spot', '4_count_bet',
       '4_count_spot', '5_count_bet', '5_count_spot', '6_count_bet',
       '6_count_spot'],
      dtype='object')

In [18]:
cols = ['strategy', 'decks', 'penetration', 'seventeen', 'BJ_payout', '-1_count_bet', '-1_count_spot', '0_count_bet',
       '0_count_spot', '1_count_bet', '1_count_spot', '2_count_bet',
       '2_count_spot', '3_count_bet', '3_count_spot', '4_count_bet',
       '4_count_spot', '5_count_bet', '5_count_spot', '6_count_bet',
       '6_count_spot',
       'probability_of_winning', 'expected_value',
       'prob_of_winning_on_pos_count', 'expected_value_on_pos_count',
       'fifth_percentile_loss']

noncounters = noncounters[cols]

Unnamed: 0,strategy,decks,penetration,seventeen,BJ_payout,-1_count_bet,-1_count_spot,0_count_bet,0_count_spot,1_count_bet,...,4_count_spot,5_count_bet,5_count_spot,6_count_bet,6_count_spot,probability_of_winning,expected_value,prob_of_winning_on_pos_count,expected_value_on_pos_count,fifth_percentile_loss
0,naive,2,0.5,S17,"[3, 2]",1,1,1,1,1,...,1,1,1,1,1,0.479071,-0.093314,0.492684,-0.069098,-4935.8
1,naive,2,0.5,S17,"[6, 5]",1,1,1,1,1,...,1,1,1,1,1,0.479071,-0.093314,0.492684,-0.069098,-4935.8
2,naive,2,0.5,H17,"[3, 2]",1,1,1,1,1,...,1,1,1,1,1,0.479322,-0.092072,0.492735,-0.067641,-4929.8
3,naive,2,0.5,H17,"[6, 5]",1,1,1,1,1,...,1,1,1,1,1,0.479322,-0.092072,0.492735,-0.067641,-4929.8
4,naive,2,1.0,S17,"[3, 2]",1,1,1,1,1,...,1,1,1,1,1,0.479497,-0.091914,0.489151,-0.074486,-4948.8


In [21]:
noncounters.to_csv('summary_dfs/noncounters.csv')

Now for the counters:

In [None]:
# generate product list of parameters (again)

decks = [2,4,6]
pens = [0.5, 1, 1.5, 2]
seventeens = ['S17','H17']
BJpayouts = [[3,2],[6,5]]

product = []
for deck in decks:
    for pen in pens:
        for seventeen in seventeens:
            for BJpays in BJpayouts:
                if pen == deck:
                    continue
                else:
                    product.append((deck,pen,seventeen,BJpays))

Form spreads dictionary

In [None]:
# function to replace the second entry in a list to a 1
def to_one(ls):
    new = [i for i in ls]
    new[1] = 1
    return new

def dict_one(dic):
    return {key:to_one(dic[key]) for key in list(dic.keys())}
    

In [None]:
# spreads dictionary

spreads = {}

spreads['twelve'] = {-1:[1,1], 0:[1,1], 1:[1,2], 2:[4,2], 3:[6,2], 4:[8,2], 5:[10,2], 6:[12,2]}
spreads['twelve_1'] = dict_one(spreads['twelve'])

spreads['twenty_a'] = {-1:[1,1], 0:[1,1], 1:[2,2], 2:[6,2], 3:[10,2], 4:[13,2], 5:[17,2], 6:[20,2]}
spreads['twenty_a_1'] = dict_one(spreads['twenty_a'])

spreads['six'] = {-1:[1,1], 0:[1,1], 1:[1,2], 2:[2,1], 3:[3,2], 4:[4,2], 5:[5,2], 6:[6,2]}
spreads['six_1'] = dict_one(spreads['six'])

spreads['fifty'] = {-1:[1,1], 0:[1,1], 1:[5,1], 2:[10,1], 3:[20,2], 4:[30,2], 5:[40,2], 6:[50,2]}
spreads['fifty_1'] = dict_one(spreads['fifty'])

spreads['eight'] = {-1:[1,1], 0:[1,1], 1:[1,1], 2:[2,1], 3:[3,1], 4:[5,2], 5:[6,2], 6:[8,2]}
spreads['eight_1'] = dict_one(spreads['eight'])

spreads['eighteen'] = {-1:[1,1], 0:[1,1], 1:[3,2], 2:[6,2], 3:[9,2], 4:[12,2], 5:[15,2], 6:[18,2]}
spreads['eighteen_1'] = dict_one(spreads['eighteen'])

spreads['twenty-four'] = {-1:[1,1], 0:[1,1], 1:[4,1], 2:[8,1], 3:[14,1], 4:[16,2], 5:[18,2], 6:[24,2]}
spreads['twenty-four_1'] = dict_one(spreads['twenty-four'])

spreads['twenty_b'] = {-1:[1,1], 0:[1,1], 1:[3,2], 2:[6,2], 3:[9,2], 4:[12,2], 5:[15,2], 6:[20,2]}
spreads['twenty_b_1'] = dict_one(spreads['twenty_b'])

spreads['thirty'] = {-1:[1,1], 0:[1,1], 1:[5,1], 2:[10,1], 3:[15,1], 4:[20,2], 5:[25,2], 6:[30,2]}
spreads['thirty_1'] = dict_one(spreads['thirty'])

spreads['sixteen'] = {-1:[1,1], 0:[1,1], 1:[1,1], 2:[4,1], 3:[7,1], 4:[10,1], 5:[12,2], 6:[16,2]}
spreads['sixteen_1'] = dict_one(spreads['sixteen'])

Produce summary DataFrame from counter samples

In [None]:
strat_list = []
deck_list = []
pen_list = []
seventeen_list = []
BJ_pay_list = []
prob_win_list = []
EV_list = []
prob_win_pos_list = []
EV_pos_list = []
fifth_loss_list = []

strat = 'counter'

minus_one_count_bet_list = []
minus_one_count_spot_list = []

zero_count_bet_list = []
zero_count_spot_list = []

one_count_bet_list = []
one_count_spot_list = []

two_count_bet_list = []
two_count_spot_list = []

three_count_bet_list = []
three_count_spot_list = []

four_count_bet_list = []
four_count_spot_list = []

five_count_bet_list = []
five_count_spot_list = []

six_count_bet_list = []
six_count_spot_list = []

for spread_name in list(spreads.keys()):
    for params in product:

        deck, pen, seventeen, BJ_pays = params
        strat_list.append(strat)
        deck_list.append(deck)
        pen_list.append(pen)
        seventeen_list.append(seventeen)
        BJ_pay_list.append(BJ_pays)
        read_path = '/media/saa/My Passport/Blackjack/sim_dfs/%s/%s/%s_%s_%s_%s_%s.csv'\
        %(strat,spread_name,deck,pen,seventeen, BJpays, spread_name)
        df = pd.read_csv(read_path)
        prob_win_list.append(np.sum(df['total_round_winnings'] >=0)/1000000)
        EV_list.append(np.mean(df['total_round_winnings']))
        prob_win_pos_list.append(np.sum((df['opening_count']>0) & (df['total_round_winnings']>= 0))
                          /np.sum(df['opening_count']>0) )  
        EV_pos_list.append(np.mean(df[df['opening_count']>0]['total_round_winnings']))
        fifth_loss_list.append(np.percentile(losses(df),[0,5])[1])
        
        minus_one_count_bet_list.append(spreads[spread_name][-1][0])
        minus_one_count_spot_list.append(spreads[spread_name][-1][1])

        zero_count_bet_list.append(spreads[spread_name][0][0])
        zero_count_spot_list.append(spreads[spread_name][0][1])

        one_count_bet_list.append(spreads[spread_name][1][0])
        one_count_spot_list.append(spreads[spread_name][1][1])

        two_count_bet_list.append(spreads[spread_name][2][0])
        two_count_spot_list.append(spreads[spread_name][2][1])

        three_count_bet_list.append(spreads[spread_name][3][0])
        three_count_spot_list.append(spreads[spread_name][3][1])

        four_count_bet_list.append(spreads[spread_name][4][0])
        four_count_spot_list.append(spreads[spread_name][4][1])

        five_count_bet_list.append(spreads[spread_name][5][0])
        five_count_spot_list.append(spreads[spread_name][5][1])

        six_count_bet_list.append(spreads[spread_name][6][0])
        six_count_spot_list.append(spreads[spread_name][6][1])
    


dic = {'strategy':strat_list,\
       'decks':deck_list,\
       'penetration':pen_list,\
       'seventeen':seventeen_list,\
       'BJ_payout':BJ_pay_list,\
       '-1_count_bet':minus_one_count_bet_list,\
       '-1_count_spot':minus_one_count_spot_list,\
       '0_count_bet':zero_count_bet_list,\
       '0_count_spot':zero_count_spot_list,\
       '1_count_bet':one_count_bet_list,\
       '1_count_spot':one_count_spot_list,\
       '2_count_bet':two_count_bet_list,\
       '2_count_spot':two_count_spot_list,\
       '3_count_bet':three_count_bet_list,\
       '3_count_spot':three_count_spot_list,\
       '4_count_bet':four_count_bet_list,\
       '4_count_spot':four_count_spot_list,\
       '5_count_bet':five_count_bet_list,\
       '5_count_spot':five_count_spot_list,\
       '6_count_bet':six_count_bet_list,\
       '6_count_spot':six_count_spot_list,\
       'probability_of_winning':prob_win_list,\
       'expected_value':EV_list,\
       'prob_of_winning_on_pos_count':prob_win_pos_list,\
       'expected_value_on_pos_count':EV_pos_list,\
       'fifth_percentile_loss':fifth_loss_list}
out = pd.DataFrame(dic)
out.to_csv('summary_dfs/counters.csv')

## 3. Modeling<a id='3.'></a>

Train a model to predict expected value, min losses given a certain game and bet spread. To be completed once all samples are simulated.