In [7]:
import numpy as np
import pandas as pd
from pandas import Series,DataFrame

import math


import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
%matplotlib inline


from sklearn.linear_model import LogisticRegression

In [13]:
# ボードがmonotone,twotone,rainbowのどれなのかを判別する関数
# boardは 'Ts6s3c' などの形式
def dist_board_type(board):
    suit1 = board[1]
    suit2 = board[3]
    suit3 = board[5]
    
    if suit1 == suit2 and suit1 == suit3:
        return 'monotone'
    elif suit1 != suit2 and suit1 != suit3 and suit2 !=suit3:
        return 'rainbow'
    else:
        return 'twotone'
    
def dist_board_pair(board):
    num1 = board[0]
    num2 = board[2]
    num3 = board[4]
    
    if num1 == num2 and num1 == num3:
        return 'trips'
    elif num1 != num2 and num1 != num3 and num2 !=num3:
        return 'no pair'
    else:
        return 'one pair'    

In [18]:
card_num_list = {'A':14, 'K':13, 'Q':12, 'J':11, 'T':10, '9':9, '8':8, '7':7, '6':6, '5':5, '4':4, '3':3, '2':2}
card_str = '23456789TJQKA'

def get_board_sum(board):
    return card_num_list[board[0]]+card_num_list[board[2]]+card_num_list[board[4]]

def get_high_card(board):
    board_cards = [card_num_list[board[0]], card_num_list[board[2]], card_num_list[board[4]]]
    board_cards.sort(reverse=True)
    return card_str[board_cards[0]-2]   

In [19]:
# GTO+の解析データをgoogle spread sheetに貼り付けたものをクリップボードにコピーしてます
df = pd.read_clipboard()

In [20]:
df

Unnamed: 0,board,equity,EV,bet66%,bet33%,check
0,AsQs9s,55.887,3.757,0.501,11.242,88.257
1,As8s4s,53.762,3.753,0.099,22.494,77.408
2,As6s3s,53.711,3.793,0.978,22.249,76.772
3,KsJs3s,51.558,3.664,5.008,23.642,71.350
4,QsTs5s,52.739,3.712,1.193,27.835,70.972
5,AcKdTs,55.921,3.997,18.282,16.288,65.430
6,Ks9s3d,52.366,3.715,12.556,23.772,63.672
7,Qs4sKd,51.583,3.766,21.489,15.920,62.591
8,4s2sKd,51.998,3.668,0.018,38.070,61.911
9,Js9sJd,53.888,3.734,0.001,38.323,61.676


In [21]:
# これだけだとcheck率を予測する変数が足りないので、feature engineering
df['board_type'] = df['board'].apply(dist_board_type)
df['board_sum'] = df['board'].apply(get_board_sum)
df['board_pair'] = df['board'].apply(dist_board_pair)
df['high_card'] = df['board'].apply(get_high_card)

In [22]:
df

Unnamed: 0,board,equity,EV,bet66%,bet33%,check,board_type,board_sum,board_pair,high_card
0,AsQs9s,55.887,3.757,0.501,11.242,88.257,monotone,35,no pair,A
1,As8s4s,53.762,3.753,0.099,22.494,77.408,monotone,26,no pair,A
2,As6s3s,53.711,3.793,0.978,22.249,76.772,monotone,23,no pair,A
3,KsJs3s,51.558,3.664,5.008,23.642,71.350,monotone,27,no pair,K
4,QsTs5s,52.739,3.712,1.193,27.835,70.972,monotone,27,no pair,Q
5,AcKdTs,55.921,3.997,18.282,16.288,65.430,rainbow,37,no pair,A
6,Ks9s3d,52.366,3.715,12.556,23.772,63.672,twotone,25,no pair,K
7,Qs4sKd,51.583,3.766,21.489,15.920,62.591,twotone,29,no pair,K
8,4s2sKd,51.998,3.668,0.018,38.070,61.911,twotone,19,no pair,K
9,Js9sJd,53.888,3.734,0.001,38.323,61.676,twotone,31,one pair,J


In [13]:
# bet55%以上のボードに1, bet45%以下のボードに0をつける関数
def check_or_bet(x):
    if x >= 55:
        return 0
    elif x <= 45:
        return 1

In [14]:
df['check_or_bet'] = df['check'].apply(check_or_bet)

In [15]:
df

Unnamed: 0,board,equity,EV,bet66%,bet33%,check,board_type,board_sum,check_or_bet
0,AsQs3s,59.446,13.336,0.035,5.471,94.493,monotone,29,0.0
1,4s3s2s,52.193,11.321,0.118,9.239,90.643,monotone,9,0.0
2,6s4s3s,51.017,11.494,0.152,10.345,89.503,monotone,13,0.0
3,As7s5s,55.429,13.209,3.708,13.781,82.511,monotone,26,0.0
4,As3s2d,56.014,11.881,4.105,15.903,79.992,twotone,19,0.0
5,7s6s5d,47.392,9.621,5.476,17.529,76.994,twotone,18,0.0
6,As4s2d,55.805,11.951,4.714,18.527,76.759,twotone,20,0.0
7,5s4s3d,50.288,9.771,2.810,20.982,76.208,twotone,12,0.0
8,KsJsTs,60.200,14.126,0.318,25.732,73.950,monotone,34,0.0
9,8c7d4s,48.859,10.990,26.019,1.899,72.082,rainbow,19,0.0


In [16]:
# bet率が45%より大きく55%より小さいデータは取り除く
df2 = df.dropna()

In [17]:
df2

Unnamed: 0,board,equity,EV,bet66%,bet33%,check,board_type,board_sum,check_or_bet
0,AsQs3s,59.446,13.336,0.035,5.471,94.493,monotone,29,0.0
1,4s3s2s,52.193,11.321,0.118,9.239,90.643,monotone,9,0.0
2,6s4s3s,51.017,11.494,0.152,10.345,89.503,monotone,13,0.0
3,As7s5s,55.429,13.209,3.708,13.781,82.511,monotone,26,0.0
4,As3s2d,56.014,11.881,4.105,15.903,79.992,twotone,19,0.0
5,7s6s5d,47.392,9.621,5.476,17.529,76.994,twotone,18,0.0
6,As4s2d,55.805,11.951,4.714,18.527,76.759,twotone,20,0.0
7,5s4s3d,50.288,9.771,2.810,20.982,76.208,twotone,12,0.0
8,KsJsTs,60.200,14.126,0.318,25.732,73.950,monotone,34,0.0
9,8c7d4s,48.859,10.990,26.019,1.899,72.082,rainbow,19,0.0


In [18]:
df['board_type'].value_counts()

twotone     96
rainbow     73
monotone    15
Name: board_type, dtype: int64

In [19]:
df.groupby('board_type').mean()

Unnamed: 0_level_0,equity,EV,bet66%,bet33%,check,board_sum,check_or_bet
board_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
monotone,56.154867,13.302267,3.206333,31.1568,65.6368,24.066667,0.083333
rainbow,58.07811,15.305699,46.133589,41.093562,12.772808,23.520548,0.956522
twotone,57.107698,14.219552,34.475479,38.35526,27.169323,24.40625,0.883721


In [20]:
df[df['board_sum']<=24].groupby('board_type').mean()

Unnamed: 0_level_0,equity,EV,bet66%,bet33%,check,board_sum,check_or_bet
board_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
monotone,54.794375,12.888125,1.87525,34.719375,63.405375,19.5,0.166667
rainbow,55.873452,14.323024,51.265214,28.796786,19.937952,19.261905,0.921053
twotone,54.003667,12.929463,33.317241,31.071815,35.610981,19.462963,0.772727


In [21]:
df[df['board_sum']>=24].groupby('board_type').mean()

Unnamed: 0_level_0,equity,EV,bet66%,bet33%,check,board_sum,check_or_bet
board_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
monotone,57.3113,13.6844,3.8682,30.6194,65.5124,27.7,0.0
rainbow,60.706806,16.398444,37.861056,57.815889,4.323028,28.555556,1.0
twotone,60.474653,15.629837,36.742755,46.313755,16.943592,29.795918,1.0


In [22]:
# ロジスティック回帰をするためにboard_typeをダミー変数化
board_dummies = pd.get_dummies(df2['board_type'])

In [23]:
board_dummies.head()

Unnamed: 0,monotone,rainbow,twotone
0,1,0,0
1,1,0,0
2,1,0,0
3,1,0,0
4,0,0,1


In [24]:
X = df2[['equity', 'board_sum']]
X = pd.concat([X,board_dummies],axis=1).drop('monotone',axis=1)

In [25]:
X.head()

Unnamed: 0,equity,board_sum,rainbow,twotone
0,59.446,29,0,0
1,52.193,9,0,0
2,51.017,13,0,0
3,55.429,26,0,0
4,56.014,19,0,1


In [26]:
Y = df2.check_or_bet

In [28]:
Y = Y.values

In [29]:
# 86.8%の精度が得られた
log_model = LogisticRegression() 
log_model.fit(X,Y)
log_model.score(X,Y)

0.86826347305389218

In [31]:
coeff_df = DataFrame([X.columns, log_model.coef_[0]]).T
coeff_df

Unnamed: 0,0,1
0,equity,-0.0136838
1,board_sum,0.0946388
2,rainbow,2.43218
3,twotone,1.68819


In [32]:
# boardがconnectedかseparatedかもbet頻度に影響してそうなので標準偏差でboardの散らばり具合を暫定的に数値化
def get_std(board):
    return np.std([card_num_list[board[0]],card_num_list[board[2]],card_num_list[board[4]]])

In [33]:
df2['board_std'] = df2['board'].apply(get_std)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [34]:
X['std'] = df2['board_std']

In [35]:
# 87%に精度が上がった。 ロジスティック回帰手法の実験のためCVは行なっていない。
log_model = LogisticRegression() 
log_model.fit(X,Y)
log_model.score(X,Y)

0.86826347305389218