In [1]:
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
import os

import chess

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_val_score

In [2]:
tqdm.pandas()

  from pandas import Panel


In [3]:
players = ['Konevlad','Crecent','LongLive16Hayastan','may6enexttime','IWANNABEADOORED',
           'Ogrilla','dolar9','MilfSalvatore','cutemouse83','Benefactorr',
           'cjota95','alireza2003','vistagausta','GRUrussia','Drvitman',
           'NIndja64','KnezMihailova','Bestinblitz','AnonLondonKiller','black_knight22',
           'IVK88','Inventing_Invention','temp006','Arka50','Ernst_Gruenfeld',
           'Zaola420','GeorgMeier','SindarovGM','BenjaminBokTwitch','Hungry_Dragon',
           'avantage_ru','Chesstoday','GABUZYAN_CHESSMOOD','Tryhard00','TilChess',
           'muisback','MikeGScarn','Zhalmakhanov_R','L04d1ng','BabaRamdev',
           'opperwezen','Feokl1995','LucaBrazzi','Venost17','RebeccaHarris',
           'gmmoranda','YuQuesada','FakeBruceLee','swimmerchess','GrigorGrigorov']

elen = 8
cur_cols = [f'c{i}' for i in range(elen)]
player_cols = [f'p{i}' for i in range(elen)]
optimal_cols = [f'o{i}' for i in range(elen)]
numeric_cols = cur_cols + player_cols + optimal_cols
diff_cols = ['cur_pos'] + cur_cols + ['player_uci'] + player_cols + ['optimal_uci'] + optimal_cols

In [23]:
def get_player_df(file_path, names):
    df = pd.read_csv(file_path, names=names, index_col=False)
    df['color'] = df['cur_pos'].apply(lambda x: x.split()[1])
#     df = df.drop('cur_pos', axis=1)
#     df.loc[df['color'] == 'b',cols] = df*-1
#     df = df.drop('color', axis=1)
    df['player'] = os.path.basename(file_path).split('.')[0]
    return df

def get_all_df(players):
    ans = [get_player_df(f'diffs/{i}.csv', diff_cols) for i in players]
    ans = pd.concat(ans).reset_index(drop=True)
    return ans

def get_diff_df(all_df, cols1, cols2):
    diff_df = all_df[cols1] - all_df[cols2].values
#     diff_df['player'] = all_df['player']
    return diff_df

def get_dummies(all_df, col1, col2=None):
    if col2:
        return pd.get_dummies(all_df[col1], dtype=int) - pd.get_dummies(all_df[col2], dtype=int)
    return pd.get_dummies(all_df[col1], dtype=int)

In [24]:
def san_to_piece(san):
    if san[0]=='K':
        return 'king'
    if san[0]=='Q':
        return 'queen'
    if san[0]=='B':
        return 'bishop'
    if san[0]=='N':
        return 'knight'
    if san[0]=='R':
        return 'rook'
    return 'pawn'

def get_piece(row):
    board = chess.Board(row['cur_pos'])
    player_san = board.san(chess.Move.from_uci(row['player_uci']))
    optimal_san = board.san(chess.Move.from_uci(row['optimal_uci']))
    row['player_piece'] = san_to_piece(player_san)
    row['optimal_piece'] = san_to_piece(optimal_san)
    return row

In [7]:
all_df = get_all_df(players).dropna().reset_index(drop=True)
all_df = all_df.progress_apply(get_piece, axis=1)

HBox(children=(FloatProgress(value=0.0, max=80981.0), HTML(value='')))




In [67]:
cur_pos = all_df[cur_cols]
y = all_df['player']
player_dummies = get_dummies(all_df,'player_piece')
optimal_dummies = get_dummies(all_df,'optimal_piece')
bias_dummies = get_dummies(all_df,'player_piece','optimal_piece')

player_df1 = get_diff_df(all_df, player_cols, cur_cols)
player_df2 = pd.concat([player_df1, player_dummies], axis=1)
player_df3 = pd.concat([player_df1, cur_pos], axis=1)
player_df4 = pd.concat([player_df1, player_dummies, cur_pos], axis=1)

optimal_df1 = get_diff_df(all_df, optimal_cols, cur_cols)
optimal_df2 = pd.concat([optimal_df1, optimal_dummies], axis=1)
optimal_df3 = pd.concat([optimal_df1, cur_pos], axis=1)
optimal_df4 = pd.concat([optimal_df1, optimal_dummies, cur_pos], axis=1)

bias_df1 = get_diff_df(all_df, player_cols, optimal_cols)
bias_df2 = pd.concat([bias_df1, bias_dummies], axis=1)
bias_df3 = pd.concat([bias_df1, cur_pos], axis=1)
bias_df4 = pd.concat([bias_df1, bias_dummies, cur_pos], axis=1)

In [68]:
X1 = player_df1
X2 = optimal_df1
X3 = bias_df1

scores1_1 = cross_val_score(RandomForestClassifier(), X1, y)
scores1_2 = cross_val_score(RandomForestClassifier(), X2, y)
scores1_3 = cross_val_score(RandomForestClassifier(), X3, y)

In [69]:
X1 = player_df2
X2 = optimal_df2
X3 = bias_df2

scores2_1 = cross_val_score(RandomForestClassifier(), X1, y)
scores2_2 = cross_val_score(RandomForestClassifier(), X2, y)
scores2_3 = cross_val_score(RandomForestClassifier(), X3, y)

In [70]:
X1 = player_df3
X2 = optimal_df3
X3 = bias_df3

scores3_1 = cross_val_score(RandomForestClassifier(), X1, y)
scores3_2 = cross_val_score(RandomForestClassifier(), X2, y)
scores3_3 = cross_val_score(RandomForestClassifier(), X3, y)

In [71]:
X1 = player_df4
X2 = optimal_df4
X3 = bias_df4

scores4_1 = cross_val_score(RandomForestClassifier(), X1, y)
scores4_2 = cross_val_score(RandomForestClassifier(), X2, y)
scores4_3 = cross_val_score(RandomForestClassifier(), X3, y)

In [72]:
print('Baseline Accuracy: 0.020', end='\n\n')

print('Move Vector Only')
print(f'{scores1_1.mean():.3f} - Player Change')
print(f'{scores1_2.mean():.3f} - Optimal Change')
print(f'{scores1_3.mean():.3f} - Player Bias')

print('Move Vector and Piece Moved')
print(f'{scores2_1.mean():.3f} - Player Change')
print(f'{scores2_2.mean():.3f} - Optimal Change')
print(f'{scores2_3.mean():.3f} - Player Bias')

print('Move Vector and Inital Position')
print(f'{scores3_1.mean():.3f} - Player Change')
print(f'{scores3_2.mean():.3f} - Optimal Change')
print(f'{scores3_3.mean():.3f} - Player Bias')

print('Move Vector, Piece Moved, and Initial Position')
print(f'{scores4_1.mean():.3f} - Player Change')
print(f'{scores4_2.mean():.3f} - Optimal Change')
print(f'{scores4_3.mean():.3f} - Player Bias')

Baseline Accuracy: 0.020

Move Vector Only
0.031 - Player Change
0.029 - Optimal Change
0.024 - Player Bias
Move Vector and Piece Moved
0.032 - Player Change
0.031 - Optimal Change
0.023 - Player Bias
Move Vector and Inital Position
0.038 - Player Change
0.036 - Optimal Change
0.033 - Player Bias
Move Vector, Piece Moved, and Initial Position
0.037 - Player Change
0.035 - Optimal Change
0.034 - Player Bias


In [73]:
def get_kmeans_classes(df, y):
    means_df = pd.concat([df,y],axis=1).groupby('player').mean()
    kmeans = KMeans(n_clusters=2).fit(means_df)
    d = dict(zip(means_df.index, kmeans.labels_))
    return y.map(d)

In [74]:
X1 = player_df1
X2 = optimal_df1
X3 = bias_df1

y1 = get_kmeans_classes(player_df1,y)
y2 = get_kmeans_classes(optimal_df1,y)
y3 = get_kmeans_classes(bias_df1,y)

scores5_1 = cross_val_score(RandomForestClassifier(), X1, y1)
scores5_2 = cross_val_score(RandomForestClassifier(), X2, y2)
scores5_3 = cross_val_score(RandomForestClassifier(), X3, y3)

In [75]:
X1 = player_df2
X2 = optimal_df2
X3 = bias_df2

y1 = get_kmeans_classes(player_df2,y)
y2 = get_kmeans_classes(optimal_df2,y)
y3 = get_kmeans_classes(bias_df2,y)

scores6_1 = cross_val_score(RandomForestClassifier(), X1, y1)
scores6_2 = cross_val_score(RandomForestClassifier(), X2, y2)
scores6_3 = cross_val_score(RandomForestClassifier(), X3, y3)

In [76]:
X1 = player_df3
X2 = optimal_df3
X3 = bias_df3

y1 = get_kmeans_classes(player_df3,y)
y2 = get_kmeans_classes(optimal_df3,y)
y3 = get_kmeans_classes(bias_df3,y)

scores7_1 = cross_val_score(RandomForestClassifier(), X1, y1)
scores7_2 = cross_val_score(RandomForestClassifier(), X2, y2)
scores7_3 = cross_val_score(RandomForestClassifier(), X3, y3)

In [77]:
X1 = player_df4
X2 = optimal_df4
X3 = bias_df4

y1 = get_kmeans_classes(player_df4,y)
y2 = get_kmeans_classes(optimal_df4,y)
y3 = get_kmeans_classes(bias_df4,y)

scores8_1 = cross_val_score(RandomForestClassifier(), X1, y1)
scores8_2 = cross_val_score(RandomForestClassifier(), X2, y2)
scores8_3 = cross_val_score(RandomForestClassifier(), X3, y3)

In [78]:
print('Baseline Accuracy: 0.500', end='\n\n')

print('Move Vector Only')
print(f'{scores5_1.mean():.3f} - Player Change')
print(f'{scores5_2.mean():.3f} - Optimal Change')
print(f'{scores5_3.mean():.3f} - Player Bias')

print('Move Vector and Piece Moved')
print(f'{scores6_1.mean():.3f} - Player Change')
print(f'{scores6_2.mean():.3f} - Optimal Change')
print(f'{scores6_3.mean():.3f} - Player Bias')

print('Move Vector and Inital Position')
print(f'{scores7_1.mean():.3f} - Player Change')
print(f'{scores7_2.mean():.3f} - Optimal Change')
print(f'{scores7_3.mean():.3f} - Player Bias')

print('Move Vector, Piece Moved, and Initial Position')
print(f'{scores8_1.mean():.3f} - Player Change')
print(f'{scores8_2.mean():.3f} - Optimal Change')
print(f'{scores8_3.mean():.3f} - Player Bias')

Baseline Accuracy: 0.500

Move Vector Only
0.612 - Player Change
0.604 - Optimal Change
0.540 - Player Bias
Move Vector and Piece Moved
0.612 - Player Change
0.630 - Optimal Change
0.642 - Player Bias
Move Vector and Inital Position
0.600 - Player Change
0.601 - Optimal Change
0.588 - Player Bias
Move Vector, Piece Moved, and Initial Position
0.602 - Player Change
0.601 - Optimal Change
0.583 - Player Bias
