In [None]:
import chess
import chess.uci
import chess.pgn
import os
import csv
from numpy import array
import numpy as np
import pandas as pd
import math
import statistics
import ast
import pandas as pd
import json
import fastai

# Data prep methods

In [None]:
def get_perf_df(event = 'Rated Classical game'):
    df = pd.read_csv("/Users/tylerahlstrom/Documents/GitHub/DI_proposal/stockfish_performances_DC.csv")
    df = df.drop(df[df.event != event].index)
    return df

In [None]:
def split_perfs(joint_perf_df):
    new_headers = ['elo', 'chosen_evals', 'option_evals', 'opp_elo', 'win', 'acc_name']
    split_df = pd.DataFrame(columns = new_headers)
    for index, row in joint_perf_df.iterrows():
        if len(row['result']) is 3:
            split_df = split_df.append({'elo': row['elo_w'], 'chosen_evals' : row['chosen_moves_eval_w'], 'option_evals' : row['available_moves_eval_w'], 'opp_elo': row['elo_b'], 'result': row['result'][0], 'acc_name': row['acc_name_w']}, ignore_index=True)
            split_df = split_df.append({'elo': row['elo_b'], 'chosen_evals' : row['chosen_moves_eval_b'], 'option_evals' : row['available_moves_eval_b'], 'opp_elo': row['elo_w'], 'result': row['result'][2], 'acc_name': row['acc_name_b']}, ignore_index=True)
    return split_df

In [None]:
def convert_json_to_list(df):
    for index, row in df.iterrows():
        row['chosen_evals'] = json.loads(row['chosen_evals'])
        row['option_evals'] = json.loads(row['option_evals'])
    return df

In [None]:
def get_list_of_chosen_moves(dict_of_move_dict): #e.g. {u'11': {u'move_rank': 2, u'cp_scor#
    lol_of_moves = []
    for key, d_move in dict_of_move_dict.items():
        lol_of_moves.append([key, d_move])
    lol_of_moves.sort(key=lambda x: int(x[0]))
    return lol_of_moves

In [None]:
def get_list_of_list_of_available_moves(dict_of_options_dict): # e.g. {u'24': {u'd7e8': {u'cp_score': -674, u'mate_s...
    lolol_of_options  = []
    for key, d_options in dict_of_options_dict.items():
        lol_of_options = []
        for key2, d_option in d_options.items():
            lol_of_options.append([key2, d_option])
        lol_of_options.sort(key=lambda x: int(x[1]['rank']))
    
        lolol_of_options.append([key, lol_of_options])
    lolol_of_options.sort(key=lambda x: int(x[0][0]))
    return lolol_of_options

In [None]:
def get_list_of_rank_percentiles(list_of_moves):
    list_of_rank_percentiles = []
    for move in list_of_moves:
        rank = int(move[1]['move_rank'])
        num_options = int(move[1]['num_move_options'])
        chunk = float(1)/float(num_options)
        percentile = 1.0 - (float(rank-1) * chunk)
        list_of_rank_percentiles.append(percentile)
    return list_of_rank_percentiles

In [None]:
def get_list_of_move_cps(list_of_moves):
    list_of_cps = []
    for move in list_of_moves:
        cp = move[1]['cp_score']
        list_of_cps.append(cp)
    return list_of_cps

In [None]:
def get_list_of_move_mates(list_of_moves):
    list_of_mates = []
    for move in list_of_moves:
        mate = move[1]['mate_score']
        list_of_mates.append(mate)
    return list_of_mates

In [None]:
def get_list_of_option_cps(list_of_av_moves):
    #print(list_of_av_moves)
    lol_of_option_cps = []
    for move in list_of_av_moves:
        options_cps = []
        for option in move[1]:
            options_cps.append(option[1]['cp_score'])
        lol_of_option_cps.append(options_cps)
    
    #print(lol_of_option_cps)
    return lol_of_option_cps

In [None]:
def get_list_of_dist_percentiles(move_cps, option_cps):
    dist_scores = []
    for i in range(len(move_cps)):
        cp_temp = [x for x in option_cps[i] if x != None]
        max_cp = None
        min_cp = None
        if (len(cp_temp) > 0):
            max_cp = max(cp_temp)
            min_cp = min(cp_temp)
    
        #avg_cp = sum([x for x in option_cps[i] if x is not None])/float((len([x for x in option_cps[i] if x is not None])+0.1))
        if move_cps[i] is None:
            move_cps[i] = -2000
        if max_cp is None:
            max_cp = -10
        if min_cp is None:
            min_cp = -200
        
        if max_cp == min_cp:
            dist_scores.append(0.5)
            continue
        dist = max(0, 1- (abs(move_cps[i])/abs(max_cp-min_cp)))#move_cps[i] - avg_cp
        dist_scores.append(dist)
        #percentile = float(better_than_cp)/min(float(total_cp), -1)
        #dist_percentiles.append(percentile)
    
    return dist_scores

In [None]:
def get_raw_data_df(event):
    data_df = get_perf_df(event)
    data_df = data_df.drop_duplicates()
    data_df = data_df.sample(frac=1).reset_index(drop=True)
    data_df = split_perfs(data_df)
    data_df = convert_json_to_list(data_df)
    data_df = data_df.sample(frac=1).reset_index(drop=True)
    return data_df


In [None]:
def get_elo_system_prediction(result, opp_elo):
    k_factor = 40
    base_elo = 1560
    Ea = 1./(1.+10.**((opp_elo - base_elo)/400.))
    Rnew = base_elo + k_factor*(float(result) - Ea)
    return Rnew

In [None]:
def remove_short_games(X, y):
    i = 0
    while i < (len(X)):
        if len(X[i]['cps']) < 30:
            X.pop(i)
            y.pop(i)
            i -= 1
        i+=1
    return X, y
    

In [None]:
def get_desired_data(complete_data_df, to_select = ['rank_percentiles', 'dist_percentiles', 'cps', 'result', 'opp_elo', 'acc_name', 'mates']):
    i=0
    X_selected_ldl = [] #X_selected_ldl is a list of dictionaries of lists, easiest way (i think) to track all the relevant data
    y = []#elo targets
    for index, row in complete_data_df.iterrows():
        if i == 0:
            print(index, row)
        i += 1
        row_dict = {}
        
        ch_moves = get_list_of_chosen_moves(row['chosen_evals'])
        av_moves = get_list_of_list_of_available_moves(row['option_evals'])
        
        if 'rank_percentiles' in to_select:
            rank_percentiles = get_list_of_rank_percentiles(ch_moves)
            row_dict['rank_percentiles'] = rank_percentiles
        if 'cps' in to_select: #TO ADD: cp percentiles (e.g., just how much worse would the worst move have been?)
            cps = get_list_of_move_cps(ch_moves)
            row_dict['cps'] = cps
        if 'dist_percentiles' in to_select:
            option_cps = get_list_of_option_cps(av_moves)
            dis_percentiles = get_list_of_dist_percentiles(cps, option_cps)
            row_dict['dist_percentiles'] = dis_percentiles
        if 'opp_elo' in to_select:
            row_dict['opp_elo'] = row['opp_elo']
        if 'result' in to_select:
            row_dict['result'] = row['result']
        if 'acc_name' in to_select:
            row_dict['acc_name'] = row['acc_name']
        if 'mates' in to_select:
            mates = get_list_of_move_mates(ch_moves)
            row_dict['mates'] = mates
        
        elo = row['elo']
        X_selected_ldl.append(row_dict)
        y_entry = []
        y_entry.append(elo)
        elo_system_prediction = get_elo_system_prediction(row_dict['result'],row_dict['opp_elo'])
        y_entry.append(int(elo_system_prediction))
        y.append(y_entry)

    return X_selected_ldl, y #X_selected_ldl is a list of dictionaries of lists

In [None]:
def TabularizeMates(X_raw):
    for x in X_raw:
        found_mate = []
        continued_mate = []
        lost_mate = []
        moved_into_mate = []
        continued_being_mated = []

        for m in x['mates']:  
            found = 0
            continued = 0
            lost = 0
            found_bad = 0
            continued_bad = 0

            if m != None:
                if (m[0:2] == 'AB'):
                    found = 1
                if (m[0:2] == 'AC'):
                    continued = 1
                if (m[0:2] == 'AL'):
                    lost = 1
                if (m[0:2] == 'DB'):
                    found_bad = 1
                if (m[0:2] == 'DC'):
                    continued_bad = 1    
            found_mate.append(found)
            continued_mate.append(continued)
            lost_mate.append(lost)
            moved_into_mate.append(found_bad)
            continued_being_mated.append(continued_bad)

        x['found_mate'] = found_mate
        x['continued_mate'] = continued_mate
        x['lost_mate'] = lost_mate
        x['moved_into_mate'] = moved_into_mate
        x['continued_being_mated'] = continued_being_mated
    


In [None]:
def RefineX(X_):
    refined_X = []
    for x in X_:
        g = []
        for i in range(len(x['cps'])):
            m = []
            m.append(min(10, x['cps'][i]))
            m.append(x['dist_percentiles'][i])
            m.append(x['rank_percentiles'][i])
            m.append(x['found_mate'][i])
            m.append(x['continued_mate'][i])
            m.append(x['lost_mate'][i])
            m.append(x['moved_into_mate'][i])
            m.append(x['continued_being_mated'][i])
            g.append(m)
        refined_X.append(g)
    return refined_X
    
    

In [None]:
def CreateLables(X_, Y_):
    lables = []
    for i in range(len(X_)):
        single_lable = []
        for _ in range(len(X_[i]['cps'])):
            single_lable.append(Y_[i][0]) #the second item in Y[i] is the standard system prediction
        lables.append(single_lable)
    return lables
        

In [None]:
def get_data(event = 'Rated Classical game'):
    data_df = get_raw_data_df(event)
    X, y = get_desired_data(data_df)
    X, y = remove_short_games(X, y)
    return X, y

# Get the data

In [None]:
X_raw, y_raw = get_data(event = 'Rated Classical game')

In [None]:
X_raw[0]

In [None]:
TabularizeMates(X_raw)
X_ = RefineX(X_raw)
X = np.array([np.array(xi) for xi in X_])
Y_ = CreateLables(X_raw, y_raw)
Y = np.array([np.array(yi) for yi in Y_])

In [None]:
X[2].shape

In [None]:
Y[2].shape

In [None]:
Y[0]

In [None]:
X_raw[0]

In [None]:
Y[0].shape

In [None]:
X[0].shape

# Build the model

In [None]:
from keras import Sequential
from keras.layers import Dense, LSTM
from keras.utils import to_categorical

In [None]:
model = Sequential()
model.add(LSTM(units=50, dropout = 0.3, return_sequences= True, input_shape=(30, 8)))
model.add(LSTM(units=50, dropout = 0.3, return_sequences=True))
model.add(LSTM(units=50))
model.add(Dense(units=1))
model.summary()
model.compile(optimizer='adam', loss='mean_squared_error')

In [None]:
model.fit(X_trimmed_train, Y_trimmed_train, epochs=800, batch_size=32, validation_data=(X_trimmed_test, Y_trimmed_test))

In [None]:
Xt = X

In [None]:
Xt[0][0:2]

In [None]:
X_trimmed = []
for each in Xt:
    X_trimmed.append(each[0:30])

In [None]:
X_trimmed = np.array(X_trimmed)

In [None]:
X_trimmed.shape

In [None]:
Y_trimmed = []
for each in Y:
    Y_trimmed.append(each[0:1])

In [None]:
Y_trimmed = np.array(Y_trimmed)

In [None]:
Y_trimmed.shape

In [None]:
X_trimmed_train = X_trimmed[:3800]
X_trimmed_test = X_trimmed[3800:]
Y_trimmed_train = Y_trimmed[:3800]
Y_trimmed_test = Y_trimmed[3800:]

model = Sequential()
model.add(LSTM(units=50, dropout = 0.3, return_sequences= True, input_shape=(30, 8)))
model.add(LSTM(units=50, dropout = 0.3, return_sequences=True))
model.add(LSTM(units=50))
model.add(Dense(units=1))
model.summary()
model.compile(optimizer='adam', loss='mean_squared_error')

model.fit(X_trimmed_train, Y_trimmed_train, epochs=800, batch_size=32, validation_data=(X_trimmed_test, Y_trimmed_test))

In [None]:
X_trimmed_train = X_trimmed[:3800]
X_trimmed_test = X_trimmed[3800:]
Y_trimmed_train = Y_trimmed[:3800]
Y_trimmed_test = Y_trimmed[3800:]

model = Sequential()
model.add(LSTM(units=50, dropout = 0.3, return_sequences= True, input_shape=(30, 8)))
model.add(LSTM(units=50, dropout = 0.3, return_sequences=True))
model.add(LSTM(units=50))
model.add(Dense(units=1))
model.summary()
model.compile(optimizer='adam', loss='mean_squared_error')

model.fit(X_trimmed_train, Y_trimmed_train, epochs=500, batch_size=32, validation_data=(X_trimmed_test, Y_trimmed_test))

In [None]:
X_trimmed_train = X_trimmed[:3800]
X_trimmed_test = X_trimmed[3800:]
Y_trimmed_train = Y_trimmed[:3800]
Y_trimmed_test = Y_trimmed[3800:]

model = Sequential()
model.add(LSTM(units=50, dropout = 0.3, return_sequences= True, input_shape=(30, 8)))
model.add(LSTM(units=50, dropout = 0.3, return_sequences=True))
model.add(LSTM(units=50))
model.add(Dense(units=1))
model.summary()
model.compile(optimizer='adam', loss='mean_squared_error')

model.fit(X_trimmed_train, Y_trimmed_train, epochs=500, batch_size=32, validation_data=(X_trimmed_test, Y_trimmed_test))

In [None]:
X_trimmed_train = X_trimmed[:3800]
X_trimmed_test = X_trimmed[3800:]
Y_trimmed_train = Y_trimmed[:3800]
Y_trimmed_test = Y_trimmed[3800:]

model = Sequential()
model.add(LSTM(units=100, dropout = 0.5, return_sequences= True, input_shape=(30, 8)))
model.add(LSTM(units=100, dropout = 0.5, return_sequences= True, input_shape=(30, 8)))
model.add(LSTM(units=100, dropout = 0.5, return_sequences= True, input_shape=(30, 8)))
model.add(LSTM(units=100, dropout = 0.5))
model.add(Dense(units=1))
model.summary()
model.compile(optimizer='adam', loss='mean_absolute_error')

model.fit(X_trimmed_train, Y_trimmed_train, epochs=1000, batch_size=64, validation_data=(X_trimmed_test, Y_trimmed_test), shuffle=True)

In [None]:
X_trimmed_train = X_trimmed[:3800]
X_trimmed_test = X_trimmed[3800:]
Y_trimmed_train = Y_trimmed[:3800]
Y_trimmed_test = Y_trimmed[3800:]

model = Sequential()
model.add(LSTM(units=150, dropout = 0.5, return_sequences= True, input_shape=(30, 8)))
model.add(LSTM(units=150, dropout = 0.5, return_sequences= True, input_shape=(30, 8)))
model.add(LSTM(units=150, dropout = 0.5, return_sequences= True, input_shape=(30, 8)))
model.add(LSTM(units=150, dropout = 0.5, return_sequences= True, input_shape=(30, 8)))
model.add(LSTM(units=150, dropout = 0.5, return_sequences= True, input_shape=(30, 8)))
model.add(LSTM(units=100, dropout = 0.5))
model.add(Dense(units=1))
model.summary()
model.compile(optimizer='adam', loss='mean_absolute_error')

model.fit(X_trimmed_train, Y_trimmed_train, epochs=1000, batch_size=64, validation_data=(X_trimmed_test, Y_trimmed_test), shuffle=True)