In [1]:
import chess
import chess.uci
import chess.pgn
import os
import csv
import matplotlib.pyplot as plt
from numpy import array
import numpy as np
import pandas as pd
import math
import ast
from random import shuffle
from mpl_toolkits.mplot3d import Axes3D
from scipy import stats
from sklearn.preprocessing import StandardScaler

%matplotlib inline


In [2]:
# if using raw evaluation data
def get_old_elo_perfs():
    game_data_path = "/Users/tylerahlstrom/Documents/GitHub/DI_proposal/data/game_data.pgn"
    eval_csv_path = "/Users/tylerahlstrom/Documents/GitHub/DI_proposal/data/stockfish_evals.csv"

    pgns = open(game_data_path)
    eval_csv = open(eval_csv_path)

    sf_evals_csv = csv.reader(eval_csv, delimiter=',')

    evals = []
    for item in sf_evals_csv:
        item.pop(0)
        item = str(item)[2:-2].split()
        evals.append(item)
    evals.pop(0)
    
    elo_and_performance = []

    for i in range(49000):
        pw = []
        pb = []
        current_game = chess.pgn.read_game(pgns)
        try:
            pw.append(float(current_game.headers["WhiteElo"]))
        except:
            pw.append(0.0)
        try:
            pb.append(float(current_game.headers["BlackElo"]))
        except:
            pb.append(0.0)
        try:
            pw.append(float(evals[i][0]))
        except:
            pw.append(0.0)
        for j in range (len(evals[i])):
            if j == 0:
                continue
            try:
                move_value = float(evals[i][j]) - float(evals[i][j-1])
            except:
                move_value = 0.0
            if j % 2 != 0: 
                pb.append(-move_value)
            else:
                pw.append(move_value)
        elo_and_performance.append(pw)
        elo_and_performance.append(pb)

    elo_and_performance = array(elo_and_performance)
    
    
    return elo_and_performance
    

In [3]:
# # straight from kaggle dataset
# elo_and_performance = []

# for i in range(49000):
#     pw = []
#     pb = []
#     current_game = chess.pgn.read_game(pgns)
#     try:
#         pw.append(float(current_game.headers["WhiteElo"]))
#     except:
#         pw.append(0.0)
#     try:
#         pb.append(float(current_game.headers["BlackElo"]))
#     except:
#         pb.append(0.0)
#     try:
#         pw.append(float(evals[i][0]))
#     except:
#         pw.append(0.0)
#     for j in range (len(evals[i])):
#         if j == 0:
#             continue
#         try:
#             move_value = float(evals[i][j]) - float(evals[i][j-1])
#         except:
#             move_value = 0.0
#         if j % 2 != 0: 
#             pb.append(-move_value)
#         else:
#             pw.append(move_value)
#     elo_and_performance.append(pw)
#     elo_and_performance.append(pb)

# elo_and_performance = array(elo_and_performance)



In [4]:
# if using my own stockfish analysis
def get_new_elo_perfs(eval_time): 

    assert type(eval_time) is float, "Eval_time is not a float: %r" % name

    time_str = "def"

    if eval_time == 0.5:
        time_str = "halfsecond"
    if eval_time == 1.0:
        time_str = "onesecond"
    if eval_time == 3.0:
        time_str = "threeseconds"

    if time_str == "def":
        print("Eval time not available")
        return null


    elo_and_p = []
    performances_file = "/Users/tylerahlstrom/Documents/GitHub/DI_proposal/data/stockfish_performances_" + time_str + ".txt"

    with open(performances_file, 'r') as f:
        for line in f:
            listobj = ast.literal_eval(line)
            elo_and_p.append(listobj)
            
    #removing openings for now
    for item in elo_and_p:
        del item[1]
    
    #inserting win/loss from separate extraction
    winloss = []
    winloss_file_dir = "/Users/tylerahlstrom/Documents/GitHub/DI_proposal/data/winloss.txt"
    winloss_file = open(winloss_file_dir)
    filecontents = winloss_file.readlines()
    for line in filecontents:
        target_content = line.strip('\n')
        winloss.append(target_content)
        
    for i in range(len(elo_and_p)):
        elo_and_p[i].insert(1,winloss[i])
    
    
    return elo_and_p

In [5]:
def get_performance_csv(eval_time = 0): 

    assert type(eval_time) is float, "Eval_time is not a float: %r" % name

    time_str = "def"

    if eval_time == 0.5:
        time_str = "halfsecond"
    if eval_time == 1.0:
        time_str = "onesecond"
    if eval_time == 3.0:
        time_str = "threeseconds"
    if eval_time == 15.0:
        time_str = "fifteenseconds"

    if time_str == "def":
        print("Eval time not available")
        return null

    performances_file = "/Users/tylerahlstrom/Documents/GitHub/DI_proposal/data/stockfish_performances_" + time_str + ".csv"

    with open(performances_file, 'r') as f:
        df = pd.read_csv(f)
    return df

In [8]:
df = get_performance_csv(eval_time = 15.0)

df

Unnamed: 0,elo,opp_elo,opening,result,eval_time,move_evals,opp_move_evals
0,1721,1253,B01,1.0,0.1,"['-18.00', '-23.00', '-65.00', '-22.00', '-39....","['-79.00', '-25.00', '-95.00', '1.00', '-115.0..."
1,1253,1721,B01,0.0,0.1,"['-79.00', '-25.00', '-95.00', '1.00', '-115.0...","['-18.00', '-23.00', '-65.00', '-22.00', '-39...."
2,1444,1307,B20,0.0,0.1,"['-26.00', '-67.00', '-100.00', '-37.00', '-18...","['-54.00', '-20.00', '-47.00', '-103.00', '-27..."
3,1307,1444,B20,1.0,0.1,"['-54.00', '-20.00', '-47.00', '-103.00', '-27...","['-26.00', '-67.00', '-100.00', '-37.00', '-18..."
4,1907,1799,B00,1.0,0.1,"['-18.00', '-30.00', '-22.00', '-50.00', '-37....","['-96.00', '-13.00', '-31.00', '-20.00', '-50...."
5,1799,1907,B00,0.0,0.1,"['-96.00', '-13.00', '-31.00', '-20.00', '-50....","['-18.00', '-30.00', '-22.00', '-50.00', '-37...."
6,2011,1936,E00,0.0,0.1,"['-146.00', '-70.00', '-50.00', '-132.00', '-8...","['-32.00', '-30.00', '-185.00', '-131.00', '-5..."
7,1936,2011,E00,1.0,0.1,"['-32.00', '-30.00', '-185.00', '-131.00', '-5...","['-146.00', '-70.00', '-50.00', '-132.00', '-8..."
8,1256,1259,D00,0.0,0.1,"['-40.00', '-140.00', '2.00', '-108.00', '-109...","['-63.00', '-21.00', '-3.00', '-95.00', '-102...."
9,1259,1256,D00,1.0,0.1,"['-63.00', '-21.00', '-3.00', '-95.00', '-102....","['-40.00', '-140.00', '2.00', '-108.00', '-109..."


In [5]:
def remove_bugged_evals(provided_list):

    zero_rows = []
    i = 0
    for row in provided_list: 
        i+=1
        if row.count(0.0)>3:
            zero_rows.append(i)
    if (len(zero_rows) > 1):
        print("Found many zeros in these rows: " + str(zero_rows[1:40]))
        new_perf = [row for row in provided_list if row.count(0.0) < 2]
        new_perf = [row for row in new_perf if row[0] > 1]
        print("...and removed those rows")
    else:
        print("List is cleared of zeros bug")
        new_perf = provided_list
    return new_perf


In [6]:
def remove_short_games(provided_list):

    threshold = 12
    
    nomove_rows = []
    i = 0
    for row in provided_list:
        if len(row) < threshold:
            nomove_rows.append(i)
        i+=1
    if (len(nomove_rows) > 0):
        print("Found less than 3 moves these rows: " + str(nomove_rows[:]))
        new_list = [row for row in provided_list if len(row) >= threshold]
        print("...and removed those rows")
    else:
        print("List has no short games")
        new_list = provided_list
    return new_list

### Get data and clean it:

In [None]:
elo_and_performance = get_new_elo_perfs(3.0)

elo_and_performance = elo_and_performance[:1000]
print("Data points before clean-up: " + str(len(elo_and_performance)))
elo_and_performance = remove_bugged_evals(elo_and_performance)
elo_and_performance = remove_short_games(elo_and_performance)
print("Data points after clean-up: " + str(len(elo_and_performance)))
print(elo_and_performance[0:2])


### Create features:

In [None]:
data_points = []

for item in elo_and_performance:
    dp = []
    
    #0
    dp.append(int(item[0]))
    
    #1
    dp.append(float(item[1]))

    
    arr = np.array([float(entry) for entry in item[2:]], dtype = float)
    
    average = np.mean(arr)
    #2
    dp.append(average)
    #dp.append(np.square(average))
    
    std = np.std(arr)
    #3
    dp.append(std)
    #dp.append(np.square(std))


    ten_ave = np.mean(arr[0:9])
    #4
    dp.append(ten_ave)
    #dp.append(np.square(ten_ave))

    
    std_ten = np.std(arr[0:9])
    #5
    dp.append(std_ten) 
    #dp.append(np.square(std_ten))

    
    #6
    dp.append(len(item))
    #dp.append(np.square(len(item)))

    
#    arr = np.sort(arr)
#    ave_five_worst = np.mean(arr[0:4])
    #ave_ten_worst = np.mean(arr[0:9])
    

    #7
#    dp.append(ave_five_worst)
#    dp.append(np.square(ave_five_worst))

    #8
#    dp.append(ave_ten_worst)
#    dp.append(np.square(ave_ten_worst))

#    dp.append(np.sum(arr[0:4]))
#    dp.append(np.sum(arr[0:9]))

    #print(arr)
    
    data_points.append(dp)

data_points = array(data_points)
np.set_printoptions(suppress=True)

#np.random.shuffle(data_points)
print(data_points.shape)
print(data_points[0:10])

### Create training and test set:

In [None]:
test_percentage = 5
total = len(data_points)
train_size = int(total * (100-test_percentage)/100)


X_train = data_points[:train_size,1:]

scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
print(X_train[3])

Y_train_elo = data_points[:train_size,0]
Y_train_wl = data_points[:train_size,1]



X_test = data_points[train_size:,1:]

X_test = scaler.transform(X_test)

Y_test_elo = data_points[train_size:,0]
Y_test_wl = data_points[train_size:,1]

print ("X_train: " + str(X_train.shape))
print ("X_test: " + str(X_test.shape))
print ("Y_train: " + str(Y_train_elo.shape))
print ("Y_test: " + str(Y_test_elo.shape))

### Build models:

In [None]:
from sklearn.neural_network import MLPRegressor
from sklearn import linear_model
from sklearn.metrics import mean_squared_error, r2_score
from sklearn import preprocessing
from sklearn.model_selection import validation_curve
from sklearn.linear_model import Ridge
from sklearn.model_selection import learning_curve


In [None]:
model = MLPRegressor(hidden_layer_sizes=(2,),
        activation='relu',
        solver='lbfgs',
        #learning_rate='adaptive',
        max_iter=100000,
        #learning_rate_init=0.001,
        tol = 0.00000001,
        alpha=0.01,
        #early_stopping=False,
        #shuffle=True,
        warm_start=True,
        random_state = 0)

In [None]:
model1 = MLPRegressor(hidden_layer_sizes=(4,),
        activation='relu',
        solver='lbfgs',
        #learning_rate='adaptive',
        max_iter=100000,
        #learning_rate_init=0.001,
        tol = 0.000001,
        alpha=0.00001,
        #early_stopping=False,
        #shuffle=True,
        warm_start=True,
        random_state = 0)

In [None]:
model2 = MLPRegressor(hidden_layer_sizes=(6,),
        activation='relu',
        solver='lbfgs',
        #learning_rate='adaptive',
        max_iter=1000000,
        #learning_rate_init=0.001,
        tol = 0.00000001,
        alpha=0.00001,
        #early_stopping=False,
        #shuffle=True,
        warm_start=True,
        random_state = 0)

In [None]:
model3 = MLPRegressor(hidden_layer_sizes=(8,),
        activation='relu',
        solver='lbfgs',
        #learning_rate='adaptive',
        max_iter=100000,
        #learning_rate_init=0.001,
        tol = 0.00001,
        alpha=0.00001,
        #early_stopping=False,
        #shuffle=True,
        warm_start=True,
        random_state = 0)


### Train and test models:

In [None]:
model.fit(X_train, Y_train_elo)
Y_pred = model.predict(X_test)
print("Average absolute error: " + str(math.sqrt(mean_squared_error(Y_test_elo, Y_pred))))
print("Training set r^2: " + str(model.score(X_train, Y_train_elo)))
print("Test set r^2: " + str(model.score(X_test, Y_test_elo)))

In [None]:
model1.fit(X_train, Y_train_elo)
Y_pred = model1.predict(X_test)
print("Average absolute error: " + str(math.sqrt(mean_squared_error(Y_test_elo, Y_pred))))
print("Training set r^2: " + str(model1.score(X_train, Y_train_elo)))
print("Test set r^2: " + str(model1.score(X_test, Y_test_elo)))

In [None]:
model2.fit(X_train, Y_train_elo)
Y_pred = model2.predict(X_test)
print("Average absolute error: " + str(math.sqrt(mean_squared_error(Y_test_elo, Y_pred))))
print("Training set r^2: " + str(model2.score(X_train, Y_train_elo)))
print("Test set r^2: " + str(model2.score(X_test, Y_test_elo)))

In [None]:
model3.fit(X_train, Y_train_elo)
Y_pred = model3.predict(X_test)
Y2_pred = model3.predict(X_train)
print("Average absolute error train: " + str(math.sqrt(mean_squared_error(Y_train_elo, Y2_pred))))
print("Average absolute error test: " + str(math.sqrt(mean_squared_error(Y_test_elo, Y_pred))))

print("Training set r^2: " + str(model3.score(X_train, Y_train_elo)))
print("Test set r^2: " + str(model3.score(X_test, Y_test_elo)))

# loss_values = model3.loss_curve_
# plt.plot(loss_values)
# plt.show()

# train_sizes, train_scores, test_scores = learning_curve(model3, X, Y, n_jobs=-1, cv=cv, train_sizes=np.linspace(.1, 1.0, 5), verbose=0)

# train_scores_mean = np.mean(train_scores, axis=1)
# train_scores_std = np.std(train_scores, axis=1)
# test_scores_mean = np.mean(test_scores, axis=1)
# test_scores_std = np.std(test_scores, axis=1)

# plt.figure()
# plt.title("RandomForestClassifier")
# plt.legend(loc="best")
# plt.xlabel("Training examples")
# plt.ylabel("Score")
# plt.gca().invert_yaxis()

# # box-like grid
# plt.grid()

# # plot the std deviation as a transparent range at each training set size
# plt.fill_between(train_sizes, train_scores_mean - train_scores_std, train_scores_mean + train_scores_std, alpha=0.1, color="r")
# plt.fill_between(train_sizes, test_scores_mean - test_scores_std, test_scores_mean + test_scores_std, alpha=0.1, color="g")

# # plot the average training and test score lines at each training set size
# plt.plot(train_sizes, train_scores_mean, 'o-', color="r", label="Training score")
# plt.plot(train_sizes, test_scores_mean, 'o-', color="g", label="Cross-validation score")

# # sizes the window for readability and displays the plot
# # shows error from 0 to 1.1
# #plt.ylim(-.1,1.1)
# plt.show()



In [None]:
regr = linear_model.LinearRegression()
regr.fit(X_train, Y_train_elo)
Y_pred = regr.predict(X_test)
print(regr.coef_)
print(math.sqrt(mean_squared_error(Y_test_elo, Y_pred)))
print(r2_score(Y_test_elo, Y_pred))



### Null test for predicting elo:

In [None]:
average_elo = np.sum(Y_train_elo)/len(Y_train_elo)
print("average elo: " + str(average_elo))

l = len(Y_pred)
Y_pred = []
for i in range(l):
    Y_pred.append(average_elo)
print(math.sqrt(mean_squared_error(Y_test_elo, Y_pred)))
print(r2_score(Y_test_elo, Y_pred))


### Run best model to predict win/loss:

In [None]:
model1.fit(X_train, Y_train_wl)
Y_pred = model1.predict(X_test)
#print(model.coef_)
print(math.sqrt(mean_squared_error(Y_test_wl, Y_pred)))
print(model1.score(X_test, Y_test_wl)) 
print(r2_score(Y_test_wl, Y_pred))

### Below lies messy graphing scripts

In [None]:
group_data = []

num_bins = 6

bins = np.linspace(1000, 2500, num_bins)

for bin in range (num_bins):
    group_data_point = []
    group_data_point.append(bins[bin])
    min=1000.0+(300*bin)
    group_count = 0
    group_mean = []
    group_std = []
    for point in data_points:
        if (point[0].item() > min) & (point[0].item() < min + 300):
            group_count +=1
            if not math.isnan(float(point[4])):
                group_mean.append(point[4])
            if not math.isnan(float(point[5])):
                group_std.append(point[5])
    group_mean = np.mean(array(group_mean))
    group_std = np.mean(array(group_std))

    if math.isnan(float(group_mean)):
        print(group_mean)
    if math.isnan(float(group_std)):
        print(group_std)

    group_data_point.append(group_mean)
    group_data_point.append(group_std)
    group_data.append(group_data_point)

binned_data = array(group_data)


In [None]:
%matplotlib qt

N = binned_data.shape[0]
print(N)
elos = [int(el[0]) for el in binned_data]
means = [el[1] for el in binned_data]
stds = [el[2] for el in binned_data]

print(elos)
print(means)
print(stds)



### scaling for viewing
means = [el*0.8 for el in means]

font = {'family' : 'normal',
        'weight' : 'normal',
        'size'   : 20}
plt.rc('font', **font)

ind = np.arange(N)  
width = 0.35 

fig, ax = plt.subplots()
rects1 = ax.bar(ind, means, width, color='seagreen')
rects2 = ax.bar(ind + width*.8, stds, width, color='darkslateblue')


ax.set_ylabel('Values (scaled for viewing)')
ax.set_xlabel('Elo bins ')

ax.set_title('Features of evaluated moves')
ax.set_xticks(ind + width / 2)
ax.set_xticklabels(elos)

plt.tick_params(left='off', labelleft='off')

ax.legend((rects1[0], rects2[0]), ('Average move strength', 'Standard deviation of move strength'))


plt.axhline(0, color='black')
plt.show()

In [None]:
# x = data_points[:,0]
# y = [:,1]

data = np.array(data_points)

x = data[:,0]
y = data[:,13]
# x = np.array(x)
# y = np.array(y)

color = 'r'
scale = 1.0


#plt.legend()
plt.scatter(x,y, s=scale, c=color, marker = ",", lw=0, alpha = 0.5, label = 'accounts with # games played: <15')
#plt.scatter(x, y, c="g", alpha=0.5, marker=r'$\clubsuit$',label="Luck")
m, b = np.polyfit(x, y, 1)

slope, intercept, r_value, p_value, std_err = stats.linregress(x,y)
plt.plot(x, slope*x +intercept, '-', c = color, label = 'slope = ' + '%1.2E' % slope)
print("first order: " + str(np.power(r_value, 2)))

p = np.polyfit(x,y,2)
yfit = np.polyval(p,x)
yresid = y - yfit
SSresid = sum(np.power(yresid,2))
SStotal = (len(y)-1) * np.var(y)
rsq = 1 - SSresid/SStotal
print("second order: " + str(rsq))

p = np.polyfit(x,y,3)
yfit = np.polyval(p,x)
yresid = y - yfit
SSresid = sum(np.power(yresid,2))
SStotal = (len(y)-1) * np.var(y)
rsq = 1 - SSresid/SStotal
print("third order: " + str(rsq))

In [None]:
%matplotlib qt

fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')

c = 'r'
m = 'o'

xs = [(item[1]) for item in data_points]
ys = [(item[2]) for item in data_points]
zs = [(item[0]) for item in data_points]

ax.scatter(xs, ys, zs, c=c, marker=m)

# for c, m, zlow, zhigh in [('r', 'o', -50, -25), ('b', '^', -30, -5)]:
#     xs = randrange(n, 23, 32)
#     ys = randrange(n, 0, 100)
#     zs = randrange(n, zlow, zhigh)

ax.set_xlabel('mean strength')
ax.set_ylabel('std strength')
ax.set_zlabel('Elo')

plt.show()