In [None]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import pandas as pd
import seaborn as sn
import chess
import chess.uci
import chess.pgn
import io

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
X=pd.read_table('all_with_filtered_anotations_since1998.txt',
                sep=' ',usecols = np.arange(16),engine='c',
               skiprows=[0,1,2,3,4],
                names=['t','date','result','welo','belo','len',
                       'date_c','resu_c','welo_c','belo_c','edate_c','setup','fen','resu2_c','oyrange','bad_len'],
                na_values='None',
                nrows=100000,
               )

In [None]:
def str2bool(k):
    if k=='true': return True
    if k=='false': return False
    return k

In [None]:
X = X.apply(lambda st: pd.Series(str2bool(s.split('_')[-1]) if isinstance(s,str) else s for s in st ),axis=0)

In [None]:
Z=pd.read_table('all_with_filtered_anotations_since1998.txt',
                sep='### ',
                skiprows=[0,1,2,3,4],
                names=['garbage','game'],
                na_values='None',
                nrows=1000,
                )['game']

In [None]:
Z[10]

In [None]:
G = chess.pgn.read_game(io.StringIO(Z[100]))

In [None]:
handler = chess.uci.InfoHandler()
engine = chess.uci.popen_engine('./stockfish-10-linux/Linux/stockfish_10_x64')
engine.info_handlers.append(handler)
for i,g in enumerate(G.mainline()):
    engine.position(g.board())
    evaluation = engine.go(movetime=400)
    #print(g.board().turn)
    print(evaluation.bestmove.uci())
    print(handler.info["score"][1].cp)

In [None]:
print(g.board().unicode())

In [None]:
g.board().turn*2-1


In [None]:
with_elos = X[~X['welo_c']&~X['belo_c']]
elos = with_elos['welo']-with_elos['belo']


In [None]:
def result2score(result_str):
    if result_str == '1/2-1/2': return 0
    if result_str == '1-0':return 1
    if result_str == '0-1':return -1
scores = with_elos['result'].apply(result2score)

In [None]:
Y = pd.DataFrame({'elo_gap':elos,'scores':scores})

In [None]:
#Y = pd.DataFrame(elos[scores==i] for i in [-1,0,1])

In [None]:
# rolls_dict = {i:elos[Y['scores']==i] for i in [-1,0,1]}

# #You can print the next line to see what rolls_dict is
# #print(rolls_dict) 

# #put the dictionary into a DataFrame
# #means = pd.DataFrame(rolls_dict)
# #means.plot.hist(stacked=True,bins=20,density=True,kde=True)
# for col in [-1,0,1]:
#     sn.distplot(rolls_dict[col],kde=True,label=col)
# plt.xlim(-100,550)
# plt.legend()
# #the bar plot takes means as the data, and ci tells it 
# #what confidence interval to plot with.
# #sns.distplot(means) 
# #sns.set(style="whitegrid")
# #plt.ylabel('Proportion of rolls')
# #plt.xlabel('Dice outcome')

In [None]:
#sn.catplot(x="scores", y="elo_gap", kind="swarm", data=Y);

In [None]:
sn.regplot(elos,scores)

In [None]:
Y['scores'].values

In [None]:
Y['elo_gap'].values

In [None]:
import torch
import torch.nn as nn
import numpy as np
import torch.utils.data
from oil.model_trainers.trainer import Trainer
x = torch.from_numpy(Y['elo_gap'].values.astype(np.float32)).unsqueeze(-1)
y = torch.from_numpy(Y['scores'].values.astype(np.float32)).unsqueeze(-1)
train = torch.utils.data.TensorDataset(x,y)
train_loader = torch.utils.data.DataLoader(train, batch_size=64, shuffle=True,drop_last=True)
class Regressor(Trainer):
    def loss(self, mb):
        x,y = mb
        return nn.MSELoss()(self.model(x),y)


l_rate = 0.001

model = nn.Sequential(
            nn.BatchNorm1d(1),
            nn.Linear(1, 1),
            nn.Tanh()
        )
dataloaders={'train':train_loader}
log_args={'no_print':True, 'minPeriod':0, 'timeFrac':.2}
opt_constr = lambda params: torch.optim.Adam(params,lr=1e-4) 
trainer = Regressor(model,dataloaders,log_args=log_args)#,opt_constr=opt_constr)

In [None]:
trainer.train(10);

In [None]:
trainer.model.eval()
trainer.logger.scalar_frame.plot()

In [None]:
x = np.linspace(-600,500,1000)
x_t = torch.from_numpy(x.astype(np.float32)).unsqueeze(-1)

In [None]:
y_pred = trainer.model(x_t).squeeze(-1).data.numpy()

In [None]:
trainer.model(torch.zeros(1,1)-50)

In [None]:
plt.plot(x,y_pred)

In [None]:
X.hist('welo')
X.hist('belo')

In [None]:
sn.distplot(X['belo'])

In [1]:
%cd data/

/home/marc/OMGchess/chess/data


In [2]:
%ls

all_with_filtered_anotations_since1998.txt  chess_3000k_0.1s_train_6.pkl
chess_100k_0.1s_indices.pkl                 chess_3000k_0.1s_train_7.pkl
chess_100k_0.1s_test.pkl                    chess_3000k_0.1s_train_8.pkl
chess_100k_0.1s_train_0.pkl                 chess_3000k_0.1s_train_9.pkl
chess_100k_0.1s_train_1.pkl                 chess_3000k_0.1s_train_merged.pkl
chess_100k_0.1s_train_2.pkl                 chess_3000k_0.1s_trainsmall.pkl
chess_100k_0.1s_trainsmall.pkl              chess_3000k_0.1s_val.pkl
chess_100k_0.1s_val.pkl                     chess_3000k_0.2s_test.pkl
chess_3000k_0.1s_indices.pkl                chess_3000k_0.2s_train_0.pkl
chess_3000k_0.1s_test.pkl                   chess_3000k_0.2s_trainsmall.pkl
chess_3000k_0.1s_train_0.pkl                chess_3000k_0.2s_val.pkl
chess_3000k_0.1s_train_1.pkl                chess_3001k_0.2s_indices.pkl
chess_3000k_0.1s_train_2.pkl                chess_3001k_0.2s_test.pkl
chess_3000k_0.1s_train_3.pkl                chess_3001

In [3]:
import dill,torch

In [6]:
data = []
for i in range(10):
    with open(f'chess_3000k_0.1s_train_{i}.pkl','rb') as file:
        data.append(dill.load(file))

In [None]:
len(data[2])

In [7]:
merged = []
for data_subset in data:
    merged.extend(data_subset)

In [None]:
len(merged)

In [8]:
with open('chess_3000k_0.1s_train_merged.pkl','wb') as file:
    dill.dump(merged,file)

MemoryError: 

In [None]:
1

In [5]:
with open(f'chess_3000k_0.1s_train_merged.pkl','rb') as file:
    b = dill.load(file)

EOFError: Ran out of input