Didn't miss writing a notebook for statistics that much :|

First things first we need to change the extensions of the matches to txt from tcx (the export format of choice of the garmin connect app) but first some imports

In [8]:
import os
import numpy as np
import pandas as pd
import csv
import chess
import chess.pgn
import chess.engine
from stockfish import Stockfish


In [9]:
def change_ext(cartella, old_ext, new_ext):
    for root, dirs, files in os.walk(cartella):
        # print(os.getcwd())
        for f in files:
            filename = os.fsdecode(f)
            
                
            if filename.endswith(old_ext):
                name, ext = os.path.splitext(filename)
                
                new_name = name + new_ext
                jpath = os.path.join(root,new_name)
                os.rename(os.path.join(root,filename), jpath)
                print(f"renamed {os.path.join(root,filename)} to {jpath}")

change_ext("games", ".tcx", ".txt")

Now we have to import the data from the [Portable Game Notation or pgn](https://en.wikipedia.org/wiki/Portable_Game_Notation)
and read the heartbeats of both white and black (in the games where they are both available) in order to create a 3d dataframe for every game and then to visualize the data we are working with. To process the portable game notation we will use the following cell of code.

In [10]:
### UTILS FOR READING THE ACTIVITIES

def calctime(start,curr):
    c = [int(x) for x in curr.split(":")]
    s = [int(x) for x in start.split(":")]
    return ((c[0] - s[0]) * 60 + (c[1] - s[1])) * 60 + (c[2] - s[2])

def read_activity(game):
    with open(game) as f:
        lines = f.readlines()
        hr = []
        time = []
        for s in lines:
            if "<Value>" in s:
                hr.append(s.strip()[s.strip().find(">") + 1:s.strip().rfind("<")])
            if "<Time>" in s:
                s = s.strip()[3:]
                if not time:
                    start = s.strip()[s.strip().find("T") + 1:s.strip().rfind(".")]
                    
                time.append(calctime(start, s.strip()[s.strip().find("T") + 1:s.strip().rfind(".")]))
        
        #print(f"hr is {hr}, and time is {time}")
        hr = hr[2:]
        if len(time) - 1 != time[-1]:
            diz = {time[i]: hr[i] for i in range(min(len(hr), len(time)))}
            last_battito = 0
            res = []
            keys = list(diz.keys())
            
            
            for s in range(time[-1] + 1):
                if s in diz:
                    res.append((diz[s], s))
                    last = (diz[s], s)
                    indice_s = keys.index(s)
                    if indice_s +1 < len(keys):
                        next_s = keys[indice_s + 1]
                        next = (diz[keys[indice_s + 1]], next_s)
                    
                    
                else:
                    total_gap = s - last[1] + next[1] - s
                    weight_next = (s-last[1]) * 100 / total_gap
                    weight_last = 100 - weight_next 
                    est_battito = (weight_last * float(last[0]) + weight_next * float(next[0])) / 100
                    res.append((round(est_battito), s))
            return res
                
        
        return list(zip(hr,time))
        
read_activity("games/partita_0814-2/white.txt")

[('88', 0),
 ('88', 1),
 (89, 2),
 (89, 3),
 (90, 4),
 (90, 5),
 ('91', 6),
 (91, 7),
 (90, 8),
 (90, 9),
 (90, 10),
 (89, 11),
 (89, 12),
 (88, 13),
 ('88', 14),
 (87, 15),
 (87, 16),
 (86, 17),
 (86, 18),
 ('85', 19),
 (86, 20),
 (86, 21),
 (86, 22),
 (87, 23),
 (88, 24),
 ('88', 25),
 (88, 26),
 (88, 27),
 (88, 28),
 (88, 29),
 (88, 30),
 (88, 31),
 (88, 32),
 (89, 33),
 (89, 34),
 (89, 35),
 (89, 36),
 (89, 37),
 (89, 38),
 (89, 39),
 ('89', 40),
 (88, 41),
 (88, 42),
 (87, 43),
 ('86', 44),
 (86, 45),
 (86, 46),
 (86, 47),
 (86, 48),
 (86, 49),
 (86, 50),
 (86, 51),
 (86, 52),
 (86, 53),
 (86, 54),
 (86, 55),
 (86, 56),
 (86, 57),
 (86, 58),
 ('86', 59),
 (87, 60),
 (87, 61),
 (88, 62),
 (88, 63),
 (89, 64),
 (89, 65),
 ('90', 66),
 (89, 67),
 (89, 68),
 (88, 69),
 (88, 70),
 ('87', 71),
 (87, 72),
 (88, 73),
 (88, 74),
 (88, 75),
 (89, 76),
 (89, 77),
 (90, 78),
 ('90', 79),
 (91, 80),
 (92, 81),
 (92, 82),
 ('93', 83),
 (92, 84),
 (92, 85),
 (92, 86),
 (91, 87),
 (90, 88),
 ('90

In [11]:
### UTILS FOR READING THE PGN

#this method takes as input the board and the last move so to update the board and compute the evaluation
def get_evaluation(board,move):

    stockfish = Stockfish("/usr/local/Cellar/stockfish/15/bin/stockfish")     #if no parameter passed, takes as input the executable file called stockfish from the same directory, if found
    board.push_san(move)        #insert the move in the board
    stockfish.set_fen_position(board.fen())  #bpard.fen() applies a PGN to FEN conversion
    engine = chess.engine.SimpleEngine.popen_uci("stockfish")
    info = engine.analyse(board, chess.engine.Limit(time=0.1))     #gets the infos of the game
    
    #if the position on the board is not a mate the score is in centipawns, then it's divided by 100 to get the actual score
    #Gets the score from White’s point of view
    if chess.engine.PovScore.is_mate(info['score']):
        eval = chess.engine.PovScore.white(info['score'])
    else:
        eval = int(str(chess.engine.PovScore.white(info['score'])))/100

    return board, eval      #returns the board updated and the evaluation of the position

In [12]:
def get_timestamps(file):
    timestamps = []
    file = file[:-3] + "txt"
    with open(file, "r") as f:
        file_read = f.read()
        lista = file_read.split("%")
        
        
        for el in lista:
            if el[0:4] == "time":
                num = ""
                flag = False
                for i  in range(len(el)):
                    while el[i].isdigit():
                        num += el[i]
                        i += 1
                        flag = True
                    if flag:
                        timestamps.append(int(num)/10)
                        break
    return timestamps
            
get_timestamps("/Users/tommasodimario/Documents/GitHub/statistics-101/project/src/games/partita_0816-7/tommidim_vs_roccot01_2022.08.16.txt")

[0.1,
 0.1,
 3.4,
 0.9,
 13.6,
 1.9,
 2.8,
 9.8,
 3.5,
 11.4,
 13.7,
 9.5,
 13.0,
 19.4,
 12.9,
 11.3,
 2.5,
 1.7,
 17.7,
 5.5,
 10.1,
 2.7,
 0.6,
 4.8,
 21.2,
 4.4,
 0.1,
 10.4,
 2.5,
 22.1,
 10.8,
 12.1,
 23.1,
 6.5,
 1.6,
 20.3,
 7.7,
 49.1,
 27.4,
 10.1,
 3.5,
 16.7,
 6.2,
 8.7,
 37.7,
 34.0,
 49.1,
 9.7,
 26.9,
 29.2,
 11.8,
 1.4,
 12.9,
 22.8,
 17.7,
 17.7,
 12.9,
 20.9,
 54.3,
 12.6,
 6.0,
 1.6,
 0.4,
 1.0,
 0.4,
 26.9,
 3.7,
 7.1,
 25.2,
 15.9,
 42.5,
 7.4,
 15.1,
 16.0,
 10.7,
 21.5,
 7.9,
 13.1,
 21.0,
 1.7,
 2.0,
 22.3,
 1.1,
 19.5,
 6.0,
 18.0,
 19.3,
 12.7,
 0.7,
 3.9,
 2.0,
 6.2,
 3.8,
 1.3,
 3.1,
 3.2,
 7.7,
 1.5,
 0.1,
 1.1]

In [13]:
#read_pgn takes as input the name of the file pgn containing the game and returns a dataframe
def read_pgn(game_name): 
    old = False
    if "05.29" in game_name or "05.25" in game_name or "clk" in game_name:
        old = True
    #opens and reads the pgn file
    if not old:
        timestamps = get_timestamps(game_name)
    pgn = open(game_name) 
    
    game = chess.pgn.read_game(pgn)
    
                                   
    array = []      #initialize the array that will contain the data
    index_values = [] 
    i=0
    time_white = -0.1      #to keep track of the time for each player
    time_black = 0      #to keep track of the time for each player
    board = chess.Board()       #initialize the chessboard

    #iterate through the moves
    for node in game.mainline():        
        
        move = str(node.move)       #represents the last move
        board,evaluation = get_evaluation(board,move)       #the get_evaluation method returns the updated board and the evaluation of the current position
        
        
        if i%2 == 0:        #if the white is moving we update his time
            index_values.append("white")
            if old:
                time_white = 600 - node.clock()
            else:
                time_white += timestamps[i]
                array.append([str(node.move),round(time_white,4),round(time_white+time_black,4),evaluation])
            
           
        else:           #if the black is moving we update his time
            index_values.append("black") 
            if old:
                time_black = 600 - node.clock()
            else:
                time_black += timestamps[i]
                array.append([str(node.move),round(time_black,4),round(time_white+time_black,4),evaluation])

        #evaluation = get_evaluation(board,move)
        #we append an element in the array (a row in the dataframe) with move, time the player used, total time since the game started and evaluation of the position
        # if str(evaluation)[:2] == "#+":
        #     evaluation = 15
        # elif str(evaluation)[:2] == "#-":
        #     evaluation = -15
        if old:
            array.append([str(node.move),round(600 - node.clock(),4),round(time_white+time_black,4),evaluation])   
        i+=1
    
    column_values = ['move','time','real time','evaluation'] #create the columns names
    array = array      #convert our array to a numpy array
    res = []
    j = 0
    #df = pd.DataFrame(data = array, index = index_values, columns = column_values) 
    
    tub = 0
    tuw = 0
    for i in range(int(array[-1][2]) + 1):
        mosse = []
        
        while float(array[j][2]) <= float(i):
            pos = array[j][3]
            mosse.append(array[j][0])
            if j % 2 == 0:
                tuw = array[j][1]
            elif j % 2 != 0:
                tub = array[j][1]
            j += 1
            if j > len(array):
                break
        res.append([i, pos, (tuw,tub), mosse])
        
            
        
    #create a dataframe
    print(array)
    return res



#/Users/tommasodimario/Documents/GitHub/statistics-101/project/src/games/partite_confronto/partita_timestamp.pgn

In [14]:
file = open("prova.csv", "w")

writer = csv.writer(file)

columns = ["Time", "Game", "Player", "Statistics", "Value"]
writer.writerow(columns)

#pseudo code
# to do create queste cose così da poter iterare in questa roba della morte e creare un dataset di questo tipo
# https://raw.githubusercontent.com/QuantEcon/lecture-python/master/source/_static/lecture_specific/pandas_panel/realwage.csv
# sembra particolarmente greve ma se n è il numero di statistiche gira in ~20*2*3 * n quindi è un n^4 molto tranquillo
print(os.getcwd())



gamecount = 0
count = 0   
for root,dirs, files in os.walk("games"):
    if str(root)  != "games":
        partita = f"Game{gamecount}"
        gamecount += 1
        if gamecount > 1: # così fa solo le prime tre partite
            break
    row_written = 0
    flag = False
    row_1 = []
    row_2 = []
    row_3 = [] 
    row_4 = []
    for file in files:
        #qui siamo dentro i file di una singola partita
        filename = os.fsdecode(file)
        print(filename)
        if filename.endswith(".txt") and (filename[0] == "w" or filename[0] == "b"): # abbiamo un carissimo file delle attività
        #leggiamo i battiti
            print(f"processando {filename}")
            player = filename[0]
            activity = read_activity(f"{str(root)}/{filename}")
            # As explained in the markdown above on our strumentation TODO our smartwatches recorded the heartbeats at different time so we smoothed it out
            for hr, time in activity:
                row = [int(time), partita, player, "HR", int(hr)]
                count +=1
                
                if filename[0] == "w":
                    row_1.append(row)
                elif filename[0] == "b": 
                    row_2.append(row)
            row_written += 1
            
        elif filename.endswith(".pgn"):
            print(f"processando {filename}")
            pgn = read_pgn(f"{str(root)}/{filename}")
            
            last_second = len(pgn) - 1
            
                  
            for second in range(len(pgn)):
                ev = pgn[second][1]
                try:
                    ev = float(pgn[second][1])
                except:
                    ev = 20
                row = [second, partita, "w", "Evaluation", ev]
                count +=1
                row_3.append(row)
                row = [second, partita, "b", "Evaluation", -ev]
                row_3.append(row)
                count += 1
            
            for second in range(len(pgn)):
                for player in ["b", "w"]:       
                    p = 0 if player == "w" else 1
                    row = [second, partita, player, "TimeUsed", int(pgn[second][2][p])]
                    row_4.append(row)
                    
            row_written += 1
            
    if row_written == 3:
        list_rows = [row_1,row_2,row_3,row_4]
        
        
        
        count = 0
        for i in range(len(list_rows)):
            
            for r in list_rows[i]:
                if i == 0 or i == 1:
                    if r[0] <= last_second:
                        writer.writerow([count] + r)
                        count += 1
                else:
                    writer.writerow([count] + r)
                    count += 1
            row_written = False
                        



/Users/tommasodimario/Documents/GitHub/statistics-101/project/src
.DS_Store
zSanjatosti_vs_valesepicacchi_2022.08.14 (1).pgn
processando zSanjatosti_vs_valesepicacchi_2022.08.14 (1).pgn
[['e2e4', 0.0, 0.0, 0.38], ['e7e5', 0.1, 0.1, 0.55], ['f2f3', 5.9, 6.0, -1.07], ['g8f6', 2.6, 8.5, -0.74], ['d2d3', 7.2, 9.8, -1.44], ['d7d5', 7.6, 14.8, -1.44], ['b1c3', 20.7, 28.3, -1.55], ['d5e4', 14.6, 35.3, -0.12], ['d3e4', 35.3, 49.9, -0.21], ['b8c6', 19.2, 54.5, 0.14], ['d1d8', 37.9, 57.1, 0.07], ['c6d8', 22.0, 59.9, 0.12], ['c1g5', 53.7, 75.7, -0.12], ['f8e7', 25.3, 79.0, 0.06], ['g5f6', 57.6, 82.9, -0.04], ['e7f6', 27.2, 84.8, 0.12], ['e1c1', 60.9, 88.1, -0.83], ['e8g8', 36.8, 97.7, -0.15], ['g1e2', 79.1, 115.9, -0.71], ['c7c5', 59.8, 138.9, 1.1], ['c3d5', 103.7, 163.5, 0.94], ['f6g5', 87.4, 191.1, 1.08], ['f3f4', 155.8, 243.2, -0.69], ['e5f4', 88.7, 244.5, -0.81], ['d5f4', 158.1, 246.8, -2.88], ['d8c6', 98.7, 256.8, -1.12], ['h2h4', 168.2, 266.9, -2.19], ['g5f4', 102.5, 270.7, 0.14], ['e2f4', 

In [18]:
list_rows[3][1700][0]

850

In [19]:

# Display 6 columns for viewing purposes
# pd.set_option('display.max_columns', 6)

# Reduce decimal points to 2
# pd.options.display.float_format = '{:,.2f}'.format

realwage = pd.read_csv("prova.csv")
realwage = realwage.pivot_table(values='Value',
                                index='Time',
                                columns=['Game', 'Player', 'Statistics'])
realwage

Game,Game0,Game0,Game0,Game0,Game0,Game0
Player,b,b,b,w,w,w
Statistics,Evaluation,HR,TimeUsed,Evaluation,HR,TimeUsed
Time,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3
0,-0.38,107.0,0.0,0.38,88.0,0.0
1,-0.55,107.0,0.0,0.55,88.0,0.0
2,-0.55,107.0,0.0,0.55,89.0,0.0
3,-0.55,107.0,0.0,0.55,89.0,0.0
4,-0.55,107.0,0.0,0.55,90.0,0.0
...,...,...,...,...,...,...
864,-20.00,101.0,,20.00,78.0,
865,-20.00,102.0,,20.00,78.0,
866,-20.00,102.0,,20.00,78.0,
867,-20.00,103.0,,20.00,78.0,


In [17]:
realwage["Game0"]

Player,b,b,b,w,w,w
Statistics,Evaluation,HR,TimeUsed,Evaluation,HR,TimeUsed
Time,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
0,-0.38,107.0,0.0,0.38,88.0,0.0
1,-0.55,107.0,0.0,0.55,88.0,0.0
2,-0.55,107.0,0.0,0.55,89.0,0.0
3,-0.55,107.0,0.0,0.55,89.0,0.0
4,-0.55,107.0,0.0,0.55,90.0,0.0
...,...,...,...,...,...,...
864,-20.00,101.0,,20.00,78.0,
865,-20.00,102.0,,20.00,78.0,
866,-20.00,102.0,,20.00,78.0,
867,-20.00,103.0,,20.00,78.0,
