Didn't miss writing a notebook for statistics that much :|

First things first we need to change the extensions of the matches to txt from tcx (the export format of choice of the garmin connect app) but first some imports

In [27]:
import os
import numpy as np
import pandas as pd
import csv
import chess
import chess.pgn
import chess.engine
from stockfish import Stockfish


In [32]:
def change_ext(cartella, old_ext, new_ext):
    for root, dirs, files in os.walk(cartella):
        # print(os.getcwd())
        for f in files:
            filename = os.fsdecode(f)
            
                
            if filename.endswith(old_ext):
                name, ext = os.path.splitext(filename)
                
                new_name = name + new_ext
                jpath = os.path.join(root,new_name)
                os.rename(os.path.join(root,filename), jpath)
                print(f"renamed {os.path.join(root,filename)} to {jpath}")

change_ext("games", ".tcx", ".txt")

renamed games/partita_0814-2/white.tcx to games/partita_0814-2/white.txt
renamed games/partita_0814-2/black.tcx to games/partita_0814-2/black.txt
renamed games/partita_0816-6/w_activity_9418433002.tcx to games/partita_0816-6/w_activity_9418433002.txt
renamed games/partita_0816-6/b_activity_9418334461_tom.tcx to games/partita_0816-6/b_activity_9418334461_tom.txt
renamed games/partita_0816-1/w_activity_9416067935_tom.tcx to games/partita_0816-1/w_activity_9416067935_tom.txt
renamed games/partita_0816-1/b_activity_9418432154.tcx to games/partita_0816-1/b_activity_9418432154.txt
renamed games/partita_0816-7/w_activity_9420267252_tom.tcx to games/partita_0816-7/w_activity_9420267252_tom.txt
renamed games/partita_0816-7/b_activity_9418433194.tcx to games/partita_0816-7/b_activity_9418433194.txt
renamed games/partita_0814-3/black.tcx to games/partita_0814-3/black.txt
renamed games/partita_0816-3_pgn_occhio/w_activity_9416251146_tom.tcx to games/partita_0816-3_pgn_occhio/w_activity_9416251146_

Now we have to import the data from the [Portable Game Notation or pgn](https://en.wikipedia.org/wiki/Portable_Game_Notation)
and read the heartbeats of both white and black (in the games where they are both available) in order to create a 3d dataframe for every game and then to visualize the data we are working with. To process the portable game notation we will use the following cell of code.

In [19]:
### UTILS FOR READING THE ACTIVITIES

def calctime(start,curr):
    c = [int(x) for x in curr.split(":")]
    s = [int(x) for x in start.split(":")]
    return ((c[0] - s[0]) * 60 + (c[1] - s[1])) * 60 + (c[2] - s[2])

def read_activity(game):
    with open(game) as f:
        lines = f.readlines()
        hr = []
        time = []
        for s in lines:
            if "<Value>" in s:
                hr.append(s.strip()[s.strip().find(">") + 1:s.strip().rfind("<")])
            if "<Time>" in s:
                s = s.strip()[3:]
                if not time:
                    start = s.strip()[s.strip().find("T") + 1:s.strip().rfind(".")]
                    
                time.append(calctime(start, s.strip()[s.strip().find("T") + 1:s.strip().rfind(".")]))
        
        #print(f"hr is {hr}, and time is {time}")
        return list(zip(hr,time))
        
read_activity("games/partita_0525-1/b_activity_8913493582.txt")

[('60', 0),
 ('76', 1),
 ('57', 2),
 ('56', 5),
 ('57', 6),
 ('58', 12),
 ('57', 16),
 ('58', 19),
 ('59', 21),
 ('58', 22),
 ('59', 25),
 ('58', 28),
 ('57', 31),
 ('56', 38),
 ('55', 42),
 ('54', 44),
 ('53', 48),
 ('52', 52),
 ('51', 54),
 ('52', 55),
 ('51', 56),
 ('52', 58),
 ('51', 64),
 ('52', 65),
 ('53', 69),
 ('54', 72),
 ('55', 73),
 ('56', 79),
 ('55', 86),
 ('54', 91),
 ('53', 94),
 ('54', 96),
 ('55', 104),
 ('54', 105),
 ('55', 106),
 ('56', 107),
 ('58', 112),
 ('59', 116),
 ('57', 117),
 ('56', 119),
 ('57', 123),
 ('56', 129),
 ('55', 130),
 ('54', 132),
 ('55', 133),
 ('56', 135),
 ('57', 137),
 ('58', 138),
 ('57', 143),
 ('58', 145),
 ('57', 147),
 ('58', 149),
 ('59', 150),
 ('60', 152),
 ('61', 155),
 ('60', 158),
 ('59', 159),
 ('60', 161),
 ('61', 171),
 ('64', 175),
 ('63', 176),
 ('64', 178),
 ('63', 182),
 ('62', 192),
 ('61', 193),
 ('60', 194),
 ('61', 195),
 ('60', 198),
 ('61', 201),
 ('60', 205),
 ('59', 211),
 ('60', 213),
 ('61', 214),
 ('60', 216),
 

In [None]:
### UTILS FOR READING THE PGN

#this method takes as input the board and the last move so to update the board and compute the evaluation
def get_evaluation(board,move):

    stockfish = Stockfish("/usr/local/Cellar/stockfish/15/bin/stockfish")     #if no parameter passed, takes as input the executable file called stockfish from the same directory, if found
    board.push_san(move)        #insert the move in the board
    stockfish.set_fen_position(board.fen())  #bpard.fen() applies a PGN to FEN conversion
    engine = chess.engine.SimpleEngine.popen_uci("stockfish")
    info = engine.analyse(board, chess.engine.Limit(time=0.1))     #gets the infos of the game
    
    #if the position on the board is not a mate the score is in centipawns, then it's divided by 100 to get the actual score
    #Gets the score from White’s point of view
    if chess.engine.PovScore.is_mate(info['score']):
        eval = chess.engine.PovScore.white(info['score'])
    else:
        eval = int(str(chess.engine.PovScore.white(info['score'])))/100

    return board, eval      #returns the board updated and the evaluation of the position

In [None]:
#create_dataFrame takes as input the name of the file pgn containing the game and returns a dataframe
def create_dataFrame(game_name): 

    #opens and reads the pgn file
    pgn = open(game_name) 
    game = chess.pgn.read_game(pgn)
    
                                   
    array = []      #initialize the array that will contain the data
    index_values = [] 
    i=0
    time_white = 0      #to keep track of the time for each player
    time_black = 0      #to keep track of the time for each player
    board = chess.Board()       #initialize the chessboard

    #iterate through the moves
    for node in game.mainline():        
        
        move = str(node.move)       #represents the last move
        board,evaluation = get_evaluation(board,move)       #the get_evaluation method returns the updated board and the evaluation of the current position

        if i%2 == 0:        #if the white is moving we update his time
            index_values.append("white")
            time_white = 600-node.clock()

        else:           #if the black is moving we update his time
            index_values.append("black") 
            time_black = 600-node.clock()

        #evaluation = get_evaluation(board,move)
        #we append an element in the array (a row in the dataframe) with move, time the player used, total time since the game started and evaluation of the position
        # if str(evaluation)[:2] == "#+":
        #     evaluation = 15
        # elif str(evaluation)[:2] == "#-":
        #     evaluation = -15
        array.append([str(node.move),round(600-node.clock(),4),round(time_white+time_black,4),evaluation])   
        i+=1
    
    column_values = ['move','time','real time','evaluation'] #create the columns names
    array = np.array(array)      #convert our array to a numpy array
    #create a dataframe
    df = pd.DataFrame(data = array, index = index_values, columns = column_values) 
    return df,board


df,board = create_dataFrame("partite/tommi_vs_rocco_11-05.pgn")   
df



In [26]:
file = open("prova.csv", "w")

writer = csv.writer(file)

columns = ["Time", "Game", "Player", "Statistics", "Value"]
writer.writerow(columns)

#pseudo code
# to do create queste cose così da poter iterare in questa roba della morte e creare un dataset di questo tipo
# https://raw.githubusercontent.com/QuantEcon/lecture-python/master/source/_static/lecture_specific/pandas_panel/realwage.csv
# sembra particolarmente greve ma se n è il numero di statistiche gira in ~20*2*3 * n quindi è un n^4 molto tranquillo
print(os.getcwd())



gamecount = 0
for root,dirs, files in os.walk("games"):
    if str(root)  != "games":
        partita = f"Game{gamecount}"
        gamecount += 1
    for file in files:
        
        #qui siamo dentro i file di una singola partita
        filename = os.fsdecode(file)
        if filename.endswith(".txt"): # abbiamo un carissimo file delle attività
        #leggiamo i battiti
            player = "White" if filename[0] == "w" else "Black"
            activity = read_activity(f"{str(root)}/{filename}")
            # As explained in the markdown above on our strumentation TODO our smartwatches recorded the heartbeats at different time so we smoothed it out
            for hr, time in activity:
                row = [time, time, partita, player, "HR", hr]
                writer.writerow(row)
            

        
# 4.



/Users/tommasodimario/Documents/GitHub/statistics-101/project/src


In [25]:

# Display 6 columns for viewing purposes
pd.set_option('display.max_columns', 6)

# Reduce decimal points to 2
pd.options.display.float_format = '{:,.2f}'.format

realwage = pd.read_csv("prova.csv")
realwage = realwage.pivot_table(values='Value',
                                index='Time',
                                columns=['Game', 'Player', 'Statistics'])
realwage.head()

Game,Game0,Game0
Player,Black,White
Statistics,HR,HR
Time,Unnamed: 1_level_3,Unnamed: 2_level_3
0,82.33,80.75
1,101.35,100.83
10,88.25,82.54
100,86.5,79.71
1000,97.0,96.0
