In [39]:
from chessdotcom import get_player_profile, get_player_stats, get_player_game_archives
import requests
import pprint
import pandas as pd
from parsita import *
from parsita.util import constant
import json
import chess.pgn
import io
import ftplib
import os
import logging

# https://pypi.org/project/pgn2data/
from converter.pgn_data import PGNData

In [40]:
"""
chess_fact table
player_id
game_id
move_id

dimention tables

Player_table
Player id - primary key
player username
class
player rating
rating updated time


Game_table
game id - primary key
player id - foreign key
urls
time_control
Date
EndDate
StartTime
EndTime
Timezone
UTCDate
UTCTime
initial setup
time class
rules
white_rating
white_username
black_rating
black_username
player_rating
event
Site
Round
Result
CurrentPosition
ECO
ECOUrl
Termination
pgn

Moves_table
game_id
move_id
move_number
white_move
black_move
white_time
black_time
"""

'\nchess_fact table\nplayer_id\ngame_id\nmove_id\n\ndimention tables\n\nPlayer_table\nPlayer id - primary key\nplayer username\nclass\nplayer rating\nrating updated time\n\n\nGame_table\ngame id - primary key\nplayer id - foreign key\nurls\ntime_control\nDate\nEndDate\nStartTime\nEndTime\nTimezone\nUTCDate\nUTCTime\ninitial setup\ntime class\nrules\nwhite_rating\nwhite_username\nblack_rating\nblack_username\nplayer_rating\nevent\nSite\nRound\nResult\nCurrentPosition\nECO\nECOUrl\nTermination\npgn\n\nMoves_table\ngame_id\nmove_id\nmove_number\nwhite_move\nblack_move\nwhite_time\nblack_time\n'

In [41]:
# Fetch data from chess.com API

# make json data easier to read
pp = pprint.PrettyPrinter(width=41, compact=True)

def get_user_archives(username, months):
    """
    get archive monthly files of specific chess.com player
    input:
    username - username of the chess.com player
    months - target months that we want to get the archives
    
    output:
    target_month - files of archives according to months parameter
    """
    archives = get_player_game_archives(username).json['archives']
    target_month = []
    for archive in archives:
        if archive[-7:] in months:
            target_month.append(archive)
    return target_month

files = get_user_archives("AGcuber19",["2023/02"])


In [42]:
files

['https://api.chess.com/pub/player/agcuber19/games/2023/02']

In [60]:
def get_archive_games(filename):
    """
    return games in one archive file
    
    input:
    filename - filename that contains game urls
    
    output: 
    """
    games = requests.get(filename).json()['games']
    return games

games = get_archive_games(files[-1])

In [151]:
def game_df(username,files):
    """
    import data from archive files and turn relevant data parameters into data frames
    
    input:
    username - username of the player
    files - file archived of the player
    
    output:
    a dataframe contains wanted columns
                        'username',
                         'urls', 
                         'time_control',
                        'end_time',
                        'uuid',
                        'initial_setup',
                        'time_class',
                        'rules',
                        'white_rating',
                        'white_username',
                        'black_rating',
                        'black_username',
                        'Event',
                        'Site',
                        'Date',
                        'Round',
                        'Result',
                        'CurrentPosition',
                        'ECO',
                        'ECOUrl',
                        'EndDate',
                        'EndTime',
                        'StartTime',
                        'Termination',
                        'Timezone',
                        'UTCDate',
                        'UTCTime',                        
                        
    """
    print("Player " + username + " is processing...")
    usernames = []
    urls = []
    time_control = [] 
    end_time = [] 
    uuid = []
    initial_setup = [] 
    time_class = [] 
    rules = [] 
    white_rating = [] 
    white_username = [] 
    black_rating = [] 
    black_username = []
    pgn = []
    event = []
    Site = []
    Date = []
    Round = []
    Result = []
    CurrentPosition = []
    ECO = []
    ECOUrl = []
    EndDate = []
    EndTime = []
    StartTime = []
    Termination = []
    Timezone = []
    UTCDate = []
    UTCTime = []
    
    for file in files:
        print(file + " " + " is processing...")
        games = get_archive_games(file)
        for game in games:
            try:
                usernames.append(username)
                urls.append(game.get('url',None)) # game.get('url', None)
                time_control.append(game.get('time_control',None))
                end_time.append(game.get('end_time',None))
                uuid.append(game.get('uuid',None))
                initial_setup.append(game.get('initial_setup',None))
                time_class.append(game.get('time_class',None))
                rules.append(game.get('rules',None))
                white_rating.append(game.get('white',None)['rating'])
                white_username.append(game.get('white',None)['username'])
                black_rating.append(game.get('black',None)['rating'])
                black_username.append(game.get('black',None)['username'])
                pgn.append(game.get('pgn',None))
                
                pgn_written = io.StringIO(game['pgn'])
                game_data = chess.pgn.read_game(pgn_written)
                event.append(game_data.headers['Event'])
                Site.append(game_data.headers['Site'])
                Date.append(game_data.headers['Date'])
                Round.append(game_data.headers['Round'])
                Result.append(game_data.headers['Result'])
                CurrentPosition.append(game_data.headers['CurrentPosition'])
                ECO.append(game_data.headers['ECO'])
                ECOUrl.append(game_data.headers['ECOUrl'])
                EndDate.append(game_data.headers['EndDate'])
                EndTime.append(game_data.headers['EndTime'])
                StartTime.append(game_data.headers['StartTime'])
                Termination.append(game_data.headers['Termination'])
                Timezone.append(game_data.headers['Timezone'])
                UTCDate.append(game_data.headers['UTCDate'])
                UTCTime.append(game_data.headers['UTCTime'])
            except Exception as e:
                print(e)
                print(type(e))
                print(str(e))
                print("add " + str(game.get(str(e),None)) + " into row")
                #e.append(None)
                #print(game['url'])
                print(game['uuid'])
    
    print("data fetch work is done.")
    
    df = pd.DataFrame(list(zip(usernames,
                               urls, 
                           time_control,
                          end_time,
                           uuid,
                           initial_setup,
                           time_class,
                           rules,
                           white_rating,
                           white_username,
                           black_rating,
                           black_username,
                           pgn,
                           event,
                          Site,
                          Date,
                          Round,
                          Result,
                          CurrentPosition,
                          ECO,
                          ECOUrl,
                          EndDate,
                          EndTime,
                          StartTime,
                          Termination,
                          Timezone,
                          UTCDate,
                          UTCTime)),
               columns =['username',
                         'urls', 
                         'time_control',
                        'end_time',
                        'uuid',
                        'initial_setup',
                        'time_class',
                        'rules',
                        'white_rating',
                        'white_username',
                        'black_rating',
                        'black_username',
                         'pgn',
                         'event',
                         'Site',
                          'Date',
                          'Round',
                          'Result',
                          'CurrentPosition',
                          'ECO',
                          'ECOUrl',
                          'EndDate',
                          'EndTime',
                          'StartTime',
                          'Termination',
                          'Timezone',
                          'UTCDate',
                          'UTCTime'
                        ])
    print("dataframe importing is done.")
    return df
    
data = game_df('AGcuber19',files)


Player AGcuber19 is processing...
https://api.chess.com/pub/player/agcuber19/games/2023/02  is processing...
'ECO'
<class 'KeyError'>
'ECO'
add None into row
b2de386f-a273-11ed-8d9d-78ac4409ff3c
'pgn'
<class 'KeyError'>
'pgn'
add None into row
1fe4a021-a27a-11ed-8d9d-78ac4409ff3c
'ECO'
<class 'KeyError'>
'ECO'
add None into row
7797f31a-a27a-11ed-8d9d-78ac4409ff3c
data fetch work is done.
dataframe importing is done.


In [49]:
data.loc[data['uuid']=='72965de5-a41c-11ed-8eba-78ac4409ff3c']['pgn'].iloc[0]

'[Event "Live Chess"]\n[Site "Chess.com"]\n[Date "2023.02.04"]\n[Round "-"]\n[White "selenecv"]\n[Black "AGcuber19"]\n[Result "1-0"]\n[CurrentPosition "r3kb1r/pp3ppp/2n2n2/4p3/8/2N3P1/PPPP3P/R1BQ1RK1 b kq -"]\n[Timezone "UTC"]\n[ECO "A02"]\n[ECOUrl "https://www.chess.com/openings/Birds-Opening-Froms-Gambit-2.fxe5"]\n[UTCDate "2023.02.04"]\n[UTCTime "02:04:56"]\n[WhiteElo "826"]\n[BlackElo "770"]\n[TimeControl "600"]\n[Termination "selenecv won by resignation"]\n[StartTime "02:04:56"]\n[EndDate "2023.02.04"]\n[EndTime "02:07:23"]\n[Link "https://www.chess.com/game/live/69215253779"]\n\n1. f4 {[%clk 0:09:57.2]} 1... e5 {[%clk 0:09:58.3]} 2. fxe5 {[%clk 0:09:53.7]} 2... Qh4+ {[%clk 0:09:55.5]} 3. g3 {[%clk 0:09:44.3]} 3... Qe4 {[%clk 0:09:53.1]} 4. Nf3 {[%clk 0:09:35.7]} 4... d6 {[%clk 0:09:47.6]} 5. exd6 {[%clk 0:09:27.1]} 5... cxd6 {[%clk 0:09:46]} 6. Nc3 {[%clk 0:09:19]} 6... Qc6 {[%clk 0:09:39.7]} 7. Bg2 {[%clk 0:09:01]} 7... Bg4 {[%clk 0:09:33.9]} 8. e4 {[%clk 0:08:49.1]} 8... Bxf3 {

In [55]:
data.loc[data['uuid'] == '1fe4a021-a27a-11ed-8d9d-78ac4409ff3c']['pgn'].iloc[0]

'[Event "Live Chess"]\n[Site "Chess.com"]\n[Date "2023.02.01"]\n[Round "-"]\n[White "smerwanji"]\n[Black "AGcuber19"]\n[Result "1/2-1/2"]\n[CurrentPosition "rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq -"]\n[Timezone "UTC"]\n[UTCDate "2023.02.01"]\n[UTCTime "21:50:46"]\n[WhiteElo "751"]\n[BlackElo "755"]\n[TimeControl "600"]\n[Termination "Game drawn by agreement"]\n[StartTime "21:50:46"]\n[EndDate "2023.02.01"]\n[EndTime "21:50:46"]\n[Link "https://www.chess.com/game/live/69027243575"]\n\n1/2-1/2\n'

In [50]:
data.loc[data['uuid']=='72965de5-a41c-11ed-8eba-78ac4409ff3c']['uuid'].iloc[0]

'72965de5-a41c-11ed-8eba-78ac4409ff3c'

In [6]:
# students from Tianmin's classes - BO, BP, AN
tianmin_players = {
    "BO" : ['AGcuber19',
            'TLPAWN',
            'xiaoanwu',
            'EmmaXLi',
            'akfunchess66',
            'Marsboom', 
            'Claraqiu',
            'Ravenclawfairy', 
            'Zora_zhu',
            'BurleyWalrus'],
    "BP" : ['taionemm',
            'augustinewz',
            'oscarzhang818',
            'yaohengli',
            'Wallacewang1214',
            'SophiaZ2022',
            'AliceCLi',
            'yumitang',
            'james2945',
            'Oinkoinkw'],
    "AN" : ['Cathye1',
            'lunathekitsune',
            'ArthurRocket',
            'vivianwwww20',
            'ChloeWang16',
            'Tyzalex',
            'ZhichengW',
            'Haochen1123',
            'jaydenlan0118',
            'ImRacoonie']
}

In [7]:
df_players = []
error_players = []

for classes in tianmin_players.keys():
    for player in tianmin_players[classes]:
        try:
            files = get_user_archives(player,["2023/02","2023/01","2022/12"]) # target months
            df = game_df(player, files)
            df['class'] = classes
            df_players.append(df)
        except:
            print(Exception)
            print("This player account " + player + " does not exist")
            error_players.append(player)

Player AGcuber19 is processing...
https://api.chess.com/pub/player/agcuber19/games/2022/12  is processing...
'ECO'
https://www.chess.com/game/live/63766207495
'ECO'
https://www.chess.com/game/live/64002753283
'ECO'
https://www.chess.com/game/live/64089271325
https://api.chess.com/pub/player/agcuber19/games/2023/01  is processing...
'ECO'
https://www.chess.com/game/live/67449082651
https://api.chess.com/pub/player/agcuber19/games/2023/02  is processing...
'ECO'
https://www.chess.com/game/live/69024314043
'pgn'
https://www.chess.com/game/live/69027164491
'ECO'
https://www.chess.com/game/live/69027243575
data fetch work is done.
dataframe importing is done.
Player TLPAWN is processing...
https://api.chess.com/pub/player/tlpawn/games/2022/12  is processing...
'ECO'
https://www.chess.com/game/daily/456493725
https://api.chess.com/pub/player/tlpawn/games/2023/01  is processing...
'ECO'
https://www.chess.com/game/live/67387278311
https://api.chess.com/pub/player/tlpawn/games/2023/02  is proce

data fetch work is done.
dataframe importing is done.
Player ZhichengW is processing...
https://api.chess.com/pub/player/zhichengw/games/2023/01  is processing...
https://api.chess.com/pub/player/zhichengw/games/2023/02  is processing...
data fetch work is done.
dataframe importing is done.
Player Haochen1123 is processing...
https://api.chess.com/pub/player/haochen1123/games/2023/01  is processing...
https://api.chess.com/pub/player/haochen1123/games/2023/02  is processing...
data fetch work is done.
dataframe importing is done.
Player jaydenlan0118 is processing...
https://api.chess.com/pub/player/jaydenlan0118/games/2023/01  is processing...
https://api.chess.com/pub/player/jaydenlan0118/games/2023/02  is processing...
data fetch work is done.
dataframe importing is done.
Player ImRacoonie is processing...
https://api.chess.com/pub/player/imracoonie/games/2023/01  is processing...
https://api.chess.com/pub/player/imracoonie/games/2023/02  is processing...
data fetch work is done.
da

In [8]:
print("% of error players is ...")
print(len(error_players) * 100/ len(tianmin_players))

% of error players is ...
0.0


In [9]:
players_df = pd.concat(df_players)

In [10]:
def player_rating(row):  
    if row['username'].lower() == row['white_username'].lower():
        return row['white_rating']
    else:
        return row['black_rating']
    
players_df['player_rating'] = players_df.apply(lambda row: player_rating(row), axis=1)

In [11]:
# generate moves table from pgn column

def moves_split(pgn):
    move_number = []
    white_move = []
    black_move = []
    white_time = []
    black_time = []
    for note in range(len(pgn)):
        if note % 8 == 0:
            move_number.append(pgn[note].split(".")[0])
        elif note % 8 == 1:
            white_move.append(pgn[note])
        elif note % 8 == 2:
            pass
        elif note % 8== 3:
            white_time.append(pgn[note].split("]}")[0])
        elif note %8 == 4:
            pass
        elif note % 8 == 5:
            black_move.append(pgn[note])
        elif note % 8 == 6:
            pass
        elif note % 8 == 7:
            black_time.append(pgn[note].split("]}")[0])
        
    if len(black_move) < len(white_move):
        black_move.append("NaN")
        black_time.append("NaN")

    return move_number, white_move, black_move, white_time, black_time

In [215]:
def create_moves_df(game):
    moves_df = []
    for i in range(game.shape[0]):
        try:
            pgn = game['pgn'].iloc[i].split("\n")[-2].split(" ")[:-1]
        except:
            #pgn = "."
            move_number = [None]
            white_move = [None]
            black_move = [None]
            white_time = [None]
            black_time = [None]
            #print(i)
            #print(game['pgn'].iloc[i])
            print("---")
            #print(game['uuid'].iloc[i])
            #print([game['uuid'].iloc[i]] * 1)
        if len(pgn) != 0:
            moves = moves_split(pgn)
            move_number = moves[0]
            white_move = moves[1]
            black_move = moves[2]
            white_time = moves[3]
            black_time = moves[4]
        else:
            move_number = [None]
            white_move = [None]
            black_move = [None]
            white_time = [None]
            black_time = [None]
        #uuid = [game['uuid'].iloc[i]] * len(move_number)
        uuid = [game['uuid'].iloc[i]] * len(move_number)
        if len(uuid) == 1:
            print(game['uuid'].iloc[i])
        df = pd.DataFrame(list(zip(uuid,
                              move_number,
                              white_move,
                              black_move,
                              white_time,
                              black_time)),
               columns =[     'uuid',
                              'move_number',
                              'white_move',
                              'black_move',
                              'white_time',
                              'black_time'
                        ])
        moves_df.append(df)
    moves_df = pd.concat(moves_df)
    return moves_df

In [29]:
import pandas as pd
players = pd.read_csv("player_tianmin_class.csv")

In [30]:
test = players.loc[players['username'] == 'AGcuber19']

In [124]:
data

Unnamed: 0,username,urls,time_control,end_time,uuid,initial_setup,time_class,rules,white_rating,white_username,...,CurrentPosition,ECO,ECOUrl,EndDate,EndTime,StartTime,Termination,Timezone,UTCDate,UTCTime
0,AGcuber19,https://www.chess.com/game/live/69024314043,600,1675285335,b2de386f-a273-11ed-8d9d-78ac4409ff3c,rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w ...,rapid,chess,709,Captainrobert07,...,rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w ...,C23,https://www.chess.com/openings/Bishops-Opening,2023.02.01,21:37:04,21:35:06,AGcuber19 won - game abandoned,UTC,2023.02.01,21:35:06
1,AGcuber19,https://www.chess.com/game/live/69026464907,600,1675287424,4adb898e-a278-11ed-8d9d-78ac4409ff3c,rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w ...,rapid,chess,772,avantuyle,...,rn2k2r/pp3qpp/3p1n2/2b1p3/2B1P3/6Pb/PPPP1P1P/R...,C20,https://www.chess.com/openings/Kings-Pawn-Open...,2023.02.02,04:56:31,04:54:18,AGcuber19 won by checkmate,UTC,2023.02.02,04:54:18
2,AGcuber19,https://www.chess.com/game/live/69027164491,600,1675288214,1fe4a021-a27a-11ed-8d9d-78ac4409ff3c,rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w ...,rapid,bughouse,442,AGcuber19,...,rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w ...,A40,https://www.chess.com/openings/Englund-Gambit-...,2023.02.02,16:20:53,16:18:44,AGcuber19 won on time,UTC,2023.02.02,16:18:44
3,AGcuber19,https://www.chess.com/game/live/69027243575,600,1675288246,7797f31a-a27a-11ed-8d9d-78ac4409ff3c,rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w ...,rapid,chess,751,smerwanji,...,r1b2rk1/ppp1bppQ/3q4/2np4/8/2N5/PPBP1PPP/R1B1K...,C20,https://www.chess.com/openings/Kings-Pawn-Open...,2023.02.02,16:21:51,16:21:23,AGcuber19 won by checkmate,UTC,2023.02.02,16:21:23
4,AGcuber19,https://www.chess.com/game/live/69052827375,600,1675313791,a5a1fbd8-a2b5-11ed-8d9d-78ac4409ff3c,rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w ...,rapid,chess,763,AGcuber19,...,1n2k2r/Q2b2pp/1Np1pn2/8/5P2/6P1/P3P1KP/B6R w - -,C20,https://www.chess.com/openings/Kings-Pawn-Open...,2023.02.02,16:24:07,16:21:59,AGcuber19 won on time,UTC,2023.02.02,16:21:59
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
70,AGcuber19,https://www.chess.com/game/live/69732286551,180,1675993619,22acc117-a8e4-11ed-8508-78ac4409ff3c,rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w ...,blitz,chess,433,Pabreezy,...,3k1b2/8/p3pp2/2p5/b7/5N2/PP3PPP/3qK2R w - -,C20,https://www.chess.com/openings/Kings-Pawn-Open...,2023.02.12,20:47:33,20:29:57,AGcuber19 won on time,UTC,2023.02.12,20:29:57
71,AGcuber19,https://www.chess.com/game/live/69867716207,180,1676128811,e686d4f3-aa1e-11ed-880b-78ac4409ff3c,rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w ...,blitz,chess,383,AGcuber19,...,r4r2/p1p2Q1p/P1pk4/8/3P1B2/8/PP2K1PP/R6n b - -,C47,https://www.chess.com/openings/Four-Knights-Ga...,2023.02.12,21:42:16,21:15:08,AGcuber19 won on time,UTC,2023.02.12,21:15:08
72,AGcuber19,https://www.chess.com/game/live/69869767975,900+10,1676131221,15b9b50a-aa24-11ed-880b-78ac4409ff3c,rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w ...,rapid,chess,518,akfunchess66,...,8/1p6/p7/3NK3/6q1/1P1P3k/8/8 b - -,C45,https://www.chess.com/openings/Scotch-Game,2023.02.12,21:49:44,21:42:33,chengliam won by checkmate,UTC,2023.02.12,21:42:33
73,AGcuber19,https://www.chess.com/game/live/69972881581,600,1676234853,011cbeb4-ab14-11ed-880b-78ac4409ff3c,rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w ...,rapid,chess,801,AGcuber19,...,4r1k1/3Q1ppp/5r2/8/3p1B2/5K1P/PP3P2/R4R2 b - -,C26,https://www.chess.com/openings/Vienna-Game-Fal...,2023.02.12,22:26:12,22:19:11,Game drawn by repetition,UTC,2023.02.12,22:19:11


In [216]:
data_test = create_moves_df(data)

b2de386f-a273-11ed-8d9d-78ac4409ff3c
---
7797f31a-a27a-11ed-8d9d-78ac4409ff3c
a5ca3341-a326-11ed-8d9d-78ac4409ff3c


In [230]:
data.loc[data['uuid'] == 'a5ca3341-a326-11ed-8d9d-78ac4409ff3c'][['urls','uuid','pgn']]

Unnamed: 0,urls,uuid,pgn
8,https://www.chess.com/game/live/69101170939,a5ca3341-a326-11ed-8d9d-78ac4409ff3c,"[Event ""Live Chess""]\n[Site ""Chess.com""]\n[Dat..."


In [224]:
data_test.loc[data_test['uuid'] == 'a5ca3341-a326-11ed-8d9d-78ac4409ff3c']

Unnamed: 0,uuid,move_number,white_move,black_move,white_time,black_time
0,a5ca3341-a326-11ed-8d9d-78ac4409ff3c,1,e4,,0:01:00,


In [206]:
data_test.loc[data_test['uuid'] == '1fe4a021-a27a-11ed-8d9d-78ac4409ff3c']

Unnamed: 0,uuid,move_number,white_move,black_move,white_time,black_time
0,1fe4a021-a27a-11ed-8d9d-78ac4409ff3c,1,e4,e5,0:09:58.8,0:09:57.3
1,1fe4a021-a27a-11ed-8d9d-78ac4409ff3c,2,Bc4,Qh4,0:09:58.4,0:09:54.3
2,1fe4a021-a27a-11ed-8d9d-78ac4409ff3c,3,Nc3,Bc5,0:09:55.3,0:09:51.9
3,1fe4a021-a27a-11ed-8d9d-78ac4409ff3c,4,Nh3,d6,0:09:53.1,0:09:49.3
4,1fe4a021-a27a-11ed-8d9d-78ac4409ff3c,5,g3,Qf6,0:09:51.7,0:09:46
5,1fe4a021-a27a-11ed-8d9d-78ac4409ff3c,6,O-O,Bxh3,0:09:34.1,0:09:42.8
6,1fe4a021-a27a-11ed-8d9d-78ac4409ff3c,7,Nd5,Qd8,0:09:22.6,0:09:32.6
7,1fe4a021-a27a-11ed-8d9d-78ac4409ff3c,8,Qh5,Nf6,0:09:21.6,0:09:26.5
8,1fe4a021-a27a-11ed-8d9d-78ac4409ff3c,9,Nxc7+,Qxc7,0:09:16.6,0:09:23.6
9,1fe4a021-a27a-11ed-8d9d-78ac4409ff3c,10,Qxf7+,Qxf7,0:09:15.9,0:09:16.5


In [208]:
data.loc[data['uuid'] == '1fe4a021-a27a-11ed-8d9d-78ac4409ff3c'][['username','uuid','pgn']]

Unnamed: 0,username,uuid,pgn
2,AGcuber19,1fe4a021-a27a-11ed-8d9d-78ac4409ff3c,


In [217]:
data['uuid'].nunique()

75

In [218]:
data_test['uuid'].nunique()

75

In [219]:
data.loc[data['uuid'] == '72965de5-a41c-11ed-8eba-78ac4409ff3c']['pgn'].iloc[0]

'[Event "Live Chess"]\n[Site "Chess.com"]\n[Date "2023.02.03"]\n[Round "-"]\n[White "RonnyTC2020"]\n[Black "AGcuber19"]\n[Result "0-1"]\n[CurrentPosition "rn2k2r/ppp1np1p/5b2/5bp1/1PB2Q2/2P1P3/P4PPP/RN3RK1 w kq g6"]\n[Timezone "UTC"]\n[ECO "A40"]\n[ECOUrl "https://www.chess.com/openings/Englund-Gambit-Mosquito-Gambit"]\n[UTCDate "2023.02.03"]\n[UTCTime "23:42:45"]\n[WhiteElo "393"]\n[BlackElo "432"]\n[TimeControl "180"]\n[Termination "AGcuber19 won on time"]\n[StartTime "23:42:45"]\n[EndDate "2023.02.03"]\n[EndTime "23:47:14"]\n[Link "https://www.chess.com/game/live/69206828203"]\n\n1. d4 {[%clk 0:02:59.9]} 1... e5 {[%clk 0:02:59.4]} 2. dxe5 {[%clk 0:02:57.5]} 2... Qh4 {[%clk 0:02:58.2]} 3. Nf3 {[%clk 0:02:49.3]} 3... Qb4+ {[%clk 0:02:55.6]} 4. c3 {[%clk 0:02:29.9]} 4... Qc5 {[%clk 0:02:44.7]} 5. Be3 {[%clk 0:02:12.6]} 5... Qb5 {[%clk 0:02:33.2]} 6. b4 {[%clk 0:01:52.7]} 6... d6 {[%clk 0:02:27.4]} 7. Nd4 {[%clk 0:01:44.7]} 7... Qc4 {[%clk 0:02:18.6]} 8. Bf4 {[%clk 0:01:14.5]} 8... dxe5

In [220]:
data_test.loc[data_test['uuid'] == '72965de5-a41c-11ed-8eba-78ac4409ff3c']

Unnamed: 0,uuid,move_number,white_move,black_move,white_time,black_time
0,72965de5-a41c-11ed-8eba-78ac4409ff3c,1,d4,e5,0:02:59.9,0:02:59.4
1,72965de5-a41c-11ed-8eba-78ac4409ff3c,2,dxe5,Qh4,0:02:57.5,0:02:58.2
2,72965de5-a41c-11ed-8eba-78ac4409ff3c,3,Nf3,Qb4+,0:02:49.3,0:02:55.6
3,72965de5-a41c-11ed-8eba-78ac4409ff3c,4,c3,Qc5,0:02:29.9,0:02:44.7
4,72965de5-a41c-11ed-8eba-78ac4409ff3c,5,Be3,Qb5,0:02:12.6,0:02:33.2
5,72965de5-a41c-11ed-8eba-78ac4409ff3c,6,b4,d6,0:01:52.7,0:02:27.4
6,72965de5-a41c-11ed-8eba-78ac4409ff3c,7,Nd4,Qc4,0:01:44.7,0:02:18.6
7,72965de5-a41c-11ed-8eba-78ac4409ff3c,8,Bf4,dxe5,0:01:14.5,0:02:14.5
8,72965de5-a41c-11ed-8eba-78ac4409ff3c,9,Bxe5,Bd6,0:01:04,0:02:09.1
9,72965de5-a41c-11ed-8eba-78ac4409ff3c,10,e3,Bxe5,0:00:54.2,0:02:07.4


In [174]:
list1 = data['uuid'].unique()

In [175]:
list2 = data_test['uuid'].unique()

In [176]:
main_list = list(set(list1) - set(list2))
main_list

['1fe4a021-a27a-11ed-8d9d-78ac4409ff3c',
 '7797f31a-a27a-11ed-8d9d-78ac4409ff3c',
 'b2de386f-a273-11ed-8d9d-78ac4409ff3c']

In [25]:
test.loc[test['uuid'] == 'efe6de36-729d-11ed-a69d-78ac4409ff3c']['pgn'].iloc[0].split("\n")[-2].split(" ")[:-1]

['1.',
 'd4',
 '{[%clk',
 '0:10:00]}',
 '1...',
 'e5',
 '{[%clk',
 '0:09:57.4]}',
 '2.',
 'dxe5',
 '{[%clk',
 '0:09:58.8]}',
 '2...',
 'Qh4',
 '{[%clk',
 '0:09:54.9]}',
 '3.',
 'Nf3',
 '{[%clk',
 '0:09:57.4]}',
 '3...',
 'Qb4+',
 '{[%clk',
 '0:09:47.4]}',
 '4.',
 'c3',
 '{[%clk',
 '0:09:52.9]}',
 '4...',
 'Qb6',
 '{[%clk',
 '0:09:31.2]}',
 '5.',
 'Be3',
 '{[%clk',
 '0:09:45.6]}',
 '5...',
 'Qxb2',
 '{[%clk',
 '0:09:22.8]}',
 '6.',
 'Nbd2',
 '{[%clk',
 '0:09:36.5]}',
 '6...',
 'Qxc3',
 '{[%clk',
 '0:09:06.7]}',
 '7.',
 'Rc1',
 '{[%clk',
 '0:09:25.2]}',
 '7...',
 'Qa3',
 '{[%clk',
 '0:08:51]}',
 '8.',
 'Ra1',
 '{[%clk',
 '0:09:04.7]}',
 '8...',
 'Bb4',
 '{[%clk',
 '0:08:36.4]}',
 '9.',
 'Qb1',
 '{[%clk',
 '0:08:55.4]}',
 '9...',
 'Qa5',
 '{[%clk',
 '0:08:16.7]}',
 '10.',
 'g3',
 '{[%clk',
 '0:08:45.7]}',
 '10...',
 'Bxd2+',
 '{[%clk',
 '0:08:11.4]}',
 '11.',
 'Bxd2',
 '{[%clk',
 '0:08:44.4]}',
 '11...',
 'Qd5',
 '{[%clk',
 '0:07:44.8]}',
 '12.',
 'Qb2',
 '{[%clk',
 '0:08:21.1]}',
 '12...

In [14]:
moves = create_moves_df(players_df)

In [15]:
moves

Unnamed: 0,uuid,move_number,white_move,black_move,white_time,black_time
0,efe6de36-729d-11ed-a69d-78ac4409ff3c,1,d4,e5,0:10:00,0:09:57.4
1,efe6de36-729d-11ed-a69d-78ac4409ff3c,2,dxe5,Qh4,0:09:58.8,0:09:54.9
2,efe6de36-729d-11ed-a69d-78ac4409ff3c,3,Nf3,Qb4+,0:09:57.4,0:09:47.4
3,efe6de36-729d-11ed-a69d-78ac4409ff3c,4,c3,Qb6,0:09:52.9,0:09:31.2
4,efe6de36-729d-11ed-a69d-78ac4409ff3c,5,Be3,Qxb2,0:09:45.6,0:09:22.8
...,...,...,...,...,...,...
24,93c76789-a4b8-11ed-8eba-78ac4409ff3c,25,cxd5,Rxd5,0:15:36.1,0:14:42.7
25,93c76789-a4b8-11ed-8eba-78ac4409ff3c,26,Qc2,Rf5+,0:15:40.7,0:14:35.5
26,93c76789-a4b8-11ed-8eba-78ac4409ff3c,27,Kg2,Nf1+,0:15:43.5,0:14:33.1
27,93c76789-a4b8-11ed-8eba-78ac4409ff3c,28,Kh3,Rf6,0:15:48.6,0:14:39.9


In [16]:
# https://docs.google.com/spreadsheets/d/1QJjp2wG_k4XPW6Z8hbvArKkCQH3fI5O7RmPH2Ongto4/edit#gid=0

"""
chess_fact table
player_id
game_id
move_id
"""



'\nchess_fact table\nplayer_id\ngame_id\nmove_id\n'

In [17]:
"""
Player_table
Player id - primary key
player username
class
player rating
rating updated time
"""



'\nPlayer_table\nPlayer id - primary key\nplayer username\nclass\nplayer rating\nrating updated time\n'

In [18]:
"""
Game_table
game id - primary key
player id - foreign key?
urls
time_control
Date
EndDate
StartTime
EndTime
Timezone
UTCDate
UTCTime
initial setup
time class
rules
white_rating
white_username
black_rating
black_username
player_rating
event
Site
Round
Result
CurrentPosition
ECO
ECOUrl
Termination
pgn
"""

'\nGame_table\ngame id - primary key\nplayer id - foreign key?\nurls\ntime_control\nDate\nEndDate\nStartTime\nEndTime\nTimezone\nUTCDate\nUTCTime\ninitial setup\ntime class\nrules\nwhite_rating\nwhite_username\nblack_rating\nblack_username\nplayer_rating\nevent\nSite\nRound\nResult\nCurrentPosition\nECO\nECOUrl\nTermination\npgn\n'

In [19]:
"""
Moves_table
game_id
move_id
move_number
white_move
black_move
white_time
black_time
"""

'\nMoves_table\ngame_id\nmove_id\nmove_number\nwhite_move\nblack_move\nwhite_time\nblack_time\n'