In [7]:
import requests
from datetime import date
from datetime import datetime
import json
from dateutil.relativedelta import relativedelta
from pgn_parser import pgn, parser

def last_n_month(n):
    """
    purpose:
    return the month as yyyy/mm format of the past n months from now
    
    input -
    n: number of months from past
    
    output -
    a list of month with yyyy/mm format
    """
    months_lst = []
    for num in range(n):
        months = date.today() + relativedelta(months=-num)
        if months.month <= 9:
            months_lst.append(str(months.year) + "/"+ "0" + str(months.month))
        else:
            months_lst.append(str(months.year) + "/"+ str(months.month))
    return months_lst

def get_user_archives(username, 
                      nr_months,
                     user_agent = {'User-Agent': 'username: tianminlyu, email: tianminlyu@gmail.com'}):
    """
    purpose:
    get archive monthly files of specific chess.com player
    
    input:
    username - username of the chess.com player
    nr_months - integer, nummber of past months that we want to get the archives
    # to request chess.com API
       user_agent = {'User-Agent': 'username: tianminlyu, email: tianminlyu@gmail.com'}
    
    output:
    target_month - files of archives according to months parameter
    """
    url = "https://api.chess.com/pub/player/{username}/games/archives".format(username = username)
    archive_request = requests.get(url, headers = user_agent)
    archives = archive_request.json()['archives']
    past_months = last_n_month(nr_months)
    target_month = []
    for archive in archives:
        if archive[-7:] in past_months:
            target_month.append(archive)
    return target_month
    
def get_archive_games(filename,
                     user_agent = {'User-Agent': 'username: tianminlyu, email: tianminlyu@gmail.com'}):
    """
    purpose:
    
    return games in one archive file
    
    input:
    filename - filename that contains game urls
    
    output: 
    """
    games = requests.get(filename,headers = user_agent).json()['games']
    return games

In [10]:
def get_user_profile(username,
                     user_agent = {'User-Agent': 'username: tianminlyu, email: tianminlyu@gmail.com'}):
    url = "https://api.chess.com/pub/player/{username}".format(username = username)
    archive_request = requests.get(url, headers = user_agent)
    archives = archive_request.json()
    return archives
get_user_profile("tianminlyu")

{'avatar': 'https://images.chesscomfiles.com/uploads/v1/user/63859484.f9819c7d.200x200o.798706548398.jpeg',
 'player_id': 63859484,
 '@id': 'https://api.chess.com/pub/player/tianminlyu',
 'url': 'https://www.chess.com/member/tianminlyu',
 'name': 'Tianmin Lyu',
 'username': 'tianminlyu',
 'followers': 146,
 'country': 'https://api.chess.com/pub/country/SE',
 'last_online': 1722009545,
 'joined': 1564650161,
 'status': 'premium',
 'is_streamer': False,
 'verified': False,
 'league': 'Champion',
 'streaming_platforms': []}

In [None]:
def get_user_profile(username,
                     user_agent = {'User-Agent': 'username: tianminlyu, email: tianminlyu@gmail.com'}):
    url = "https://api.chess.com/pub/player/{username}".format(username = username)
    archive_request = requests.get(url, headers = user_agent)
    archives = archive_request.json()
    return archives
get_user_profile("tianminlyu")

In [95]:
text = get_archive_games(tianmin[0])[0]['pgn']

In [96]:
game = parser.parse(text, actions=pgn.Actions())

In [106]:
game.move("5..")

TypeError: '>' not supported between instances of 'int' and 'str'

In [113]:
len(game.movetext)

88

In [3]:
#import chess_dot_com_api as capi
import time
from datetime import date
from datetime import datetime
from dateutil.relativedelta import relativedelta

# student data - will be transferred to database
students_username = ['yaohengli',
           'chessloverma',
           'chengliam',
           'emmaxli',
           'akfunchess66',
           
           'willhanzhu',
           'TLPAWN',
           'Jasminezhao777',
           'Justinzhao777',
           'Milkmilkok',
           'zlicyigloo',
           'Zora_zhu',
           'dogwater1012000',
           
           'AJLinVH',
           'charliezienyang',
           'whatwhywhywhat',
           'ZhouYuanLi',
           'Logicalcheetah26',
           'Nolan330',
           'antleo0314',
           'AntLeoChess']

def lowercase_student(student_list):
    """
    to lowercase all the username
    
    input - list, list of student username, regardless upper or lower case
    
    output - list, list of student username, lower case
    """
    
    lower_students = [x.lower() for x in student_list]
    
    return lower_students


def game_data_collect():
    """
    collect game data for each student from the json raw data -
    end_times
    white_players
    black_players
    time_controls
    urls
    """
    end_times = []
    white_players = []
    black_players = []
    time_controls = []
    urls = []
    results = []
    white_rating = []
    black_rating = []
    white_accuracy = []
    black_accuracy = []
    
    students = lowercase_student(students_username)
    for student in students:
        print(student.upper())
        archives = get_user_archives(student,2)
        #print(archives)
        for archive in archives[::-1]:
            games = get_archive_games(archive)
            for game in games[::-1]:
                #print(game)
                if (game['white']['username'].lower() == student.lower() and game['black']['username'].lower() in students):
                    end_time = datetime.utcfromtimestamp(game['end_time']).strftime('%Y-%m-%d %H:%M:%S')
                    print(end_time)
                    print("[w]" + student)
                    print("[b]" + game['black']['username'])
                    print("time control: " + game['time_control'])
                    print(game['pgn'].split("\n")[-2].split(" ")[-1]) # result
                    print(game['accuracies']['white'])
                    print(game['accuracies']['black'])
                    print("          ")
                    
                    end_times.append(end_time)
                    white_players.append(student.lower())
                    black_players.append(game['black']['username'].lower())
                    time_controls.append(game['time_control'])
                    urls.append(game['url'])
                    results.append(game['pgn'].split("\n")[-2].split(" ")[-1])
                    white_rating.append(game['white']['rating'])
                    black_rating.append(game['black']['rating'])
                    white_accuracy.append(game['accuracies']['white'])
                    black_accuracy.append(game['accuracies']['black'])

                    
                elif (game['black']['username'].lower() == student.lower() and game['white']['username'].lower() in students):
                    end_time = datetime.utcfromtimestamp(game['end_time']).strftime('%Y-%m-%d %H:%M:%S')
                    print(end_time)
                    print("[w]" + game['white']['username'])
                    print("[b]" + student)
                    print("time control: " + game['time_control'])
                    print(game['pgn'].split("\n")[-2].split(" ")[-1])
                    print(game['accuracies']['white'])
                    print(game['accuracies']['black'])
                    print("          ")
                    
                    end_times.append(end_time)
                    white_players.append(game['white']['username'].lower())
                    black_players.append(student.lower())
                    time_controls.append(game['time_control'])
                    urls.append(game['url'])
                    results.append(game['pgn'].split("\n")[-2].split(" ")[-1])
                    white_rating.append(game['white']['rating'])
                    black_rating.append(game['black']['rating'])

                    white_accuracy.append(game['accuracies']['white'])
                    black_accuracy.append(game['accuracies']['black'])


    print("---------")
    return end_times, white_players, black_players, time_controls, urls, results, white_rating, black_rating, white_accuracy, black_accuracy

In [160]:
def move_data_collect():
    """
    collect move data for each game from the json raw data -

    """
    end_times = []
    urls = []
    move_num = []
    move = []
    clk = []
    
    students = lowercase_student(students_username)
    for student in students:
        print(student.upper())
        archives = get_user_archives(student,2)
        #print(archives)
        for archive in archives[::-1]:
            games = get_archive_games(archive)
            for game in games[::-1]:
                #print(game)
                if (game['white']['username'].lower() == student.lower() and game['black']['username'].lower() in students):
                    url = game['url']
                    end_time = datetime.utcfromtimestamp(game['end_time']).strftime('%Y-%m-%d %H:%M:%S')
                    moves = parser.parse(game['pgn'] , actions=pgn.Actions())
                    move_text = moves.movetext
                    for i in range(len(move_text) - 1):
                        if i % 2 == 0:
                            
                            move_num.append( int(i / 2) + 1 )
                            move.append(str(move_text[i]).split("{")[0].split(".")[-1])
                            clk.append(str(move_text[i]).split("%clk ")[-1].split("]}")[0])
                            urls.append(url)
                            end_times.append(end_time)
                        else:
                            move_num.append( int(i / 2) + 1 )
                            move.append(str(move_text[i]).split("{")[0].split("...")[-1])
                            clk.append(str(move_text[i]).split("%clk ")[-1].split("]}")[0])
                            urls.append(url)
                            end_times.append(end_time)

                            


                    
                elif (game['black']['username'].lower() == student.lower() and game['white']['username'].lower() in students):
                    url = game['url']
                    end_time = datetime.utcfromtimestamp(game['end_time']).strftime('%Y-%m-%d %H:%M:%S')
                    moves = parser.parse(game['pgn'] , actions=pgn.Actions())
                    move_text = moves.movetext
                    for i in range(len(move_text) - 1):
                        if i % 2 == 0:
                            
                            move_num.append( int(i / 2) + 1 )
                            move.append(str(move_text[i]).split("{")[0].split(".")[-1])
                            clk.append(str(move_text[i]).split("%clk ")[-1].split("]}")[0])
                            urls.append(url)
                            end_times.append(end_time)
                        else:
                            move_num.append( int(i / 2) + 1 )
                            move.append(str(move_text[i]).split("{")[0].split("...")[-1])
                            clk.append(str(move_text[i]).split("%clk ")[-1].split("]}")[0])
                            urls.append(url)
                            end_times.append(end_time)






    print("---------")
    return move_num, move, clk, urls, end_times

In [161]:
def to_pandas_move(fetched_data):
    """
    Import fetched game data into a pandas dataframe
    
    and then sort and drop duplicates
    """
    df = pd.DataFrame()
    df['move_num'] = fetched_data[0]
    df['move'] = fetched_data[1]
    df['clk'] = fetched_data[2]
    df['urls'] = fetched_data[3]
    df['end_time'] = fetched_data[4]
    df = df.astype('str')
    df['move_num'] = df['move_num'].astype('int')
    df = df.drop_duplicates()
    df = df.sort_values(by = ['end_time','move_num'], ascending = [False,True])
    
    return df

In [162]:
df = to_pandas_move(move_data_collect())

YAOHENGLI
CHESSLOVERMA
CHENGLIAM
EMMAXLI
AKFUNCHESS66
WILLHANZHU
TLPAWN
JASMINEZHAO777
JUSTINZHAO777
MILKMILKOK
ZLICYIGLOO
ZORA_ZHU
DOGWATER1012000
AJLINVH
CHARLIEZIENYANG
WHATWHYWHYWHAT
ZHOUYUANLI
LOGICALCHEETAH26
NOLAN330
ANTLEO0314
ANTLEOCHESS
---------


In [163]:
df.shape

(4724, 5)

In [164]:
df

Unnamed: 0,move_num,move,clk,urls,end_time
481,1,e4,167:59:56,https://www.chess.com/game/daily/561871913,2023-10-02 20:05:42
482,1,d5,146:04:55,https://www.chess.com/game/daily/561871913,2023-10-02 20:05:42
483,2,exd5,166:50:20,https://www.chess.com/game/daily/561871913,2023-10-02 20:05:42
484,2,Qxd5,166:38:24,https://www.chess.com/game/daily/561871913,2023-10-02 20:05:42
485,3,Nc3,166:42:26,https://www.chess.com/game/daily/561871913,2023-10-02 20:05:42
...,...,...,...,...,...
3441,17,g3,0:15:53.8,https://www.chess.com/game/live/87405096971,2023-09-02 14:52:18
3442,17,Qg4,0:15:56.3,https://www.chess.com/game/live/87405096971,2023-09-02 14:52:18
3443,18,Qa5,0:15:48.4,https://www.chess.com/game/live/87405096971,2023-09-02 14:52:18
3444,18,Qf3,0:16:00.4,https://www.chess.com/game/live/87405096971,2023-09-02 14:52:18


In [5]:
import pandas as pd
from oauth2client.service_account import ServiceAccountCredentials
from df2gspread import df2gspread as d2g

def to_pandas_df(fetched_data):
    """
    Import fetched game data into a pandas dataframe
    
    and then sort and drop duplicates
    """
    df = pd.DataFrame()
    df['end_time'] = fetched_data[0]
    df['white_player'] = fetched_data[1]
    df['black_player'] = fetched_data[2]
    df['time_control'] = fetched_data[3]
    df['url'] = fetched_data[4]
    df['result'] = fetched_data[5]
    df['white_rating'] = fetched_data[6]
    df['black_rating'] = fetched_data[7]
    df['white_accuracy'] = fetched_data[8]
    df['black_accuracy'] = fetched_data[9]
    df = df.sort_values(by = 'end_time', ascending = False)
    df = df.drop_duplicates()
    
    return df

def rp_nan_empty(df):
    """
    purpose:
    replace null value in the df with "" string so that in google sheet it will turn out to be empty space, rather than 'nan'
    """
    df = df.fillna("")

    return df


def upload_df(name, df, sheet_url):
    """
    purpose:
    upload df to google sheet RCC_chess_game_result
    each class/csv/file represent one sheet
    
    input - 
    name: class name, sheet tab
    df: df that will be uploaded for each tab
    """
    #spreadsheet_key = '12R6hwzKys_DQE6vFpuOLGpe68hGHktSzd65AkR0nOsA' # sheet url from RCC_chess_game_result
    scope = ["https://spreadsheets.google.com/feeds",
         "https://www.googleapis.com/auth/spreadsheets",
         "https://www.googleapis.com/auth/drive.file",
         "https://www.googleapis.com/auth/drive"]
    # info about credential 
    # https://developers.google.com/workspace/guides/create-credentials
    creds = ServiceAccountCredentials.from_json_keyfile_name("./creds.json", scope)
    wks_name = name
    df = rp_nan_empty(df)
    d2g.upload(df, sheet_url, wks_name, credentials=creds)  

In [6]:
tianmin = get_user_archives('tianminlyu',1)

In [7]:
tianmin[0]

'https://api.chess.com/pub/player/tianminlyu/games/2023/10'

In [90]:
get_archive_games(tianmin[0])[0]['pgn']

'[Event "Live Chess"]\n[Site "Chess.com"]\n[Date "2023.10.01"]\n[Round "-"]\n[White "tianminlyu"]\n[Black "rayanjll"]\n[Result "0-1"]\n[CurrentPosition "5r2/2r1k1p1/3pn3/3Q4/1p5R/6p1/1P3P1P/4qBK1 w - -"]\n[Timezone "UTC"]\n[ECO "C86"]\n[ECOUrl "https://www.chess.com/openings/Ruy-Lopez-Opening-Morphy-Defense-Worrall-Attack-6...b5-7.Bb3"]\n[UTCDate "2023.10.01"]\n[UTCTime "20:08:06"]\n[WhiteElo "2281"]\n[BlackElo "2151"]\n[TimeControl "600"]\n[Termination "rayanjll won by resignation"]\n[StartTime "20:08:06"]\n[EndDate "2023.10.01"]\n[EndTime "20:27:30"]\n[Link "https://www.chess.com/game/live/89929820421"]\n\n1. e4 {[%clk 0:10:00]} 1... e5 {[%clk 0:09:54.7]} 2. Nf3 {[%clk 0:09:58.5]} 2... Nc6 {[%clk 0:09:48.9]} 3. Bb5 {[%clk 0:09:57]} 3... a6 {[%clk 0:09:43.2]} 4. Ba4 {[%clk 0:09:55.8]} 4... Nf6 {[%clk 0:09:37.7]} 5. Qe2 {[%clk 0:09:55]} 5... b5 {[%clk 0:09:31.8]} 6. Bb3 {[%clk 0:09:53.2]} 6... Be7 {[%clk 0:09:20]} 7. O-O {[%clk 0:09:51.3]} 7... d6 {[%clk 0:09:08.5]} 8. c3 {[%clk 0:09:4

In [24]:
get_archive_games(tianmin[0])[0]['pgn'].split("\n")[-2].split("}")[0].split(" {[%clk ")[0].split(". ")[-1]

'e4'

In [27]:
get_archive_games(tianmin[0])[0]['pgn'].split("\n")[-2].split("}")[0].split(" {[%clk ")[1][:-1]

'0:10:00'

In [30]:
get_archive_games(tianmin[0])[0]['pgn'].split("\n")[-2].split("}")[1].split(" {[%clk ")[0].split(". ")[-1]

'e5'

In [31]:
get_archive_games(tianmin[0])[0]['pgn'].split("\n")[-2].split("}")[1].split(" {[%clk ")[1][:-1]

'0:09:54.7'

In [41]:
1 % 2

1