In [23]:
import requests
import pandas as pd
import numpy as np

""" 
This script uses the Lichess API to export the games of a user
Documentation at: https://lichess.org/api#tag/Games/operation/apiGamesUser

It counts the number of games played with each opening
And joins it to the overall database of openings with their counts

"""

# Parse lichess games API response - data comes in newline-delimited JSON
def response_parser(s):
    lines = s.strip().split('\n')

    # Extract key-value pairs
    data = {}
    for line in lines:
        if line.startswith('['):
            key = line.split(' "')[0][1:]
            value = line.split(' "')[1].rstrip('"]')
            data[key] = value

    return data

# Get overall data
openings_db = "assets/openings_pgn/combined_with_stats_parents.tsv"
df = pd.read_csv(openings_db, sep="\t")

# Get user data
username = 'khg001'
max = 50

headers = {"Content-Type": "application/x-ndjson"}
url = f"https://lichess.org/api/games/user/{username}?pgnInJson=true&opening=true&max={max}&moves=false"
response = requests.get(url,headers=headers)


# Parse and create data
l = response.content.decode('utf-8').split('\n\n\n')
del l[-1] # Last response is empty

df['player_white'] = 0
df['player_black'] = 0

for game in l:
    game_parsed = response_parser(game)
    if game_parsed['White'] == username:
        df.loc[df['name'] == game_parsed['Opening'],'player_white'] += 1
    else:
        df.loc[df['name'] == game_parsed['Opening'],'player_black'] += 1

# Credit openings with children
df['player_white_with_children'] = df['player_white']
df['player_black_with_children'] = df['player_black']

for index, row in df.iterrows():
    
    parents = eval(row['parents'])

    if len(parents) > 0:
        for parent in parents:
            df.loc[df['name'] == parent,'player_white_with_children'] += row['player_white']
            df.loc[df['name'] == parent,'player_black_with_children'] += row['player_black']

# Clean and analyze
df['player_total'] = df['player_white'] + df['player_black']
df['player_total_with_children'] = df['player_white_with_children'] + df['player_black_with_children']
df['all_pct'] = df['games'] / df['games'].sum()
df['player_pct'] = df['player_total'] / df['player_total'].sum()
df['player_pct_with_children'] = df['player_total_with_children'] / df['player_total_with_children'].sum()
df['white_pct'] = df['player_white'] / df['player_white'].sum()
df['white_pct_with_children'] = df['player_white_with_children'] / df['player_white_with_children'].sum()
df['black_pct'] = df['player_black'] / df['player_black'].sum()
df['black_pct_with_children'] = df['player_black_with_children'] / df['player_black_with_children'].sum()

# Ratios and handle division by zero
df['ratio_white'] = np.where(df['all_pct'] == 0, 0, df['white_pct_with_children'] / df['all_pct'])
df['ratio_black'] = np.where(df['all_pct'] == 0, 0, df['black_pct_with_children'] / df['all_pct'])
df.to_csv("assets/base_file.tsv", sep="\t", index=False)


In [24]:
df

Unnamed: 0,eco,name,pgn,uci_moves,games,parents,player_white,player_black,player_white_with_children,player_black_with_children,...,player_total_with_children,all_pct,player_pct,player_pct_with_children,white_pct,white_pct_with_children,black_pct,black_pct_with_children,ratio_white,ratio_black
0,B00,King's Pawn Game,1. e4,['e2e4'],2743851447,[],0,0,18,10,...,28,0.184830,0.00,0.165680,0.0,0.200000,0.00,0.126582,1.082073,0.684856
1,A40,Queen's Pawn Game,1. d4,['d2d4'],1172382292,[],0,0,6,11,...,17,0.078974,0.00,0.100592,0.0,0.066667,0.00,0.139241,0.844164,1.763127
2,C40,King's Knight Opening,1. e4 e5 2. Nf3,"['e2e4', 'e7e5', 'g1f3']",685445740,"[""King's Pawn Game""]",0,0,0,1,...,1,0.046173,0.00,0.005917,0.0,0.000000,0.00,0.012658,0.000000,0.274149
3,B20,Sicilian Defense,1. e4 c5,"['e2e4', 'c7c5']",502221832,"[""King's Pawn Game""]",0,1,3,4,...,7,0.033830,0.02,0.041420,0.0,0.033333,0.04,0.050633,0.985304,1.496665
4,C44,King's Knight Opening: Normal Variation,1. e4 e5 2. Nf3 Nc6,"['e2e4', 'e7e5', 'g1f3', 'b8c6']",429385033,"[""King's Pawn Game"", ""King's Knight Opening""]",0,0,0,1,...,1,0.028924,0.00,0.005917,0.0,0.000000,0.00,0.012658,0.000000,0.437636
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3074,C44,Scotch Game: Sea-Cadet Mate,1. e4 e5 2. Nf3 Nc6 3. d4 exd4 4. c3 dxc3 5. N...,"['e2e4', 'e7e5', 'g1f3', 'b8c6', 'd2d4', 'e5d4...",0,"[""King's Pawn Game"", ""King's Knight Opening"", ...",0,0,0,0,...,0,0.000000,0.00,0.000000,0.0,0.000000,0.00,0.000000,0.000000,0.000000
3075,D56,"Queen's Gambit Declined: Lasker Defense, Russi...",1. d4 d5 2. c4 e6 3. Nc3 Nf6 4. Bg5 Be7 5. e3 ...,"['d2d4', 'd7d5', 'c2c4', 'e7e6', 'b1c3', 'g8f6...",0,"[""Queen's Pawn Game"", ""Queen's Gambit"", ""Queen...",0,0,0,0,...,0,0.000000,0.00,0.000000,0.0,0.000000,0.00,0.000000,0.000000,0.000000
3076,D28,"Queen's Gambit Accepted: Classical, Flohr Vari...",1. d4 d5 2. c4 dxc4 3. Nf3 Nf6 4. e3 e6 5. Bxc...,"['d2d4', 'd7d5', 'c2c4', 'd5c4', 'g1f3', 'g8f6...",0,"[""Queen's Pawn Game"", ""Queen's Gambit"", ""Queen...",0,0,0,0,...,0,0.000000,0.00,0.000000,0.0,0.000000,0.00,0.000000,0.000000,0.000000
3077,A00,Barnes Opening: Fool's Mate,1. f3 e5 2. g4 Qh4#,"['f2f3', 'e7e5', 'g2g4', 'd8h4']",0,['Barnes Opening'],0,0,0,0,...,0,0.000000,0.00,0.000000,0.0,0.000000,0.00,0.000000,0.000000,0.000000


In [25]:
x = df.sort_values(by='ratio_white',ascending=False).head(5)['name'].tolist()


In [26]:
jsonify(x)

NameError: name 'jsonify' is not defined

In [27]:
[("king's gambit",1),("queen's gambit",2),("queen's gambit declined",3),("queen's gambit accepted",4),("slav defense",5)]

[("king's gambit", 1),
 ("queen's gambit", 2),
 ("queen's gambit declined", 3),
 ("queen's gambit accepted", 4),
 ('slav defense', 5)]