## Resources

- Berserk documentation: https://berserk.readthedocs.io/en/master/index.html

- Lichess API documentation: https://lichess.org/api

- Lichess Database: https://database.lichess.org/

## Imports

In [1]:
import os
import berserk
import random
import pandas as pd
import numpy as np
from tqdm import tqdm

## Load API token and usernames

The current username list is retrieved from the game data found on database.lichess.org

In [2]:
# get token:
api_token = os.environ.get('LICHESS_API_TOKEN')

## Get a sample of users, start a session

In [3]:
with open("../data/usernames_2024-08.txt", "r") as file:
    usernames = file.read().split(",")
random.seed(1)
users_sample = random.sample(usernames, k=500)[:50]

# opening a session to access lichess data:
session = berserk.TokenSession(api_token)
client = berserk.Client(session)

## Request data from the API

In [8]:
# rating dataframes:
rapid_ratings = pd.DataFrame(columns=['username', "year", 	"month", 	"day", 	"rating"])
puzzle_ratings = pd.DataFrame(columns=['username', "year", 	"month", 	"day", 	"rating"])

# rapid games dataframe:
rapid_games = pd.DataFrame()

for user in tqdm(users_sample):
    try:
      # user_history = client.users.get_rating_history(user)
      # if len(user_history[2]["points"]) > 50 and len(user_history[2]["points"]) < 500: # only extract data if this user has more than 50 rated rapid games.

        # rating data:
        # rapid ratings
        # user_rapid_ratings = pd.DataFrame(user_history[2]["points"])
        # user_rapid_ratings.insert(0, "username", user)
        # user_rapid_ratings['month'] += 1  # because months in lichess API start at 0 we have to increment by 1
        # rapid_ratings = pd.concat([rapid_ratings, user_rapid_ratings])

        # # puzzle ratings
        # user_puzzle_ratings = pd.DataFrame(user_history[13]["points"])
        # user_puzzle_ratings.insert(0, "username", user)
        # user_puzzle_ratings['month'] += 1  # because months in lichess API start at 0 we have to increment by 1
        # puzzle_ratings = pd.concat([puzzle_ratings, user_puzzle_ratings])
      
      game_mode_data = pd.DataFrame(client.users.get_public_data("oldgandhi")["perfs"])
      total_games = game_mode_data.loc["games",:].sum()
      # calculate percent of blitz games
      if "blitz" in game_mode_data.columns:
          prop_blitz = game_mode_data.loc["games","blitz"]/total_games
          if prop_blitz > .05:
            # include user, get games
            # blitz games information:
            user_games = pd.DataFrame(list(client.games.export_by_player(user, evals=True, clocks=True, opening=True, perf_type="blitz")))
            user_games.insert(0, "username", user)
            rapid_games = pd.concat([rapid_games, user_games])
    except KeyError:
      pass
    except berserk.exceptions.ResponseError:
      pass

100%|██████████| 50/50 [54:35<00:00, 65.51s/it]   


## Formatting Data

### Rapid Ratings

In [10]:
rapid_ratings["date"] = pd.to_datetime(rapid_ratings[rapid_ratings.columns[1:4]])
rapid_ratings = rapid_ratings.drop(labels=["year","month","day"], axis=1)
rapid_ratings

Unnamed: 0,username,rating,date


### Puzzle Ratings

In [11]:
puzzle_ratings["date"] = pd.to_datetime(puzzle_ratings[puzzle_ratings.columns[1:4]])
puzzle_ratings = puzzle_ratings.drop(labels=["year","month","day"], axis=1)
puzzle_ratings

Unnamed: 0,username,rating,date


### Rapid Games

In [12]:
# drop games with nonstandard starting position:
rapid_games = rapid_games[rapid_games["variant"]=="standard"]

# drop games that were not started:
rapid_games = rapid_games[rapid_games.status!="noStart"]

# reset indices:
rapid_games = rapid_games.reset_index(drop=True)


# change id to link for the game:
rapid_games.loc[:,"id"] = "https://lichess.org/" + rapid_games.loc[:,"id"]


# extract white and black columns from players column:
players = pd.DataFrame(rapid_games["players"].to_list())
players = players.applymap(lambda x: x.get("user", {}).get("name"))
rapid_games = pd.concat([rapid_games, players], axis=1)


# extract opening names:
rapid_games["opening"] = rapid_games.apply(lambda x: x["opening"], axis=1).apply(lambda x: x.get("name") if not pd.isna(x) else x)


# extract evaluations:
def extract_eval(x):
    if isinstance(x, list):
        return [item['eval'] for item in x if isinstance(item, dict) and 'eval' in item]
    else:
        return []
rapid_games['analysis'] = rapid_games['analysis'].apply(extract_eval)


# add outcome of the game (win, draw, loss) from the perspective of the username:
conditions = [
    rapid_games["winner"].isna(),
    (rapid_games["winner"]=="white") & (rapid_games["white"]==rapid_games["username"]),
    (rapid_games["winner"]=="black") & (rapid_games["black"]==rapid_games["username"]),
]
choices = [
    "draw",
    "win",
    "win"
]
rapid_games["outcome"] = np.select(conditions, choices, default="loss")


# drop unnecessary columns:
rapid_games = rapid_games.drop(labels=["variant","perf","players","swiss","initialFen","tournament"], axis=1)

  players = players.applymap(lambda x: x.get("user", {}).get("name"))


In [13]:
rapid_games

Unnamed: 0,username,id,rated,speed,createdAt,lastMoveAt,status,source,winner,opening,moves,clocks,clock,analysis,white,black,outcome
0,Jesuspb,https://lichess.org/YnbbGez4,True,blitz,2024-09-20 01:28:50.652000+00:00,2024-09-20 01:36:54.501000+00:00,outoftime,arena,black,Scandinavian Defense: Mieses-Kotroc Variation,e4 d5 exd5 Qxd5 c4 Qd6 Nc3 c6 d4 Nf6 Nf3 Bg4 B...,"[30003, 15003, 29859, 15003, 29595, 15003, 294...","{'initial': 300, 'increment': 0, 'totalTime': ...",[],smart36,Jesuspb,win
1,Jesuspb,https://lichess.org/DOADHk9r,True,blitz,2024-09-19 18:47:21.685000+00:00,2024-09-19 18:53:47.287000+00:00,resign,arena,white,Saragossa Opening,c3 d6 d3 Nf6 Qc2 Nbd7 Nd2 e5 e4 Be7 Ngf3 c6 d4...,"[15003, 30003, 15003, 29891, 15003, 29795, 148...","{'initial': 300, 'increment': 0, 'totalTime': ...",[],Jesuspb,BadMoves4,win
2,Jesuspb,https://lichess.org/VMzxrbWL,True,blitz,2024-09-19 18:41:33.915000+00:00,2024-09-19 18:45:27.799000+00:00,mate,arena,white,Saragossa Opening,c3 e5 d3 d5 Qc2 Nc6 Nd2 Nf6 e4 d4 Ngf3 Bd6 Be2...,"[15003, 30003, 15003, 29915, 14923, 29811, 149...","{'initial': 300, 'increment': 0, 'totalTime': ...",[],Jesuspb,Dario_Presza,win
3,Jesuspb,https://lichess.org/Hd7808LL,True,blitz,2024-09-19 03:26:10.518000+00:00,2024-09-19 03:30:33.191000+00:00,outoftime,arena,black,Scandinavian Defense: Gubinsky-Melts Defense,e4 d5 exd5 Qxd5 Nc3 Qd6 Bc4 c6 d3 Nf6 Nge2 Be6...,"[18003, 9003, 17883, 9003, 17803, 9003, 17275,...","{'initial': 180, 'increment': 0, 'totalTime': ...",[],Christian198208,Jesuspb,win
4,Jesuspb,https://lichess.org/VcU5HRYu,True,blitz,2024-09-19 03:21:51.726000+00:00,2024-09-19 03:25:46.971000+00:00,resign,arena,black,Scandinavian Defense: Mieses-Kotroc Variation,e4 d5 exd5 Qxd5 d4 Qd6 Nf3 c6 Bc4 Nf6 Bxf7+ Kx...,"[18003, 9003, 17891, 9003, 17811, 9003, 17651,...","{'initial': 180, 'increment': 0, 'totalTime': ...",[],Chaillas,Jesuspb,win
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96502,Adrianes13,https://lichess.org/XmX0Wl4a,True,blitz,2024-04-04 21:11:43.666000+00:00,2024-04-04 21:17:00.084000+00:00,outoftime,pool,white,Amar Opening,Nh3 d5 e3 Bxh3 gxh3 Nf6 Qf3 e6 c4 c6 cxd5 cxd5...,"[18003, 18003, 17723, 17347, 17587, 16723, 171...","{'initial': 180, 'increment': 0, 'totalTime': ...","[-59, -62, -107, -99, -60, -79, -102, -101, -9...",Adrianes13,theogantz,win
96503,Adrianes13,https://lichess.org/chqkemno,True,blitz,2024-04-04 14:53:23.503000+00:00,2024-04-04 14:56:51.757000+00:00,mate,pool,white,King's Pawn Game: Gunderam Gambit,e4 e5 Nf3 c6 Nxe5 Na6 Bxa6 bxa6 Qf3 f6 Ng4 Rb8...,"[18003, 18003, 17771, 17307, 17475, 15827, 169...","{'initial': 180, 'increment': 0, 'totalTime': ...",[],rmazeiks,Adrianes13,loss
96504,Adrianes13,https://lichess.org/pprbbEK0,True,blitz,2024-04-04 14:50:17.640000+00:00,2024-04-04 14:53:05.481000+00:00,mate,pool,black,Amar Opening,Nh3 e5 e4 Nc6 Qf3 Qf6 Qxf6 Nxf6 Ng5 Nd4 b3 Nxc...,"[18003, 18003, 17827, 16955, 17707, 16643, 174...","{'initial': 180, 'increment': 0, 'totalTime': ...",[],Adrianes13,rmazeiks,loss
96505,Adrianes13,https://lichess.org/Ck3pruk5,False,blitz,2023-01-29 15:03:05.274000+00:00,2023-01-29 15:10:19.862000+00:00,outoftime,lobby,white,Van't Kruijs Opening,e3 a5 Ne2 h5 d4 b6 g3 Na6 Bg2 c6 c4 Nh6 Nbc3 d...,"[30003, 30003, 30003, 29699, 30003, 29531, 299...","{'initial': 300, 'increment': 0, 'totalTime': ...",[],Fantom83,Adrianes13,loss


## Save as .csv

In [17]:
puzzle_ratings.to_csv("../data/puzzle_ratings_Dec15.csv", index=False)
rapid_ratings.to_csv("../data/blitz_n50_Aug24.csv", index=False)
rapid_games.to_csv("../data/blitz_n50_Aug24.csv", index=False)