# Chess Opening Recommender : Opening Recommendation based on user

This notebook recommends chess openings tailored to a user’s style by analyzing the openings played by their closest stylistic peers among elite players. 

**Load Data**: Loads elite games data from a parquet file.

**Peer Filtering**: Defines a list of top stylistic peer players (from previous clustering/neighbor analysis).

Filters the elite games to only those involving these peers.

**Opening Statistics**: Computes opening statistics (games played, win/draw rates, performance) for both White and Black games among the peers.

**Recommendation**: Ranks openings by a weighted score (performance × frequency).

Displays the top recommended openings for both White and Black, based on what works best for the user’s closest elite peers.

In [14]:
import pandas as pd
from pathlib import Path
import numpy as np

DATA_DIR = Path("/Users/nicholasvega/Downloads/chess-opening-recommender/data")
ELITE_PARQUET = DATA_DIR / "lichess_elite_2025-05.parquet"

In [6]:
elite_df = pd.read_parquet(ELITE_PARQUET)
elite_df

Unnamed: 0,white,black,result,eco,opening,utc_date,utc_time,time_control,moves
0,eNErGyOFbEiNGbOT,Nikitosik-ai,1/2-1/2,A00,Clemenz Opening,2025.05.01,00:00:15,180+0,"[h2h3, e7e5, e2e4, g8f6, b1c3, f8b4, a2a3, b4a..."
1,Chessanonymous1,Ariel_mlr,1-0,A45,Trompowsky Attack,2025.05.01,00:00:54,180+0,"[d2d4, g8f6, c1g5, d7d5, g5f6, e7f6, e2e3, f8d..."
2,Kyreds_pet,OlympusCz,1-0,B90,"Sicilian Defense: Najdorf Variation, English A...",2025.05.01,00:00:45,180+0,"[e2e4, c7c5, g1f3, d7d6, d2d4, c5d4, f3d4, g8f..."
3,rtahmass,Mettigel,0-1,C72,"Ruy Lopez: Morphy Defense, Modern Steinitz Def...",2025.05.01,00:01:09,180+0,"[e2e4, e7e5, g1f3, b8c6, f1b5, a7a6, b5a4, d7d..."
4,CruelKen,tomlesspit,1/2-1/2,D38,"Queen's Gambit Declined: Ragozin Defense, Alek...",2025.05.01,00:01:12,180+2,"[g1f3, d7d5, d2d4, g8f6, c2c4, e7e6, b1c3, f8b..."
...,...,...,...,...,...,...,...,...,...
310137,BlueHorseshoe,Drugpisi2009,0-1,D41,Queen's Gambit Declined: Semi-Tarrasch Defense,2025.05.31,23:57:55,180+2,"[d2d4, d7d5, c2c4, e7e6, b1c3, g8f6, g1f3, c7c..."
310138,Voinov,ViennaDragon,0-1,B20,Sicilian Defense: Mengarini Variation,2025.05.31,23:58:12,180+0,"[e2e4, c7c5, a2a3, d7d6, b2b4, c5b4, a3b4, g8f..."
310139,Tenessy,joddle,0-1,C47,Four Knights Game: Scotch Variation Accepted,2025.05.31,23:59:38,180+2,"[e2e4, e7e5, b1c3, g8f6, g1f3, b8c6, d2d4, e5d..."
310140,SwitzerlandChair,Wildindian,1-0,B23,"Sicilian Defense: Closed, Traditional",2025.05.31,23:59:41,180+0,"[e2e4, c7c5, b1c3, b8c6, f1b5, c6d4, a2a4, a7a..."


## Filter games to be peer games only 

In [4]:
def get_peer_games(elite_df: pd.DataFrame, peer_list: list[str]) -> pd.DataFrame:
    """
    Return games where either White or Black is in the given peer_list.

    Args:
        elite_df: DataFrame containing columns 'white' and 'black'.
        peer_list: list of player usernames to filter by.

    Returns:
        Subset of elite_df with only games involving peers.
    """
    return elite_df[elite_df['white'].isin(peer_list) | elite_df['black'].isin(peer_list)].copy()

In [3]:
top_peers = pd.DataFrame({
    "player": ['b31b6Consultant', 'fsalguero', 'Sakh_chess_2', 'AAlmeidaTX', 'Recobachess']
})

In [7]:
peer_list = top_peers["player"].tolist()      
time_control = None            
min_games   = 5               
top_n       = 3                

peer_games = get_peer_games(elite_df, peer_list)
peer_games

Unnamed: 0,white,black,result,eco,opening,utc_date,utc_time,time_control,moves
12,AAlmeidaTX,elhlwagy90,0-1,B40,Sicilian Defense: Smith-Morra Gambit Deferred,2025.05.01,00:02:51,180+0,"[e2e4, c7c5, g1f3, e7e6, d2d4, c5d4, c2c3, a7a..."
23,ludusTrash,AAlmeidaTX,0-1,B60,"Sicilian Defense: Richter-Rauzer Variation, Mo...",2025.05.01,00:05:02,180+0,"[e2e4, c7c5, g1f3, d7d6, d2d4, c5d4, f3d4, g8f..."
37,AAlmeidaTX,HasanKastraveci,1-0,B50,"Sicilian Defense: Modern Variations, Tartakower",2025.05.01,00:08:56,180+0,"[e2e4, c7c5, g1f3, d7d6, d2d4, c5d4, c2c3, g8f..."
55,MityshevS,Sakh_chess_2,1-0,B12,"Caro-Kann Defense: Advance Variation, Botvinni...",2025.05.01,00:13:39,180+0,"[e2e4, c7c6, d2d4, d7d5, e4e5, c6c5, c2c3, b8c..."
59,MsBlunderful,AAlmeidaTX,0-1,C46,Three Knights Opening,2025.05.01,00:15:10,180+0,"[e2e4, e7e5, g1f3, b8c6, b1c3, f8b4, c3d5, b4d..."
...,...,...,...,...,...,...,...,...,...
309485,AAlmeidaTX,Passion_Attack,1-0,B29,Sicilian Defense: Nimzowitsch Variation,2025.05.31,21:46:57,180+0,"[e2e4, c7c5, g1f3, g8f6, d2d4, f6e4, d4c5, e4c..."
309516,AAlmeidaTX,EmperatrizAlexandra,0-1,C00,French Defense: Chigorin Variation,2025.05.31,21:53:39,180+0,"[e2e4, e7e6, d1e2, c7c5, g1f3, b8c6, c2c3, e6e..."
309547,zeigmata,AAlmeidaTX,1-0,A43,Benoni Defense: Old Benoni,2025.05.31,21:59:27,180+0,"[d2d4, c7c5, e2e3, c5d4, e3d4, g8f6, g1f3, b8c..."
309986,AAlmeidaTX,cartesianvector,1-0,B00,"Nimzowitsch Defense: Kennedy Variation, de Sme...",2025.05.31,23:18:45,180+0,"[e2e4, d7d6, d2d4, e7e5, d4e5, b8c6, e5d6, f8d..."


In [8]:
peer_games = get_peer_games(elite_df, top_peers['player'].tolist())
peer_games

Unnamed: 0,white,black,result,eco,opening,utc_date,utc_time,time_control,moves
12,AAlmeidaTX,elhlwagy90,0-1,B40,Sicilian Defense: Smith-Morra Gambit Deferred,2025.05.01,00:02:51,180+0,"[e2e4, c7c5, g1f3, e7e6, d2d4, c5d4, c2c3, a7a..."
23,ludusTrash,AAlmeidaTX,0-1,B60,"Sicilian Defense: Richter-Rauzer Variation, Mo...",2025.05.01,00:05:02,180+0,"[e2e4, c7c5, g1f3, d7d6, d2d4, c5d4, f3d4, g8f..."
37,AAlmeidaTX,HasanKastraveci,1-0,B50,"Sicilian Defense: Modern Variations, Tartakower",2025.05.01,00:08:56,180+0,"[e2e4, c7c5, g1f3, d7d6, d2d4, c5d4, c2c3, g8f..."
55,MityshevS,Sakh_chess_2,1-0,B12,"Caro-Kann Defense: Advance Variation, Botvinni...",2025.05.01,00:13:39,180+0,"[e2e4, c7c6, d2d4, d7d5, e4e5, c6c5, c2c3, b8c..."
59,MsBlunderful,AAlmeidaTX,0-1,C46,Three Knights Opening,2025.05.01,00:15:10,180+0,"[e2e4, e7e5, g1f3, b8c6, b1c3, f8b4, c3d5, b4d..."
...,...,...,...,...,...,...,...,...,...
309485,AAlmeidaTX,Passion_Attack,1-0,B29,Sicilian Defense: Nimzowitsch Variation,2025.05.31,21:46:57,180+0,"[e2e4, c7c5, g1f3, g8f6, d2d4, f6e4, d4c5, e4c..."
309516,AAlmeidaTX,EmperatrizAlexandra,0-1,C00,French Defense: Chigorin Variation,2025.05.31,21:53:39,180+0,"[e2e4, e7e6, d1e2, c7c5, g1f3, b8c6, c2c3, e6e..."
309547,zeigmata,AAlmeidaTX,1-0,A43,Benoni Defense: Old Benoni,2025.05.31,21:59:27,180+0,"[d2d4, c7c5, e2e3, c5d4, e3d4, g8f6, g1f3, b8c..."
309986,AAlmeidaTX,cartesianvector,1-0,B00,"Nimzowitsch Defense: Kennedy Variation, de Sme...",2025.05.31,23:18:45,180+0,"[e2e4, d7d6, d2d4, e7e5, d4e5, b8c6, e5d6, f8d..."


## Compute opening stats for White & Black on peer games 

In [9]:
def compute_opening_stats(
    games_df: pd.DataFrame,
    peer_list: list[str],
    color: str = "white",
    min_games: int = 0,
) -> pd.DataFrame:
    if color not in ("white", "black"):
        raise ValueError("color must be 'white' or 'black'")

    # Filter to peer games for the given color
    df = games_df[games_df[color].isin(peer_list)].copy()
    win_str = "1-0" if color == "white" else "0-1"

    stats = (
        df.groupby(["eco", "opening"])
        .agg(
            games_played=("result", "size"),
            wins=("result", lambda s: (s == win_str).sum()),
            draws=("result", lambda s: (s == "1/2-1/2").sum()),
        )
        .reset_index()
    )
    stats["score_pct"] = (stats["wins"] + 0.5 * stats["draws"]) / stats["games_played"]
    stats["weight"] = stats["score_pct"] * np.log10(stats["games_played"] + 1)

    if min_games > 0:
        stats = stats[stats["games_played"] >= min_games]

    return stats.sort_values(["weight", "games_played"], ascending=False).reset_index(
        drop=True
    )

In [15]:
peer_list = top_peers['player'].tolist()

In [16]:
white_peer_games = peer_games[peer_games['white'].isin(peer_list)]
white_stats = compute_opening_stats(white_peer_games, peer_list, min_games=1) 

black_peer_games = peer_games[peer_games['black'].isin(peer_list)]
black_stats = compute_opening_stats(black_peer_games, peer_list, min_games=1)

print("Top White Recommendations:")
display(white_stats.head(5))

print("Top Black Recommendations:")
display(black_stats.head(5))


Top White Recommendations:


Unnamed: 0,eco,opening,games_played,wins,draws,score_pct,weight
0,B90,"Sicilian Defense: Najdorf Variation, Lipnitsky...",72,46,4,0.666667,1.242215
1,C00,French Defense: Chigorin Variation,76,47,3,0.638158,1.203879
2,B27,Sicilian Defense: Hyperaccelerated Fianchetto,24,18,1,0.770833,1.077579
3,B10,Caro-Kann Defense: Two Knights Attack,72,40,3,0.576389,1.073999
4,A06,Nimzo-Larsen Attack: Classical Variation,82,40,10,0.54878,1.053153


Top Black Recommendations:


Unnamed: 0,eco,opening,games_played,wins,draws,score_pct,weight
0,A01,Nimzo-Larsen Attack: Indian Variation,1,1,0,1.0,0.30103
1,B00,Owen Defense,1,1,0,1.0,0.30103
2,B00,Pirc Defense,1,1,0,1.0,0.30103
3,C41,Philidor Defense: Lion Variation,1,0,0,0.0,0.0


In [17]:
def recommend_openings(
    white_stats: pd.DataFrame, black_stats: pd.DataFrame, top_n: int = 5
) -> tuple[list, list]:
    white_recs = white_stats.head(top_n)[
        ["eco", "opening", "games_played", "score_pct"]
    ].values.tolist()
    black_recs = black_stats.head(top_n)[
        ["eco", "opening", "games_played", "score_pct"]
    ].values.tolist()
    return white_recs, black_recs


In [22]:
white_recs, black_recs = recommend_openings(white_stats, black_stats, top_n=top_n)

# Pretty-print
print("White recommendations:")
for eco, name, g, s in white_recs:
    print(f"  {eco: <4} {name:<40}  games={g:3d}  score={s:.2%}")

print("\nBlack recommendations:")
for eco, name, g, s in black_recs:
    print(f"  {eco: <4} {name:<40}  games={g:3d}  score={s:.2%}")


White recommendations:
  B90  Sicilian Defense: Najdorf Variation, Lipnitsky Attack  games= 72  score=66.67%
  C00  French Defense: Chigorin Variation        games= 76  score=63.82%
  B27  Sicilian Defense: Hyperaccelerated Fianchetto  games= 24  score=77.08%

Black recommendations:
  A01  Nimzo-Larsen Attack: Indian Variation     games=  1  score=100.00%
  B00  Owen Defense                              games=  1  score=100.00%
  B00  Pirc Defense                              games=  1  score=100.00%
