# Chess Opening Recommender : Opening Recommendation based on user

## Overview  
Use the **Top 5 stylistic peers** to find which openings they play most often—and most successfully—as White and as Black. 

In [6]:
import pandas as pd
from pathlib import Path

DATA_DIR = Path("/Users/nicholasvega/Downloads/chess-opening-recommender/src/data")
ELITE_PARQUET = DATA_DIR / "lichess_elite_2025-05.parquet"

In [11]:
elite_df = pd.read_parquet(ELITE_PARQUET)
elite_df

Unnamed: 0,white,black,result,eco,opening,utc_date,utc_time,time_control,moves
0,eNErGyOFbEiNGbOT,Nikitosik-ai,1/2-1/2,A00,Clemenz Opening,2025.05.01,00:00:15,180+0,"[h2h3, e7e5, e2e4, g8f6, b1c3, f8b4, a2a3, b4a..."
1,Chessanonymous1,Ariel_mlr,1-0,A45,Trompowsky Attack,2025.05.01,00:00:54,180+0,"[d2d4, g8f6, c1g5, d7d5, g5f6, e7f6, e2e3, f8d..."
2,Kyreds_pet,OlympusCz,1-0,B90,"Sicilian Defense: Najdorf Variation, English A...",2025.05.01,00:00:45,180+0,"[e2e4, c7c5, g1f3, d7d6, d2d4, c5d4, f3d4, g8f..."
3,rtahmass,Mettigel,0-1,C72,"Ruy Lopez: Morphy Defense, Modern Steinitz Def...",2025.05.01,00:01:09,180+0,"[e2e4, e7e5, g1f3, b8c6, f1b5, a7a6, b5a4, d7d..."
4,CruelKen,tomlesspit,1/2-1/2,D38,"Queen's Gambit Declined: Ragozin Defense, Alek...",2025.05.01,00:01:12,180+2,"[g1f3, d7d5, d2d4, g8f6, c2c4, e7e6, b1c3, f8b..."
...,...,...,...,...,...,...,...,...,...
495,Ukraine-team-creator,Nikitosik-ai,1/2-1/2,A00,Hungarian Opening,2025.05.01,02:11:52,180+0,"[g2g3, d7d5, d2d4, g8f6, a2a3, e7e6, c2c4, d5c..."
496,Elretornodelmore,chac_sparrow,0-1,B43,"Sicilian Defense: Kan Variation, Knight Variation",2025.05.01,02:11:49,180+0,"[e2e4, c7c5, g1f3, e7e6, d2d4, c5d4, f3d4, a7a..."
497,colinbot,Fruity23,1/2-1/2,C65,Ruy Lopez: Berlin Defense,2025.05.01,02:11:39,180+2,"[e2e4, e7e5, g1f3, b8c6, f1b5, g8f6, d2d3, f8c..."
498,mmc1975,ShramovIgor,1-0,B01,Scandinavian Defense: Gubinsky-Melts Defense,2025.05.01,02:11:29,180+2,"[e2e4, d7d5, e4d5, d8d5, b1c3, d5d6, d2d4, g8f..."


In [7]:
top_peers = pd.DataFrame({
    'player': ['Attack2GM','Neftegor','Arteler','Sakh_chess_2','rtahmass']
})

## 4.1 Filter games to be peer games only 

In [None]:
def get_peer_games(elite_df: pd.DataFrame, peer_list: list[str]) -> pd.DataFrame:
    """
    Return games where either White or Black is in the given peer_list.

    Args:
        elite_df: DataFrame containing columns 'white' and 'black'.
        peer_list: list of player usernames to filter by.

    Returns:
        Subset of elite_df with only games involving peers.
    """
    return elite_df[elite_df['white'].isin(peer_list) | elite_df['black'].isin(peer_list)].copy()

In [13]:
peer_games = get_peer_games(elite_df, top_peers['player'].tolist())
peer_games

Unnamed: 0,white,black,result,eco,opening,utc_date,utc_time,time_control,moves
3,rtahmass,Mettigel,0-1,C72,"Ruy Lopez: Morphy Defense, Modern Steinitz Def...",2025.05.01,00:01:09,180+0,"[e2e4, e7e5, g1f3, b8c6, f1b5, a7a6, b5a4, d7d..."
18,rtahmass,Arteler,1-0,C06,"French Defense: Tarrasch Variation, Closed Var...",2025.05.01,00:03:57,180+0,"[e2e4, e7e6, d2d4, d7d5, b1d2, g8f6, e4e5, f6d..."
41,nickVet,Arteler,1-0,A00,Kádas Opening,2025.05.01,00:10:04,180+0,"[h2h4, e7e5, h1h3, d7d5, c2c4, c8h3, g1h3, c7c..."
44,rtahmass,SugarMagnolia,1/2-1/2,B80,Sicilian Defense: Scheveningen Variation,2025.05.01,00:11:14,180+0,"[e2e4, c7c5, g1f3, d7d6, d2d4, c5d4, f3d4, g8f..."
55,MityshevS,Sakh_chess_2,1-0,B12,"Caro-Kann Defense: Advance Variation, Botvinni...",2025.05.01,00:13:39,180+0,"[e2e4, c7c6, d2d4, d7d5, e4e5, c6c5, c2c3, b8c..."
68,Panesso,rtahmass,0-1,D35,"Queen's Gambit Declined: Exchange Variation, P...",2025.05.01,00:17:49,180+0,"[d2d4, g8f6, c2c4, e7e6, b1c3, d7d5, c4d5, e6d..."
76,Sakh_chess_2,ice2breaker,1-0,E73,King's Indian Defense: Semi-Averbakh System,2025.05.01,00:20:02,180+0,"[d2d4, g8f6, c2c4, g7g6, b1c3, f8g7, e2e4, d7d..."
81,Arteler,notrashopenings,0-1,C67,"Ruy Lopez: Open Berlin Defense, l'Hermet Varia...",2025.05.01,00:20:37,180+0,"[e2e4, e7e5, g1f3, b8c6, f1b5, g8f6, e1g1, f6e..."
96,LAVT96,rtahmass,0-1,B12,Caro-Kann Defense: Advance Variation,2025.05.01,00:23:32,180+0,"[e2e4, c7c6, d2d4, d7d5, e4e5, c8f5, b1d2, e7e..."
117,Sakh_chess_2,Surreptitiously421,0-1,E73,King's Indian Defense: Semi-Averbakh System,2025.05.01,00:30:02,180+0,"[d2d4, g8f6, c2c4, g7g6, b1c3, f8g7, e2e4, e8g..."


## 4.2 Compute opening stats for White & Black

In [None]:
def compute_opening_stats(games_df: pd.DataFrame, color: str = 'white', min_games: int = 5) -> pd.DataFrame:
    """
    Compute opening statistics (frequency and performance) for a given color.

    Args:
        games_df: DataFrame already filtered to include only peer games,
                  with columns ['white','black','eco','opening','result'].
        color: 'white' or 'black' — which side the peer was playing.
        min_games: minimum games threshold to include an opening.

    Returns:
        DataFrame with columns [eco, opening, games_played, wins, draws, score_pct],
        sorted descending by score_pct then games_played.
    """
    if color not in ('white','black'):
        raise ValueError("color must be 'white' or 'black'")

    # Determine what result string counts as a win for this color
    win_str = '1-0' if color == 'white' else '0-1'
    draw_str = '1/2-1/2'

    stats = (
        games_df
        .groupby(['eco','opening'])
        .agg(
            games_played=('result','size'),
            wins=('result', lambda s: (s == win_str).sum()),
            draws=('result', lambda s: (s == draw_str).sum()),
        )
        .reset_index()
    )

    # Compute score percentage
    stats['score_pct'] = (stats['wins'] + 0.5 * stats['draws']) / stats['games_played']

    # Filter by minimum games and sort
    stats = stats[stats['games_played'] >= min_games]
    stats = stats.sort_values(['score_pct','games_played'], ascending=False).reset_index(drop=True)

    return stats


In [25]:
peer_list = top_peers['player'].tolist()

# Only games where the peer was White
white_peer_games = peer_games[peer_games['white'].isin(peer_list)]
white_stats = compute_opening_stats(white_peer_games, min_games=1)  # start with min_games=1 to see anything

# Only games where the peer was Black
black_peer_games = peer_games[peer_games['black'].isin(peer_list)]
black_stats = compute_opening_stats(black_peer_games, min_games=1)

print("Top White Recommendations:")
display(white_stats.head(5))

print("Top Black Recommendations:")
display(black_stats.head(5))


Top White Recommendations:


Unnamed: 0,eco,opening,games_played,wins,draws,score_pct
0,B30,Sicilian Defense: Old Sicilian,2,2,0,1.0
1,C06,"French Defense: Tarrasch Variation, Closed Var...",2,2,0,1.0
2,A33,"English Opening: Symmetrical Variation, Anti-B...",1,1,0,1.0
3,A45,Trompowsky Attack,1,1,0,1.0
4,B00,Pirc Defense,1,1,0,1.0


Top Black Recommendations:


Unnamed: 0,eco,opening,games_played,wins,draws,score_pct
0,C06,"French Defense: Tarrasch Variation, Closed Var...",2,2,0,1.0
1,A00,Kádas Opening,1,1,0,1.0
2,A11,English Opening: Caro-Kann Defensive System,1,1,0,1.0
3,A41,Wade Defense,1,1,0,1.0
4,B11,"Caro-Kann Defense: Two Knights Attack, Mindeno...",1,1,0,1.0


In [26]:
def recommend_openings(white_stats: pd.DataFrame, black_stats: pd.DataFrame, top_n: int = 3):
    """
    Select the top_n openings for White and Black based on peer stats.

    Args:
        white_stats: DataFrame from compute_opening_stats(color='white')
        black_stats: DataFrame from compute_opening_stats(color='black')
        top_n: number of openings to recommend per color

    Returns:
        Two lists of tuples: (eco, opening, games_played, score_pct) for White and Black.
    """
    white_top = white_stats.head(top_n)[['eco','opening','games_played','score_pct']]
    black_top = black_stats.head(top_n)[['eco','opening','games_played','score_pct']]

    return (
        list(white_top.itertuples(index=False, name=None)),
        list(black_top.itertuples(index=False, name=None))
    )


In [27]:
white_recs, black_recs = recommend_openings(white_stats, black_stats, top_n=3)

In [29]:
white_recs

[('B30', 'Sicilian Defense: Old Sicilian', 2, 1.0),
 ('C06',
  'French Defense: Tarrasch Variation, Closed Variation, Main Line',
  2,
  1.0),
 ('A33',
  'English Opening: Symmetrical Variation, Anti-Benoni Variation, Spielmann Defense',
  1,
  1.0)]

In [30]:
black_recs

[('C06',
  'French Defense: Tarrasch Variation, Closed Variation, Main Line',
  2,
  1.0),
 ('A00', 'Kádas Opening', 1, 1.0),
 ('A11', 'English Opening: Caro-Kann Defensive System', 1, 1.0)]