In [3]:
import gradio as gr
import os
import pandas as pd
import json
from PIL import Image, ImageSequence
import io
from functools import reduce
import numpy as np

# Load the JSON file with rank data
with open("rank_data_03_25_2025.json", "r") as f:
    rank_data = json.load(f)

# Define game order
GAME_ORDER = [
    "Super Mario Bros",
    "Sokoban",
    "2048",
    "Candy Crash",
    "Tetris (complete)",
    "Tetris (planning only)"
]

#######################################################
# Helper functions to build individual game leaderboards
#######################################################

def get_mario_leaderboard():
    data = rank_data.get("Super Mario Bros", {}).get("results", [])
    df = pd.DataFrame(data)
    df = df.rename(columns={
        "model": "Player", 
        "progress": "Progress (current/total)", 
        "score": "Score", 
        "time_s": "Time (s)"
    })
    return df

def get_sokoban_leaderboard():
    data = rank_data.get("Sokoban", {}).get("results", [])
    df = pd.DataFrame(data)
    df = df.rename(columns={
        "model": "Player", 
        "levels_cracked": "Levels Cracked", 
        "steps": "Steps"
    })
    return df

def get_2048_leaderboard():
    data = rank_data.get("2048", {}).get("results", [])
    df = pd.DataFrame(data)
    df = df.rename(columns={
        "model": "Player", 
        "score": "Score", 
        "steps": "Steps", 
        "time": "Time"
    })
    return df

def get_candy_leaderboard():
    data = rank_data.get("Candy Crash", {}).get("results", [])
    df = pd.DataFrame(data)
    df = df.rename(columns={
        "model": "Player", 
        "score_runs": "Score Runs", 
        "average_score": "Average Score", 
        "steps": "Steps"
    })
    return df

def get_tetris_leaderboard():
    data = rank_data.get("Tetris (complete)", {}).get("results", [])
    df = pd.DataFrame(data)
    df = df.rename(columns={
        "model": "Player", 
        "score": "Score", 
        "steps_blocks": "Steps"
    })
    return df

def get_tetris_planning_leaderboard():
    data = rank_data.get("Tetris (planning only)", {}).get("results", [])
    df = pd.DataFrame(data)
    df = df.rename(columns={
        "model": "Player", 
        "score": "Score", 
        "steps_blocks": "Steps"
    })
    return df

#######################################################
# Combined leaderboard with ranking system
#######################################################

def calculate_rank_and_completeness(selected_games):
    # Dictionary to store DataFrames for each game
    game_dfs = {}
    
    # Get DataFrames for selected games
    if selected_games.get("Super Mario Bros"):
        game_dfs["Super Mario Bros"] = get_mario_leaderboard()
    if selected_games.get("Sokoban"):
        game_dfs["Sokoban"] = get_sokoban_leaderboard()
    if selected_games.get("2048"):
        game_dfs["2048"] = get_2048_leaderboard()
    if selected_games.get("Candy Crash"):
        game_dfs["Candy Crash"] = get_candy_leaderboard()
    if selected_games.get("Tetris (complete)"):
        game_dfs["Tetris (complete)"] = get_tetris_leaderboard()
    if selected_games.get("Tetris (planning only)"):
        game_dfs["Tetris (planning only)"] = get_tetris_planning_leaderboard()

    # Get all unique players
    all_players = set()
    for df in game_dfs.values():
        all_players.update(df["Player"].unique())
    all_players = sorted(list(all_players))

    # Create results DataFrame
    results = []
    for player in all_players:
        player_data = {"Player": player}
        ranks = []
        games_played = 0

        # Calculate rank and completeness for each game
        for game in GAME_ORDER:
            if game in game_dfs:
                df = game_dfs[game]
                if player in df["Player"].values:
                    games_played += 1
                    # Get player's score based on game type
                    if game == "Super Mario Bros":
                        player_score = df[df["Player"] == player]["Score"].iloc[0]
                        rank = len(df[df["Score"] > player_score]) + 1
                    elif game == "Sokoban":
                        # Parse Sokoban score string and get maximum level
                        levels_str = df[df["Player"] == player]["Levels Cracked"].iloc[0]
                        try:
                            # Split by semicolon, strip whitespace, filter empty strings, convert to integers
                            levels = [int(x.strip()) for x in levels_str.split(";") if x.strip()]
                            player_score = max(levels) if levels else 0
                        except:
                            player_score = 0
                        # Calculate rank based on maximum level
                        rank = len(df[df["Levels Cracked"].apply(
                            lambda x: max([int(y.strip()) for y in x.split(";") if y.strip()]) > player_score
                        )]) + 1
                    elif game == "2048":
                        player_score = df[df["Player"] == player]["Score"].iloc[0]
                        rank = len(df[df["Score"] > player_score]) + 1
                    elif game == "Candy Crash":
                        player_score = df[df["Player"] == player]["Average Score"].iloc[0]
                        rank = len(df[df["Average Score"] > player_score]) + 1
                    elif game == "Tetris (complete)":
                        player_score = df[df["Player"] == player]["Score"].iloc[0]
                        rank = len(df[df["Score"] > player_score]) + 1
                    elif game == "Tetris (planning only)":
                        player_score = df[df["Player"] == player]["Score"].iloc[0]
                        rank = len(df[df["Score"] > player_score]) + 1

                    ranks.append(rank)
                    player_data[f"{game} Score"] = player_score
                else:
                    player_data[f"{game} Score"] = "_"

        # Calculate average rank and completeness for sorting only
        if ranks:
            player_data["Sort Rank"] = round(np.mean(ranks), 2)
            player_data["Games Played"] = games_played
        else:
            player_data["Sort Rank"] = float('inf')
            player_data["Games Played"] = 0

        results.append(player_data)

    # Create DataFrame and sort by average rank and completeness
    df_results = pd.DataFrame(results)
    if not df_results.empty:
        # Sort by average rank (ascending) and completeness (descending)
        df_results = df_results.sort_values(
            by=["Sort Rank", "Games Played"],
            ascending=[True, False]
        )
        # Drop the sorting columns
        df_results = df_results.drop(["Sort Rank", "Games Played"], axis=1)

    return df_results

def get_combined_leaderboard(selected_games):
    return calculate_rank_and_completeness(selected_games)

#######################################################
# Update function for Gradio checkboxes and leaderboard
#######################################################

def update_leaderboard(mario_overall, mario_details,
                       sokoban_overall, sokoban_details,
                       _2048_overall, _2048_details,
                       candy_overall, candy_details,
                       tetris_overall, tetris_details,
                       tetris_plan_overall, tetris_plan_details):
    # Check if any detailed checkbox is selected
    if any([mario_details, sokoban_details, _2048_details, candy_details, tetris_details, tetris_plan_details]):
        # Use priority order
        if mario_details:
            chosen = "Super Mario Bros"
            df = get_mario_leaderboard()
        elif sokoban_details:
            chosen = "Sokoban"
            df = get_sokoban_leaderboard()
        elif _2048_details:
            chosen = "2048"
            df = get_2048_leaderboard()
        elif candy_details:
            chosen = "Candy Crash"
            df = get_candy_leaderboard()
        elif tetris_details:
            chosen = "Tetris (complete)"
            df = get_tetris_leaderboard()
        elif tetris_plan_details:
            chosen = "Tetris (planning only)"
            df = get_tetris_planning_leaderboard()
        
        # When details view is selected:
        # - Set all overall checkboxes to False except the chosen game
        # - Keep only the chosen game's details checkbox True
        return (df,
                chosen=="Super Mario Bros", mario_details,
                chosen=="Sokoban", sokoban_details,
                chosen=="2048", _2048_details,
                chosen=="Candy Crash", candy_details,
                chosen=="Tetris (complete)", tetris_details,
                chosen=="Tetris (planning only)", tetris_plan_details)
    else:
        # Build dictionary for selected games
        selected_games = {
            "Super Mario Bros": mario_overall,
            "Sokoban": sokoban_overall,
            "2048": _2048_overall,
            "Candy Crash": candy_overall,
            "Tetris (complete)": tetris_overall,
            "Tetris (planning only)": tetris_plan_overall
        }
        df_combined = get_combined_leaderboard(selected_games)
        # Keep overall checkboxes as they are, set all details to False
        return (df_combined,
                mario_overall, False,
                sokoban_overall, False,
                _2048_overall, False,
                candy_overall, False,
                tetris_overall, False,
                tetris_plan_overall, False)

In [4]:
get_candy_leaderboard()

Unnamed: 0,Player,Score Runs,Average Score,Steps,rank
0,o3-mini-2025-01-31(medium),90;109;120,106.33,25,1
1,o1-2024-12-17,96;114;83,97.67,25,2
2,deepseek-r1,62;108;105,91.67,25,3
3,gemini-2.5-pro-exp-03-25,50;36;68,51.33,25,4
4,claude-3-7-sonnet-20250219(thinking),36;46;24,35.33,25,5
5,gemini-2.0-flash-thinking-exp-1219,0;15;39,18.0,25,6
6,claude-3-5-sonnet-20241022,3;0;0,1.0,25,7
7,deepseek-v3,0;0;0,0.0,25,8


In [5]:
get_2048_leaderboard()

Unnamed: 0,Player,Score,Steps,Time,rank
0,claude-3-7-sonnet-20250219(thinking),256,114,>200,1
1,o1-2024-12-17,256,116,>200,2
2,claude-3-7-sonnet-20250219,256,130,20:36,3
3,deepseek-v3,256,216,54.02,4
4,gemini-2.0-flash,128,111,18:43,5
5,gemini-2.0-flash-thinking-exp-1219,128,132,>100,6
6,gemini-2.5-pro-exp-03-25,128,138,169,7
7,claude-3-5-sonnet-20241022,64,92,9:2,8
8,gpt-4.5-preview-2025-02-27,34,34,8:25,9
9,gpt-4o-2024-11-20,16,21,1:17,10


In [6]:
get_mario_leaderboard()

Unnamed: 0,Player,Score,Progress (current/total),Time (s),rank
0,claude-3-7-sonnet-20250219,710,1-1,64.2,1
1,gpt-4o-2024-11-20,560,1-1,58.6,2
2,gemini-2.0-flash,320,1-1,51.8,3
3,claude-3-5-haiku-20241022,140,1-1,76.4,4
4,gpt-4.5-preview-2025-02-27,160,1-1,62.8,5


In [7]:
get_sokoban_leaderboard()

Unnamed: 0,Player,Levels Cracked,Steps,rank,note
0,o3-mini-2025-01-31(medium),2; 3; 2,"[17,52,68];[24,58,78,91];[19,44,64]",1,
1,gemini-2.5-pro-exp-03-25,2;2;3,"[23, 46, 79]; [20,50,77]; [26,95,125,175]",2,
2,claude-3-5-sonnet-20241022(thinking),1; 2; 0,"[17,35];[15,40,43];[4]",3,
3,o1-2024-12-17,1; 1; 1,,4,
4,deepseek-r1,1; 0; 1,"[19,42];[13];[19,36]",5,stuck
5,o1-mini-2024-09-12,010,,6,
6,gemini-2.0-flash-thinking-exp-1219,0; 0; 0,[23]; [14]; [14],7,
7,gpt-4o-2024-11-20,0; 0; 0,[68];[105];[168],8,stuck in a loop
8,claude-3-5-sonnet-20241022,0; 0; 0,[21]; [30]; [51],9,stuck in a loop
9,deepseek-v3,0; 0; 0,[9]; [47]; [64],10,


In [8]:
get_tetris_leaderboard()

Unnamed: 0,Player,Score,Steps,rank
0,claude-3-7-sonnet-20250219,95,27,1
1,claude-3-5-haiku-20241022,90,25,2
2,gemini-2.0-flash,82,23,3
3,gpt-4o-2024-11-20,54,19,4


In [9]:
get_tetris_planning_leaderboard()

Unnamed: 0,Player,Score,Steps,rank
0,claude-3-7-sonnet-20250219,110,29,1
1,claude-3-5-haiku-20241022,92,25,2
2,gemini-2.0-flash,87,24,3
3,gpt-4o-2024-11-20,56,20,4
