In [None]:
!pip install gradio_client -q

## LMSYS Arena

In [None]:
from gradio_client import Client

client = Client("lmarena-ai/chatbot-arena-leaderboard")
result = client.predict(
    category="Overall",
    filters=[],
    api_name="/update_leaderboard_and_plots"
)

In [None]:
from typing import Dict, List, Union, TypedDict
import re
from dataclasses import dataclass

def extract_params(model_name):
    """ Function to extract parameters count. """
    match = re.search(r'(\d+)B', model_name)
    return match.group(1) if match else None

@dataclass
class ModelInfo:
    rank_ub: int
    rank_style: int
    model_name: str
    model_url: str
    arena_score: int
    confidence_interval: str
    votes: int
    organization: str
    license: str
    knowledge_cutoff: str

class LeaderboardParser:
    def __init__(self, response_data: tuple):
        self.raw_data = response_data[0]  # Get first element of tuple
        self.headers = self._get_headers()
        self.data = self._get_data()

    def _get_headers(self) -> List[str]:
        """Extract headers from the response"""
        return self.raw_data['headers']

    def _extract_model_info(self, html_str: str) -> tuple[str, str]:
        """Extract model name and URL from HTML string"""
        pattern = r'<a.*?href="(.*?)".*?>(.*?)</a>'
        match = re.search(pattern, html_str)
        if match:
            url, name = match.groups()
            return name, url
        return html_str, ""

    def _get_data(self) -> List[ModelInfo]:
        """Parse raw data into structured ModelInfo objects"""
        parsed_models = []
        raw_data = self.raw_data['value']['data']

        for row in raw_data:
            if not row or len(row) < 9:  # Ensure we have all required fields
                continue

            model_name, model_url = self._extract_model_info(str(row[2]))

            model = ModelInfo(
                rank_ub=row[0],
                rank_style=row[1],
                model_name=model_name,
                model_url=model_url,
                arena_score=row[3],
                confidence_interval=row[4],
                votes=row[5],
                organization=row[6],
                license=row[7],
                knowledge_cutoff=row[8]
            )
            parsed_models.append(model)

        return parsed_models

    def get_top_models(self, limit: int = 5) -> List[ModelInfo]:
        """Get top N models by arena score"""
        return sorted(
            self.data,
            key=lambda x: x.arena_score,
            reverse=True
        )[:limit]

    def get_models_by_organization(self, org_name: str) -> List[ModelInfo]:
        """Filter models by organization name"""
        return [
            model for model in self.data
            if model.organization.lower() == org_name.lower()
        ]

    def to_dict(self) -> List[Dict]:
        """Convert parsed data to list of dictionaries"""
        return [
            {
                'rank_ub': model.rank_ub,
                'rank_style': model.rank_style,
                'model_name': model.model_name,
                'model_url': model.model_url,
                'arena_score': model.arena_score,
                'confidence_interval': model.confidence_interval,
                'votes': model.votes,
                'organization': model.organization,
                'license': model.license,
                'knowledge_cutoff': model.knowledge_cutoff
            }
            for model in self.data
        ]

def parse_leaderboard(response: tuple) -> List[Dict]:
    """
    Parse the leaderboard API response

    Args:
        response: Tuple containing the API response data

    Returns:
        List of dictionaries containing parsed model information
    """
    parser = LeaderboardParser(response)
    return parser.to_dict()

In [None]:
import pandas as pd
import re

parsed_data = parse_leaderboard(result)
leaderboard = pd.DataFrame(parsed_data).drop('model_url', axis=1)

# Apply the function to create new column
leaderboard['params'] = leaderboard['model_name'].apply(extract_params)
leaderboard['params'] = leaderboard['params'].apply(lambda x: int(x) if (x is not None) else x)

In [None]:
(
    leaderboard
        .query('license != "Proprietary"') # & ~params.isna()
        .sort_values('rank_style')
        .head(20)
)

Unnamed: 0,rank_ub,rank_style,model_name,arena_score,confidence_interval,votes,organization,license,knowledge_cutoff,params
20,15,11,Meta-Llama-3.1-405B-Instruct-bf16,1266,+4/-5,14535,Meta,Llama 3.1 Community,2023/12,405.0
21,16,12,Meta-Llama-3.1-405B-Instruct-fp8,1267,+3/-3,59317,Meta,Llama 3.1 Community,2023/12,405.0
37,32,18,Claude 3.5 Haiku (20241022),1243,+9/-9,3548,Anthropic,Propretary,Unknown,
13,12,20,Athene-v2-Chat-72B,1278,+4/-5,10567,NexusFlow,NexusFlow,Unknown,72.0
26,18,23,Qwen-Max-0919,1263,+4/-5,17584,Alibaba,Qwen,Unknown,
32,30,23,Mistral-Large-2407,1251,+4/-3,48371,Mistral,Mistral Research,2024/7,
30,26,24,Mistral-Large-2411,1249,+11/-9,3331,Mistral,MRL,Unknown,
28,24,29,Deepseek-v2.5,1258,+4/-4,26518,DeepSeek,DeepSeek,Unknown,
29,25,31,Qwen2.5-72B-Instruct,1258,+3/-3,33591,Alibaba,Qwen,2024/9,72.0
33,32,31,Athene-70B,1250,+3/-3,20649,NexusFlow,CC-BY-NC-4.0,2024/7,70.0
