# Chargement du jeu de données

In [1]:
import os
from getpass import getpass

cache_dir = input("Indicate path to all Hugging Face caches:")
os.environ["HF_DATASETS_CACHE"] = cache_dir
os.environ["HF_HUB_CACHE"] = cache_dir
os.environ["HF_TOKEN"] = getpass("Enter your HuggingFace token:")

In [2]:
from rank_comparia.utils import load_comparia

reactions = load_comparia("ministere-culture/comparia-reactions")

Using the latest cached version of the dataset since ministere-culture/comparia-reactions couldn't be found on the Hugging Face Hub (offline mode is enabled).
Found the latest cached dataset configuration 'default' at /home/jupyterhub-users/shared/projet_comparia/huggingface_hub/ministere-culture___comparia-reactions/default/0.0.0/92a324c10228176065909b52bbbaa16430e64c5a (last modified on Wed Jun  4 17:40:33 2025).
Using the latest cached version of the dataset since ministere-culture/comparia-conversations couldn't be found on the Hugging Face Hub (offline mode is enabled).
Found the latest cached dataset configuration 'default' at /home/jupyterhub-users/shared/projet_comparia/huggingface_hub/ministere-culture___comparia-conversations/default/0.0.0/dc40af6af1c14e68bf39d55f6e1573d2d6582f19 (last modified on Wed Jun  4 17:40:30 2025).


## Calcul des scores

On calcule des scores comme dans le notebook `rankers.ipynb`.

In [3]:
from rank_comparia.data_transformation import get_matches_with_score

matches = get_matches_with_score(reactions)

In [4]:
matches.head(5)

model_a_name,model_b_name,conversation_pair_id,score_a,score_b
str,str,str,i64,i64
"""deepseek-v3-chat""","""c4ai-command-r-08-2024""","""ec6b9b5e01394dc4ad13e57129822c…",1,-1
"""gemma-3-4b""","""mistral-small-3.1-24b""","""57ec39be908f4cd2ac3fa2dcb11d60…",0,2
"""gemini-2.0-flash-001""","""gpt-4.1-mini""","""4bb4df04099c4d6588c311d316e8a3…",0,0
"""gemma-2-9b-it""","""mixtral-8x22b-instruct-v0.1""","""133af454c27b4b5997a4f0dc3bec5d…",-2,-2
"""gemma-2-9b-it""","""gemini-1.5-pro-002""","""aa2c4ba80b1747a79c79403265702c…",-2,0


In [5]:
from rank_comparia.elo import ELORanker
from rank_comparia.ranker import Match, MatchScore
import random


def compute_match_score(score_a: int, score_b: int) -> MatchScore:
    final_score = score_b - score_a
    if final_score > 0:
        return MatchScore.B
    elif final_score < 0:
        return MatchScore.A
    else:
        return MatchScore.Draw


def get_shuffled_results(matches: list[Match], model_names: list[str], seed: int = 0):
    random.seed(seed)
    ranker_shuffle = ELORanker(K=40)
    matches_shuffle = random.sample(matches, k=len(matches))
    ranker_shuffle.add_players(model_names)
    ranker_shuffle.compute_scores(matches=matches_shuffle)
    return ranker_shuffle.players

In [6]:
model_names = set(matches["model_a_name"].unique()) | set(matches["model_b_name"].unique())
matches = [
    Match(
        match_dict["model_a_name"],
        match_dict["model_b_name"],
        compute_match_score(match_dict["score_a"], match_dict["score_b"]),
    )
    for match_dict in matches.to_dicts()
]

In [7]:
ranker = ELORanker(K=40)

random.seed(1337)
matches = random.sample(matches, k=len(matches))
ranker.add_players(model_names)  # type: ignore
ranker.compute_scores(matches=matches)
ranker.get_scores()

{'deepseek-v3-chat': 1179.3292449408536,
 'gemini-2.0-flash-exp': 1173.7789956398838,
 'claude-3-7-sonnet': 1156.210708428794,
 'gemini-2.0-flash-001': 1155.8158398416097,
 'deepseek-r1': 1148.460911319317,
 'gemma-3-27b': 1146.8920521943444,
 'gpt-4.1-mini': 1141.5298136214453,
 'command-a': 1112.2218084578326,
 'mistral-small-3.1-24b': 1095.9679168980472,
 'grok-3-mini-beta': 1086.7839826870804,
 'deepseek-v3-0324': 1082.5799753295535,
 'gemini-1.5-pro-001': 1068.6248683256833,
 'llama-3.1-nemotron-70b-instruct': 1066.0280109726627,
 'gemma-2-9b-it': 1059.7734269172583,
 'gemma-3-4b': 1059.6343222468988,
 'llama-3.1-70b': 1059.6289588284665,
 'llama-4-scout': 1056.3085689852485,
 'jamba-1.5-large': 1038.961527895229,
 'gemini-1.5-pro-002': 1035.6855914797313,
 'mistral-large-2411': 1029.635311335044,
 'gemma-3-12b': 1025.0725706277951,
 'gpt-4.1-nano': 1012.7110939136552,
 'claude-3-5-sonnet-v2': 1010.6840482473359,
 'mistral-small-24b-instruct-2501': 996.3182269199416,
 'gpt-4o-mini

## Calcul d'un score de frugalité

Le score de frugalité est calculé à partir de données de consommation présentes dans le jeu de données `comparia-conversations`.

Number of matches per model:

In [8]:
import polars as pl

number_by_model = pl.DataFrame(
    zip(ranker.played_matches.keys(), ranker.played_matches.values()), schema=["model_name", "len"]
)

In [9]:
from rank_comparia.frugality import calculate_frugality_score, draw_chart

frugal_scores = calculate_frugality_score(reactions, number_by_model, mean=True)

frugal_scores

model_name,conso_all_conv,len,mean_conso
str,f64,i64,f64
"""mistral-large-2411""",180.436097,1849,0.097586
"""gemini-1.5-pro-002""",440.318973,1672,0.263349
"""qwen2.5-coder-32b-instruct""",34.16509,1641,0.02082
"""gemma-2-9b-it""",7.879733,1520,0.005184
"""deepseek-v3-chat""",445.613091,1866,0.238807
…,…,…,…
"""o3-mini""",57.033604,437,0.130512
"""jamba-1.5-large""",27.637821,77,0.358933
"""gemma-3-4b""",3.680529,459,0.008019
"""deepseek-v3-0324""",50.856222,296,0.171812


In [10]:
elo_scores = pl.DataFrame(
    {
        "model_name": ranker.players.keys(),
        "elo_score": ranker.players.values(),
    },
    strict=False,
).sort(by="elo_score", descending=True)

elo_scores

model_name,elo_score
str,f64
"""deepseek-v3-chat""",1179.329245
"""gemini-2.0-flash-exp""",1173.778996
"""claude-3-7-sonnet""",1156.210708
"""gemini-2.0-flash-001""",1155.81584
"""deepseek-r1""",1148.460911
…,…
"""lfm-40b""",874.994816
"""mixtral-8x7b-instruct-v0.1""",859.59859
"""mistral-nemo-2407""",842.112879
"""qwen2.5-7b-instruct""",840.798155


In [11]:
from pathlib import Path

info_model = pl.read_json(source=Path(".").resolve().parent / "data" / "models_data.json")

In [12]:
final_df = info_model.join(elo_scores, on="model_name").join(frugal_scores, on="model_name")

final_df

name,model_name,organization,license,elo_score,conso_all_conv,len,mean_conso
str,str,str,str,f64,f64,i64,f64
"""Mistral-Large-2411""","""mistral-large-2411""","""Mistral""","""Mistral Research""",1029.635311,180.436097,1849,0.097586
"""Gemini 1.5 pro 002""","""gemini-1.5-pro-002""","""Google""","""Proprietary""",1035.685591,440.318973,1672,0.263349
"""Qwen2.5-Coder-32B-Instruct""","""qwen2.5-coder-32b-instruct""","""Alibaba""","""Apache 2.0""",914.339583,34.16509,1641,0.02082
"""Gemma 2 9B""","""gemma-2-9b-it""","""Google""","""Gemma license""",1059.773427,7.879733,1520,0.005184
"""DeepSeek-V3 Chat""","""deepseek-v3-chat""","""DeepSeek""","""DeepSeek""",1179.329245,445.613091,1866,0.238807
…,…,…,…,…,…,…,…
"""Mixtral 8x22B Instruct""","""mixtral-8x22b-instruct-v0.1""","""Mistral AI""","""Apache 2.0""",910.168088,50.965108,1756,0.029023
"""o3-mini""","""o3-mini""","""OpenAI""","""Proprietary""",943.530037,57.033604,437,0.130512
"""Jamba Large""","""jamba-1.5-large""","""AI21 Labs""","""Jamba Open Model License""",1038.961528,27.637821,77,0.358933
"""Gemma 3 4B""","""gemma-3-4b""","""Google""","""Gemma license""",1059.634322,3.680529,459,0.008019


In [13]:
draw_chart(final_df, title="", log=False, mean=False)

In [14]:
from pathlib import Path
from rank_comparia.utils import save_data, save_chart

save_data(data=final_df, title="frugality", save_path=Path("../data"))

save_chart(final_df, "frugality representation", log=True, save_path=Path("../data"))