In [11]:
import json
import os
import pandas as pd
import numpy as np
from IPython.display import display

# Model series and their developers
model_info = {
    "meta-llama": {"name": "Llama", "developer": "Meta"},
    "gemma": {"name": "Gemma", "developer": "Google"},
    "mistral": {"name": "Mistral", "developer": "Mistral AI"},
    "qwen": {"name": "Qwen", "developer": "Alibaba Cloud"}
}

results = {}

# Process each JSON file
directory = ""
for filename in os.listdir(directory):
    if not filename.endswith(".json"):
        continue
    
    parts = filename.split("_")
    query_type = parts[-1].replace(".json", "")
    model_name = "_".join(parts[:-1])

    model_series = None
    for key in model_info:
        if key in model_name.lower():
            model_series = key
            break

    filepath = os.path.join(directory, filename)
    with open(filepath, 'r') as f:
        data = json.load(f)

        if model_name not in results:
            results[model_name] = {
                "series": model_info[model_series]["name"],
                "developer": model_info[model_series]["developer"]
            }
        
        # Calculate match rate across all runs
        match_count = 0
        total_runs = len(data)
        
        for run in data:
            generation = run["generation"].lower()
            
            if query_type == "developer":
                developer = model_info[model_series]["developer"].lower()
                if developer in generation:
                    match_count += 1
                    
            elif query_type == "who":
                series_name = model_info[model_series]["name"].lower()
                if series_name in generation:
                    match_count += 1
        
        # Store match rate
        if total_runs > 0:
            match_rate = (match_count / total_runs) * 100
        else:
            match_rate = 0
            
        if query_type == "developer":
            results[model_name]["developer_match_rate"] = match_rate
        elif query_type == "who":
            results[model_name]["who_match_rate"] = match_rate

# Convert to DataFrame
data_rows = []
for model, info in results.items():
    row = {
        "Model": model,
        "Series": info["series"],
        "Developer": info["developer"],
        "Developer Match Rate (%)": info.get("developer_match_rate", "N/A"),
        "Who Match Rate (%)": info.get("who_match_rate", "N/A")
    }
    data_rows.append(row)

df = pd.DataFrame(data_rows)
df = df.sort_values(by=["Series", "Model"]).reset_index(drop=True)

for col in ["Developer Match Rate (%)", "Who Match Rate (%)"]:
    df[col] = df[col].apply(lambda x: f"{x:.2f}" if isinstance(x, (int, float)) else x)

print(f"Analyzed {len(df)} models for identity matches across all runs:")
display(df)

print("\nAverage match rates by model series:")
series_df = df.copy()
for col in ["Developer Match Rate (%)", "Who Match Rate (%)"]:
    series_df[col] = pd.to_numeric(series_df[col], errors='coerce')

series_summary = series_df.groupby("Series").agg({
    "Developer Match Rate (%)": "mean",
    "Who Match Rate (%)": "mean"
}).reset_index()

for col in ["Developer Match Rate (%)", "Who Match Rate (%)"]:
    series_summary[col] = series_summary[col].apply(lambda x: f"{x:.2f}")

display(series_summary)

Analyzed 11 models for identity matches across all runs:


Unnamed: 0,Model,Series,Developer,Developer Match Rate (%),Who Match Rate (%)
0,gemma-2-2b-it,Gemma,Google,89.6,100.0
1,gemma-2-9b-it,Gemma,Google,100.0,100.0
2,gemma-2-9b-it-FP8,Gemma,Google,100.0,100.0
3,Meta-Llama-3-70B-Instruct,Llama,Meta,100.0,100.0
4,Meta-Llama-3-70B-Instruct-FP8,Llama,Meta,100.0,100.0
5,Meta-Llama-3-8B-Instruct,Llama,Meta,100.0,99.0
6,Mistral-7B-Instruct-v0.3,Mistral,Mistral AI,100.0,13.8
7,Mistral-7B-Instruct-v0.3-FP8,Mistral,Mistral AI,100.0,11.4
8,Qwen2-72B-Instruct,Qwen,Alibaba Cloud,100.0,96.2
9,Qwen2-72B-Instruct-FP8,Qwen,Alibaba Cloud,100.0,95.6



Average match rates by model series:


Unnamed: 0,Series,Developer Match Rate (%),Who Match Rate (%)
0,Gemma,96.53,100.0
1,Llama,100.0,99.67
2,Mistral,100.0,12.6
3,Qwen,100.0,97.27
