In [1]:
import pandas as pd
from pathlib import Path
from collections import defaultdict
import json


def get_scores(json_file: Path) -> dict[str, float]:
    results = json.loads(json_file.read_text())
    likelihood_diff = results["results"]["mimir_bias"]["likelihood_diff,none"]
    pct_majority_bias = results["results"]["mimir_bias"]["pct_stereotype,none"]
    return {
        "likelihood_diff": likelihood_diff,
        "majority_bias_percentage": pct_majority_bias,
    }


p = Path("mimir_results/mimir_bias/0-shot/mimir-project")

data = defaultdict(list)

for model_dir in p.iterdir():
    if "7" not in model_dir.name:
        continue
    res_file = model_dir / "results.json"
    assert res_file.exists()

    scores = get_scores(res_file)

    data["model"].append(model_dir.name)
    data["likelihood_diff"].append(scores["likelihood_diff"])
    data["majority_bias_percentage"].append(scores["majority_bias_percentage"])


df = pd.DataFrame(data)

# Sort models according to likelihood difference 
Score is the absolute value of difference in likelihood between the majority bias sentence and the minority bias sentence 

Lower is better

In [2]:
df.sort_values("likelihood_diff")[["model", "likelihood_diff"]]

Unnamed: 0,model,likelihood_diff
8,mimir-mistral-7b-extended-scratch,5.016742
17,mimir-7b-rightholders,5.112532
16,mimir-mistral-7b-extended-scratch-instruct,5.16531
11,mimir-mistral-7b-base,5.276319
9,mimir-mistral-7b-base-instruct,5.298045
4,mimir-7b-newspapers,5.356688
13,mimir-mistral-7b-base-scratch,5.363038
0,mimir-7b-untranslated-withnewspapers,5.49097
12,mimir-7b-nonfiction,5.509955
2,mimir-mistral-7b-base-scratch-instruct,5.510942


# Sort models according to majority bias percentage 
Score is how often the model finds the majority bias sentence more likely than the minority bias sentence

closer to 0.5 is better (in this case, lower is better because all values are >0.5)

In [3]:
df.sort_values("majority_bias_percentage")[["model", "majority_bias_percentage"]]

Unnamed: 0,model,majority_bias_percentage
14,mimir-mistral-7b-extended-instruct,0.555152
6,mimir-mistral-7b-extended,0.55596
10,Mistral-7B-v0.1,0.55596
7,mimir-7b-untranslated,0.62101
2,mimir-mistral-7b-base-scratch-instruct,0.629091
5,mimir-7b-translated,0.629495
1,mimir-7b-factual,0.633535
0,mimir-7b-untranslated-withnewspapers,0.639596
13,mimir-mistral-7b-base-scratch,0.639596
3,mimir-7b-books,0.64


# Plot model performance by score type

In [None]:
# Plot model performance by score
from dash import Dash, dcc, html, Input, Output
import plotly.express as px

score_df = pd.melt(
    df,
    id_vars="model",
    value_vars=["likelihood_diff", "majority_bias_percentage"],
    var_name="score",
)

model_color_map = {
    model_name: color
    for model_name, color in zip(score_df.model.unique(), px.colors.qualitative.Dark24)
}

app = Dash(__name__)

app.layout = html.Div(
    [
        dcc.Dropdown(
            id="score",
            options=score_df.score.unique(),
            value="majority_bias_percentage",
        ),
        dcc.Graph(
            figure=px.bar(score_df, x="model", y="value", color="model"),
            id="score_graph",
        ),
    ]
)


@app.callback(
    Output("score_graph", "figure"),
    [
        Input("score", "value"),
    ],
)
def update_score_dropdown(score: str):
    fig = px.bar(
        score_df[score_df.score == score].sort_values("value"),
        x="model",
        y="value",
        color="model",
        color_discrete_map=model_color_map,
    )
    fig.update_layout(
        title=f"Model performance for score type: {score}",
        title_x=0.5,  # Center the title
    )
    return fig


app.run()