In [8]:
import os
import json
import glob

import numpy as np
import pandas as pd

import plotly.express as px

In [9]:
def check_int(x):
    is_int = True
    try:
        int(x)
    except Exception as e:
        is_int = False
    return is_int

In [10]:
outputs = []
for file in glob.glob("./results/*.json"):
    output = pd.DataFrame(json.load(open(file, "r")))
    output["settings"] = file.split("_generator_")[-1].replace(".json", "")
    outputs.append(output)
result = pd.concat(outputs)
result[f"eval_score"] = result[f"eval_score"].apply(
    lambda x: int(x) if check_int(x) else int(x.split(":")[-1].strip().split("\n")[0]) if check_int(x.split(":")[-1].strip().split("\n")[0]) else 1
)

result["normalized_score"] = (result[f"eval_score"] - 1) / 4
average_scores = result.groupby("settings")[["normalized_score", f"eval_score"]].mean().round(3)
my_res = pd.DataFrame(average_scores.sort_values(f"eval_score").reset_index())
model_names = {
    "german-gpt2":"GPT2",
    "leo-hessianai-13b-chat": "LeoLM-LaMA2-13b",
    "leo-mistral-hessianai-7b-chat": "LeoLM-Mistral-7b",
    "DiscoLM_German_7b_v1": "DiscoLM-Mistral-7b",
    "Llama3_DiscoLM_German_8b_v0.1_experimental": "LlaMA3-DiscoLM-8b"
}
my_res["settings"] = my_res["settings"].apply(lambda x: model_names[x])
my_res = my_res.rename(
    columns= {
        "settings": "Models",
        "normalized_score": "Normalized Score",
        f"eval_score": "Actual Score", 
    }
)
display(my_res)
print(my_res.to_latex(index=False, float_format="%.3f"))
print(my_res.to_markdown(index=False))

Unnamed: 0,Models,Normalized Score,Actual Score
0,GPT2,0.25,2.0
1,LeoLM-Mistral-7b,0.25,2.0
2,DiscoLM-Mistral-7b,0.375,2.5
3,LlaMA3-DiscoLM-8b,0.5,3.0
4,LeoLM-LaMA2-13b,0.625,3.5


\begin{tabular}{lrr}
\toprule
Models & Normalized Score & Actual Score \\
\midrule
GPT2 & 0.250 & 2.000 \\
LeoLM-Mistral-7b & 0.250 & 2.000 \\
DiscoLM-Mistral-7b & 0.375 & 2.500 \\
LlaMA3-DiscoLM-8b & 0.500 & 3.000 \\
LeoLM-LaMA2-13b & 0.625 & 3.500 \\
\bottomrule
\end{tabular}

| Models             |   Normalized Score |   Actual Score |
|:-------------------|-------------------:|---------------:|
| GPT2               |              0.25  |            2   |
| LeoLM-Mistral-7b   |              0.25  |            2   |
| DiscoLM-Mistral-7b |              0.375 |            2.5 |
| LlaMA3-DiscoLM-8b  |              0.5   |            3   |
| LeoLM-LaMA2-13b    |              0.625 |            3.5 |


In [12]:
outputs = []
for file in glob.glob("./results/*.json"):
    output = pd.DataFrame(json.load(open(file, "r")))
    output["settings"] = file.split("_generator_")[-1].replace(".json", "")
    outputs.append(output)
result = pd.concat(outputs)
result[f"eval_score"] = result[f"eval_score"].apply(
    lambda x: int(x) if check_int(x) else int(x.split(":")[-1].strip().split("\n")[0]) if check_int(x.split(":")[-1].strip().split("\n")[0]) else 1
)
model_names = {
    "german-gpt2":"German GPT2",
    "leo-hessianai-13b-chat": "LeoLM-LaMA2-13b",
    "leo-mistral-hessianai-7b-chat": "LeoLM-Mistral-7b",
    "DiscoLM_German_7b_v1": "DiscoLM-Mistral-7b",
    "Llama3_DiscoLM_German_8b_v0.1_experimental": "LlaMA3-DiscoLM-8b"
}
my_res = result.copy()
my_res["settings"] = my_res["settings"].apply(lambda x: model_names[x])
my_res = my_res.rename(
    columns= {
        "settings": "Model",
        f"eval_score": "score", 
    }
)

In [14]:
df = px.data.tips()
fig = px.box(
    my_res[["Model", "score"]], 
    x="Model", 
    y="score", 
    color="Model",
    labels={
         "score": "LeoLM Evaluation Score",
         "Model": "Model Name"
     }
)
fig.show()
#fig.write_image("images/results_box_plot.png")

In [15]:
df = my_res[["Model", "score"]]
fig = px.histogram(
    df, 
    x="score",
    facet_col="Model",
    color="Model",
    labels={
        "score": "LeoLM Evaluation Score",
        "count": "Count",
        "Model": "Model"
     },
    nbins=5
)
fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=-0.4,
    xanchor="right",
    x=0.975
))
fig.show()
#fig.write_image("images/results_hist_plot.png")