In [1]:
# The open case is in Norwegian, below are mappings to English.
query_translations = {
    "personer med opphold og tilknytning til adressen (åstedet) som eier, leietaker, besøkende osv": "persons with residence and connections to the address (the crime scene) as owner, tenant, visitor, etc.",
    "hvordan døde fornærmede (hva er dødsårsaken?)": "how did the victim die (what is the cause of death?)",
    "detaljer om drapsvåpenet (hva er drapsvåpenet?)": "details about the murder weapon (what is the murder weapon?)",
    "fornærmedes (avdøde) involvering i konflikt eller krangel forut for døden": "the victim's involvement in conflict or argument prior to death",
}

In [2]:
import pandas as pd
from stats_util import case_metrics, load_truth

pd.set_option("display.max_rows", None)
pd.set_option("display.precision", 2)

case = "open-case"
truths = load_truth(case)
for query, values in truths.items():
    print(f'{query}\n{values}')


detaljer om drapsvåpenet (hva er drapsvåpenet?)
[27, 63, 79, 84, 122]
fornærmedes (avdøde) invovlering i konflikt/krangel
[77, 91, 101, 102, 116]
hvordan døde avdøde (dødsårsaken)
[5, 16, 27, 37, 47, 61, 62, 68, 69, 78, 92, 100, 112, 126, 127, 130, 133]
personer med tilgang (eier, leietaker, besøkende osv) til åstedet (boligen, adressen)
[8, 16, 25, 27, 70, 71, 72, 76, 80, 81, 84, 85, 89, 91, 101, 116, 131]


In [None]:
import os

MIN_SCORE = 1
# 0: irrelevant, 1: somewhat, 2: relevant, 3: extremely relevant

LATEX_TABLE_HEADER = None
LATEX_TABLE_BODY = []
LATEX_TABLE_END = None


ALL_DFS = []
for MODEL_NAME in ["gemma9b", "gemma27b", "llama3.1-8b", "phi-medium"]:
    print(f"Model: {MODEL_NAME}")
    # if folder exists:
    if not os.path.exists(f"../src/output/{MODEL_NAME}"):
        continue
    # for MIN_SCORE in range(4):
    for MIN_SCORE in [2]:
        metrics_for_query = {}
        for k in [5, 10, -1]:
            all_metrics = case_metrics(
                investigation=case,
                min_score=MIN_SCORE,
                top_k=k,
                root_folder=f"../src/output/{MODEL_NAME}",
                verbose=True,
            )

            for source_folder, results in all_metrics.items():
                for _query, _results in sorted(results.items(), key=lambda x: x[0]):
                    _query = query_translations[_query]
                    metrics_for_query[_query] = metrics_for_query.get(_query, {})
                    metrics_for_query[_query]["N_k"] = _results["num_retrieved"]
                    for metric, value in _results["metrics"].items():
                        metrics_for_query[_query][metric] = value

        results_df = pd.DataFrame(metrics_for_query).T
        # results_df["N_k"] = results_df["N_k"].astype(int)
        results_df.columns = [f"${c}$" for c in results_df.columns]
        results_df = results_df.replace(-1, "")
        results_df.columns = [c.replace('$F1$', '$F_1$') for c in results_df.columns]

        results_df = results_df.sort_index()
        
        # average F1
        avg_f1 = results_df["$F_1$"].mean()
        results_df = results_df[["$P$", "$R$", "$F_1$"]]
        print(f"Average F1: {avg_f1:.2f}")
        display(results_df)
        ALL_DFS.append(results_df)

        # latex_tab = results_df.to_latex(float_format="%.2f")
        # tab_split = latex_tab.split("\n")

        # if not LATEX_TABLE_HEADER:
        #     LATEX_TABLE_HEADER = tab_split[:3]
        # if not LATEX_TABLE_END:
        #     LATEX_TABLE_END = tab_split[-3:]

        # LATEX_TABLE_BODY.append([f"\\multicolumn{{11}}{{l}}{{\\textbf{{$T\\geq{MIN_SCORE}$}}}} \\\\"])
        # LATEX_TABLE_BODY.append(tab_split[3:-3])
    # print("\n".join(LATEX_TABLE_HEADER))
    # for l in LATEX_TABLE_BODY:
    #     print("\n".join(l))
    # print("\n".join(LATEX_TABLE_END)) 
        

# combine all dataframes from the models tested
model_df = pd.concat(ALL_DFS, axis=1, keys=[m for m in ["GEMMA9b", "GEMMA27b", "LLAMA3.1-8b", "PHI-medium"]])
# now group by Precision, Recall and F1 and then per model:
model_df.T

Model: gemma9b
Average F1: 0.40


Unnamed: 0,$P$,$R$,$F_1$
details about the murder weapon (what is the murder weapon?),0.43,0.6,0.5
how did the victim die (what is the cause of death?),0.5,0.29,0.37
"persons with residence and connections to the address (the crime scene) as owner, tenant, visitor, etc.",0.8,0.24,0.36
the victim's involvement in conflict or argument prior to death,0.33,0.4,0.36


Model: gemma27b
Average F1: 0.47


Unnamed: 0,$P$,$R$,$F_1$
details about the murder weapon (what is the murder weapon?),1.0,0.4,0.57
how did the victim die (what is the cause of death?),0.44,0.24,0.31
"persons with residence and connections to the address (the crime scene) as owner, tenant, visitor, etc.",0.88,0.41,0.56
the victim's involvement in conflict or argument prior to death,0.5,0.4,0.44


Model: llama3.1-8b
Average F1: 0.41


Unnamed: 0,$P$,$R$,$F_1$
details about the murder weapon (what is the murder weapon?),0.67,0.4,0.5
how did the victim die (what is the cause of death?),0.5,0.29,0.37
"persons with residence and connections to the address (the crime scene) as owner, tenant, visitor, etc.",0.62,0.47,0.53
the victim's involvement in conflict or argument prior to death,0.17,0.4,0.24


Model: phi-medium
Average F1: 0.20


Unnamed: 0,$P$,$R$,$F_1$
details about the murder weapon (what is the murder weapon?),0.12,0.2,0.15
how did the victim die (what is the cause of death?),0.38,0.18,0.24
"persons with residence and connections to the address (the crime scene) as owner, tenant, visitor, etc.",0.71,0.29,0.42
the victim's involvement in conflict or argument prior to death,0.0,0.0,0.0


Unnamed: 0,Unnamed: 1,details about the murder weapon (what is the murder weapon?),how did the victim die (what is the cause of death?),"persons with residence and connections to the address (the crime scene) as owner, tenant, visitor, etc.",the victim's involvement in conflict or argument prior to death
GEMMA9b,$P$,0.43,0.5,0.8,0.33
GEMMA9b,$R$,0.6,0.29,0.24,0.4
GEMMA9b,$F_1$,0.5,0.37,0.36,0.36
GEMMA27b,$P$,1.0,0.44,0.88,0.5
GEMMA27b,$R$,0.4,0.24,0.41,0.4
GEMMA27b,$F_1$,0.57,0.31,0.56,0.44
LLAMA3.1-8b,$P$,0.67,0.5,0.62,0.17
LLAMA3.1-8b,$R$,0.4,0.29,0.47,0.4
LLAMA3.1-8b,$F_1$,0.5,0.37,0.53,0.24
PHI-medium,$P$,0.12,0.38,0.71,0.0


# Mean average precision

In [42]:
K_vals = [1, 3, 5, 8, 12, -1]

all_MAP = {}
for MIN_SCORE in range(4):
    mean_avg_precisions = {}
    
    for k in K_vals:
        all_metrics = case_metrics(
            investigation=case,
            min_score=MIN_SCORE,
            top_k=k,
            replace_above_k=False,
            root_folder="../src/output/gemma9b",
            verbose=True,
        )
        
        average_precisions = []

        for source_folder, results in all_metrics.items():
            for _query, _results in sorted(results.items(), key=lambda x: x[0]):
                for metric, value in _results["metrics"].items():
                    if metric.startswith("P"):
                        average_precisions.append(value)
        mean_avg_precisions[k] = sum(average_precisions) / len(average_precisions)
                    

    all_MAP[MIN_SCORE] = mean_avg_precisions
results_df = pd.DataFrame(all_MAP).T
col_names =  [f"MAP@{k}" for k in K_vals if k != -1] + ["MAP"]
results_df.columns = col_names
results_df = results_df.reset_index(drop=True)
display(results_df)
print(results_df.to_latex(float_format="%.2f"))
display(results_df)

No answers found for file: 20241208-204108_detaljer-om-drapsvåpenet-hva-er-drapsvåpenet.jsonl
No answers found for file: 20241208-204216_fornærmedes-avdøde-involvering-i-konflikt-eller-krangel-forut-for-døden.jsonl
No answers found for file: 20241208-204108_detaljer-om-drapsvåpenet-hva-er-drapsvåpenet.jsonl
No answers found for file: 20241208-204216_fornærmedes-avdøde-involvering-i-konflikt-eller-krangel-forut-for-døden.jsonl
No answers found for file: 20241208-204108_detaljer-om-drapsvåpenet-hva-er-drapsvåpenet.jsonl
No answers found for file: 20241208-204216_fornærmedes-avdøde-involvering-i-konflikt-eller-krangel-forut-for-døden.jsonl
No answers found for file: 20241208-204108_detaljer-om-drapsvåpenet-hva-er-drapsvåpenet.jsonl
No answers found for file: 20241208-204216_fornærmedes-avdøde-involvering-i-konflikt-eller-krangel-forut-for-døden.jsonl
No answers found for file: 20241208-204108_detaljer-om-drapsvåpenet-hva-er-drapsvåpenet.jsonl
No answers found for file: 20241208-204216_for

Unnamed: 0,MAP@1,MAP@3,MAP@5,MAP@8,MAP@12,MAP
0,0.25,0.33,0.3,0.31,0.29,0.33
1,0.25,0.33,0.3,0.31,0.29,0.34
2,0.25,0.42,0.5,0.52,0.52,0.52
3,1.0,1.0,1.0,1.0,1.0,1.0


\begin{tabular}{lrrrrrr}
\toprule
 & MAP@1 & MAP@3 & MAP@5 & MAP@8 & MAP@12 & MAP \\
\midrule
0 & 0.25 & 0.33 & 0.30 & 0.31 & 0.29 & 0.33 \\
1 & 0.25 & 0.33 & 0.30 & 0.31 & 0.29 & 0.34 \\
2 & 0.25 & 0.42 & 0.50 & 0.52 & 0.52 & 0.52 \\
3 & 1.00 & 1.00 & 1.00 & 1.00 & 1.00 & 1.00 \\
\bottomrule
\end{tabular}



Unnamed: 0,MAP@1,MAP@3,MAP@5,MAP@8,MAP@12,MAP
0,0.25,0.33,0.3,0.31,0.29,0.33
1,0.25,0.33,0.3,0.31,0.29,0.34
2,0.25,0.42,0.5,0.52,0.52,0.52
3,1.0,1.0,1.0,1.0,1.0,1.0
