In [12]:
# The open case is in Norwegian, below are mappings to English.
query_translations = {
    "personer med opphold og tilknytning til adressen (åstedet) som eier, leietaker, besøkende osv": "persons with residence and connections to the address (the crime scene) as owner, tenant, visitor, etc.",
    "hvordan døde fornærmede (hva er dødsårsaken?)": "how did the victim die (what is the cause of death?)",
    "detaljer om drapsvåpenet (hva er drapsvåpenet?)": "details about the murder weapon (what is the murder weapon?)",
    "fornærmedes (avdøde) involvering i konflikt eller krangel forut for døden": "the victim's involvement in conflict or argument prior to death",
}

In [13]:
import pandas as pd
from utils.stats_util import case_metrics, load_truth

pd.set_option("display.max_rows", None)
pd.set_option("display.precision", 2)

case = "open-case"
truths = load_truth(case)
for query, values in truths.items():
    print(f'{query}\n{values}')


detaljer om drapsvåpenet (hva er drapsvåpenet?)
[27, 63, 79, 84, 122]
fornærmedes (avdøde) invovlering i konflikt/krangel
[77, 91, 101, 102, 116]
hvordan døde avdøde (dødsårsaken)
[5, 16, 27, 37, 47, 61, 62, 68, 69, 78, 92, 100, 112, 126, 127, 130, 133]
personer med tilgang (eier, leietaker, besøkende osv) til åstedet (boligen, adressen)
[8, 16, 25, 27, 70, 71, 72, 76, 80, 81, 84, 85, 89, 91, 101, 116, 131]


In [25]:
MIN_SCORE = 1
# 0: irrelevant, 1: somewhat, 2: relevant, 3: extremely relevant

LATEX_TABLE_HEADER = None
LATEX_TABLE_BODY = []
LATEX_TABLE_END = None

for MIN_SCORE in range(4):
    print(f"MIN_SCORE: {MIN_SCORE}")
    metrics_for_query = {}
    for k in [5, 10, -1]:
        all_metrics = case_metrics(
            investigation=case,
            min_score=MIN_SCORE,
            top_k=k,
            root_folder=f"output/{case}",
            verbose=True,
        )

        for source_folder, results in all_metrics.items():
            for _query, _results in sorted(results.items(), key=lambda x: x[0]):
                _query = query_translations[_query]
                metrics_for_query[_query] = metrics_for_query.get(_query, {})
                metrics_for_query[_query]["N_k"] = _results["num_retrieved"]
                for metric, value in _results["metrics"].items():
                    metrics_for_query[_query][metric] = value

    results_df = pd.DataFrame(metrics_for_query).T
    display(results_df)
    results_df["N_k"] = results_df["N_k"].astype(int)
    results_df.columns = [f"${c}$" for c in results_df.columns]
    results_df = results_df.replace(-1, "")
    results_df.columns = [c.replace('$F1$', '$F_1$') for c in results_df.columns]

    results_df = results_df.sort_index()

    latex_tab = results_df.to_latex(float_format="%.2f")
    tab_split = latex_tab.split("\n")

    if not LATEX_TABLE_HEADER:
        LATEX_TABLE_HEADER = tab_split[:3]
    if not LATEX_TABLE_END:
        LATEX_TABLE_END = tab_split[-3:]

    LATEX_TABLE_BODY.append([f"\multicolumn{{11}}{{l}}{{\\textbf{{$T\geq{MIN_SCORE}$}}}} \\\\"])
    LATEX_TABLE_BODY.append(tab_split[3:-3])


print("\n".join(LATEX_TABLE_HEADER))
for l in LATEX_TABLE_BODY:
    print("\n".join(l))
print("\n".join(LATEX_TABLE_END)) 
    

MIN_SCORE: 0


Unnamed: 0,N_k,P@5,R@5,P@10,R@10,P,R,F1
details about the murder weapon (what is the murder weapon?),16.0,0.0,0.0,0.0,0.0,0.19,0.6,0.29
the victim's involvement in conflict or argument prior to death,14.0,0.0,0.0,0.0,0.0,0.07,0.2,0.11
how did the victim die (what is the cause of death?),15.0,0.2,0.06,0.4,0.24,0.33,0.29,0.31
"persons with residence and connections to the address (the crime scene) as owner, tenant, visitor, etc.",14.0,0.6,0.18,0.7,0.41,0.79,0.65,0.71


MIN_SCORE: 1


Unnamed: 0,N_k,P@5,R@5,P@10,R@10,P,R,F1
details about the murder weapon (what is the murder weapon?),14.0,0.0,0.0,0.1,0.2,0.21,0.6,0.32
the victim's involvement in conflict or argument prior to death,9.0,0.0,0.0,-1.0,-1.0,0.11,0.2,0.14
how did the victim die (what is the cause of death?),14.0,0.4,0.12,0.4,0.24,0.36,0.29,0.32
"persons with residence and connections to the address (the crime scene) as owner, tenant, visitor, etc.",14.0,0.6,0.18,0.7,0.41,0.79,0.65,0.71


MIN_SCORE: 2


Unnamed: 0,N_k,P@5,R@5,P@10,R@10,P,R,F1
details about the murder weapon (what is the murder weapon?),8.0,0.2,0.2,-1.0,-1.0,0.38,0.6,0.46
the victim's involvement in conflict or argument prior to death,8.0,0.0,0.0,-1.0,-1.0,0.12,0.2,0.15
how did the victim die (what is the cause of death?),12.0,0.4,0.12,0.4,0.24,0.42,0.29,0.34
"persons with residence and connections to the address (the crime scene) as owner, tenant, visitor, etc.",11.0,0.6,0.18,0.8,0.47,0.82,0.53,0.64


MIN_SCORE: 3


Unnamed: 0,N_k,P@5,R@5,P@10,R@10,P,R,F1
details about the murder weapon (what is the murder weapon?),1.0,-1.0,-1.0,-1.0,-1.0,1.0,0.2,0.33
the victim's involvement in conflict or argument prior to death,3.0,-1.0,-1.0,-1.0,-1.0,0.33,0.2,0.25
how did the victim die (what is the cause of death?),6.0,0.6,0.18,-1.0,-1.0,0.67,0.24,0.35
"persons with residence and connections to the address (the crime scene) as owner, tenant, visitor, etc.",2.0,-1.0,-1.0,-1.0,-1.0,1.0,0.12,0.21


\begin{tabular}{lrrrrrrrr}
\toprule
 & $N_k$ & $P@5$ & $R@5$ & $P@10$ & $R@10$ & $P$ & $R$ & $F_1$ \\
\multicolumn{11}{l}{\textbf{$T\geq0$}} \\
\midrule
details about the murder weapon (what is the murder weapon?) & 16 & 0.00 & 0.00 & 0.00 & 0.00 & 0.19 & 0.60 & 0.29 \\
how did the victim die (what is the cause of death?) & 15 & 0.20 & 0.06 & 0.40 & 0.24 & 0.33 & 0.29 & 0.31 \\
persons with residence and connections to the address (the crime scene) as owner, tenant, visitor, etc. & 14 & 0.60 & 0.18 & 0.70 & 0.41 & 0.79 & 0.65 & 0.71 \\
the victim's involvement in conflict or argument prior to death & 14 & 0.00 & 0.00 & 0.00 & 0.00 & 0.07 & 0.20 & 0.11 \\
\multicolumn{11}{l}{\textbf{$T\geq1$}} \\
\midrule
details about the murder weapon (what is the murder weapon?) & 14 & 0.00 & 0.00 & 0.10 & 0.20 & 0.21 & 0.60 & 0.32 \\
how did the victim die (what is the cause of death?) & 14 & 0.40 & 0.12 & 0.40 & 0.24 & 0.36 & 0.29 & 0.32 \\
persons with residence and connections to the address (the

# Mean average precision

In [31]:
K_vals = [1, 3, 5, 8, 12, -1]

all_MAP = {}
for MIN_SCORE in range(4):
    mean_avg_precisions = {}
    
    for k in K_vals:
        all_metrics = case_metrics(
            investigation=case,
            min_score=MIN_SCORE,
            top_k=k,
            replace_above_k=False,
            root_folder="output/open-case",
            verbose=True,
        )
        
        average_precisions = []

        for source_folder, results in all_metrics.items():
            for _query, _results in sorted(results.items(), key=lambda x: x[0]):
                for metric, value in _results["metrics"].items():
                    if metric.startswith("P"):
                        average_precisions.append(value)
        mean_avg_precisions[k] = sum(average_precisions) / len(average_precisions)
                    

    all_MAP[MIN_SCORE] = mean_avg_precisions
results_df = pd.DataFrame(all_MAP).T
col_names =  [f"MAP@{k}" for k in K_vals if k != -1] + ["MAP"]
results_df.columns = col_names
results_df = results_df.reset_index(drop=True)
display(results_df)
print(results_df.to_latex(float_format="%.2f"))

Unnamed: 0,MAP@1,MAP@3,MAP@5,MAP@8,MAP@12,MAP
0,0.25,0.17,0.2,0.22,0.29,0.35
1,0.25,0.17,0.25,0.25,0.34,0.37
2,0.25,0.17,0.3,0.44,0.43,0.43
3,0.5,0.75,0.73,0.75,0.75,0.75


\begin{tabular}{lrrrrrr}
\toprule
 & MAP@1 & MAP@3 & MAP@5 & MAP@8 & MAP@12 & MAP \\
\midrule
0 & 0.25 & 0.17 & 0.20 & 0.22 & 0.29 & 0.35 \\
1 & 0.25 & 0.17 & 0.25 & 0.25 & 0.34 & 0.37 \\
2 & 0.25 & 0.17 & 0.30 & 0.44 & 0.43 & 0.43 \\
3 & 0.50 & 0.75 & 0.73 & 0.75 & 0.75 & 0.75 \\
\bottomrule
\end{tabular}

