In [1]:
import pandas as pd

# Load Data

In [2]:
data_path = "../../data/error-analysis/"

In [3]:
false_prediction_df = pd.read_csv(data_path + "moravian_deu_test_gbert_fine-tuned_false-predictions_categorized.csv", index_col=0)
false_prediction_df

Unnamed: 0,text,y_true,y_predicted,category
6,"Sie stellte sich ihren Heimgang oft nahe vor, ...",neutral,positive,unclear
21,"bau dir ein Haus und heirate die Person, die d...",positive,neutral,unclear
26,Einst redete er mich folgendermaßen an: “Mein ...,positive,neutral,unclear
27,die strenge Kälte war uns bei unsrer dünnen Kl...,negative,positive,multiple negations
28,"ich lies mich beym Pfarrer {NAME} melden, daß ...",positive,neutral,unclear
...,...,...,...,...
420,ich glaubte auch es würde nichts aus mir bis i...,negative,neutral,unclear
427,"daß geschahe sehr Solen, ingegenwart vieler In...",neutral,positive,short sentence
428,als wir aber etwa noch 4 Meilen davon entfernt...,neutral,negative,unclear
430,"Der Capitain sagte darauf, daß er gekommen sei...",positive,neutral,mixed sentiment


# Add Confusion Type Column

In [4]:
false_prediction_df["confusion"] = false_prediction_df[["y_true", "y_predicted"]].apply(lambda x: "<>".join(sorted(x)), axis=1)
false_prediction_df

Unnamed: 0,text,y_true,y_predicted,category,confusion
6,"Sie stellte sich ihren Heimgang oft nahe vor, ...",neutral,positive,unclear,neutral<>positive
21,"bau dir ein Haus und heirate die Person, die d...",positive,neutral,unclear,neutral<>positive
26,Einst redete er mich folgendermaßen an: “Mein ...,positive,neutral,unclear,neutral<>positive
27,die strenge Kälte war uns bei unsrer dünnen Kl...,negative,positive,multiple negations,negative<>positive
28,"ich lies mich beym Pfarrer {NAME} melden, daß ...",positive,neutral,unclear,neutral<>positive
...,...,...,...,...,...
420,ich glaubte auch es würde nichts aus mir bis i...,negative,neutral,unclear,negative<>neutral
427,"daß geschahe sehr Solen, ingegenwart vieler In...",neutral,positive,short sentence,neutral<>positive
428,als wir aber etwa noch 4 Meilen davon entfernt...,neutral,negative,unclear,negative<>neutral
430,"Der Capitain sagte darauf, daß er gekommen sei...",positive,neutral,mixed sentiment,neutral<>positive


# Group by Category (and Confusion Type)

In [5]:
false_prediction_df.groupby("category").count()[["text"]]

Unnamed: 0_level_0,text
category,Unnamed: 1_level_1
long sentence,25
mixed sentiment,8
multiple negations,4
short sentence,7
unclear,40


In [6]:
grouped_df = false_prediction_df.groupby(["category", "confusion"]).count()["text"].unstack("confusion").fillna(0)
grouped_df

confusion,negative<>neutral,negative<>positive,neutral<>positive
category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
long sentence,6.0,17.0,2.0
mixed sentiment,0.0,7.0,1.0
multiple negations,0.0,4.0,0.0
short sentence,0.0,1.0,6.0
unclear,12.0,8.0,20.0


In [7]:
# make relative
grouped_df = grouped_df / len(false_prediction_df)

# add totals
grouped_df["Total"] = grouped_df.sum(axis=1)
grouped_df.loc['Total',:] = grouped_df.sum(axis=0)
grouped_df.round(2)

confusion,negative<>neutral,negative<>positive,neutral<>positive,Total
category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
long sentence,0.07,0.2,0.02,0.3
mixed sentiment,0.0,0.08,0.01,0.1
multiple negations,0.0,0.05,0.0,0.05
short sentence,0.0,0.01,0.07,0.08
unclear,0.14,0.1,0.24,0.48
Total,0.21,0.44,0.35,1.0
