# Qualitative analysis BERT

In [None]:
import pandas as pd
from IPython.display import display, HTML
from lr.text_processing.util import pre_process_nli_df
from lr.training.util import filter_df_by_label
from lr.text_processing.transformations.wordnet import path_base_transformation
from lr.analysis.util import get_mismatch, join_df_list
import numpy as np
import matplotlib.pyplot as plt

## Getting all errors

In [None]:
paths = ["raw_results/snli/bert_base/sin_p_h/rho_0p00_dgp_seed_12_random_state_52_results.csv",
        "raw_results/snli/bert_base/sin_p_h/rho_0p00_dgp_seed_109_random_state_14_results.csv",
        "raw_results/snli/bert_base/sin_p_h/rho_0p25_dgp_seed_259_random_state_59_results.csv",
        "raw_results/snli/bert_base/sin_p_h/rho_0p25_dgp_seed_148_random_state_7_results.csv",
        "raw_results/snli/bert_base/sin_p_h/rho_0p50_dgp_seed_203_random_state_45_results.csv",
        "raw_results/snli/bert_base/sin_p_h/rho_0p50_dgp_seed_62_random_state_103_results.csv",
        "raw_results/snli/bert_base/sin_p_h/rho_0p75_dgp_seed_224_random_state_29_results.csv",
        "raw_results/snli/bert_base/sin_p_h/rho_0p75_dgp_seed_40_random_state_179_results.csv",
        "raw_results/snli/bert_base/sin_p_h/rho_1p00_dgp_seed_185_random_state_27_results.csv",
        "raw_results/snli/bert_base/sin_p_h/rho_1p00_dgp_seed_13_random_state_47_results.csv"]

all_ms = []

for i,p in enumerate(paths):
    df = pd.read_csv(p)
    name = "bert_{}".format(i)
    m = get_mismatch(df, name)
    all_ms.append(m)

results = join_df_list(all_ms)
results = results.sum(1).sort_values(ascending=False)
results = results[results>5]

original_path = "data/snli/dev.csv" 
transformed_path = "data/snli/dev_p_h_syn_noun.csv"

df = pd.read_csv(original_path)
df = filter_df_by_label(df.dropna()).reset_index(drop=True)
pre_process_nli_df(df)
df = df.loc[results.index]
df.loc[:, "number_of_erros"] = results.values

df_t = path_base_transformation(df, transformed_path)
df_t = df_t.loc[results.index]
df_t.loc[:, "number_of_erros"] = results.values

## Looking at the errors

In [None]:
severe_error = []
passable_error = []
count = 0
analysis = []

for i in range(results.shape[0]):
    count += 1
    original = df.iloc[i,:].to_frame().transpose()
    id_ = original.index[0]
    print("\noriginal\n")
    display(HTML(original.to_html()))
    modified = df_t.iloc[i,:].to_frame().transpose()
    print()
    print("\nmodified\n")
    display(HTML(modified.to_html()))
    print("\ndecide\n")
    decision = input()
    if decision == "y":
        severe_error.append(id_)
        analysis.append(True)
    elif decision =="n":
        passable_error.append(id_)
        analysis.append(False)
    else:
        break


In [None]:
n =  count-1
analysis_pct = len(severe_error) / n

print("count = {}\n".format(n))
print("len(severe_error) = {}\n".format(len(severe_error)))
print("analysis pct = {:.3f}\n".format(analysis_pct))
