# Qualitative Analysis

In [1]:
import pandas as pd
import os
import re
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
from IPython.display import display, HTML, Markdown
from lr.text_processing.util import pre_process_nli_df
from collections import Counter

## When $\rho=0$ which test examples are affected by the transformation?

In [2]:
m_name = "roberta_base"
data = "snli"
batch = 2
path = "raw_results/{}/{}/syn_p_h/batch{}/rho_0p00_results.csv".format(data, m_name, batch)
df = pd.read_csv(path)

snli_trans = pd.read_csv("data/{}/test_p_h_syn_noun.csv".format(data))
sin_noun = pd.read_csv("data/{}/syn_noun.csv".format(data))
key2id = {k:i for k,i in zip(sin_noun.key,sin_noun.index)}
f = lambda k: key2id[k]

In [3]:
def filter_df_by_label(df, drop_label='-'):
    """
    drop observations with label 'drop_label'
    """
    return df.loc[df.label != drop_label]

In [4]:
snli_ori = pd.read_csv("data/{}/test.csv".format(data))
snli_ori = filter_df_by_label(snli_ori)
pre_process_nli_df(snli_ori)
snli_ori = snli_ori.reset_index(drop=True)

In [5]:
ids = df.loc[(df.A==1) & (df.B==0)]
ids = ids.index.tolist()
n = len(ids)
pct = n/df.shape[0]
display(Markdown(r"### When $\rho=0.0$, the number of disrupted sentences (1 to 0) is {} ({:.1%} of the test data)".format(n,pct)))
comb = pd.concat([snli_ori, snli_trans.add_suffix("_2")],1)
comb = comb.loc[ids] 
display(HTML(comb.head(3).to_html()))

### When $\rho=0.0$, the number of disrupted sentences (1 to 0) is 616 (6.3% of the test data)

Unnamed: 0,premise,hypothesis,label,premise_2,hypothesis_2,label_2
31,3 young man in hoods standing in the middle of a quiet street facing the camera,three hood wearing people pose for a picture,entailment,3 young adult male in hoods standing in the center of a quiet street facing the photographic camera,three punk wearing people pose for a image,entailment
32,3 young man in hoods standing in the middle of a quiet street facing the camera,three hood wearing people stand in a street,entailment,3 young adult male in hoods standing in the center of a quiet street facing the photographic camera,three punk wearing people base in a street,entailment
44,male in a blue jacket decides to lay in the grass,the guy wearing a blue jacket is laying on the green grass taking a nap,neutral,male in a blue jacket decides to ballad in the grass,the guy wearing a blue jacket is laying on the green grass taking a sleep,neutral


## What are the most commom transformations that appear on the error pairs?

In [6]:
spaces = re.compile(' +')
results = []

ids = comb.index.tolist()
for id_ in ids:
    s = comb.loc[id_].premise + " " + comb.loc[id_].hypothesis
    st = comb.loc[id_].premise_2 + " " + comb.loc[id_].hypothesis_2
    s = spaces.sub(" ",s)
    st = spaces.sub(" ",st)
    s = s.strip()
    st = st.strip()
    s = set(s.split(" "))
    st = set(st.split(" "))
    diff = list(s - st)
    try:
        diff = list(map(f, diff))
        results += diff
    except KeyError:
        print(id_)
        print(s)
        print(st)
    
    
    
results_c = Counter(results)
dict_id = [i[0] for i in results_c.most_common(10)]
sin_noun.loc[:, "freq"] = [results_c[i]/len(results) for i in sin_noun.index]
sin_noun = sin_noun.sort_values("freq", ascending=False)
sin_noun.loc[:, "freq"] = sin_noun.freq.map(lambda x: "{:.1%}".format(x))
sin_noun.head(10)

Unnamed: 0,key,value,freq
3353,man,adult male,11.2%
6169,woman,adult female,5.2%
4022,person,individual,2.8%
637,boy,male child,2.5%
995,children,child,1.6%
2768,home,place,1.2%
3814,outdoors,open,1.1%
4663,road,route,1.1%
5296,stand,base,1.1%
2230,food,nutrient,0.9%
