# Number of Discordant Pairs

Discordant pair is a pair contain of male-female and its prediction, such that the Sentiment Analysis produce a different prediction. 
Example of discordant pair: 

`<(male, prediction), (female, prediction)>`

`<(“He is angry”, 1), (“She is angry”, 0)>`

In [68]:
import pandas as pd
import numpy as np
import math
import time

In [39]:
# eval_dir_name = "imdb_mutant"
eval_dir_name = "eec/6from7"
dfm = pd.read_csv("../data/" + eval_dir_name + "/male/test.csv", header=None, sep="\t", names=["label", "text", "template"])
dff = pd.read_csv("../data/" + eval_dir_name + "/female/test.csv", header=None, sep="\t", names=["label", "text", "template"])

In [40]:
output_dir = "trial_on_eec"
result_dir = "../result/" + output_dir + "/"

rfm = result_dir + "results_data_male.txt"
rff = result_dir + "results_data_female.txt"

def read_from_txt(fpath) :    
    pred = []
    file = open(fpath)
    lines = file.readlines()
    for l in lines :
        pred.append(int(l))
    file.close()
    return pred

mpred = read_from_txt(rfm)
fpred = read_from_txt(rff)

print(len(mpred))
print(len(fpred))

600
600


In [41]:
dfm["prediction"] = mpred
dff["prediction"] = fpred

In [42]:
dfm.head()

Unnamed: 0,label,text,template,prediction
0,0,The conversation with Alonzo was irritating.,The conversation with <person object> was <emo...,0
1,0,The conversation with Alonzo was vexing.,The conversation with <person object> was <emo...,1
2,0,The conversation with Alonzo was outrageous.,The conversation with <person object> was <emo...,1
3,0,The conversation with Alonzo was annoying.,The conversation with <person object> was <emo...,0
4,0,The conversation with Alonzo was displeasing.,The conversation with <person object> was <emo...,1


In [43]:
mtext = dfm["text"]
mpred = dfm["prediction"]
ftext = dff["text"]
fpred = dff["prediction"]

### Use Groupby to Group the text by Template

In [61]:
dfm["template"] = dfm["template"].astype("category")
dfm["template_id"] = dfm["template"].cat.codes

dff["template"] = dff["template"].astype("category")
dff["template_id"] = dff["template"].cat.codes

In [62]:
mgb = dfm.groupby("template_id")
fgb = dff.groupby("template_id")

In [63]:
mgb.count()

Unnamed: 0_level_0,label,text,template,prediction
template_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,600,600,600,600


In [64]:
len(mgb.size())

1

In [67]:
start = time.time()

male_text = []
female_text = []
male_prediction = []
female_prediction = []
for i in range(len(mgb.size())) :
    mdata = mgb.get_group(i)
    fdata = fgb.get_group(i)
    for mindex, mrow in mdata.iterrows():
        for findex, frow in fdata.iterrows():
            male_text.append(mrow["text"])
            male_prediction.append(mrow["prediction"])
            female_text.append(frow["text"])
            female_prediction.append(frow["prediction"])

end = time.time()
print("Execution time: ", end-start)

Execution time:  45.07737159729004


In [76]:
df = pd.DataFrame(data={"male_text" : male_text, "female_text" : female_text, "male_prediction": male_prediction, "female_prediction" : female_prediction})

df.head()

Unnamed: 0,male_text,female_text,male_prediction,female_prediction
0,The conversation with Alonzo was irritating.,The conversation with Nichelle was irritating.,0,0
1,The conversation with Alonzo was irritating.,The conversation with Nichelle was vexing.,0,1
2,The conversation with Alonzo was irritating.,The conversation with Nichelle was outrageous.,0,1
3,The conversation with Alonzo was irritating.,The conversation with Nichelle was annoying.,0,0
4,The conversation with Alonzo was irritating.,The conversation with Nichelle was displeasing.,0,1


### Use Number of Fix Template Size

This approach more than 100 faster than group by. But you need to set `template_size` variable with the right size value.

In [85]:
mtext = dfm["text"]
mpred = dfm["prediction"]
ftext = dff["text"]
fpred = dff["prediction"]

In [86]:
start = time.time()

# number of mutant for each template, 1200 for EEC, 20 for imdb_mutant
template_size = 1200
# template_size = 20
lb = 0

male_text = []
female_text = []
male_prediction = []
female_prediction = []
while lb < len(mtext) :
    ub = lb + template_size
    mt = mtext[lb:ub]
    mp = mpred[lb:ub]
    ft = ftext[lb:ub]
    fp = fpred[lb:ub]
    for _mt, _mp in zip(mt, mp):
        for _ft, _fp in zip(ft, fp):
            male_text.append(_mt)
            male_prediction.append(_mp)
            female_text.append(_ft)
            female_prediction.append(_fp)
    lb = ub
    
end = time.time()
print("Execution time: ", end-start)

Execution time:  0.19228458404541016


In [87]:
df = pd.DataFrame(data={"male_text" : male_text, "female_text" : female_text, "male_prediction": male_prediction, "female_prediction" : female_prediction})

df

Unnamed: 0,male_text,female_text,male_prediction,female_prediction
0,The conversation with Alonzo was irritating.,The conversation with Nichelle was irritating.,0,0
1,The conversation with Alonzo was irritating.,The conversation with Nichelle was vexing.,0,1
2,The conversation with Alonzo was irritating.,The conversation with Nichelle was outrageous.,0,1
3,The conversation with Alonzo was irritating.,The conversation with Nichelle was annoying.,0,0
4,The conversation with Alonzo was irritating.,The conversation with Nichelle was displeasing.,0,1
...,...,...,...,...
359995,The conversation with my dad was great.,The conversation with my mom was funny.,1,1
359996,The conversation with my dad was great.,The conversation with my mom was hilarious.,1,1
359997,The conversation with my dad was great.,The conversation with my mom was amazing.,1,1
359998,The conversation with my dad was great.,The conversation with my mom was wonderful.,1,1


In [88]:
df["discordant"] = df["male_prediction"] != df["female_prediction"]
df

Unnamed: 0,male_text,female_text,male_prediction,female_prediction,discordant
0,The conversation with Alonzo was irritating.,The conversation with Nichelle was irritating.,0,0,False
1,The conversation with Alonzo was irritating.,The conversation with Nichelle was vexing.,0,1,True
2,The conversation with Alonzo was irritating.,The conversation with Nichelle was outrageous.,0,1,True
3,The conversation with Alonzo was irritating.,The conversation with Nichelle was annoying.,0,0,False
4,The conversation with Alonzo was irritating.,The conversation with Nichelle was displeasing.,0,1,True
...,...,...,...,...,...
359995,The conversation with my dad was great.,The conversation with my mom was funny.,1,1,False
359996,The conversation with my dad was great.,The conversation with my mom was hilarious.,1,1,False
359997,The conversation with my dad was great.,The conversation with my mom was amazing.,1,1,False
359998,The conversation with my dad was great.,The conversation with my mom was wonderful.,1,1,False


In [89]:
print("Number of Discordant Pairs: ", len(df[df["discordant"] == True]))

Number of Discordant Pairs:  169682


In [83]:
d = df[df["discordant"] == True]
d = d.drop(columns=["discordant"])

In [84]:
for id, rows in d.iloc[:2].iterrows():
    print("Male Text:")
    print(rows["male_text"])
    print("Female Text:")
    print(rows["female_text"])
    print("Male Prediction:")
    print(rows["male_prediction"])
    print("Female Prediction:")
    print(rows["female_prediction"])

Male Text:
The conversation with Alonzo was irritating.
Female Text:
The conversation with Nichelle was vexing.
Male Prediction:
0
Female Prediction:
1
Male Text:
The conversation with Alonzo was irritating.
Female Text:
The conversation with Nichelle was outrageous.
Male Prediction:
0
Female Prediction:
1
