## FNED and FPED Implementation

In [1]:
import pandas as pd
import numpy as np
import math

#### Read Original Data

In [2]:
data_dir = "imdb_mutant"
dfm = pd.read_csv("../data/" + data_dir + "/male/test.csv", header=None, sep="\t", names=["label", "text", "template"])
dff = pd.read_csv("../data/" + data_dir + "/female/test.csv", header=None, sep="\t", names=["label", "text", "template"])

#### Read Prediction Result from the Model

Make sure you set variable `ouput_dir` with the same `output_dir` in the fine-tuning parameter

In [25]:
def read_txt(fpath):
    pred = []
    file = open(fpath)
    lines = file.readlines()
    for l in lines :
        pred.append(int(l))
    file.close()
    
    return pred

In [27]:
output_dir = "exp2_on_imdb"

result_dir = "../result/" + output_dir + "/"

rm = result_dir + "results_data_male.txt"
rf = result_dir + "results_data_female.txt"

mpred = read_txt(rm)
fpred = read_txt(rf)

print(len(mpred))
print(len(fpred))

139000
139000


In [28]:
dfm["pred"] = mpred
dff["pred"] = fpred

In [29]:
dfm.head()

Unnamed: 0,label,text,template,pred
0,1,"I have only see three episodes of Hack, starri...","I have only see three episodes of Hack, starri...",1
1,1,"I have only see three episodes of Hack, starri...","I have only see three episodes of Hack, starri...",1
2,1,"I have only see three episodes of Hack, starri...","I have only see three episodes of Hack, starri...",1
3,1,"I have only see three episodes of Hack, starri...","I have only see three episodes of Hack, starri...",1
4,1,"I have only see three episodes of Hack, starri...","I have only see three episodes of Hack, starri...",1


In [30]:
df = pd.concat([dfm, dff])

In [31]:
df

Unnamed: 0,label,text,template,pred
0,1,"I have only see three episodes of Hack, starri...","I have only see three episodes of Hack, starri...",1
1,1,"I have only see three episodes of Hack, starri...","I have only see three episodes of Hack, starri...",1
2,1,"I have only see three episodes of Hack, starri...","I have only see three episodes of Hack, starri...",1
3,1,"I have only see three episodes of Hack, starri...","I have only see three episodes of Hack, starri...",1
4,1,"I have only see three episodes of Hack, starri...","I have only see three episodes of Hack, starri...",1
...,...,...,...,...
138995,1,"First, I'm a huge Melanie fan. I grew up knowi...","First, I'm a huge Buddy Holly fan. I grew up k...",1
138996,1,"First, I'm a huge Tanisha fan. I grew up knowi...","First, I'm a huge Buddy Holly fan. I grew up k...",1
138997,1,"First, I'm a huge Nancy fan. I grew up knowing...","First, I'm a huge Buddy Holly fan. I grew up k...",1
138998,1,"First, I'm a huge Tia fan. I grew up knowing w...","First, I'm a huge Buddy Holly fan. I grew up k...",1


#### Calculate Global Performance

In [32]:
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

y_test = df["label"]
y_pred = df["pred"]
print(classification_report(y_test, y_pred))
cm = confusion_matrix(y_test, y_pred)
print(cm)

              precision    recall  f1-score   support

           0       0.82      0.97      0.89    139240
           1       0.96      0.79      0.87    138760

    accuracy                           0.88    278000
   macro avg       0.89      0.88      0.88    278000
weighted avg       0.89      0.88      0.88    278000

[[135172   4068]
 [ 29407 109353]]


#### Calculate Performance for each Group

In [33]:
y_test = dfm["label"]
y_pred = dfm["pred"]
print(classification_report(y_test, y_pred))
cm = confusion_matrix(y_test, y_pred)
print(cm)

              precision    recall  f1-score   support

           0       0.82      0.97      0.89     69620
           1       0.96      0.79      0.87     69380

    accuracy                           0.88    139000
   macro avg       0.89      0.88      0.88    139000
weighted avg       0.89      0.88      0.88    139000

[[67579  2041]
 [14507 54873]]


In [34]:
y_test = dff["label"]
y_pred = dff["pred"]
print(classification_report(y_test, y_pred))
cm = confusion_matrix(y_test, y_pred)
print(cm)

              precision    recall  f1-score   support

           0       0.82      0.97      0.89     69620
           1       0.96      0.79      0.87     69380

    accuracy                           0.88    139000
   macro avg       0.89      0.88      0.88    139000
weighted avg       0.89      0.88      0.88    139000

[[67593  2027]
 [14900 54480]]


#### Implementation

Fundamental Thery about FPR and FNR: 
https://medium.com/datadriveninvestor/confusion-matric-tpr-fpr-fnr-tnr-precision-recall-f1-score-73efa162a25f


Theory on FPED and FNED:
https://research.google/pubs/pub46743/ 

In [35]:
# calculate false positive rate from given consufsion matrix
def calculate_fpr(cm) :
    negative = np.sum(cm[0])
    fp = cm[0][1]
    fpr = fp / negative
    return fpr

# calculate false positive rate from given consufsion matrix
def calculate_fnr(cm) :
    negative = np.sum(cm[0])
    fn = cm[1][0]
    fnr = fn / negative
    return fnr

print(calculate_fpr(cm))
print(calculate_fnr(cm))

0.029115196782533756
0.21401896006894572


In [36]:
y_test = df["label"]
y_pred = df["pred"]
cm = confusion_matrix(y_test, y_pred)
global_fpr = calculate_fpr(cm)
global_fnr = calculate_fnr(cm)
print("global fpr: ", global_fpr)
print("global fnr: ", global_fnr)

d = [dfm, dff]
fped = 0
fned = 0
for _d in d :
    y_test = _d["label"]
    y_pred = _d["pred"]
    cm = confusion_matrix(y_test, y_pred)
    fpr = calculate_fpr(cm)
    fnr = calculate_fnr(cm)
    fped += abs(global_fpr - fpr)
    fned += abs(global_fnr - fnr)
    print("fpr: ", fpr)
    print("fnr: ", fnr)

print()
print("FPED: ", fped)
print("FNED: ", fned)

global fpr:  0.029215742602700375
global fnr:  0.21119649525998277
fpr:  0.02931628842286699
fnr:  0.20837403045101982
fpr:  0.029115196782533756
fnr:  0.21401896006894572

FPED:  0.00020109164033323482
FNED:  0.005644929617925898
