In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import f1_score
from scipy import stats
import statsmodels.api as sm
import csv
import glob

In [3]:
df = pd.read_csv('CSVFiles/all_data_with_identities_50000.csv')
df = df.loc[:, ["comment_text", "split", "toxicity", "male", "female", "LGBTQ", "christian", "muslim", "other_religions", "black", "white"]]
df = df[df['split'] == 'test']

df['male'] = df['male'].apply(lambda x: np.round(x>=0.5))
df['female'] = df['female'].apply(lambda x: np.round(x>=0.5))
df['LGBTQ'] = df['LGBTQ'].apply(lambda x: np.round(x>=0.5))
df['christian'] = df['christian'].apply(lambda x: np.round(x>=0.5))
df['muslim'] = df['muslim'].apply(lambda x: np.round(x>=0.5))
df['other_religions'] = df['other_religions'].apply(lambda x: np.round(x>=0.5))
df['black'] = df['black'].apply(lambda x: np.round(x>=0.5))
df['white'] = df['white'].apply(lambda x: np.round(x>=0.5))

In [4]:
def loadTestAndValPreds(seed, pathPrefix):
    test_preds = []
    val_preds = []
    test_path = pathPrefix+'test_seed-'+ str(seed) + '_epoch-best_pred.csv'
    val_path = pathPrefix+'val_seed-'+ str(seed) + '_epoch-best_pred.csv'
    
    test_preds = pd.read_csv(test_path, header=None)
    val_preds = pd.read_csv(val_path, header=None)

    test_preds = pd.DataFrame({'test predictions': np.array(test_preds.values.tolist()).flatten()}, index = df.index)
    val_preds = pd.DataFrame({'val predictions': np.array(val_preds.values.tolist()).flatten()}, index = df.index)
    
    return test_preds, val_preds

In [5]:
print("\n")
print('Toxic samples test data: ', sum(df['toxicity']))
print('None-toxic samples test data: ', len(df['toxicity'])-sum(df['toxicity']))

print("\n")

for col in df.columns[4:]:
    print(col + ": " + str(np.sum(df[col])))

#df[(df['male'] == 0) & (df['female'] == 0) & (df['LGBTQ'] == 0) & (df['christian'] == 0) & (df['muslim'] == 0) & (df['other_religions'] == 0) & (df['black'] == 0) & (df['white'] == 0)]



Toxic samples test data:  1510.0
None-toxic samples test data:  1510.0


female: 402.0
LGBTQ: 169.0
christian: 237.0
muslim: 227.0
other_religions: 102.0
black: 180.0
white: 288.0


In [6]:
df

Unnamed: 0,comment_text,split,toxicity,male,female,LGBTQ,christian,muslim,other_religions,black,white
1,Bottom line is all religious extremist of all ...,test,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0
4,So long Europe. So many people are afraid of b...,test,1.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0
7,Still ignorant Qbcoach15 must be all those Tra...,test,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0
10,"The technical term for an atheist who""stands w...",test,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
13,"Islam means ""submission"". It is expected that ...",test,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...
48555,I'll betcha Mark Redwine has had major anger i...,test,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
48556,Funny you mention the younger woman wearing th...,test,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
48557,The deficit is out of control. Listen to the ...,test,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
48558,"I would really, really like to see him and his...",test,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [7]:
def CheckAccuracy(predictions, labels):
        acc = 0.0
        for i in range(len(predictions)):
            if (predictions[i] == labels[i]):
                acc += 1
        return acc/len(predictions)

def F1AndAcc(df):
    f1_scores = []
    accuracies = []
    demographics = []
    
    for col in df.columns[4:]:
        tempdf = df[(df[col] == 1)]        
        
        labels = np.array(tempdf['toxicity'].values.tolist())
        predictions = np.array(tempdf['test predictions'].values.tolist())
        
        f1_scores.append(f1_score(labels, predictions, zero_division=1))
        accuracies.append(CheckAccuracy(labels, predictions))   
    return np.array(f1_scores), np.array(accuracies)


def pRule(df):
    pRules = []
    
    for col in df.columns[4:]:
        
        tempdfz1 = df[(df[col] == 1)]      
        tempdfz0 = df[(df[col] == 0)]
        
        labelsz1 = np.array(tempdfz1['toxicity'].values.tolist())
        predictionsz1 = np.array(tempdfz1['test predictions'].values.tolist())
        
        labelsz0 = np.array(tempdfz0['toxicity'].values.tolist())
        predictionsz0 = np.array(tempdfz0['test predictions'].values.tolist())
        
        with np.errstate(divide='ignore'):
            
            z1Ut1 = np.sum(predictionsz1)/len(df)
            pz1 = len(predictionsz1)/len(df)
            
            z0Ut1 = np.sum(predictionsz0)/len(df)
            pz0 = len(predictionsz0)/len(df)
            
            pscore0 = (z1Ut1/pz1) / (z0Ut1/pz0)
            pscore1 = (z0Ut1/pz0) / (z1Ut1/pz1)
        
        if np.isnan(pscore0) or np.isnan(pscore1):
            finalpscore = 0
        else:
            finalpscore = min(pscore0, pscore1)
        
        pRules.append(finalpscore)
    return pRules


def pRuleOwn(df):
    pRules = []
    
    for col in df.columns[4:]:
        tempdfz1 = df[(df[col] == 1)]      
        tempdfz0 = df[(df[col] == 0)]
        
        labelsz1 = np.array(tempdfz1['toxicity'].values.tolist())
        predictionsz1 = np.array(tempdfz1['test predictions'].values.tolist())
        
        labelsz0 = np.array(tempdfz0['toxicity'].values.tolist())
        predictionsz0 = np.array(tempdfz0['test predictions'].values.tolist())
        
        with np.errstate(divide='ignore'):
            pscore0 = (np.sum(predictionsz1)/np.sum(labelsz1))/(np.sum(predictionsz0)/np.sum(labelsz0))
            pscore1 = (np.sum(predictionsz0)/np.sum(labelsz0))/(np.sum(predictionsz1)/np.sum(labelsz1))
        
        if np.isnan(pscore0) or np.isnan(pscore1):
            finalpscore = 0
        else:
            finalpscore = min(pscore0, pscore1)
        
        pRules.append(finalpscore)
    return pRules

def MinMaxFairness(scores):
    return np.max(scores)-np.min(scores)

def VarianceFairness(scores):
    return np.var(scores)

In [8]:
prulearrmean = []
prulearrmin = []
accArr = []
f1Arr = []


for i in range(1,11):
    test_preds, val_preds = loadTestAndValPreds(i, 'finalOwnWilds/logs/50000noDP/civilcomments_split-')
    temp_df = pd.concat([test_preds, df], axis=1)
    prulearrmean.append(np.array(pRuleOwn(temp_df)).mean())
    prulearrmin.append(np.min(np.array(pRuleOwn(temp_df))))
    f1, acc = F1AndAcc(temp_df)
    accArr.append(acc)
    f1Arr.append(f1)
    
prulearrmin = np.array(prulearrmin).mean()
prulearrmean = np.array(prulearrmean).mean()

print(prulearrmin)
print(prulearrmean)

0.8371452229933416
0.9249540737034412


In [9]:
varF1 = []
minMaxF1 = []
pRuleOwnMeanArr = []
pRuleOwnMinArr = []
pRuleMeanArr = []
pRuleMinArr = []
accuracy = []
for i in range(1,11):
    test_preds, val_preds = loadTestAndValPreds(i, 'finalOwnWilds/logs/50000noDP/civilcomments_split-')
    temp_df = pd.concat([test_preds, df], axis=1)
    f1_scores, accuracies = F1AndAcc(temp_df)

    own = pRuleOwn(temp_df)       
    pRuleOwnMinArr.append(np.min(own))
    pRuleOwnMeanArr.append(np.mean(own))

    official = pRule(temp_df)
    pRuleMinArr.append(np.min(official))
    pRuleMeanArr.append(np.mean(official))

    #F1 Variance
    varF1.append(VarianceFairness(f1_scores))

    #F1 Min Max
    minMaxF1.append(MinMaxFairness(f1_scores))

    labels = np.array(temp_df['toxicity'].values.tolist())
    preds = np.array(temp_df['test predictions'].values.tolist())
    accuracy.append(CheckAccuracy(preds,labels))

In [13]:
print(f'varF1:\t\t {np.mean(varF1):.4f}')
print(f'minMaxF1:\t {np.mean(minMaxF1):.4f}')
print(f'pRuleOwnMeanArr: {np.mean(pRuleOwnMeanArr):.4f}')
print(f'pRuleOwnMinArr:\t {np.mean(pRuleOwnMinArr):.4f}')
print(f'pRuleMeanArr:\t {np.mean(pRuleMeanArr):.4f}')
print(f'pRuleMinArr:\t {np.mean(pRuleMinArr):.4f}')
print(f'Avg accuracy:\t {np.mean(accuracy):.4f}')

varF1:		 0.0015
minMaxF1:	 0.1116
pRuleOwnMeanArr: 0.9250
pRuleOwnMinArr:	 0.8371
pRuleMeanArr:	 0.5647
pRuleMinArr:	 0.4336
Avg accuracy:	 0.7934
