In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import re
from sklearn.metrics import f1_score
from sklearn.feature_extraction.text import CountVectorizer
from wilds.common.data_loaders import get_train_loader
import torchvision.transforms as transforms
import matplotlib.pyplot as plt



In [2]:
def CleanText(text):
    text = re.sub(r'''[\[|\]]''', "", text).split()
    text = np.array(text, dtype="float64")
    return text

In [3]:
df = pd.read_csv("all_data_with_identitiesEmbedded.csv")

df = df.loc[:, ["comment_text", "split", "na_gender", "na_race", "toxicity", "male", "female", "transgender", "black", "white", "asian", "latino"]]

df['toxicity'] = df['toxicity'].apply(lambda score: np.round(score))
df['comment_text'] = df['comment_text'].apply(lambda text: CleanText(text))

df['male'] = df['male'].apply(lambda x: round(x))
df['female'] = df['female'].apply(lambda x: round(x))
df['transgender'] = df['transgender'].apply(lambda x: round(x))
df['black'] = df['black'].apply(lambda x: round(x))
df['white'] = df['white'].apply(lambda x: round(x))
df['asian'] = df['asian'].apply(lambda x: round(x))
df['latino'] = df['latino'].apply(lambda x: round(x))

In [4]:
# Updating values for training_data
training_data = df[df['split'] == 'train']
training_data = training_data[(training_data['na_gender'] == 0) | (training_data['na_race'] == 0)]

# Getting test_data
test_data = df[df['split'] == 'test']
test_data = test_data[(test_data['na_gender'] == 0) | (test_data['na_race'] == 0)]

# Getting validation_data
validation_data = df[df['split'] == 'val']
validation_data = validation_data[(validation_data['na_gender'] == 0) | (validation_data['na_race'] == 0)]

In [5]:
X_train = training_data['comment_text'].values.tolist()
Y_train = training_data['toxicity'].values.tolist()

X_test = test_data['comment_text'].values.tolist()
Y_test = test_data['toxicity'].values.tolist()

X_val = validation_data['comment_text'].values.tolist()
Y_val = validation_data['toxicity'].values.tolist()

In [None]:
class OwnF1():
    def __init__(self, y_pred=None, y_true=None):
        self.tp = 0
        self.tn = 0
        self.fp = 0
        self.fn = 0
        self.f1 = 0
        self.y_pred = y_pred
        self.y_true = y_true
    
    precision = 0
    recall = 0
    def confusion_matrix(self):
        for i in range(len(self.y_true)):
            if self.y_true[i]==1 and self.y_pred[i]==1:
                self.tp += 1
            if self.y_true[i]==0 and self.y_pred[i]==0:
                self.tn += 1
            if self.y_true[i]==0 and self.y_pred[i]==1:
                self.fp += 1
            if self.y_true[i]==1 and self.y_pred[i]==0:
                self.fn += 1
    
    def precision_recall(self):
        self.confusion_matrix()
        self.precision = self.tp/(self.tp+self.fp)
        self.recall = self.tp/(self.tp+self.fn)
        
    def f1_score(self):
        self.precision_recall()
        self.f1 = 2*(self.precision*self.recall)/(self.precision+self.recall)
        
    def print_info(self):
        print('True Positives :', self.tp)
        print('True Negatives :', self.tn)
        print('False Positives :',self.fp)
        print('False Negatives :',self.fn)
        print('Precision : ',self.precision, '\nRecall : ',self.recall)
        print('F1 Score : ',self.f1)

In [6]:
class AdalineGDF1():
    def __init__(self, learning_rate = 0.01, n_iter = 20, w = None):
        self.w = w
        self.learning_rate = learning_rate
        self.n_iter = n_iter
        
    def CheckAccuracy(self, predictions, labels):
        acc = 0.0
        for i in range(len(predictions)):
            #print(predictions[i],labels[i])
            if (predictions[i] == labels[i]):
                acc += 1
        return acc/len(predictions)
    
    def predict(self, X_test):
        # Making sure that array is numpy array
        X_test = np.array(X_test).reshape((len(X_test), -1))
        output = np.dot(X_test, self.w)
        # Sigmoid giving 0-1 results
        
        preds = np.round(1/(1+np.exp(-output)))
        return preds
    
    def PredictTestSetDemographic(self, dfTest):
        dic = {}
        for col in dfTest.columns[5:]:
            tempSet = dfTest[dfTest[col] == 1]
            tempLabels = tempSet['toxicity'].values.tolist()
            tempVecs = tempSet['comment_text'].values.tolist()
            dic[col] = [tempVecs, tempLabels]
        
        accuracies = []
        for col in dic:
            tempPredictions = self.predict(dic[col][0])
            currentAccuracy = self.CheckAccuracy(tempPredictions, dic[col][1])
            accuracies.append([currentAccuracy, col])
        
        accuracies = np.array(accuracies)
        return accuracies
        
    def fit(self, X_train, Y_train, X_val, Y_val):
        # Making sure that arrays are numpy arrays
        
        X_train = np.array(X_train).reshape((len(X_train), -1))
        Y_train = np.array(Y_train).reshape((len(Y_train), 1))
        
        X_val = np.array(X_val).reshape((len(X_val), -1))
        Y_val = np.array(Y_val).reshape((len(Y_val), 1))
        
        # Initializing w vector using random normal distribution
        if self.w == None:
            self.w = np.random.normal(0, 0.1, len(X_train[0])).reshape(len(X_train[0]), 1)
        
        badEpoch = 0
        self.bestW = self.w
        bestAcc = 0
        early_stopping = 5
        
        # Using n epochs
        for i in range(self.n_iter):
            output = 1/(1+np.exp(-np.dot(X_train, self.w)))
            error = (Y_train - output)
            self.w += (1/len(Y_train)) * self.learning_rate * np.dot(X_train.T, error)
        
            tmpPreds = self.predict(X_val)
            
            if i == 0:     
                print("---- F1 score before training ---------")
                self.CheckF1Score(self.predict(X_test), Y_test)
            
            valAcc = self.CheckAccuracy(tmpPreds, Y_val)
            
            # Saving the best model and also checks for Early_Stopping
            if valAcc > bestAcc:
                bestAcc = valAcc
                badEpoch = 0
                self.bestW = self.w
            else:
                badEpoch += 1
            
            if badEpoch >= early_stopping:
                self.w = self.bestW                
                print("Stopped cause of bad Epoch in iteration: ", i)
                break
            #Print status
            if (i % 100) == 0:
                print('Iteration ', i, ' Done')
                
        return self

In [7]:
model = AdalineGDF1(learning_rate = 0.00004, n_iter = 10000)

model.fit(X_train, Y_train, X_val, Y_val)

predictions = model.predict(X_test)

Y_test = np.array(Y_test).reshape(len(Y_test), 1)

model.CheckAccuracy(predictions, Y_test)


print("---- F1 score after training ---------")
model.CheckF1Score(predictions, Y_test)


F1_score is:  0.22037896659310555
F1_score is:  0.22039448633005862
F1_score is:  0.2204952380952381
F1_score is:  0.22066229207996335
F1_score is:  0.22072965959395513
F1_score is:  0.2208476517754868
F1_score is:  0.22096577017114916
F1_score is:  0.22110014535995715
F1_score is:  0.2211332312404288
F1_score is:  0.22120098039215688
F1_score is:  0.22128443182690094
F1_score is:  0.22145434999616065
F1_score is:  0.22152190622598
F1_score is:  0.22167525574955776
F1_score is:  0.22181174478565382
F1_score is:  0.22189713581767787
F1_score is:  0.22203284784629684
F1_score is:  0.2221705426356589
F1_score is:  0.22236082956494624
F1_score is:  0.2225345514171937
F1_score is:  0.2227462076554272
F1_score is:  0.22288635469917423
F1_score is:  0.2229915807695334
F1_score is:  0.22306178669815033
F1_score is:  0.22306238185255198
F1_score is:  0.22320862396956245
F1_score is:  0.2232603092783505
Stopped cause of bad Epoch in iteration:  114
model accuracy is:  0.24063622370446383
f1_scor

In [10]:
forBarPlot = model.PredictTestSetDemographic(test_data)
plt.figure()
x = forBarPlot[:,1]
y = np.array(forBarPlot[:,0].astype(float))
x_pos = [i for i, _ in enumerate(x)]
plt.bar(x_pos, y, color='rgbkymc')
plt.xlabel("\nDemographic source")
plt.ylabel("Accuracy")
plt.xticks(x_pos, x)
plt.show()

AttributeError: 'OwnF1' object has no attribute 'PredictTestSetDemographic'

In [None]:
forBarPlot

In [None]:
np.std(forBarPlot[:,0].astype(float))

In [None]:
print("Antal toxic i predictions: ", sum(predictions))
print("Antal toxic i test set: ", sum(Y_test))
print("Samples i test set: ", len(Y_test))

In [None]:
f1_score([0,0,0], [0,0,0], zero_division=1, average='macro')