IMPORTANT --- Vijay Keswani Note: This notebook is from the code of Mozannar, Sontag 2020. I use this notebook only to extract the cleaned dataset and save it as an npy file

# Hate Speech and Offensive Language Detection
This notebook runs our experiments on the Hate Speech and Offensive Language Detection tweet dataset.
WARNING: dataset contains offensive language.

##  AAE detection model
As described in our paper, we use the model from https://github.com/slanglab/twitteraae to detect the dialect of each tweet, we only need the model files from the repo which we have conveniently copied in the same folder.

In [14]:
from __future__ import division
import numpy as np
import sys,os
import numpy as np
import torch
from torchtext import data
from torchtext import datasets
import time
import random
vocabfile = "twitteraae_models/model_vocab.txt" # change path if needed, path inside twitteraae repo is twitteraae/model/model_vocab.txt
modelfile = "twitteraae_models/model_count_table.txt" # change path if needed, path inside twitteraae repo is twitteraae/model/model_vocab.txt

import spacy
nlp = spacy.load("en_core_web_sm")
import re

In [2]:
# the following functions are copied from twitteraae for convenience
K=0; wordprobs=None; w2num=None

def load_model():
    """Idempotent"""
    global vocab,w2num,N_wk,N_k,wordprobs,N_w,K, modelfile,vocabfile
    if wordprobs is not None:
        # assume already loaded
        return

    N_wk = np.loadtxt(modelfile)
    N_w = N_wk.sum(1)
    N_k = N_wk.sum(0)
    K = len(N_k)
    wordprobs = (N_wk + 1) / N_k

    vocab = [L.split("\t")[-1].strip() for L in open(vocabfile,encoding="utf8")]
    w2num = {w:i for i,w in enumerate(vocab)}
    assert len(vocab) == N_wk.shape[0]

def infer_cvb0(invocab_tokens, alpha, numpasses):
    global K,wordprobs,w2num
    doclen = len(invocab_tokens)

    # initialize with likelihoods
    Qs = np.zeros((doclen, K))
    for i in range(0,doclen):
        w = invocab_tokens[i]
        Qs[i,:] = wordprobs[w2num[w],:]
        Qs[i,:] /= Qs[i,:].sum()
    lik = Qs.copy()  # pertoken normalized but proportionally the same for inference

    Q_k = Qs.sum(0)
    for itr in range(1,numpasses):
        # print "cvb0 iter", itr
        for i in range(0,doclen):
            Q_k -= Qs[i,:]
            Qs[i,:] = lik[i,:] * (Q_k + alpha)
            Qs[i,:] /= Qs[i,:].sum()
            Q_k += Qs[i,:]

    Q_k /= Q_k.sum()
    return Q_k

def predict_lang(tokens, alpha=1, numpasses=5, thresh1=1, thresh2=0.2):
    invocab_tokens = [w.lower() for w in tokens if w.lower() in w2num]
    # check that at least xx tokens are in vocabulary
    if len(invocab_tokens) < thresh1:
        return None  
    # check that at least yy% of tokens are in vocabulary
    elif len(invocab_tokens) / len(tokens) < thresh2:
        return None
    else:
        posterior = infer_cvb0(invocab_tokens, alpha=alpha, numpasses=numpasses)
        return posterior


In [3]:
# this loads the twitteraae model for detection
load_model()


We load the dataset 'labeled_data.csv', available at https://github.com/t-davidson/hate-speech-and-offensive-language, for convenience we copy it to this repo.

In [15]:
spacy_en = nlp
url = re.compile('(<url>.*</url>)')

def tokenize_en(text):
    return [tok.text for tok in spacy_en.tokenizer(url.sub('@URL@', text))]

In [16]:
# for cnn

labeled_data_path = "data/labeled_data.csv" # change path if needed

TEXT = data.Field(tokenize = tokenize_en, batch_first = True)
LABEL = data.LabelField(dtype = torch.long)
EXPERT = data.LabelField(dtype = torch.long)
GROUP = data.LabelField(dtype = torch.long)
EXPERTLABEL = data.LabelField(dtype = torch.long)

fields = [(None, None),(None, None),('expertlabel', EXPERTLABEL),('group', GROUP),('expert', EXPERT),
          ('label', LABEL), ('text', TEXT)]

train_data_orig = data.TabularDataset.splits(
                                        path = '',
                                        train = labeled_data_path,
                                        format = 'csv',
                                        fields = fields,
                                        skip_header = True)


Augment data with expert predictions and demographic data

In [18]:
# build expert data
all_data = train_data_orig[0]

p = 0.75 # expert probability of being correct for AA tweeet
q = 0.9 # expert probability of being correct for AA tweeet

# tracker variables for statistics
sum = 0
total = 0
i = 0
aa_frac = 0
for example in all_data:
    lang = predict_lang(vars(example)['text'])
    aa = 0
    try:
        if lang[0] >= 0.5:
            aa = 1
    except:
        print("error processing tweet: "+str(vars(example)['text']))
    label = vars(example)['label']
    exp = 0 # 0: expert wrong, 1: expert is right
    exp_label = 0
    if aa == 1: # if tweet is african american

        coin = np.random.binomial(1,p)
        if coin:
            exp =1 
            exp_label = np.longlong(label)
        else:
            exp_label = np.longlong(np.argmax(np.random.multinomial(1,[1/3]*3 , size=1)))
            exp =0
    else:
        coin = np.random.binomial(1,q)
        if coin:
            exp =1 # is right 90% of time
            exp_label = np.longlong(label)
        else:
            exp_label = np.longlong(np.argmax(np.random.multinomial(1,[1/3]*3 , size=1)))
            exp =0
    #if label =='2' : # 2: neither, 1: offensive, 0: hate speech
    #    aa = 1
    vars(all_data[i])['expertlabel'] = exp_label
    vars(all_data[i])['group'] = str(aa)
    vars(all_data[i])['expert'] = exp
    aa_frac += aa
    i += 1
    total +=1
    sum += exp
#print(sum/total)
#print(aa_frac/total)


error processing tweet: ['@Fulf_ShawnFulf', 'peckerwood']
error processing tweet: ['GA', 'Charlie', '@Charlie4927', '@sholzbee', '@DorisTafoya1', '@cat_lmbo', '@Jagauress', '@Walter_lars', '@lynnemrnp', '@AmyMek', '@Justin_Awe']
error processing tweet: ['RT', '@cenopant', ':', '(', 'she)s', '\n', 'bro(was)ken', '\n', 'bec(a)use', '\n', 's(side)he', '\n', 'beli(hoe)ved']


Build data for Pytorch and vectorize, this requires the glove.6b.100d embeddings which will be downloaded (862mb).

In [19]:

LABEL.build_vocab(all_data)
EXPERT.build_vocab(all_data)
GROUP.build_vocab(all_data)
EXPERTLABEL.build_vocab(all_data)
MAX_VOCAB_SIZE = 25_000

TEXT.build_vocab(all_data, 
                 max_size = MAX_VOCAB_SIZE, 
                 vectors = "glove.6B.100d", 
                 unk_init = torch.Tensor.normal_)



.vector_cache/glove.6B.zip: 862MB [02:58, 4.83MB/s]                             
100%|███████████████████████████████▉| 399999/400000 [00:06<00:00, 61883.93it/s]


In [20]:
print (len(all_data))

24783


In [21]:
lines = []
with open("data/labeled_data.csv") as f:
    lines = f.readlines()

len(lines)

26954

In [22]:
posts, labels, groups = [], [], []
for i in range(len(all_data)):
    posts.append(vars(all_data[i])['text'])
    labels.append(vars(all_data[i])['label'])
    groups.append(vars(all_data[i])['group'])
    
len(posts), len(groups), len(labels)

labelled_data = {"posts": posts, "labels": labels, "groups": groups}
np.save('output/labelled_data.npy', labelled_data)

Split the data for train, test and validation.

In [23]:

train_data, test_data, valid_data  = all_data.split(split_ratio=[0.6,0.1,0.3])

BATCH_SIZE = 64

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits(
    (train_data, valid_data, test_data), 
    sort = False,
    batch_size = BATCH_SIZE, 
    device = device)


The following code is of two parts:
1) the first part goes through our method and baselines to get results
2) the second combines all models to get std and confidence intervals, but need to go through the first part

# Build model
Model definitions for sentiment analysis adapted from https://github.com/bentrevett/pytorch-sentiment-analysis/blob/master/3%20-%20Faster%20Sentiment%20Analysis.ipynb

In [24]:
import torch.nn as nn
import torch.nn.functional as F


class CNN(nn.Module):
    def __init__(self, vocab_size, embedding_dim, n_filters, filter_sizes, output_dim, 
                 dropout, pad_idx):
        
        super().__init__()
        
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx = pad_idx)
        
        self.conv_0 = nn.Conv2d(in_channels = 1, 
                                out_channels = n_filters, 
                                kernel_size = (filter_sizes[0], embedding_dim))
        
        self.conv_1 = nn.Conv2d(in_channels = 1, 
                                out_channels = n_filters, 
                                kernel_size = (filter_sizes[1], embedding_dim))
        
        self.conv_2 = nn.Conv2d(in_channels = 1, 
                                out_channels = n_filters, 
                                kernel_size = (filter_sizes[2], embedding_dim))
        
        self.fc = nn.Linear(len(filter_sizes) * n_filters, output_dim)
        
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, text):
                
        #text = [batch size, sent len]
        
        embedded = self.embedding(text)
                
        #embedded = [batch size, sent len, emb dim]
        
        embedded = embedded.unsqueeze(1)
        
        #embedded = [batch size, 1, sent len, emb dim]
        
        conved_0 = F.relu(self.conv_0(embedded).squeeze(3))
        conved_1 = F.relu(self.conv_1(embedded).squeeze(3))
        conved_2 = F.relu(self.conv_2(embedded).squeeze(3))
            
        #conved_n = [batch size, n_filters, sent len - filter_sizes[n] + 1]
        
        pooled_0 = F.max_pool1d(conved_0, conved_0.shape[2]).squeeze(2)
        pooled_1 = F.max_pool1d(conved_1, conved_1.shape[2]).squeeze(2)
        pooled_2 = F.max_pool1d(conved_2, conved_2.shape[2]).squeeze(2)
        
        #pooled_n = [batch size, n_filters]
        
        cat = self.dropout(torch.cat((pooled_0, pooled_1, pooled_2), dim = 1))

        #cat = [batch size, n_filters * len(filter_sizes)]
            
        return self.fc(cat)

class CNN(nn.Module):
    def __init__(self, vocab_size, embedding_dim, n_filters, filter_sizes, output_dim, 
                 dropout, pad_idx):
        
        super().__init__()
                
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx = pad_idx)
        
        self.convs = nn.ModuleList([
                                    nn.Conv2d(in_channels = 1, 
                                              out_channels = n_filters, 
                                              kernel_size = (fs, embedding_dim)) 
                                    for fs in filter_sizes
                                    ])
        
        self.fc = nn.Linear(len(filter_sizes) * n_filters, output_dim)
        
        self.dropout = nn.Dropout(dropout)
        
        self.softmax = nn.Softmax()

    def forward(self, text):
                
        #text = [batch size, sent len]
        
        embedded = self.embedding(text)
                
        #embedded = [batch size, sent len, emb dim]
        
        embedded = embedded.unsqueeze(1)
        
        #embedded = [batch size, 1, sent len, emb dim]
        
        conved = [F.relu(conv(embedded)).squeeze(3) for conv in self.convs]
            
        #conved_n = [batch size, n_filters, sent len - filter_sizes[n] + 1]
                
        pooled = [F.max_pool1d(conv, conv.shape[2]).squeeze(2) for conv in conved]
        
        #pooled_n = [batch size, n_filters]
        
        cat = self.dropout(torch.cat(pooled, dim = 1))

        #cat = [batch size, n_filters * len(filter_sizes)]
        
        out = self.fc(cat)
        out = self.softmax(out)
        return out

class CNN_rej(nn.Module):
    def __init__(self, vocab_size, embedding_dim, n_filters, filter_sizes, output_dim, 
                 dropout, pad_idx):
        
        super().__init__()
                
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx = pad_idx)
        
        self.convs = nn.ModuleList([
                                    nn.Conv2d(in_channels = 1, 
                                              out_channels = n_filters, 
                                              kernel_size = (fs, embedding_dim)) 
                                    for fs in filter_sizes
                                    ])
        
        self.fc = nn.Linear(len(filter_sizes) * n_filters, output_dim)
        
        self.dropout = nn.Dropout(dropout)
        
        self.embedding_rej = nn.Embedding(vocab_size, embedding_dim, padding_idx = pad_idx)
        
        self.convs_rej = nn.ModuleList([
                                    nn.Conv2d(in_channels = 1, 
                                              out_channels = n_filters, 
                                              kernel_size = (fs, embedding_dim)) 
                                    for fs in filter_sizes
                                    ])
        
        self.fc_rej = nn.Linear(len(filter_sizes) * n_filters, 1)
        
        self.dropout_rej = nn.Dropout(dropout)
        
        self.softmax = nn.Softmax()

    def forward(self, text):
                
        #text = [batch size, sent len]
        
        embedded = self.embedding(text)
                
        #embedded = [batch size, sent len, emb dim]
        
        embedded = embedded.unsqueeze(1)
        
        #embedded = [batch size, 1, sent len, emb dim]
        
        conved = [F.relu(conv(embedded)).squeeze(3) for conv in self.convs]
            
        #conved_n = [batch size, n_filters, sent len - filter_sizes[n] + 1]
                
        pooled = [F.max_pool1d(conv, conv.shape[2]).squeeze(2) for conv in conved]
        
        #pooled_n = [batch size, n_filters]
        
        cat = self.dropout(torch.cat(pooled, dim = 1))

        embedded_rej = self.embedding_rej(text)
                
        #embedded = [batch size, sent len, emb dim]
        
        embedded_rej = embedded.unsqueeze(1)
        
        #embedded = [batch size, 1, sent len, emb dim]
        
        conved_rej = [F.relu(conv(embedded)).squeeze(3) for conv in self.convs_rej]
            
        #conved_n = [batch size, n_filters, sent len - filter_sizes[n] + 1]
                
        pooled_rej = [F.max_pool1d(conv, conv.shape[2]).squeeze(2) for conv in conved]
        
        #pooled_n = [batch size, n_filters]
        
        cat_rej = self.dropout_rej(torch.cat(pooled, dim = 1))

        out_rej = self.fc_rej(cat_rej)
        #cat = [batch size, n_filters * len(filter_sizes)]
        
        out = self.fc(cat)
        out =  torch.cat((out, out_rej), 1)

        out = self.softmax(out)
        return out


In [25]:
# build model
INPUT_DIM = len(TEXT.vocab)
EMBEDDING_DIM = 100 # fixed
N_FILTERS = 300 # hyperparameterr
FILTER_SIZES = [3,4,5]
OUTPUT_DIM = 4
DROPOUT = 0.5
PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token]

#model = CNN(INPUT_DIM, EMBEDDING_DIM, N_FILTERS, FILTER_SIZES, OUTPUT_DIM, DROPOUT, PAD_IDX)
model = CNN_rej(INPUT_DIM, EMBEDDING_DIM, N_FILTERS, FILTER_SIZES, 3, DROPOUT, PAD_IDX)

pretrained_embeddings = TEXT.vocab.vectors

model.embedding.weight.data.copy_(pretrained_embeddings)
UNK_IDX = TEXT.vocab.stoi[TEXT.unk_token]

model.embedding.weight.data[UNK_IDX] = torch.zeros(EMBEDDING_DIM)
model.embedding.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_DIM)

## Train the Model

In [26]:
import torch.optim as optim

optimizer = optim.Adam(model.parameters())
model = model.to(device)


In [27]:

def categorical_accuracy(preds, y):
    """
    Returns accuracy per batch, i.e. if you get 8/10 right, this returns 0.8, NOT 8
    """
    max_preds = preds.argmax(dim = 1, keepdim = True) # get the index of the max probability
    correct = max_preds.squeeze(1).eq(y)
    return correct.sum() / torch.FloatTensor([y.shape[0]])
def reject_CrossEntropyLoss(outputs, m, labels, m2, n_classes):
    '''
    The L_{CE} loss implementation for hatespeech, identical to CIFAR implementation
    ----
    outputs: network outputs
    m: cost of deferring to expert cost of classifier predicting (alpha* I_{m\neq y} + I_{m =y})
    labels: target
    m2:  cost of classifier predicting (alpha* I_{m\neq y} + I_{m =y})
    n_classes: number of classes
    '''
    batch_size = outputs.size()[0]            # batch_size
    rc = [n_classes] * batch_size
    rc = torch.tensor(rc)
    outputs =  -m*torch.log2( outputs[range(batch_size), rc]) - m2*torch.log2(outputs[range(batch_size), labels])   # pick the values corresponding to the labels
    return torch.sum(outputs)/batch_size

def train_reject(model, iterator, optimizer,alpha):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.train()
    
    for batch in iterator:
        
        optimizer.zero_grad()
        
        predictions = model(batch.text)
        batch_size = predictions.size()[0]
        # get expert predictions and costs 
        m = (1 - batch.expert)*1.0
        m2 = [1] * batch_size
        m2 = torch.tensor(m2)
        for j in range (0,batch_size):
            exp = m[j].item()
            if exp:
                m2[j] = alpha
            else:
                m2[j] = 1

        m2 = m2.to(device)

        loss = reject_CrossEntropyLoss(predictions, m, batch.label, m2, 3)

        acc = categorical_accuracy(predictions, batch.label)
        
        loss.backward()
        
        optimizer.step()
        epoch_loss += loss.item()
        epoch_acc += acc.item()
    return epoch_loss / len(iterator), epoch_acc / len(iterator)
def evaluate_reject(model, iterator):
    
    epoch_loss = 0
    epoch_acc = 0
    model.eval()
    with torch.no_grad():
        for batch in iterator:
            predictions = model(batch.text)
            batch_size = predictions.size()[0]            # batch_size
            m = 1 - batch.expert
            m2 = [1] * batch_size
            m2 = torch.tensor(m2)
            m2 = m2.to(device)
            loss = reject_CrossEntropyLoss(predictions, m, batch.label, m2, 3)
            acc = categorical_accuracy(predictions, batch.label)
            epoch_loss += loss.item()
            epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [28]:
def metrics_print(net, loader):
    net.eval()
    correct = 0
    correct_sys = 0
    exp = 0
    exp_total = 0
    total = 0
    real_total = 0
    alone_correct = 0
    with torch.no_grad():
        for data in loader:
            outputs = net(data.text)
            _, predicted = torch.max(outputs.data, 1)
            batch_size = outputs.size()[0]            # batch_size
            for i in range(0,batch_size):
                r = (predicted[i].item() == 3)
                if r==0:
                    total += 1
                    correct += (predicted[i] == data.label[i]).item()
                    correct_sys += (predicted[i] == data.label[i]).item()
                if r==1:
                    exp += 1 - data.expert[i].item()
                    correct_sys += 1 - data.expert[i].item()
                    exp_total+=1
                real_total += 1
    cov = str(total) + str(" out of") + str(real_total)
    to_print={"coverage":cov, "system accuracy": 100*correct_sys/real_total, "expert accuracy":100* exp/(exp_total+0.0002),"classifier accuracy":100*correct/(total+0.0001), "alone classifier": 100*alone_correct/real_total }
    print(to_print)
    return [100*total/real_total,  100*correct_sys/real_total, 100* exp/(exp_total+0.0002),100*correct/(total+0.0001) ]

def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs
def metrics_print_fairness(net, loader):
    net.eval()
    group_1 = 0
    group_1_counts = 0
    group_0 = 0
    group_0_counts = 0

    with torch.no_grad():
        for data in loader:
            outputs = net(data.text)
            _, predicted = torch.max(outputs.data, 1)
            batch_size = outputs.size()[0]            # batch_size
            for i in range(0,batch_size):
                r = (predicted[i].item() == 3)
                prediction = 0
                if r==0:
                    prediction = predicted[i]
                if r==1:
                    prediction = data.expertlabel[i].item()

                if  data.group[i].item() == 0:
                    if data.label[i].item() == 2:
                        group_0_counts += 1
                        if prediction == 1 or prediction ==0:
                            group_0 += 1
                else:
                    if data.label[i].item() == 2:
                        group_1_counts += 1
                        if prediction == 1 or prediction ==0:
                            group_1 += 1


    to_print={"group0":group_0/(group_0_counts+0.0001), "group1": group_1/(group_1_counts+0.0001), "discrimination":group_0/(group_0_counts+0.0001)- group_1/(group_1_counts+0.0001) }
    return [group_0/(group_0_counts+0.0001), group_1/(group_1_counts+0.0001), abs(group_0/(group_0_counts+0.0001)- group_1/(group_1_counts+0.0001))]


Train the model by validation over alpha in [0,1] with steps of 0.1

In [29]:
import copy, time 
for i in range(0,11):
    model = CNN_rej(INPUT_DIM, EMBEDDING_DIM, N_FILTERS, FILTER_SIZES, 3, DROPOUT, PAD_IDX)

    pretrained_embeddings = TEXT.vocab.vectors

    model.embedding.weight.data.copy_(pretrained_embeddings)
    UNK_IDX = TEXT.vocab.stoi[TEXT.unk_token]

    model.embedding.weight.data[UNK_IDX] = torch.zeros(EMBEDDING_DIM)
    model.embedding.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_DIM)

    optimizer = optim.Adam(model.parameters())
    criterion = nn.CrossEntropyLoss()

    model = model.to(device)
    alpha = i/10
    N_EPOCHS = 5

    best_valid_loss = 0
    best_model = None
    for epoch in range(N_EPOCHS):

        start_time = time.time()
        train_loss, train_acc = train_reject(model, train_iterator, optimizer, alpha)
        #train_loss, train_acc = train_reject_bla(model, train_iterator, optimizer)

        #valid_loss, valid_acc = evaluate_reject(model, valid_iterator)
        #print( metrics_print(model,test_iterator))
        valid_loss = metrics_print(model,valid_iterator)[1]

        end_time = time.time()

        epoch_mins, epoch_secs = epoch_time(start_time, end_time)

        if valid_loss >= best_valid_loss:
            best_valid_loss = valid_loss
            best_model = copy.deepcopy(model)
    

    print(metrics_print(best_model, valid_iterator))

  return self._call_impl(*args, **kwargs)


{'coverage': '0 out of2478', 'system accuracy': 83.4140435835351, 'expert accuracy': 83.41403685116732, 'classifier accuracy': 0.0, 'alone classifier': 0.0}
{'coverage': '152 out of2478', 'system accuracy': 84.34221146085552, 'expert accuracy': 83.74891799235444, 'classifier accuracy': 93.42099117040054, 'alone classifier': 0.0}
{'coverage': '42 out of2478', 'system accuracy': 83.85794995964487, 'expert accuracy': 83.62068278976332, 'classifier accuracy': 97.61881519329715, 'alone classifier': 0.0}
{'coverage': '61 out of2478', 'system accuracy': 83.69652945924132, 'expert accuracy': 83.40917803813173, 'classifier accuracy': 95.08181134129288, 'alone classifier': 0.0}
{'coverage': '57 out of2478', 'system accuracy': 83.89830508474576, 'expert accuracy': 83.60181052442705, 'classifier accuracy': 96.49105878761615, 'alone classifier': 0.0}
{'coverage': '152 out of2478', 'system accuracy': 84.34221146085552, 'expert accuracy': 83.74891799235444, 'classifier accuracy': 93.42099117040054, '

{'coverage': '92 out of2478', 'system accuracy': 83.85794995964487, 'expert accuracy': 83.52891169078696, 'classifier accuracy': 92.39120392260443, 'alone classifier': 0.0}
{'coverage': '232 out of2478', 'system accuracy': 84.38256658595641, 'expert accuracy': 83.52626148474965, 'classifier accuracy': 92.67237384811472, 'alone classifier': 0.0}
{'coverage': '232 out of2478', 'system accuracy': 84.38256658595641, 'expert accuracy': 83.52626148474965, 'classifier accuracy': 92.67237384811472, 'alone classifier': 0.0}
[9.362389023405973, 84.38256658595641, 83.52626148474965, 92.67237384811472]
{'coverage': '0 out of2478', 'system accuracy': 83.4140435835351, 'expert accuracy': 83.41403685116732, 'classifier accuracy': 0.0, 'alone classifier': 0.0}
{'coverage': '2 out of2478', 'system accuracy': 83.4140435835351, 'expert accuracy': 83.40063946683041, 'classifier accuracy': 99.99500024998748, 'alone classifier': 0.0}
{'coverage': '7 out of2478', 'system accuracy': 83.4140435835351, 'expert 

In [30]:
metrics_print_fairness(best_model, test_iterator)

[0.3222890595514881, 0.6363630578517656, 0.3140739983002775]

In [31]:
metrics_print(best_model, test_iterator)

{'coverage': '4820 out of7435', 'system accuracy': 92.03765971755212, 'expert accuracy': 85.77437202490462, 'classifier accuracy': 95.43568266730948, 'alone classifier': 0.0}


[64.8285137861466, 92.03765971755212, 85.77437202490462, 95.43568266730948]

# Baseline: Confidence 

In [32]:
class CNN_(nn.Module):
    def __init__(self, vocab_size, embedding_dim, n_filters, filter_sizes, output_dim, 
                 dropout, pad_idx):
        
        super().__init__()
                
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx = pad_idx)
        
        self.convs = nn.ModuleList([
                                    nn.Conv2d(in_channels = 1, 
                                              out_channels = n_filters, 
                                              kernel_size = (fs, embedding_dim)) 
                                    for fs in filter_sizes
                                    ])
        
        self.fc = nn.Linear(len(filter_sizes) * n_filters, output_dim)
        
        self.dropout = nn.Dropout(dropout)
        

    def forward(self, text):
                
        #text = [batch size, sent len]
        
        embedded = self.embedding(text)
                
        #embedded = [batch size, sent len, emb dim]
        
        embedded = embedded.unsqueeze(1)
        
        #embedded = [batch size, 1, sent len, emb dim]
        
        conved = [F.relu(conv(embedded)).squeeze(3) for conv in self.convs]
            
        #conved_n = [batch size, n_filters, sent len - filter_sizes[n] + 1]
                
        pooled = [F.max_pool1d(conv, conv.shape[2]).squeeze(2) for conv in conved]
        
        #pooled_n = [batch size, n_filters]
        
        cat = self.dropout(torch.cat(pooled, dim = 1))

        #cat = [batch size, n_filters * len(filter_sizes)]
        
        out = self.fc(cat)
        return out

## expert model

In [33]:
INPUT_DIM = len(TEXT.vocab)
EMBEDDING_DIM = 100
N_FILTERS = 300
FILTER_SIZES = [3,4,5]
OUTPUT_DIM = 2
DROPOUT = 0.5
PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token]

model_expert = CNN_(INPUT_DIM, EMBEDDING_DIM, N_FILTERS, FILTER_SIZES, 2, DROPOUT, PAD_IDX)
pretrained_embeddings = TEXT.vocab.vectors

model_expert.embedding.weight.data.copy_(pretrained_embeddings)
UNK_IDX = TEXT.vocab.stoi[TEXT.unk_token]

model_expert.embedding.weight.data[UNK_IDX] = torch.zeros(EMBEDDING_DIM)
model_expert.embedding.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_DIM)

In [34]:
import torch.optim as optim

optimizer = optim.Adam(model_expert.parameters())
criterion = nn.CrossEntropyLoss()

model_expert = model_expert.to(device)
criterion = criterion.to(device)

In [35]:
def train_expert(model_exp, iterator, optimizer, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model_exp.train()
    
    for batch in iterator:
        
        optimizer.zero_grad()
        
        predictions = model_exp(batch.text)

        
        loss = criterion(predictions, batch.expert)
        
        acc = categorical_accuracy(predictions, batch.expert)
        
        loss.backward()
        
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

def evaluate_expert(model_exp, iterator, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    model_exp.eval()
    with torch.no_grad():
        for batch in iterator:
            predictions = model_exp(batch.text)
            loss = criterion(predictions, batch.expert)
            acc = categorical_accuracy(predictions, batch.expert)
            epoch_loss += loss.item()
            epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [36]:
N_EPOCHS = 10

best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):

    start_time = time.time()
    
    train_loss, train_acc = train_expert(model_expert, train_iterator, optimizer, criterion)
    valid_loss, valid_acc = evaluate_expert(model_expert, valid_iterator, criterion)
    
    end_time = time.time()

    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model_expert.state_dict(), 'tut3-model.pt')
    
    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')

Epoch: 01 | Epoch Time: 0m 6s
	Train Loss: 0.460 | Train Acc: 84.05%
	 Val. Loss: 0.457 |  Val. Acc: 83.28%
Epoch: 02 | Epoch Time: 0m 6s
	Train Loss: 0.416 | Train Acc: 84.42%
	 Val. Loss: 0.470 |  Val. Acc: 83.40%
Epoch: 03 | Epoch Time: 0m 6s
	Train Loss: 0.357 | Train Acc: 85.53%
	 Val. Loss: 0.479 |  Val. Acc: 82.64%
Epoch: 04 | Epoch Time: 0m 6s
	Train Loss: 0.292 | Train Acc: 88.09%
	 Val. Loss: 0.538 |  Val. Acc: 78.81%
Epoch: 05 | Epoch Time: 0m 6s
	Train Loss: 0.221 | Train Acc: 91.23%
	 Val. Loss: 0.621 |  Val. Acc: 81.01%
Epoch: 06 | Epoch Time: 0m 6s
	Train Loss: 0.154 | Train Acc: 94.25%
	 Val. Loss: 0.683 |  Val. Acc: 79.05%
Epoch: 07 | Epoch Time: 0m 6s
	Train Loss: 0.121 | Train Acc: 95.59%
	 Val. Loss: 0.802 |  Val. Acc: 79.75%
Epoch: 08 | Epoch Time: 0m 6s
	Train Loss: 0.095 | Train Acc: 96.90%
	 Val. Loss: 0.877 |  Val. Acc: 78.71%
Epoch: 09 | Epoch Time: 0m 6s
	Train Loss: 0.080 | Train Acc: 97.42%
	 Val. Loss: 0.997 |  Val. Acc: 77.19%
Epoch: 10 | Epoch Time: 0m 6

## classifier model

In [37]:
INPUT_DIM = len(TEXT.vocab)
EMBEDDING_DIM = 100
N_FILTERS = 300
FILTER_SIZES = [3,4,5]
OUTPUT_DIM = 3
DROPOUT = 0.5
PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token]

model_class = CNN_(INPUT_DIM, EMBEDDING_DIM, N_FILTERS, FILTER_SIZES, OUTPUT_DIM, DROPOUT, PAD_IDX)
pretrained_embeddings = TEXT.vocab.vectors

model_class.embedding.weight.data.copy_(pretrained_embeddings)
UNK_IDX = TEXT.vocab.stoi[TEXT.unk_token]

model_class.embedding.weight.data[UNK_IDX] = torch.zeros(EMBEDDING_DIM)
model_class.embedding.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_DIM)
import torch.optim as optim

optimizer = optim.Adam(model_class.parameters())
criterion = nn.CrossEntropyLoss()

model_class = model_class.to(device)
criterion = criterion.to(device)
def train(model_class, iterator, optimizer, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model_class.train()
    
    for batch in iterator:
        
        optimizer.zero_grad()
        predictions = model_class(batch.text)
        loss = criterion(predictions, batch.label)
        
        acc = categorical_accuracy(predictions, batch.label)
        
        loss.backward()
        
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
    return epoch_loss / len(iterator), epoch_acc / len(iterator)
def evaluate(model_class, iterator, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    model_class.eval()
    with torch.no_grad():
        for batch in iterator:
            predictions = model_class(batch.text)
            loss = criterion(predictions, batch.label)
            acc = categorical_accuracy(predictions, batch.label)
            epoch_loss += loss.item()
            epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)    

In [38]:
N_EPOCHS = 10

best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):

    start_time = time.time()
    
    train_loss, train_acc = train(model_class, train_iterator, optimizer, criterion)
    valid_loss, valid_acc = evaluate(model_class, valid_iterator, criterion)
    
    end_time = time.time()

    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model_class.state_dict(), 'tut3-model.pt')
    
    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')

Epoch: 01 | Epoch Time: 0m 6s
	Train Loss: 0.449 | Train Acc: 83.68%
	 Val. Loss: 0.339 |  Val. Acc: 87.94%
Epoch: 02 | Epoch Time: 0m 6s
	Train Loss: 0.272 | Train Acc: 90.47%
	 Val. Loss: 0.319 |  Val. Acc: 88.86%
Epoch: 03 | Epoch Time: 0m 6s
	Train Loss: 0.199 | Train Acc: 93.21%
	 Val. Loss: 0.358 |  Val. Acc: 88.98%
Epoch: 04 | Epoch Time: 0m 6s
	Train Loss: 0.145 | Train Acc: 94.85%
	 Val. Loss: 0.372 |  Val. Acc: 88.84%
Epoch: 05 | Epoch Time: 0m 6s
	Train Loss: 0.101 | Train Acc: 96.55%
	 Val. Loss: 0.374 |  Val. Acc: 89.34%
Epoch: 06 | Epoch Time: 0m 6s
	Train Loss: 0.065 | Train Acc: 97.94%
	 Val. Loss: 0.408 |  Val. Acc: 88.83%
Epoch: 07 | Epoch Time: 0m 6s
	Train Loss: 0.052 | Train Acc: 98.52%
	 Val. Loss: 0.441 |  Val. Acc: 88.49%
Epoch: 08 | Epoch Time: 0m 6s
	Train Loss: 0.040 | Train Acc: 98.80%
	 Val. Loss: 0.505 |  Val. Acc: 88.43%
Epoch: 09 | Epoch Time: 0m 6s
	Train Loss: 0.032 | Train Acc: 99.13%
	 Val. Loss: 0.528 |  Val. Acc: 88.60%
Epoch: 10 | Epoch Time: 0m 6

In [39]:
from scipy.special import softmax


def metrics_print_confid(net_class, net_exp, loader):
    net_class.eval()
    net_exp.eval()
    correct = 0
    correct_sys = 0
    exp = 0
    exp_total = 0
    total = 0
    real_total = 0
    alone_correct = 0
    with torch.no_grad():
        for data in loader:
            outputs_class = net_class(data.text)
            outputs_exp = net_exp(data.text)
            _, predicted = torch.max(outputs_class.data, 1)
            batch_size = outputs_class.size()[0]            # batch_size
            for i in range(0,batch_size):
                arr = [outputs_class.data[i][0].item(),outputs_class.data[i][1].item(),outputs_class.data[i][2].item()]
                arr = softmax(arr)
                r_score = 1 - np.max(arr) #outputs_class.data[i][predicted[i].item()].item()
                arr_exp = [outputs_exp.data[i][0].item(),outputs_exp.data[i][1].item()]
                arr_exp = softmax(arr_exp)
                r_score = r_score - arr_exp[1]
                r = 0
                if r_score >= 0:
                    r = 1
                if r==0:
                    total += 1
                    correct += (predicted[i] == data.label[i]).item()
                    correct_sys += (predicted[i] == data.label[i]).item()
                if r==1:
                    exp += 1 - data.expert[i].item()
                    correct_sys += 1 - data.expert[i].item()
                    exp_total+=1
                real_total += 1
    cov = str(total) + str(" out of") + str(real_total)
    to_print={"coverage":cov, "system accuracy": 100*correct_sys/real_total, "expert accuracy":100* exp/(exp_total+0.0002),"classifier accuracy":100*correct/(total+0.0001), "alone classifier": 100*alone_correct/real_total }
    print(to_print)
    return [100*total/real_total,  100*correct_sys/real_total, 100* exp/(exp_total+0.0002),100*correct/(total+0.0001) ]


def metrics_print_confid_fairness(net_class, net_exp, loader):
    net_class.eval()
    net_exp.eval()
    group_1 = 0
    group_1_counts = 0
    group_0 = 0
    group_0_counts = 0
    with torch.no_grad():
        for data in loader:
            outputs_class = net_class(data.text)
            outputs_exp = net_exp(data.text)
            _, predicted = torch.max(outputs_class.data, 1)
            batch_size = outputs_class.size()[0]            # batch_size
            for i in range(0,batch_size):
                arr = [outputs_class.data[i][0].item(),outputs_class.data[i][1].item(),outputs_class.data[i][2].item()]
                arr = softmax(arr)
                r_score = 1 - np.max(arr) #outputs_class.data[i][predicted[i].item()].item()
                arr_exp = [outputs_exp.data[i][0].item(),outputs_exp.data[i][1].item()]
                arr_exp = softmax(arr_exp)
                r_score = r_score - arr_exp[1]
                r = 0
                if r_score >= 0:
                    r = 1
                prediction = 0
                if r==0:
                    prediction = predicted[i]
                if r==1:
                    prediction = data.expertlabel[i].item()
                    
                if  data.group[i].item() == 0:
                    if data.label[i].item() == 2:
                        group_0_counts += 1
                        if prediction == 1 or prediction ==0:
                            group_0 += 1
                else:
                    if data.label[i].item() == 2:
                        group_1_counts += 1
                        if prediction == 1 or prediction ==0:
                            group_1 += 1
    print(group_1_counts)
    print(group_0_counts)

    to_print={"group0":group_0/(group_0_counts+0.0001), "group1": group_1/(group_1_counts+0.0001), "discrimination":group_0/(group_0_counts+0.0001)- group_1/(group_1_counts+0.0001) }
    print(to_print)
    return [group_0/(group_0_counts+0.0001), group_1/(group_1_counts+0.0001), abs(group_0/(group_0_counts+0.0001)- group_1/(group_1_counts+0.0001))]


In [40]:
metrics_print_confid(model_class, model_expert,test_iterator)

{'coverage': '4766 out of7435', 'system accuracy': 91.44586415601883, 'expert accuracy': 86.0621891298472, 'classifier accuracy': 94.46076176120935, 'alone classifier': 0.0}


[64.10221923335575, 91.44586415601883, 86.0621891298472, 94.46076176120935]

In [41]:
print(metrics_print_confid_fairness(model_class, model_expert,test_iterator))

110
332
{'group0': 0.3162649649804323, 'group1': 0.572726752066589, 'discrimination': -0.2564617870861567}
[0.3162649649804323, 0.572726752066589, 0.2564617870861567]


# Oracle Baseline

In [42]:
def metrics_print_oracle(net_class, loader):
    # prints classification metrics for Oracle baseline
    correct = 0
    correct_sys = 0
    exp = 0
    exp_total = 0
    total = 0
    real_total = 0
    alone_correct = 0
    with torch.no_grad():
        for data in loader:
            outputs_class = net_class(data.text)
            _, predicted = torch.max(outputs_class.data, 1)
            batch_size = outputs_class.size()[0]            # batch_size
            for i in range(0,batch_size):
                r = 0
                arr = [outputs_class.data[i][0].item(),outputs_class.data[i][1].item(),outputs_class.data[i][2].item()]
                arr = softmax(arr)
                #r = (data.group[i].item() == 0)
                if data.group[i].item() == 0:
                    if np.max(arr) <= 0.90:
                        r = 1
                else:
                    if np.max(arr) <= 0.75:
                        r = 1
                if r==0:
                    total += 1
                    correct += (predicted[i] == data.label[i]).item()
                    correct_sys += (predicted[i] == data.label[i]).item()
                if r==1:
                    exp += 1 - data.expert[i].item()
                    correct_sys += 1 - data.expert[i].item()
                    exp_total+=1
                real_total += 1
    cov = str(total) + str(" out of") + str(real_total)
    to_print={"coverage":cov, "system accuracy": 100*correct_sys/real_total, "expert accuracy":100* exp/(exp_total+0.0002),"classifier accuracy":100*correct/(total+0.0001), "alone classifier": 100*alone_correct/real_total }
    print(to_print)
    return [100*total/real_total,  100*correct_sys/real_total, 100* exp/(exp_total+0.0002),100*correct/(total+0.0001) ]

def metrics_print_oracle_fairness(net_class, loader):
    net_class.eval()
    group_1 = 0
    group_1_counts = 0
    group_0 = 0
    group_0_counts = 0
    with torch.no_grad():
        for data in loader:
            outputs_class = net_class(data.text)
            _, predicted = torch.max(outputs_class.data, 1)
            batch_size = outputs_class.size()[0]            # batch_size
            for i in range(0,batch_size):
                r = 0
                arr = [outputs_class.data[i][0].item(),outputs_class.data[i][1].item(),outputs_class.data[i][2].item()]
                arr = softmax(arr)
                #r = (data.group[i].item() == 0)
                if data.group[i].item() == 0:
                    if np.max(arr) <= 0.90:
                        r = 1
                else:
                    if np.max(arr) <= 0.75:
                        r = 1                
                if r==0:
                    prediction = predicted[i]
                if r==1:
                    prediction = data.expertlabel[i].item()
                    
                if  data.group[i].item() == 0:
                    if data.label[i].item() == 2:
                        group_0_counts += 1
                        if prediction == 1 or prediction ==0:
                            group_0 += 1
                else:
                    if data.label[i].item() == 2:
                        group_1_counts += 1
                        if prediction == 1 or prediction ==0:
                            group_1 += 1

    to_print={"group0":group_0/(group_0_counts+0.0001), "group1": group_1/(group_1_counts+0.0001), "discrimination":group_0/(group_0_counts+0.0001)- group_1/(group_1_counts+0.0001) }
    return [group_0/(group_0_counts+0.0001), group_1/(group_1_counts+0.0001), abs(group_0/(group_0_counts+0.0001)- group_1/(group_1_counts+0.0001))]


In [43]:
def metrics_print_classifier(net_class, loader):
    # print classification metrics of the classifier alone on all the dataset
    correct = 0
    correct_sys = 0
    exp = 0
    exp_total = 0
    total = 0
    real_total = 0
    alone_correct = 0
    with torch.no_grad():
        for data in loader:
            outputs_class = net_class(data.text)
            _, predicted = torch.max(outputs_class.data, 1)
            batch_size = outputs_class.size()[0]            # batch_size
            for i in range(0,batch_size):
                total += 1
                correct += (predicted[i] == data.label[i]).item()
                correct_sys += (predicted[i] == data.label[i]).item()
                real_total += 1
    cov = str(total) + str(" out of") + str(real_total)
    to_print={"coverage":cov, "system accuracy": 100*correct_sys/real_total, "expert accuracy":100* exp/(exp_total+0.0002),"classifier accuracy":100*correct/(total+0.0001), "alone classifier": 100*alone_correct/real_total }
    print(to_print)
    return [100*total/real_total,  100*correct_sys/real_total, 100* exp/(exp_total+0.0002),100*correct/(total+0.0001) ]


def metrics_print_classifier_fairness(net_class, loader):
    # print fairness metrics of the classifier alone on all the dataset
    net_class.eval()
    group_1 = 0
    group_1_counts = 0
    group_0 = 0
    group_0_counts = 0
    with torch.no_grad():
        for data in loader:
            outputs_class = net_class(data.text)
            _, predicted = torch.max(outputs_class.data, 1)
            batch_size = outputs_class.size()[0]            # batch_size
            for i in range(0,batch_size):
                prediction = predicted[i]
                if  data.group[i].item() == 0:
                    if data.label[i].item() == 2:
                        group_0_counts += 1
                        if prediction == 1 or prediction ==0:
                            group_0 += 1
                else:
                    if data.label[i].item() == 2:
                        group_1_counts += 1
                        if prediction == 1 or prediction ==0:
                            group_1 += 1

    to_print={"group0":group_0/(group_0_counts+0.0001), "group1": group_1/(group_1_counts+0.0001), "discrimination":group_0/(group_0_counts+0.0001)- group_1/(group_1_counts+0.0001) }
    print(to_print)
    return [group_0/(group_0_counts+0.0001), group_1/(group_1_counts+0.0001), abs(group_0/(group_0_counts+0.0001)- group_1/(group_1_counts+0.0001))]


In [44]:

def metrics_print_expert_fairness( loader):
    # print fairness metrics of the expert on all the dataset
    group_1 = 0
    group_1_counts = 0
    group_0 = 0
    group_0_counts = 0
    with torch.no_grad():
        for data in loader:
            batch_size =len(data)            # batch_size
            for i in range(0,batch_size):
                prediction = data.expertlabel[i].item()
                    
                if  data.group[i].item() == 0:
                    if data.label[i].item() == 2:
                        group_0_counts += 1
                        if prediction == 1 or prediction ==0:
                            group_0 += 1
                else:
                    if data.label[i].item() == 2:
                        group_1_counts += 1
                        if prediction == 1 or prediction ==0:
                            group_1 += 1

    to_print={"group0":group_0/(group_0_counts+0.0001), "group1": group_1/(group_1_counts+0.0001), "discrimination":group_0/(group_0_counts+0.0001)- group_1/(group_1_counts+0.0001) }
    print(to_print)
    return [group_0/(group_0_counts+0.0001), group_1/(group_1_counts+0.0001), abs(group_0/(group_0_counts+0.0001)- group_1/(group_1_counts+0.0001))]


In [45]:
metrics_print_expert_fairness( test_iterator)

{'group0': 0.07831322942372608, 'group1': 0.11818171074389933, 'discrimination': -0.039868481320173246}


[0.07831322942372608, 0.11818171074389933, 0.039868481320173246]

In [46]:
metrics_print_classifier_fairness(model_class, test_iterator)

{'group0': 0.7108431593845906, 'group1': 0.881817380166018, 'discrimination': -0.17097422078142732}


[0.7108431593845906, 0.881817380166018, 0.17097422078142732]

In [47]:
metrics_print_oracle(model_class,test_iterator)

{'coverage': '6714 out of7435', 'system accuracy': 91.31136516476127, 'expert accuracy': 86.40774302143052, 'classifier accuracy': 91.83794918322982, 'alone classifier': 0.0}


[90.30262273032952, 91.31136516476127, 86.40774302143052, 91.83794918322982]

In [48]:
metrics_print_oracle_fairness(model_class,test_iterator)

[0.5512046532516105, 0.7999992727279338, 0.2487946194763233]

# Experiment: repeat data building for each expert type
the following repeats the above code many times to obtain error bars

In [50]:
import numpy as np
# build expert data
all_data = train_data_orig[0]
p = 0.75 # expert probability of being correct for AA tweeet
q = 0.9 # expert probability of being correct for AA tweeet
sum = 0
total = 0
i = 0
aa_frac = 0
for example in all_data:
    lang = predict_lang(vars(example)['text'])
    aa = 0
    try:
        if lang[0] >= 0.5:
            aa = 1
    except:
        print(vars(example)['text'])
    label = vars(example)['label']
    exp = 0 # 0: expert wrong, 1: expert is right
    exp_label = 0
    if aa == 1: # if tweet is african american
        #if label == '2':
        #    exp = 0 # never predict neither
        #else:
        coin = np.random.binomial(1,p) 
        if coin:
            exp =1 # is right 90% of time
            exp_label = np.longlong(label)
        else:
            exp_label = np.longlong(np.argmax(np.random.multinomial(1,[1/3]*3 , size=1)))
            exp =0
    else:
        coin = np.random.binomial(1,q)
        if coin:
            exp =1 # is right 90% of time
            exp_label = np.longlong(label)
        else:
            exp_label = np.longlong(np.argmax(np.random.multinomial(1,[1/3]*3 , size=1)))
            exp =0
    #if label =='2' : # 2: neither, 1: offensive, 0: hate speech
    #    aa = 1
    vars(all_data[i])['expertlabel'] = exp_label
    vars(all_data[i])['group'] = str(aa)
    vars(all_data[i])['expert'] = exp
    aa_frac += aa
    i += 1
    total +=1
    sum += exp



['@Fulf_ShawnFulf', 'peckerwood']
['GA', 'Charlie', '@Charlie4927', '@sholzbee', '@DorisTafoya1', '@cat_lmbo', '@Jagauress', '@Walter_lars', '@lynnemrnp', '@AmyMek', '@Justin_Awe']
['RT', '@cenopant', ':', '(', 'she)s', '\n', 'bro(was)ken', '\n', 'bec(a)use', '\n', 's(side)he', '\n', 'beli(hoe)ved']


In [51]:
train_data, test_data, valid_data  = all_data.split(split_ratio=[0.7,0.2,0.1])

BATCH_SIZE = 64

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits(
    (train_data, valid_data, test_data), 
    sort = False,
    batch_size = BATCH_SIZE, 
    device = device)

In [52]:
# needs to be changed for each expert model
def metrics_print_oracle(net_class, loader):
    correct = 0
    correct_sys = 0
    exp = 0
    exp_total = 0
    total = 0
    real_total = 0
    alone_correct = 0
    with torch.no_grad():
        for data in loader:
            outputs_class = net_class(data.text)
            _, predicted = torch.max(outputs_class.data, 1)
            batch_size = outputs_class.size()[0]            # batch_size
            for i in range(0,batch_size):
                r = 0
                arr = [outputs_class.data[i][0].item(),outputs_class.data[i][1].item(),outputs_class.data[i][2].item()]
                arr = softmax(arr)
                #r = (data.group[i].item() == 0)
                if data.group[i].item() == 0:
                    if np.max(arr) <= q:
                        r = 1
                else:
                    if np.max(arr) <= p:
                        r = 1
                if r==0:
                    total += 1
                    correct += (predicted[i] == data.label[i]).item()
                    correct_sys += (predicted[i] == data.label[i]).item()
                if r==1:
                    exp += 1 - data.expert[i].item()
                    correct_sys += 1 - data.expert[i].item()
                    exp_total+=1
                real_total += 1
    cov = str(total) + str(" out of") + str(real_total)
    to_print={"coverage":cov, "system accuracy": 100*correct_sys/real_total, "expert accuracy":100* exp/(exp_total+0.0002),"classifier accuracy":100*correct/(total+0.0001), "alone classifier": 100*alone_correct/real_total }
    return [100*total/real_total,  100*correct_sys/real_total, 100* exp/(exp_total+0.0002),100*correct/(total+0.0001) ]

def metrics_print_oracle_fairness(net_class, loader):
    net_class.eval()
    group_1 = 0
    group_1_counts = 0
    group_0 = 0
    group_0_counts = 0
    with torch.no_grad():
        for data in loader:
            outputs_class = net_class(data.text)
            _, predicted = torch.max(outputs_class.data, 1)
            batch_size = outputs_class.size()[0]            # batch_size
            for i in range(0,batch_size):
                r = 0
                arr = [outputs_class.data[i][0].item(),outputs_class.data[i][1].item(),outputs_class.data[i][2].item()]
                arr = softmax(arr)
                #r = (data.group[i].item() == 0)
                if data.group[i].item() == 0:
                    if np.max(arr) <= q:
                        r = 1
                else:
                    if np.max(arr) <= p:
                        r = 1                
                
                if r==0:
                    prediction = predicted[i]
                if r==1:
                    prediction = data.expertlabel[i].item()
                    
                if  data.group[i].item() == 0:
                    if data.label[i].item() == 2:
                        group_0_counts += 1
                        if prediction == 1 or prediction ==0:
                            group_0 += 1
                else:
                    if data.label[i].item() == 2:
                        group_1_counts += 1
                        if prediction == 1 or prediction ==0:
                            group_1 += 1

    to_print={"group0":group_0/(group_0_counts+0.0001), "group1": group_1/(group_1_counts+0.0001), "discrimination":group_0/(group_0_counts+0.0001)- group_1/(group_1_counts+0.0001) }
    return [group_0/(group_0_counts+0.0001), group_1/(group_1_counts+0.0001), abs(group_0/(group_0_counts+0.0001)- group_1/(group_1_counts+0.0001))]


In [53]:
exp_conf = []
exp_rej = []
exp_ora = []
exp_conf_fairness = []
exp_rej_fairness = []
exp_ora_fairness = []
max_trials = 1
for exp in range(0,max_trials):
    train_data, test_data, valid_data  = all_data.split(split_ratio=[0.6,0.1,0.3])

    BATCH_SIZE = 64

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits(
        (train_data, valid_data, test_data), 
        sort = False,
        batch_size = BATCH_SIZE, 
        device = device)
    ##################################################################################################
    ##################################################################################################
    # baseline confidence
    ##################################################################################################
    INPUT_DIM = len(TEXT.vocab)
    EMBEDDING_DIM = 100
    N_FILTERS = 300
    FILTER_SIZES = [3,4,5]
    OUTPUT_DIM = 2
    DROPOUT = 0.5
    PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token]

    model_expert = CNN_(INPUT_DIM, EMBEDDING_DIM, N_FILTERS, FILTER_SIZES, 2, DROPOUT, PAD_IDX)
    pretrained_embeddings = TEXT.vocab.vectors

    model_expert.embedding.weight.data.copy_(pretrained_embeddings)
    UNK_IDX = TEXT.vocab.stoi[TEXT.unk_token]

    model_expert.embedding.weight.data[UNK_IDX] = torch.zeros(EMBEDDING_DIM)
    model_expert.embedding.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_DIM)
    optimizer = optim.Adam(model_expert.parameters())
    criterion = nn.CrossEntropyLoss()

    model_expert = model_expert.to(device)
    criterion = criterion.to(device)
    N_EPOCHS = 5
    for epoch in range(N_EPOCHS):

        start_time = time.time()

        train_loss, train_acc = train_expert(model_expert, train_iterator, optimizer, criterion)
        #valid_loss, valid_acc = evaluate_expert(model_expert, valid_iterator, criterion)

        end_time = time.time()

        epoch_mins, epoch_secs = epoch_time(start_time, end_time)

    # classifier
    INPUT_DIM = len(TEXT.vocab)
    EMBEDDING_DIM = 100
    N_FILTERS = 300
    FILTER_SIZES = [3,4,5]
    OUTPUT_DIM = 3
    DROPOUT = 0.5
    PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token]

    model_class = CNN_(INPUT_DIM, EMBEDDING_DIM, N_FILTERS, FILTER_SIZES, OUTPUT_DIM, DROPOUT, PAD_IDX)
    pretrained_embeddings = TEXT.vocab.vectors

    model_class.embedding.weight.data.copy_(pretrained_embeddings)
    UNK_IDX = TEXT.vocab.stoi[TEXT.unk_token]

    model_class.embedding.weight.data[UNK_IDX] = torch.zeros(EMBEDDING_DIM)
    model_class.embedding.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_DIM)

    optimizer = optim.Adam(model_class.parameters())
    criterion = nn.CrossEntropyLoss()

    model_class = model_class.to(device)
    criterion = criterion.to(device)
    N_EPOCHS = 5
    for epoch in range(N_EPOCHS):

        start_time = time.time()

        train_loss, train_acc = train(model_class, train_iterator, optimizer, criterion)
        #valid_loss, valid_acc = evaluate(model_class, valid_iterator, criterion)

        end_time = time.time()

        epoch_mins, epoch_secs = epoch_time(start_time, end_time)

    ####################################
    print("Baseline")
    
    conf = metrics_print_confid(model_class, model_expert,test_iterator)
    exp_conf.append(conf)
    conf = metrics_print_confid_fairness(model_class, model_expert,test_iterator)
    exp_conf_fairness.append(conf)
    ##################################################################################################
    # my method 
    ##################################################################################################
    INPUT_DIM = len(TEXT.vocab)
    EMBEDDING_DIM = 100
    N_FILTERS = 1000
    FILTER_SIZES = [3,4,5]
    OUTPUT_DIM = 4
    DROPOUT = 0.5
    PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token]

    model = CNN(INPUT_DIM, EMBEDDING_DIM, N_FILTERS, FILTER_SIZES, 4, DROPOUT, PAD_IDX)

    pretrained_embeddings = TEXT.vocab.vectors

    model.embedding.weight.data.copy_(pretrained_embeddings)
    UNK_IDX = TEXT.vocab.stoi[TEXT.unk_token]

    model.embedding.weight.data[UNK_IDX] = torch.zeros(EMBEDDING_DIM)
    model.embedding.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_DIM)
    optimizer = optim.Adam(model.parameters())
    criterion = nn.CrossEntropyLoss()

    model = model.to(device)

    N_EPOCHS = 15

    best_valid_loss = 0
    best_model = None
    for epoch in range(N_EPOCHS):
        start_time = time.time()
        train_loss, train_acc = train_reject(model, train_iterator, optimizer, 1)
        #train_loss, train_acc = train_reject_bla(model, train_iterator, optimizer)

        #valid_loss, valid_acc = evaluate_reject(model, valid_iterator)
        valid_loss = metrics_print(model,valid_iterator)[1]

        end_time = time.time()

        epoch_mins, epoch_secs = epoch_time(start_time, end_time)

        if valid_loss >= best_valid_loss:
            best_valid_loss = valid_loss
            best_model = copy.deepcopy(model)
    
    print("Our method")
    rej = metrics_print(best_model, test_iterator)
    exp_rej.append(rej)
    print(rej)
    rej = metrics_print_fairness(best_model, test_iterator)
    exp_rej_fairness.append(rej)
    ##############################################################################################
    # ORACLE
    ora = metrics_print_oracle(model_class, test_iterator)
    print(ora)
    exp_ora.append(ora)
    ora = metrics_print_oracle_fairness(model_class, test_iterator)
    exp_ora_fairness.append(ora)


Baseline
{'coverage': '4906 out of7435', 'system accuracy': 93.49024882313383, 'expert accuracy': 88.41438604868438, 'classifier accuracy': 96.1068060312514, 'alone classifier': 0.0}
110
332
{'group0': 0.21385535727248275, 'group1': 0.618181256198858, 'discrimination': -0.40432589892637527}


  return self._call_impl(*args, **kwargs)


{'coverage': '1567 out of2478', 'system accuracy': 91.76755447941889, 'expert accuracy': 86.3885650079989, 'classifier accuracy': 94.89469719880682, 'alone classifier': 0.0}
{'coverage': '1744 out of2478', 'system accuracy': 92.25181598062954, 'expert accuracy': 86.78471749735219, 'classifier accuracy': 94.55274687197553, 'alone classifier': 0.0}
{'coverage': '1003 out of2478', 'system accuracy': 88.05488297013721, 'expert accuracy': 84.06778521114776, 'classifier accuracy': 93.91823590047498, 'alone classifier': 0.0}
{'coverage': '1243 out of2478', 'system accuracy': 91.04116222760291, 'expert accuracy': 85.7489739677775, 'classifier accuracy': 96.29926819796717, 'alone classifier': 0.0}
{'coverage': '715 out of2478', 'system accuracy': 87.32849071832122, 'expert accuracy': 84.17469266310917, 'classifier accuracy': 95.10488180351304, 'alone classifier': 0.0}
{'coverage': '1682 out of2478', 'system accuracy': 90.79903147699758, 'expert accuracy': 84.4220893411836, 'classifier accuracy'

# Confidence metrics

In [54]:
import numpy as np, scipy.stats as st

In [55]:
metrics_class = ["coverage", "system accuracy", "expert accuracy", "classifier accuracy"]
metrics_fairness = ["FPR for group 0", "FPR for group 1", "discrimination"]

In [56]:
print("Results for Confidence Baseline")
for i in range(0,4):
    print("----")
    print("For " + metrics_class[i])
    arr = [0] * max_trials
    for j in range(0,max_trials):
        arr[j] = exp_conf[j][i]
    print("average: " +str(np.average(arr)))
    print("std: " + str(np.std(arr)))
    print("95 confidence interval: " + str(st.t.interval(0.95, len(arr)-1, loc=np.mean(arr), scale=st.sem(arr))))
print("#############################")

for i in range(0,3):
    print("-----")
    print("For " + metrics_fairness[i])
    arr = [0] * max_trials
    for j in range(0,max_trials):
        arr[j] = exp_conf_fairness[j][i]
    print("average: " +str(np.average(arr)))
    print("std: " + str(np.std(arr)))
    print("95 confidence interval: " + str(st.t.interval(0.95, len(arr)-1, loc=np.mean(arr), scale=st.sem(arr))))


Results for Confidence Baseline
----
For coverage
average: 65.98520511096167
std: 0.0
95 confidence interval: (nan, nan)
----
For system accuracy
average: 93.49024882313383
std: 0.0
95 confidence interval: (nan, nan)
----
For expert accuracy
average: 88.41438604868438
std: 0.0
95 confidence interval: (nan, nan)
----
For classifier accuracy
average: 96.1068060312514
std: 0.0
95 confidence interval: (nan, nan)
#############################
-----
For FPR for group 0
average: 0.21385535727248275
std: 0.0
95 confidence interval: (nan, nan)
-----
For FPR for group 1
average: 0.618181256198858
std: 0.0
95 confidence interval: (nan, nan)
-----
For discrimination
average: 0.40432589892637527
std: 0.0
95 confidence interval: (nan, nan)


  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  ret = ret.dtype.type(ret / rcount)


# Oracle baseline

In [57]:
print("Results for Oracle Baseline")
for i in range(0,4):
    print("----")
    print("For " + metrics_class[i])
    arr = [0] * max_trials
    for j in range(0,max_trials):
        arr[j] = exp_ora[j][i]
    print("average: " +str(np.average(arr)))
    print("std: " + str(np.std(arr)))
    print("95 confidence interval: " + str(st.t.interval(0.95, len(arr)-1, loc=np.mean(arr), scale=st.sem(arr))))
print("#############################")

for i in range(0,3):
    print("-----")
    print("For " + metrics_fairness[i])
    arr = [0] * max_trials
    for j in range(0,max_trials):
        arr[j] = exp_ora_fairness[j][i]
    print("average: " +str(np.average(arr)))
    print("std: " + str(np.std(arr)))
    print("95 confidence interval: " + str(st.t.interval(0.95, len(arr)-1, loc=np.mean(arr), scale=st.sem(arr))))


Results for Oracle Baseline
----
For coverage
average: 83.53732347007397
std: 0.0
95 confidence interval: (nan, nan)
----
For system accuracy
average: 93.35574983187627
std: 0.0
95 confidence interval: (nan, nan)
----
For expert accuracy
average: 90.11436436039799
std: 0.0
95 confidence interval: (nan, nan)
----
For classifier accuracy
average: 93.99452432789366
std: 0.0
95 confidence interval: (nan, nan)
#############################
-----
For FPR for group 0
average: 0.39759024168968626
std: 0.0
95 confidence interval: (nan, nan)
-----
For FPR for group 1
average: 0.8545446776866567
std: 0.0
95 confidence interval: (nan, nan)
-----
For discrimination
average: 0.4569544359969704
std: 0.0
95 confidence interval: (nan, nan)


# Our method

In [58]:
print("Results for our method L_{CE}")

for i in range(0,4):
    print("----")
    print("For " + metrics_class[i])
    arr = [0] * max_trials
    for j in range(0,max_trials):
        arr[j] = exp_rej[j][i]
    print("average: " +str(np.average(arr)))
    print("std: " + str(np.std(arr)))
    print("95 confidence interval: " + str(st.t.interval(0.95, len(arr)-1, loc=np.mean(arr), scale=st.sem(arr))))
print("#############################")

for i in range(0,3):
    print("-----")
    print("For " + metrics_fairness[i])
    arr = [0] * max_trials
    for j in range(0,max_trials):
        arr[j] = exp_rej_fairness[j][i]
    print("average: " +str(np.average(arr)))
    print("std: " + str(np.std(arr)))
    print("95 confidence interval: " + str(st.t.interval(0.95, len(arr)-1, loc=np.mean(arr), scale=st.sem(arr))))


Results for our method L_{CE}
----
For coverage
average: 71.52656355077337
std: 0.0
95 confidence interval: (nan, nan)
----
For system accuracy
average: 92.99260255548083
std: 0.0
95 confidence interval: (nan, nan)
----
For expert accuracy
average: 88.04911780357885
std: 0.0
95 confidence interval: (nan, nan)
----
For classifier accuracy
average: 94.96050968483434
std: 0.0
95 confidence interval: (nan, nan)
#############################
-----
For FPR for group 0
average: 0.33132520140807187
std: 0.0
95 confidence interval: (nan, nan)
-----
For FPR for group 1
average: 0.7909083719014801
std: 0.0
95 confidence interval: (nan, nan)
-----
For discrimination
average: 0.45958317049340824
std: 0.0
95 confidence interval: (nan, nan)
