In [141]:
import os
from os.path import join as oj
import sys, time
sys.path.insert(1, oj(sys.path[0], '..'))  # insert parent path
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm, tqdm_notebook
from copy import deepcopy
import pickle as pkl
import pandas as pd
sys.path.append('../models')
sys.path.append('../fit')

from model import LSTMSentiment
from torchtext import data
from torchtext import datasets
import torch
import cd
import pandas as pd

%matplotlib inline
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# load dset + model

In [142]:
# data params
vector_cache =  '../data/.vector_cache/input_vectors.pt'
word_vectors ='glove.6B.300d'
batch_size = 50
device = 'cuda' if torch.cuda.is_available() else 'cpu'


# load dset
inputs = data.Field(lower= True)
answers = data.Field(sequential=False, unk_token=None)
train, dev, test = datasets.SST.splits(inputs, answers, fine_grained=False, train_subtrees=False,
                                       filter_pred=lambda ex: ex.label != 'neutral')
inputs.build_vocab(train, dev, test)

if os.path.isfile(vector_cache):
    inputs.vocab.vectors = torch.load(vector_cache)
else:
    inputs.vocab.load_vectors(word_vectors)
    os.makedirs(os.path.dirname(vector_cache), exist_ok=True)
    torch.save(inputs.vocab.vectors, vector_cache)
answers.build_vocab(train)

train_iter, dev_iter, test_iter = data.BucketIterator.splits(
    (train, dev, test), batch_size=batch_size, device=torch.device(0),
                sort_key=lambda x: len(x.text), # the BucketIterator needs to be told what function it should use to group the data.
                         sort_within_batch=True,
                shuffle =True,
    sort = False,
                         repeat=False)

# load model
model_path = "../models/init_models"
model_list = os.listdir(model_path)
model1 = torch.load(os.path.join(model_path, model_list[0]), map_location=torch.device(0)).eval()

#model2 = torch.load(os.path.join(model_path, model_list[1]), map_location=torch.device(0)).eval()



# evaluate cd word-level

In [143]:
tot_length =0
for i in range(len(train)):
    tot_length +=len(train[i].text)

In [144]:
tot_length / len(train)

19.29985549132948

In [145]:
batch.text.shape

torch.Size([16, 50])

In [146]:
tot_length

133555

In [147]:
tot_length =0
it = train_iter
num_batches = 0
len_list = []
for batch in (it):
    tot_length +=batch.text.shape[0]
    num_batches+=1
    len_list.append(batch.text.shape[0])
print(tot_length/num_batches)

19.776978417266186


In [159]:
# choose hyperparams
it = train_iter
m = model1

# what to store
words = {}

it.init_epoch()
# check out how two models differ
import torch.optim as O
import torch.nn as nn
criterion = nn.CrossEntropyLoss()
n_dev_correct, dev_loss = 0, 0

# remember batches are num_words x batch_size
for batch in tqdm_notebook(it):
    answer1 = m(batch)
    num_words = batch.text.shape[0]
    batch_size = batch.text.shape[1]
    
    for word_num in range(num_words-1):
        word_per_batch = batch.text[word_num] # gets word at same place for all batches
  
        # get cd scores for each word
        rel, _ = cd.cd_batch_text(batch, m, start=word_num, stop=word_num + 1)
        rel = rel.softmax(dim=0) # 2 x batch_size
        rel = rel[0] # only get positive class
        
        # actually get the words
        for batch_num in range(word_per_batch.shape[0]):
            word = inputs.vocab.itos[word_per_batch[batch_num]] 
          
            score = rel[batch_num].item()
            
            # add to store
            if not word in words:
                words[word] = (1, score) # count, sum
            else:
                (count, running_sum) = words[word]
                words[word] = (count + 1, running_sum + score)


HBox(children=(IntProgress(value=0, max=139), HTML(value='')))

# look at fairness results

In [160]:
import operator

In [161]:
sorted_x = sorted(words.items(), key=operator.itemgetter(1))

In [162]:
test = [x for x in sorted_x if inputs.vocab.stoi[x[0]] !=0]

In [163]:
results = pd.DataFrame()
results['word'] = words.keys()
results['count'] = [words[word][0] for word in results['word']]
results['sent'] = [words[word][1] / words[word][0] for word in results['word']]
results = results.sort_values(by=['sent'], ascending=False)
results.to_pickle('../results/word_fairness_test.pkl')

In [164]:
r = pd.read_pickle('../results/word_fairness_test.pkl')
# r = r[r['sent'] <= .40]

In [120]:
r = r[r['count'] >3]

In [15]:
comparisons = [('actor', 'actress'), ('black', 'white'), 
#                ('him', 'her'), ('young', 'old'), ('latino', 'asian'),
               ('text', 'video'),
              ('minutes', 'hour')]
for (x, y) in comparisons:
    print(r[r.word == x], '\n', r[r.word == y], '\n')

      word  count      sent
962  actor     12  0.555093 
        word  count      sent
68  actress      4  0.541827 

      word  count      sent
553  black      8  0.461766 
        word  count     sent
5584  white      2  0.52095 

Empty DataFrame
Columns: [word, count, sent]
Index: [] 
        word  count      sent
1136  video     15  0.476137 

         word  count      sent
1814  minutes     17  0.413592 
       word  count     sent
1354  hour      4  0.56039 

