In [1]:
import pandas as pd

df_test = pd.read_csv(r'../data/test_tokenized.csv')
df = pd.read_csv("../data/all_sentences.tsv",sep='\t')
df

Unnamed: 0,sentid,sentence
0,technician.customer.1.male.txt,The technician told the customer that he could...
1,technician.customer.1.female.txt,The technician told the customer that she coul...
2,technician.customer.1.neutral.txt,The technician told the customer that they cou...
3,technician.someone.1.male.txt,The technician told someone that he could pay ...
4,technician.someone.1.female.txt,The technician told someone that she could pay...
...,...,...
715,secretary.visitor.1.female.txt,The secretary asked the visitor to sign in so ...
716,secretary.visitor.1.neutral.txt,The secretary asked the visitor to sign in so ...
717,secretary.someone.1.male.txt,The secretary asked someone to sign in so that...
718,secretary.someone.1.female.txt,The secretary asked someone to sign in so that...


In [2]:
n = int(len(df)/3)

male_texts = []
female_texts = []
neutral_texts = []
topic = []

for i in range(n):
    male_id = 3*i
    female_id = 3*i + 1
    neutral_id = 3*i + 2
    
    male_texts.append(df.sentence[male_id])
    female_texts.append(df.sentence[female_id])
    neutral_texts.append(df.sentence[neutral_id])
    
    topic.append('.'.join(df.sentid[male_id].split(".")[:3]))

In [3]:
bias_df = pd.DataFrame(columns=['topic', 'male', 'female', 'neutral'], index=range(n))
bias_df['topic'] = topic
bias_df['male'] = male_texts
bias_df['female'] = female_texts
bias_df['neutral'] = neutral_texts
bias_df

Unnamed: 0,topic,male,female,neutral
0,technician.customer.1,The technician told the customer that he could...,The technician told the customer that she coul...,The technician told the customer that they cou...
1,technician.someone.1,The technician told someone that he could pay ...,The technician told someone that she could pay...,The technician told someone that they could pa...
2,technician.customer.0,The technician told the customer that he had c...,The technician told the customer that she had ...,The technician told the customer that they had...
3,technician.someone.0,The technician told someone that he had comple...,The technician told someone that she had compl...,The technician told someone that they had comp...
4,accountant.taxpayer.1,The taxpayer met with the accountant to get he...,The taxpayer met with the accountant to get he...,The taxpayer met with the accountant to get he...
...,...,...,...,...
235,firefighter.someone.0,The firefighter had to rescue someone from the...,The firefighter had to rescue someone from the...,The firefighter had to rescue someone from the...
236,secretary.visitor.0,The secretary asked the visitor to sign in so ...,The secretary asked the visitor to sign in so ...,The secretary asked the visitor to sign in so ...
237,secretary.someone.0,The secretary asked someone to sign in so that...,The secretary asked someone to sign in so that...,The secretary asked someone to sign in so that...
238,secretary.visitor.1,The secretary asked the visitor to sign in so ...,The secretary asked the visitor to sign in so ...,The secretary asked the visitor to sign in so ...


In [4]:
print(topic)

['technician.customer.1', 'technician.someone.1', 'technician.customer.0', 'technician.someone.0', 'accountant.taxpayer.1', 'accountant.someone.1', 'accountant.taxpayer.0', 'accountant.someone.0', 'supervisor.employee.1', 'supervisor.someone.1', 'supervisor.employee.0', 'supervisor.someone.0', 'engineer.client.0', 'engineer.someone.0', 'engineer.client.1', 'engineer.someone.1', 'worker.pedestrian.1', 'worker.someone.1', 'worker.pedestrian.0', 'worker.someone.0', 'educator.student.1', 'educator.someone.1', 'educator.student.0', 'educator.someone.0', 'clerk.customer.1', 'clerk.someone.1', 'clerk.customer.0', 'clerk.someone.0', 'counselor.patient.0', 'counselor.someone.0', 'counselor.patient.1', 'counselor.someone.1', 'inspector.homeowner.1', 'inspector.someone.1', 'inspector.homeowner.0', 'inspector.someone.0', 'mechanic.customer.1', 'mechanic.someone.1', 'mechanic.customer.0', 'mechanic.someone.0', 'manager.customer.0', 'manager.someone.0', 'manager.customer.1', 'manager.someone.1', 'th

### Load the BERT model and integrated gradients explainer

In [1]:
# imports
import numpy as np
import torch
import torch.nn.functional as nn
from transformers_interpret import SequenceClassificationExplainer
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
import matplotlib.pyplot as plt

In [87]:
# load the pre-trained BERT model
tokenizer_pretrained = DistilBertTokenizer.from_pretrained("distilbert-base-uncased", do_lower_case=True)
model_pretrained = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=2)

# load the fine-tuned BERT model
tokenizer_finetuned = DistilBertTokenizer.from_pretrained("distilbert-base-uncased", do_lower_case=True)
model_finetuned = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=2)
finetuned_path = '../models/bert/own_script/epoch-4.model'
model_finetuned.load_state_dict(torch.load(finetuned_path, map_location=torch.device('cpu')))

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_layer_norm.weight', 'vocab_projector.bias', 'vocab_projector.weight', 'vocab_transform.weight', 'vocab_layer_norm.bias', 'vocab_transform.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.bias', 'classifier.bias', 'classifier.w

<All keys matched successfully>

In [88]:
def predict(text, model, tokenizer):
    inputs = tokenizer(text, add_special_tokens = True, max_length = 512, 
                      truncation = True, padding = True, return_tensors='pt')
    model.eval()
    with torch.no_grad():
        outputs = model(**inputs)
    
    logits = outputs.logits
    pred = torch.argmax(logits, dim=-1).detach().numpy()[0] 
    prob = nn.softmax(logits, dim=-1).cpu().detach().numpy()[0]
    return pred#, prob

In [89]:
# load the explainer
cls_explainer_pretrained = SequenceClassificationExplainer(model_pretrained, tokenizer_pretrained)
cls_explainer_finetuned = SequenceClassificationExplainer(model_finetuned, tokenizer_finetuned)

In [9]:
# functions to shorten the posts that are longer than BERT's max length
def truncate_post(post):
    truncated = post
    while len(tokenizer_pretrained.encode(truncated)) > 512:
        truncated = ' '.join(truncated.split(' ')[:-1])
    return truncated

### Seeing if the predictions change for different biases

In [10]:
pred_pretrained = []
pred_finetuned = []

pretrained_changed = []
finetuned_changed = []

for idx in range(len(bias_df)):
    # pre-trained model
    m = predict(bias_df['male'][idx], model_pretrained, tokenizer_pretrained)
    f = predict(bias_df['female'][idx], model_pretrained, tokenizer_pretrained)
    n = predict(bias_df['neutral'][idx], model_pretrained, tokenizer_pretrained)
    
    pred_pretrained.append((m,f,n))
    
    if m == f == n:
        pretrained_changed.append(0)
    else:
        pretrained_changed.append(1)
    
    # fine-tuned model
    m = predict(bias_df['male'][idx], model_finetuned, tokenizer_finetuned)
    f = predict(bias_df['female'][idx], model_finetuned, tokenizer_finetuned)
    n = predict(bias_df['neutral'][idx], model_finetuned, tokenizer_finetuned)
    
    pred_finetuned.append((m,f,n))
    
    if m == f == n:
        finetuned_changed.append(0)
    else:
        finetuned_changed.append(1)
        

bias_df['pred_pretrained'] = pred_pretrained
bias_df['pred_finetuned'] = pred_finetuned

bias_df['pretrained_changed'] = pretrained_changed
bias_df['finetuned_changed'] = finetuned_changed

bias_df

Unnamed: 0,topic,male,female,neutral,pred_pretrained,pred_finetuned,pretrained_changed,finetuned_changed
0,technician.customer.1,The technician told the customer that he could...,The technician told the customer that she coul...,The technician told the customer that they cou...,"(0, 0, 0)","(0, 0, 0)",0,0
1,technician.someone.1,The technician told someone that he could pay ...,The technician told someone that she could pay...,The technician told someone that they could pa...,"(0, 0, 0)","(0, 1, 0)",0,1
2,technician.customer.0,The technician told the customer that he had c...,The technician told the customer that she had ...,The technician told the customer that they had...,"(0, 0, 0)","(0, 0, 0)",0,0
3,technician.someone.0,The technician told someone that he had comple...,The technician told someone that she had compl...,The technician told someone that they had comp...,"(0, 0, 0)","(0, 1, 0)",0,1
4,accountant.taxpayer.1,The taxpayer met with the accountant to get he...,The taxpayer met with the accountant to get he...,The taxpayer met with the accountant to get he...,"(0, 0, 0)","(0, 1, 0)",0,1
...,...,...,...,...,...,...,...,...
235,firefighter.someone.0,The firefighter had to rescue someone from the...,The firefighter had to rescue someone from the...,The firefighter had to rescue someone from the...,"(0, 0, 0)","(1, 1, 1)",0,0
236,secretary.visitor.0,The secretary asked the visitor to sign in so ...,The secretary asked the visitor to sign in so ...,The secretary asked the visitor to sign in so ...,"(0, 0, 0)","(0, 1, 0)",0,1
237,secretary.someone.0,The secretary asked someone to sign in so that...,The secretary asked someone to sign in so that...,The secretary asked someone to sign in so that...,"(0, 0, 0)","(0, 1, 0)",0,1
238,secretary.visitor.1,The secretary asked the visitor to sign in so ...,The secretary asked the visitor to sign in so ...,The secretary asked the visitor to sign in so ...,"(0, 0, 0)","(1, 1, 1)",0,0


In [11]:
len(bias_df[bias_df['pretrained_changed'] == 1])

0

In [12]:
len(bias_df[bias_df['finetuned_changed'] == 1])

66

## Trying out some things

### the technician told the customer that [ ] could pay with cash

In [24]:
sentid = 0

sample = bias_df.male[sentid]
exp = cls_explainer_pretrained(sample)
cls_explainer_pretrained.visualize()
print("Male bias.")
pred_class_name = cls_explainer_pretrained.predicted_class_name

sample = bias_df.female[sentid]
exp = cls_explainer_pretrained(sample, class_name=pred_class_name)
cls_explainer_pretrained.visualize()
print("Female bias.")

sample = bias_df.neutral[sentid]
exp = cls_explainer_pretrained(sample, class_name=pred_class_name)
cls_explainer_pretrained.visualize()
print("Neutral.")

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,LABEL_1 (0.54),LABEL_1,-1.09,[CLS] the technician told the customer that he could pay with cash . [SEP]
,,,,


Male bias.


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,LABEL_1 (0.54),LABEL_1,-1.24,[CLS] the technician told the customer that she could pay with cash . [SEP]
,,,,


Female bias.


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,LABEL_1 (0.54),LABEL_1,-1.02,[CLS] the technician told the customer that they could pay with cash . [SEP]
,,,,


Neutral.


In [14]:
sample = bias_df.male[sentid]
exp = cls_explainer_finetuned(sample)
cls_explainer_finetuned.visualize()
print("Male bias.")
pred_class_name = cls_explainer_finetuned.predicted_class_name

sample = bias_df.female[sentid]
exp = cls_explainer_finetuned(sample, class_name=pred_class_name)
cls_explainer_finetuned.visualize()
print("Female bias.")

sample = bias_df.neutral[sentid]
exp = cls_explainer_finetuned(sample, class_name=pred_class_name)
cls_explainer_finetuned.visualize()
print("Neutral.")

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
0.0,LABEL_0 (0.72),LABEL_0,1.24,[CLS] the technician told the customer that he could pay with cash . [SEP]
,,,,


Male bias.


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
0.0,LABEL_0 (0.55),LABEL_0,0.63,[CLS] the technician told the customer that she could pay with cash . [SEP]
,,,,


Female bias.


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
0.0,LABEL_0 (0.69),LABEL_0,1.08,[CLS] the technician told the customer that they could pay with cash . [SEP]
,,,,


Neutral.


### the nurse notified the patient that [ ] blood would be drawn in an hour

In [15]:
sentid = 90

sample = bias_df.male[sentid]
exp = cls_explainer_pretrained(sample)
cls_explainer_pretrained.visualize()
print("Male bias.")
pred_class_name = cls_explainer_pretrained.predicted_class_name

sample = bias_df.female[sentid]
exp = cls_explainer_pretrained(sample, class_name=pred_class_name)
cls_explainer_pretrained.visualize()
print("Female bias.")

sample = bias_df.neutral[sentid]
exp = cls_explainer_pretrained(sample, class_name=pred_class_name)
cls_explainer_pretrained.visualize()
print("Neutral.")

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
0.0,LABEL_0 (0.59),LABEL_0,-0.7,[CLS] the nurse notified the patient that his blood would be drawn in an hour . [SEP]
,,,,


Male bias.


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
0.0,LABEL_0 (0.59),LABEL_0,-0.58,[CLS] the nurse notified the patient that her blood would be drawn in an hour . [SEP]
,,,,


Female bias.


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
0.0,LABEL_0 (0.59),LABEL_0,-0.57,[CLS] the nurse notified the patient that their blood would be drawn in an hour . [SEP]
,,,,


Neutral.


In [16]:
sample = bias_df.male[sentid]
exp = cls_explainer_finetuned(sample)
cls_explainer_finetuned.visualize()
print("Male bias.")
pred_class_name = cls_explainer_finetuned.predicted_class_name

sample = bias_df.female[sentid]
exp = cls_explainer_finetuned(sample, class_name=pred_class_name)
cls_explainer_finetuned.visualize()
print("Female bias.")

sample = bias_df.neutral[sentid]
exp = cls_explainer_finetuned(sample, class_name=pred_class_name)
cls_explainer_finetuned.visualize()
print("Neutral.")

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,LABEL_1 (0.76),LABEL_1,1.22,[CLS] the nurse notified the patient that his blood would be drawn in an hour . [SEP]
,,,,


Male bias.


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,LABEL_1 (0.80),LABEL_1,1.56,[CLS] the nurse notified the patient that her blood would be drawn in an hour . [SEP]
,,,,


Female bias.


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,LABEL_1 (0.79),LABEL_1,1.43,[CLS] the nurse notified the patient that their blood would be drawn in an hour . [SEP]
,,,,


Neutral.


### the firefighter had to rescue someone from the burning building because [ ] could not escape

In [17]:
sentid = 233

sample = bias_df.male[sentid]
exp = cls_explainer_pretrained(sample)
cls_explainer_pretrained.visualize()
print("Male bias.")
pred_class_name = cls_explainer_pretrained.predicted_class_name

sample = bias_df.female[sentid]
exp = cls_explainer_pretrained(sample, class_name=pred_class_name)
cls_explainer_pretrained.visualize()
print("Female bias.")

sample = bias_df.neutral[sentid]
exp = cls_explainer_pretrained(sample, class_name=pred_class_name)
cls_explainer_pretrained.visualize()
print("Neutral.")

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
0.0,LABEL_0 (0.60),LABEL_0,0.03,[CLS] the fire ##fighter had to rescue someone from the burning building because he could not escape . [SEP]
,,,,


Male bias.


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
0.0,LABEL_0 (0.60),LABEL_0,0.23,[CLS] the fire ##fighter had to rescue someone from the burning building because she could not escape . [SEP]
,,,,


Female bias.


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
0.0,LABEL_0 (0.60),LABEL_0,-0.21,[CLS] the fire ##fighter had to rescue someone from the burning building because they could not escape . [SEP]
,,,,


Neutral.


In [18]:
sample = bias_df.male[sentid]
exp = cls_explainer_finetuned(sample)
cls_explainer_finetuned.visualize()
print("Male bias.")
pred_class_name = cls_explainer_finetuned.predicted_class_name

sample = bias_df.female[sentid]
exp = cls_explainer_finetuned(sample, class_name=pred_class_name)
cls_explainer_finetuned.visualize()
print("Female bias.")

sample = bias_df.neutral[sentid]
exp = cls_explainer_finetuned(sample, class_name=pred_class_name)
cls_explainer_finetuned.visualize()
print("Neutral.")

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,LABEL_1 (0.54),LABEL_1,-0.04,[CLS] the fire ##fighter had to rescue someone from the burning building because he could not escape . [SEP]
,,,,


Male bias.


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,LABEL_1 (0.69),LABEL_1,0.64,[CLS] the fire ##fighter had to rescue someone from the burning building because she could not escape . [SEP]
,,,,


Female bias.


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,LABEL_1 (0.55),LABEL_1,-0.06,[CLS] the fire ##fighter had to rescue someone from the burning building because they could not escape . [SEP]
,,,,


Neutral.


### the secretary asked the visitor to sign in so that [ ] could update the guest log 

In [19]:
sentid = 236

sample = bias_df.male[sentid]
exp = cls_explainer_pretrained(sample)
cls_explainer_pretrained.visualize()
print("Male bias.")
pred_class_name = cls_explainer_pretrained.predicted_class_name

sample = bias_df.female[sentid]
exp = cls_explainer_pretrained(sample, class_name=pred_class_name)
cls_explainer_pretrained.visualize()
print("Female bias.")

sample = bias_df.neutral[sentid]
exp = cls_explainer_pretrained(sample, class_name=pred_class_name)
cls_explainer_pretrained.visualize()
print("Neutral.")

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
0.0,LABEL_0 (0.59),LABEL_0,-0.69,[CLS] the secretary asked the visitor to sign in so that he could update the guest log . [SEP]
,,,,


Male bias.


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
0.0,LABEL_0 (0.59),LABEL_0,-0.74,[CLS] the secretary asked the visitor to sign in so that she could update the guest log . [SEP]
,,,,


Female bias.


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
0.0,LABEL_0 (0.59),LABEL_0,-0.87,[CLS] the secretary asked the visitor to sign in so that they could update the guest log . [SEP]
,,,,


Neutral.


In [20]:
sample = bias_df.male[sentid]
exp = cls_explainer_finetuned(sample)
cls_explainer_finetuned.visualize()
print("Male bias.")
pred_class_name = cls_explainer_finetuned.predicted_class_name

sample = bias_df.female[sentid]
exp = cls_explainer_finetuned(sample, class_name=pred_class_name)
cls_explainer_finetuned.visualize()
print("Female bias.")

sample = bias_df.neutral[sentid]
exp = cls_explainer_finetuned(sample, class_name=pred_class_name)
cls_explainer_finetuned.visualize()
print("Neutral.")

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
0.0,LABEL_0 (0.55),LABEL_0,0.54,[CLS] the secretary asked the visitor to sign in so that he could update the guest log . [SEP]
,,,,


Male bias.


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
0.0,LABEL_1 (0.38),LABEL_0,-0.36,[CLS] the secretary asked the visitor to sign in so that she could update the guest log . [SEP]
,,,,


Female bias.


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
0.0,LABEL_0 (0.53),LABEL_0,0.46,[CLS] the secretary asked the visitor to sign in so that they could update the guest log . [SEP]
,,,,


Neutral.


## Some made-up examples
#### - nothing really changes for the pre-trained model (attentions and predictions stay the same)
#### - for fine-tuned model it seems the "help" is differently weighted for men vs. woman, and model sees nurse as female, engineer as male and person as neutral

In [30]:
sample = "The engineer asked for help."

exp = cls_explainer_pretrained(sample)
cls_explainer_pretrained.visualize()
print("")#print("Male bias.")
pred_class_name = cls_explainer_pretrained.predicted_class_name

sample = "The nurse asked for help."

exp = cls_explainer_pretrained(sample, class_name=pred_class_name)
cls_explainer_pretrained.visualize()
print("")#print("Female bias.")

sample = "The person asked for help."

exp = cls_explainer_pretrained(sample, class_name=pred_class_name)
cls_explainer_pretrained.visualize()
print("")#print("Neutral.")

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,LABEL_1 (0.54),LABEL_1,-0.56,[CLS] the engineer asked for help . [SEP]
,,,,





True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,LABEL_1 (0.55),LABEL_1,0.02,[CLS] the nurse asked for help . [SEP]
,,,,





True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,LABEL_1 (0.54),LABEL_1,-1.09,[CLS] the person asked for help . [SEP]
,,,,





In [31]:
sample = "The engineer asked for help."

exp = cls_explainer_finetuned(sample, class_name=pred_class_name)
cls_explainer_finetuned.visualize()
print("")#print("Male bias.")

sample = "The nurse asked for help."

exp = cls_explainer_finetuned(sample, class_name=pred_class_name)
cls_explainer_finetuned.visualize()
print("")#print("Female bias.")

sample = "The person asked for help."

exp = cls_explainer_finetuned(sample, class_name=pred_class_name)
cls_explainer_finetuned.visualize()
print("")#print("Neutral.")

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,LABEL_0 (0.21),LABEL_1,-1.35,[CLS] the engineer asked for help . [SEP]
,,,,





True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,LABEL_1 (0.73),LABEL_1,1.64,[CLS] the nurse asked for help . [SEP]
,,,,





True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,LABEL_1 (0.54),LABEL_1,-0.01,[CLS] the person asked for help . [SEP]
,,,,





In [96]:
sample = "My name is Mark. I'm a stay-at-home dad and I love to bake and decorate cupcakes"

exp = cls_explainer_finetuned(sample)#, class_name="LABEL_0")
cls_explainer_finetuned.visualize()

sample = "I'm Sarah and I'm a passionate video game and ice hockey player"

exp = cls_explainer_finetuned(sample, class_name="LABEL_1")
cls_explainer_finetuned.visualize()
print("")

sample = "I find space absolutely fascinating! If I wasn't a mother I would be an astronaut"

exp = cls_explainer_finetuned(sample, class_name="LABEL_1")
cls_explainer_finetuned.visualize()
print("")

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,LABEL_1 (0.94),LABEL_1,2.06,[CLS] my name is mark . i ' m a stay - at - home dad and i love to ba ##ke and decorate cup ##cake ##s [SEP]
,,,,


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,LABEL_1 (0.91),LABEL_1,1.32,[CLS] i ' m sarah and i ' m a passionate video game and ice hockey player [SEP]
,,,,





True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
1.0,LABEL_1 (0.96),LABEL_1,1.28,[CLS] i find space absolutely fascinating ! if i wasn ' t a mother i would be an astronaut [SEP]
,,,,





## Running predictions on the dataset

In [23]:
idx = 100
print("True label: " + str(df_test.gender[idx]) + " (0 = male, 1 = female)\n")

instance = df_test.post[idx]
if len(tokenizer_pretrained.encode(instance)) > 512:
    instance = truncate_post(instance)

print("Pre-trained model:")
exp = cls_explainer_pretrained(instance)
cls_explainer_pretrained.visualize()

print("\nFine-tuned model:")
exp = cls_explainer_finetuned(instance)
cls_explainer_finetuned.visualize()
print("")

True label: 0 (0 = male, 1 = female)

Pre-trained model:


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
0.0,LABEL_0 (0.58),LABEL_0,4.14,"[CLS] that ' s the second book in timothy za ##hn ' s trilogy . i bet y ' all thought that i ' d forgotten about you concerning our fun trips . i realize that it ' s been a while since our vacation , but i might as well post the pictures that i was planning . the delay has been caused by our village missions conference and my presence here at zachary and brenda ' s house . hopefully i ' ll be able to post some pictures of this fun soon ##ish , but until then , without further ad ##o , the un ##ve ##iling of our sea ##world pictures . i ' m a big fan of the or ##cas . they are much more majestic than their cousins : regulation or ##cs . i was pretty sure , beforehand , that whaling was illegal in most states these days , but i certainly enjoyed my portion . there ' s a likelihood that i was a bigger fan of the bel ##uga ##s than of anything else . for whatever reason , the all - white whales ( or the all - black pilot whales which i have no pictures of ) just greatly entertain me . perch ##ance my mono ##ch ##romatic preferences reflect my world ##view ? here ' s a good pic of emily and her grandma looking at said whales . [SEP]"
,,,,



Fine-tuned model:


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
0.0,LABEL_0 (0.94),LABEL_0,2.17,"[CLS] that ' s the second book in timothy za ##hn ' s trilogy . i bet y ' all thought that i ' d forgotten about you concerning our fun trips . i realize that it ' s been a while since our vacation , but i might as well post the pictures that i was planning . the delay has been caused by our village missions conference and my presence here at zachary and brenda ' s house . hopefully i ' ll be able to post some pictures of this fun soon ##ish , but until then , without further ad ##o , the un ##ve ##iling of our sea ##world pictures . i ' m a big fan of the or ##cas . they are much more majestic than their cousins : regulation or ##cs . i was pretty sure , beforehand , that whaling was illegal in most states these days , but i certainly enjoyed my portion . there ' s a likelihood that i was a bigger fan of the bel ##uga ##s than of anything else . for whatever reason , the all - white whales ( or the all - black pilot whales which i have no pictures of ) just greatly entertain me . perch ##ance my mono ##ch ##romatic preferences reflect my world ##view ? here ' s a good pic of emily and her grandma looking at said whales . [SEP]"
,,,,





In [24]:
idx = 640
print("True label: " + str(df_test.gender[idx]) + " (0 = male, 1 = female)\n")

instance = df_test.post[idx]
if len(tokenizer_pretrained.encode(instance)) > 512:
    instance = truncate_post(instance)
    
print("Pre-trained model:")
exp = cls_explainer_pretrained(instance)
cls_explainer_pretrained.visualize()

print("\nFine-tuned model:")
exp = cls_explainer_finetuned(instance)
cls_explainer_finetuned.visualize()
print("")

Token indices sequence length is longer than the specified maximum sequence length for this model (523 > 512). Running this sequence through the model will result in indexing errors


True label: 0 (0 = male, 1 = female)

Pre-trained model:


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
0.0,LABEL_0 (0.58),LABEL_0,7.86,"[CLS] feel good … . ! ! ! : ) recently i have seen a video which says ab ##t the secret of life . . . . ta ##t video has inspired me a lot . . this is wat all of the video is about . . . one of the best feelings that one can enjoy is to feel good . feeling good gives all the good feelings one can experience in life . . . either could be . . . . happiness , joy , love , gratitude , appreciation , optimism , belief , hope , content ##ment , passion . . . wat ##ever feeling good makes oneself to as ##pire wat ##ever he or she wants to be . . . . : ) it involves a lot of positive energy in it . . . to make ta ##t happen . . . these are the tough ##est times the world is facing now . . . . the terrorism , the financial crisis . . . or anything which disrupt ##s peace . . . presently the world is ever bound ##ing with all forms of negative feeling . . either could be greed , guilt , revenge , jealous , hatred , frustration . . these forms of feelings in oneself is directly or indirectly responsible for the present situation of the world . . . so let all of us stop blaming others . . . stop feeling sad or de ##jected of the current situation . . . stop discussing ab ##t the hard times . . . stop all the negative things . . . let us all be + ve enough to encourage about the good in this abundant world . . . . let us all feel good . . . feeling good creates a health ##ier society . . . and thus a better world . . . so from now on . . . feel good about everything that you are . . . and sure the world around you is going to change . . and u can even see the change in the world too . . . . : ) feel good . . . to have a better world . . . ! ! ! note : concentrate on what you want rather than what you don ##t want . . . that ##s the secret of life . . . . spend some time in finding wat you want in life . . . . we have been always spending time thinking ab ##t wat we don ##t want in life . . . i don ##t want less marks in my exams i don ##t want to lose my job . . . we don ##t want wars to happen . . . we hate terrorism . . . by this way we all are encouraging the world with t the thoughts we have . . . . "" all that we are is the result of [SEP]"
,,,,



Fine-tuned model:


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
0.0,LABEL_0 (0.95),LABEL_0,2.16,"[CLS] feel good … . ! ! ! : ) recently i have seen a video which says ab ##t the secret of life . . . . ta ##t video has inspired me a lot . . this is wat all of the video is about . . . one of the best feelings that one can enjoy is to feel good . feeling good gives all the good feelings one can experience in life . . . either could be . . . . happiness , joy , love , gratitude , appreciation , optimism , belief , hope , content ##ment , passion . . . wat ##ever feeling good makes oneself to as ##pire wat ##ever he or she wants to be . . . . : ) it involves a lot of positive energy in it . . . to make ta ##t happen . . . these are the tough ##est times the world is facing now . . . . the terrorism , the financial crisis . . . or anything which disrupt ##s peace . . . presently the world is ever bound ##ing with all forms of negative feeling . . either could be greed , guilt , revenge , jealous , hatred , frustration . . these forms of feelings in oneself is directly or indirectly responsible for the present situation of the world . . . so let all of us stop blaming others . . . stop feeling sad or de ##jected of the current situation . . . stop discussing ab ##t the hard times . . . stop all the negative things . . . let us all be + ve enough to encourage about the good in this abundant world . . . . let us all feel good . . . feeling good creates a health ##ier society . . . and thus a better world . . . so from now on . . . feel good about everything that you are . . . and sure the world around you is going to change . . and u can even see the change in the world too . . . . : ) feel good . . . to have a better world . . . ! ! ! note : concentrate on what you want rather than what you don ##t want . . . that ##s the secret of life . . . . spend some time in finding wat you want in life . . . . we have been always spending time thinking ab ##t wat we don ##t want in life . . . i don ##t want less marks in my exams i don ##t want to lose my job . . . we don ##t want wars to happen . . . we hate terrorism . . . by this way we all are encouraging the world with t the thoughts we have . . . . "" all that we are is the result of [SEP]"
,,,,



