In [1]:
# Import Dataset, joint model and pytorch train helpers
import transformers
%load_ext autoreload
%autoreload 2
import sys
import torch.optim as optim
import torch
from dataset_parser.davidson_parser import DavidsonDataset
from joint_model import JointModel
from util.WeightedRandomSampler import WeightedRandomSampler

In [2]:
# Open data set and split in train and dev and instantiate data loaders
trainset, devset, testset  = DavidsonDataset(fixed_set="train"), DavidsonDataset(fixed_set="val"), DavidsonDataset(fixed_set="test")
res = len(trainset) + len(devset) + len(testset)
print (res)

Loading Davidson train set from fixed split.
Successfully loaded davidson dataset.
Loading Davidson val set from fixed split.
Successfully loaded davidson dataset.
Loading Davidson test set from fixed split.
Successfully loaded davidson dataset.
14939


In [3]:
BATCH_SIZE = 256
sampler_trainset = WeightedRandomSampler(trainset, 3000)
sampler_devset = WeightedRandomSampler(devset, 1000)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE,
                                        num_workers=6, sampler = sampler_trainset)
devloader = torch.utils.data.DataLoader(devset, batch_size=1,
                                        num_workers=6)
testloader = torch.utils.data.DataLoader(testset, batch_size=1,
                                        num_workers=6)

In [4]:
# Instantiate joint model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
joint_model = JointModel()
joint_model.to(device)

Successfully initialized TweetNetwork submodel
Successfully initialized TweetClassifier submodel
hate done
offensive done
neither done
Successfully initialized TweetHistory submodel
Successfully initialized last final classification layer


JointModel(
  (SAGE): GraphSAGE(
    (model): SAGE(
      (convs): ModuleList(
        (0): SAGEConv(2, 32)
        (1): SAGEConv(32, 32)
        (2): SAGEConv(32, 32)
      )
    )
  )
  (BERT): TweetBERT(
    (model): DistilBertForSequenceClassification(
      (distilbert): DistilBertModel(
        (embeddings): Embeddings(
          (word_embeddings): Embedding(30522, 768, padding_idx=0)
          (position_embeddings): Embedding(512, 768)
          (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (transformer): Transformer(
          (layer): ModuleList(
            (0): TransformerBlock(
              (attention): MultiHeadSelfAttention(
                (dropout): Dropout(p=0.1, inplace=False)
                (q_lin): Linear(in_features=768, out_features=768, bias=True)
                (k_lin): Linear(in_features=768, out_features=768, bias=True)
                (v_lin): Linear(in_features=768, 

In [5]:
# load weights from trained model
if "cuda" in str(device):
    pass
    joint_model.load_state_dict(torch.load('../../models/joint_model_davidson_nulled_network_10epochs.model'))
else:
    print("check-else")
    joint_model.load_state_dict(torch.load('../../models/joint_model_davidson_nulled_network_10epochs.model', map_location=torch.device('cpu')))

In [8]:
# Training settings
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(joint_model.parameters(), lr=0.001)

In [9]:
# OPTIONAL: Train joint model
print ("Batch size: {}".format(BATCH_SIZE))
for epoch in range(10):
    #break
    running_loss = 0.0
    for i, data in enumerate(trainloader):
        input_ids, attention_mask, user_id, tweet_label = data['input_ids'], data['attention_mask'], data['userid'], data['label'] #TODO fix this mess
        input_ids, attention_mask, tweet_label = input_ids.to(device),  attention_mask.to(device), tweet_label.to(device)
        predictions = joint_model(input_ids,attention_mask, user_id)
        loss = criterion(predictions, tweet_label)
        loss.backward()
        running_loss += loss
        optimizer.step()
        optimizer.zero_grad()
        if i % 4 == 0:
            print ("Epoch {}: {} tweets processed".format(epoch, i*BATCH_SIZE))
torch.save(joint_model.state_dict(), "../../models/joint_model_davidson_nulled_network_10epochs.model")

Batch size: 256
Epoch 0: 0 tweets processed
Epoch 0: 1024 tweets processed
Epoch 0: 2048 tweets processed
Epoch 0: 3072 tweets processed
Epoch 0: 4096 tweets processed
Epoch 0: 5120 tweets processed
Epoch 0: 6144 tweets processed
Epoch 0: 7168 tweets processed
Epoch 0: 8192 tweets processed
Epoch 1: 0 tweets processed
Epoch 1: 1024 tweets processed
Epoch 1: 2048 tweets processed
Epoch 1: 3072 tweets processed
Epoch 1: 4096 tweets processed
Epoch 1: 5120 tweets processed
Epoch 1: 6144 tweets processed
Epoch 1: 7168 tweets processed
Epoch 1: 8192 tweets processed
Epoch 2: 0 tweets processed
Epoch 2: 1024 tweets processed
Epoch 2: 2048 tweets processed
Epoch 2: 3072 tweets processed
Epoch 2: 4096 tweets processed
Epoch 2: 5120 tweets processed
Epoch 2: 6144 tweets processed
Epoch 2: 7168 tweets processed
Epoch 2: 8192 tweets processed
Epoch 3: 0 tweets processed
Epoch 3: 1024 tweets processed
Epoch 3: 2048 tweets processed
Epoch 3: 3072 tweets processed
Epoch 3: 4096 tweets processed
Epoc

In [8]:
# Obtain predictions for the dev/validation set.
y_pred, y_true = [],[]
output_for_print = []
for i, data in enumerate(devloader):
    joint_model.eval()
    input_ids, attention_mask, user_id, tweet_label = data['input_ids'], data['attention_mask'], data['userid'], data['label'] #TODO fix this mess
    input_ids, attention_mask, tweet_label = input_ids.to(device),  attention_mask.to(device), tweet_label.to(device)
    predictions = joint_model(input_ids,attention_mask, user_id)
    predictions = torch.nn.functional.softmax(predictions)
    max_pred = torch.argmax(predictions)
    y_pred.append(max_pred.item())
    y_true.append(tweet_label.item())
    output_for_print.append([i,user_id.item(),tweet_label.item(),max_pred.item()])

  predictions = torch.nn.functional.softmax(predictions)


In [6]:
# Print metrics
from sklearn.metrics import confusion_matrix, classification_report
#print (confusion_matrix(y_true=y_true, y_pred=y_pred))
#print (classification_report(y_true=y_true, y_pred=y_pred))

In [37]:
exit()
# Obtain predictions for the test set.
test_y_pred, test_y_true = [],[]
output_for_print = []
for i, data in enumerate(testloader):
    joint_model.eval()
    input_ids, attention_mask, user_id, tweet_label = data['input_ids'], data['attention_mask'], data['userid'], data['label'] #TODO fix this mess
    input_ids, attention_mask, tweet_label = input_ids.to(device),  attention_mask.to(device), tweet_label.to(device)
    test_predictions = joint_model(input_ids,attention_mask, user_id)
    test_predictions = torch.nn.functional.softmax(test_predictions)
    test_max_pred = torch.argmax(test_predictions)
    test_y_pred.append(test_max_pred.item())
    test_y_true.append(tweet_label.item())
    output_for_print.append([i,user_id.item(),tweet_label.item(),test_max_pred.item()])

  


In [38]:
# Print metrics
print (confusion_matrix(y_true=test_y_true, y_pred=test_y_pred))
print (classification_report(y_true=test_y_true, y_pred=test_y_pred))

[[  67   48   13]
 [  77 1929   54]
 [  14   24  374]]
              precision    recall  f1-score   support

           0       0.42      0.52      0.47       128
           1       0.96      0.94      0.95      2060
           2       0.85      0.91      0.88       412

    accuracy                           0.91      2600
   macro avg       0.75      0.79      0.77      2600
weighted avg       0.92      0.91      0.91      2600



# SHAP computations with class ShapExplainer

In [7]:
from SHAP.shap import ShapExplainer
# Shapley configuration
tweet_as_one = True
vocab_as_one=True
network_as_one = False
untokenize = True
dataset = 'davidson'

tokenizer_d = transformers.DistilBertTokenizer.from_pretrained('distilbert-base-uncased')

In [8]:
model_explainer = ShapExplainer(joint_model, tweet_as_one = tweet_as_one, vocab_as_one=vocab_as_one, network_as_one = network_as_one, dataset = dataset, untokenize = untokenize)


In [9]:
test_y_pred, test_y_true = [],[]
shap_output = []
for i, data in enumerate(testloader):
    if i != 470:
        continue
    joint_model.eval()
    input_ids, attention_mask, user_id, tweet_label = data['input_ids'], data['attention_mask'], data['userid'], data['label'] #TODO fix this mess
    input_ids, attention_mask, tweet_label = input_ids.to(device),  attention_mask.to(device), tweet_label.to(device)
    test_predictions = joint_model(input_ids,attention_mask, user_id)
    test_predictions = torch.nn.functional.softmax(test_predictions)
    test_max_pred = torch.argmax(test_predictions)
    shapley_values, predicted_class, feature_distribution, vocab_indices = model_explainer.approximate_shap_values(input_ids, attention_mask, user_id)
    res = tokenizer_d.convert_ids_to_tokens(input_ids[0], skip_special_tokens = True)
    res = tokenizer_d.convert_tokens_to_string(res)
    tweet_hate = shapley_values[0,0].item()
    tweet_offen = shapley_values[1,0].item()
    tweet_none = shapley_values[2,0].item()
    vocab_hate = shapley_values[0,1].item()
    vocab_offen = shapley_values[1,1].item()
    vocab_none = shapley_values[2,1].item()
    network_hate = shapley_values[0,2].item()
    network_offen = shapley_values[1,2].item()
    network_none = shapley_values[2,2].item()
    print (network_none)
    print (network_offen)
    print (network_hate)
    #model_explainer.visualize_text_plot(shapley_values[0], input_ids)
    #%%capture Jupyter notebook
    #break
    tokenizer = transformers.DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
    res = tokenizer.convert_ids_to_tokens(input_ids[0], skip_special_tokens = True)

    res = tokenizer.convert_tokens_to_string(res)
    print(res)
    tweet_hate,tweet_offen,tweet_none,vocab_hate,vocab_offen,vocab_none,network_hate,network_offen,network_none

    shap_output.append([i,user_id.item(),shapley_values,test_max_pred])
    #model_explainer.visualize_text_plot(shap_values=shapley_values, input_ids=input_ids)

    if i % 250 == 0:
        print(i)

Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.


6725
6724
6725
6724
6725
6724
6725
6724
6725
6724
6725
6713
6725
6724
6725
6724
6725
6723
6725
6714
6725
6714
6725
6714
6725
6714
6725
6714
6725
6714
6725
6713
6725
6714
6725
6713
6725
6724
6725
6724
6725
6724
6725
6724
6725
6724
6725
6713
6725
6724
6725
6723
6725
6724
6725
6714
6725
6724
6725
6724
6725
6714
6725
6724
6725
6724
6725
6714
6725
6724
6725
6724
6725
6724
6725
6724
6725
6724
6725
6724
6725
6724
6725
6713
6725
6724
6725
6723
6725
6724
6725
6724
6725
6724
6725
6724
6725
6724
6725
6724
6725
6713
6725
6724
6725
6724
6725
6723
6725
6724
6725
6724
6725
6724
6725
6724
6725
6724
6725
6713
6725
6724
6725
6724
6725
6723
6725
6713
6725
6713
6725
6713
6725
6713
6725
6724
6725
6713
6725
6713
6725
6712
6725
6714
6725
6713
6725
6714
6725
6714
6725
6714
6725
6714
6725
6714
6725
6714
6725
6713
6725
6714
6725
6713
6725
6714
6725
6724
6725
6724
6725
6714
6725
6724
6725
6724
6725
6724
6725
6724
6725
6724
6725
6724
6725
6724
6725
6713
6725
6724
6725
6723
6725
6724
6725
6713
6725
6713
6725
6713


In [10]:
#[i,user_id.item(),shapley_values, res, test_max_pred]]
model_explainer.plot_network(shap_output)


174612923
253731715
283237757
345979063
352976073
35807273
405676041
4299309089
52244066
563835357
772409804
2239658598
598759378
0.017551971599459648
708
here
-0.010394140146672726
nothere
0.006517278961837292
14
here
{708: {'group': 0, 'direct_relation': 11, 'value': 30, 'label': '708, 0/11', 'color': '#fe004d'}, 64: {'group': 1, 'direct_relation': 1, 'value': 30, 'label': '64, 0/1', 'color': '#118cfe'}, 14: {'group': 2, 'direct_relation': 1, 'value': 30, 'label': '14, 0/1', 'color': '#fe004d'}}
