In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip3 install transformers

In [3]:
import pandas as pd
import numpy as np
import json, re
from tqdm import tqdm_notebook
from uuid import uuid4
import time
import datetime
import random

## Torch Modules
import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
from torch.utils.data import TensorDataset, random_split
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler

# Transformers
from transformers import get_linear_schedule_with_warmup
from transformers import (
    BertForSequenceClassification,
                          BertTokenizer,
                          RobertaForSequenceClassification,
                          RobertaTokenizer,
                         AdamW)

In [4]:
# Target labels
label_encodings6 = {
    'pants-fire': 0, 
    'false':      1, 
    'barely-true':2, 
    'half-true':  3, 
    'mostly-true':4,
    'true':       5
}

In [5]:
def encode_dataframe(statement_col, target_col, unpack=False):
    # Tokenize statements
    bert_encoded_dict = statement_col.apply(lambda sent: bert_tokenizer.encode_plus(
                                      sent,                      # Sentence to encode.
                                      add_special_tokens = True, # Add '[CLS]' and '[SEP]'
                                      max_length = 120,           # Pad & truncate all sentences.
                                      pad_to_max_length = True,
                                      return_attention_mask = True,   # Construct attn. masks.
                                      return_tensors = 'pt',     # Return pytorch tensors.
                                      truncation = True
                                ))
    bert_input_ids = torch.cat([item['input_ids'] for item in bert_encoded_dict], dim=0)
    bert_attention_masks = torch.cat([item['attention_mask'] for item in bert_encoded_dict], dim=0)

    # Format targets
    labels = torch.tensor(target_col)
    sentence_ids = torch.tensor(range(len(target_col)))

    # Combine the training inputs into a TensorDataset
    bert_dataset = TensorDataset(sentence_ids, bert_input_ids, bert_attention_masks, labels)

    # Remove indices
    trial_dataset =  index_remover(bert_dataset)

    if unpack:
        return bert_input_ids, bert_attention_masks, labels
    else:
        return trial_dataset

def index_remover(tensordata):
    input_ids = []
    attention_masks = []
    labels = []
   
    for a,b,c,d in tensordata:
        input_ids.append(b.tolist())
        attention_masks.append(c.tolist())
        labels.append(d.tolist())
        
    input_ids = torch.tensor(input_ids)
    attention_masks = torch.tensor(attention_masks)
    labels = torch.tensor(labels)
    
    final_dataset =  TensorDataset(input_ids, attention_masks, labels)
    return final_dataset

# Function to calculate the accuracy of our predictions vs labels
def flat_accuracy(preds, labels):
    pred_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    return np.sum(pred_flat == labels_flat) / len(labels_flat)

def format_time(elapsed):
    '''
    Takes a time in seconds and returns a string hh:mm:ss
    '''
    # Round to the nearest second.
    elapsed_rounded = int(round((elapsed)))
    
    # Format as hh:mm:ss
    return str(datetime.timedelta(seconds=elapsed_rounded))

In [6]:
# Device
device = torch.device("cuda:0")

# BERT
bert_model = BertForSequenceClassification.from_pretrained("bert-base-uncased", # Use the 12-layer BERT model, with an uncased vocab.
                                                           num_labels = 6, # The number of output labels--2 for binary classification.
                                                                           # You can increase this for multi-class tasks.
                                                           output_attentions = False, # Whether the model returns attentions weights.
                                                           output_hidden_states = False # Whether the model returns all hidden-states.
                                                          ).to(device)
bert_tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

In [7]:
# Read in data
df_train = pd.read_csv("/content/drive/MyDrive/fake-news-explainability/train.tsv", sep='\t', header=None)
df_test = pd.read_csv("/content/drive/MyDrive/fake-news-explainability/test.tsv", sep='\t', header=None)
df_valid = pd.read_csv("/content/drive/MyDrive/fake-news-explainability/valid.tsv", sep='\t', header=None)

# Relabel columns
cols = ['ID', 'label', 'statement', 'subject', 'speaker', 'job_title', 'state',
       'party', 'barely_true_count', 'false_count', 'half_true_count',
       'mostly_true_count', 'pants_on_fire_count', 'context']
df_train.columns, df_test.columns, df_valid.columns = cols, cols, cols

# Relabel target
df_train['target'] = df_train['label'].apply(lambda x: label_encodings6[x])
df_test['target'] = df_test['label'].apply(lambda x: label_encodings6[x])
df_valid['target'] = df_valid['label'].apply(lambda x: label_encodings6[x])

# Encode dataframes
df_train_encode = encode_dataframe(df_train['statement'], df_train['target'])
df_test_encode = encode_dataframe(df_test['statement'], df_test['target'])



In [8]:
# Load data into dataloader
batch_size = 32

bert_train_dataloader = DataLoader(
            df_train_encode,  # The training samples.
            sampler = RandomSampler(df_train_encode), # Select batches randomly
            batch_size = batch_size # Trains with this batch size.
        )
bert_validation_dataloader = DataLoader(
            df_test_encode, # The validation samples.
            sampler = SequentialSampler(df_test_encode), # Pull out batches sequentially.
            batch_size = batch_size # Evaluate with this batch size.
        )

In [9]:
# Optimizer
bert_optimizer = AdamW(bert_model.parameters(),
                  lr = 5e-5, # args.learning_rate - default is 5e-5
                  eps = 1e-8 # args.adam_epsilon  - default is 1e-8.
                )

# Epochs & Learning Rate
epochs = 2
total_steps = len(bert_train_dataloader) * epochs

# Create the learning rate scheduler.
bert_scheduler = get_linear_schedule_with_warmup(bert_optimizer, 
                                            num_warmup_steps = 0, # Default value in run_glue.py
                                            num_training_steps = total_steps)

In [10]:
# This training code is based on the `run_glue.py` script here:
# https://github.com/huggingface/transformers/blob/5bfcd0485ece086ebcbed2d008813037968a9e58/examples/run_glue.py#L128

# Set the seed value all over the place to make this reproducible.
seed_val = 100

random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

# We'll store a number of quantities such as training and validation loss, 
# validation accuracy, and timings.
bert_training_stats = []

# Measure the total training time for the whole run.
total_t0 = time.time()

# For each epoch...
for epoch_i in range(0, epochs):
    
    print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
    print('Training...')

    # Measure how long the training epoch takes.
    t0 = time.time()

    # Reset the total loss for this epoch.
    total_train_loss = 0

    # Put the bert_model into training mode. Don't be mislead--the call to 
    # `train` just changes the *mode*, it doesn't *perform* the training.
    # `dropout` and `batchnorm` layers behave differently during training
    # vs. test (source: https://stackoverflow.com/questions/51433378/what-does-bert_model-train-do-in-pytorch)
    bert_model.train()

    # For each batch of training data...
    for step, batch in enumerate(bert_train_dataloader):

        # Progress update every 40 batches.
        if step % 40 == 0 and not step == 0:
            elapsed = format_time(time.time() - t0)
            print('  Batch {:>5,}  of  {:>5,}.    Elapsed: {:}.'.format(step, len(bert_train_dataloader), elapsed))

        # Unpack batch
        b_input_ids = batch[0].to(device)
        b_input_mask = batch[1].to(device)
        b_labels = batch[2].to(device)

        # Zero grads
        bert_model.zero_grad()        

        # Forward pass
        output = bert_model(b_input_ids, 
                            token_type_ids=None, 
                             attention_mask=b_input_mask, 
                             labels=b_labels)
        # Accumulate loss
        total_train_loss += output[0].item()

        # Backward pass
        output[0].backward()

        # Clip the norm of the gradients to 1.0.
        # This is to help prevent the "exploding gradients" problem.
        torch.nn.utils.clip_grad_norm_(bert_model.parameters(), 1.0)

        # Update parameters and take a step using the computed gradient.
        # The bert_optimizer dictates the "update rule"--how the parameters are
        # modified based on their gradients, the learning rate, etc.
        bert_optimizer.step()

        # Update the learning rate.
        bert_scheduler.step()

    # Calculate the average loss over all of the batches.
    avg_train_loss = total_train_loss / len(bert_train_dataloader)            
    
    # Measure how long this epoch took.
    training_time = format_time(time.time() - t0)

    print("")
    print("  Average training loss: {0:.2f}".format(avg_train_loss))
    print("  Training epoch took: {:}".format(training_time))
        
    # ========================================
    #               Validation
    # ========================================
    # After the completion of each training epoch, measure our performance on
    # our validation set.

    print("")
    print("Running Validation...")

    t0 = time.time()

    # Put the bert_model in evaluation mode--the dropout layers behave differently
    # during evaluation.
    bert_model.eval()

    # Tracking variables 
    total_eval_accuracy = 0
    total_eval_loss = 0
    nb_eval_steps = 0

    # Evaluate data for one epoch
    for batch in bert_validation_dataloader:

        b_input_ids = batch[0].to(device)
        b_input_mask = batch[1].to(device)
        b_labels = batch[2].to(device)
        
        with torch.no_grad():        
            outputs = bert_model(b_input_ids, 
                                   token_type_ids=None, 
                                   attention_mask=b_input_mask,
                                   labels=b_labels)
            
        total_eval_loss += outputs[0].item()

        # Move logits and labels to CPU
        logits = outputs[1].detach().cpu().numpy()
        label_ids = b_labels.to('cpu').numpy()

        # Calculate the accuracy for this batch of test sentences, and
        # accumulate it over all batches.
        total_eval_accuracy += flat_accuracy(logits, label_ids)
        

    # Report the final accuracy for this validation run.
    avg_val_accuracy = total_eval_accuracy / len(bert_validation_dataloader)
    print("  Accuracy: {0:.2f}".format(avg_val_accuracy))

    # Calculate the average loss over all of the batches.
    avg_val_loss = total_eval_loss / len(bert_validation_dataloader)
    
    # Measure how long the validation run took.
    validation_time = format_time(time.time() - t0)
    
    print("  Validation Loss: {0:.2f}".format(avg_val_loss))
    print("  Validation took: {:}".format(validation_time))

    # Record all statistics from this epoch.
    bert_training_stats.append(
        {
            'epoch': epoch_i + 1,
            'Training Loss': avg_train_loss,
            'Valid. Loss': avg_val_loss,
            'Valid. Accur.': avg_val_accuracy,
            'Training Time': training_time,
            'Validation Time': validation_time
        }
    )

print("")
print("Training complete!")

print("Total training took {:} (h:mm:ss)".format(format_time(time.time()-total_t0)))

Training...
  Batch    40  of    320.    Elapsed: 0:00:24.
  Batch    80  of    320.    Elapsed: 0:00:50.
  Batch   120  of    320.    Elapsed: 0:01:17.
  Batch   160  of    320.    Elapsed: 0:01:43.
  Batch   200  of    320.    Elapsed: 0:02:09.
  Batch   240  of    320.    Elapsed: 0:02:36.
  Batch   280  of    320.    Elapsed: 0:03:02.

  Average training loss: 1.72
  Training epoch took: 0:03:28

Running Validation...
  Accuracy: 0.27
  Validation Loss: 1.67
  Validation took: 0:00:10
Training...
  Batch    40  of    320.    Elapsed: 0:00:26.
  Batch    80  of    320.    Elapsed: 0:00:53.
  Batch   120  of    320.    Elapsed: 0:01:19.
  Batch   160  of    320.    Elapsed: 0:01:45.
  Batch   200  of    320.    Elapsed: 0:02:12.
  Batch   240  of    320.    Elapsed: 0:02:38.
  Batch   280  of    320.    Elapsed: 0:03:04.

  Average training loss: 1.58
  Training epoch took: 0:03:31

Running Validation...
  Accuracy: 0.29
  Validation Loss: 1.66
  Validation took: 0:00:10

Training co

### Validation & Reporting Accuracy

In [11]:
# Reformat test and validation dataframes
input_test,  attention_test,  labels_test  = encode_dataframe(df_test['statement'],  df_test['target'],  unpack=True)
input_valid, attention_valid, labels_valid = encode_dataframe(df_valid['statement'], df_valid['target'], unpack=True)

# Run the inputs through the model
with torch.no_grad():
    outputs_test = bert_model(input_test.to(device),
                              token_type_ids=None, 
                              attention_mask=attention_test.to(device),
                              labels=labels_test.to(device))
    outputs_valid = bert_model(input_valid.to(device),
                               token_type_ids=None, 
                               attention_mask=attention_valid.to(device),
                               labels=labels_valid.to(device))
    
# Test accuracy
print(f"Test Acc: {flat_accuracy(outputs_test[1].detach().cpu().numpy(), labels_test.to('cpu').numpy())}")

# Valid accuracy
print(f"Valid Acc: {flat_accuracy(outputs_valid[1].detach().cpu().numpy(), labels_valid.to('cpu').numpy())}")



Test Acc: 0.2943962115232833
Valid Acc: 0.2757009345794392


In [12]:
# Save model
bert_model.save_pretrained("/content/drive/MyDrive/fake-news-explainability/bert_model")