# Vaccine Sentiment Classification
*by Nefeli Tavoulari*

#### In this notebook I classify tweets as Neutral, Pro-vax or Anti-vax.

## Install Dependencies

In [2]:
!pip install -U torch==1.8.0 torchtext==0.9.0

Collecting torch==1.8.0
  Downloading torch-1.8.0-cp37-cp37m-manylinux1_x86_64.whl (735.5 MB)
[K     |████████████████████████████████| 735.5 MB 12 kB/s 
[?25hCollecting torchtext==0.9.0
  Downloading torchtext-0.9.0-cp37-cp37m-manylinux1_x86_64.whl (7.1 MB)
[K     |████████████████████████████████| 7.1 MB 13.9 MB/s 
Installing collected packages: torch, torchtext
  Attempting uninstall: torch
    Found existing installation: torch 1.10.0+cu111
    Uninstalling torch-1.10.0+cu111:
      Successfully uninstalled torch-1.10.0+cu111
  Attempting uninstall: torchtext
    Found existing installation: torchtext 0.11.0
    Uninstalling torchtext-0.11.0:
      Successfully uninstalled torchtext-0.11.0
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
torchvision 0.11.1+cu111 requires torch==1.10.0, but you have torch 1.8.0 which is incompatible.
torchaudio 0.10.0

## Import Packages

In [3]:
%matplotlib inline
import io
import re
import csv

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from google.colab import files
from wordcloud import WordCloud
import nltk
import re
import csv
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords 

import torch
import torch.nn as nn
from torchtext.legacy import data   
from torchtext.vocab import GloVe
from torchtext.legacy.data import BucketIterator
from torch.utils.data import Dataset, DataLoader, TensorDataset
import torch.nn.functional as F
import torch.optim as optim
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

import torchvision.transforms as transforms
import torchvision.datasets as dsets

from sklearn.metrics import precision_recall_fscore_support, confusion_matrix, ConfusionMatrixDisplay
from sklearn.metrics import classification_report, roc_curve, roc_auc_score
from sklearn.metrics import roc_curve, accuracy_score, mean_absolute_error

SEED = 1234
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True
nltk.download('stopwords')
nltk.download('wordnet')
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Unzipping corpora/wordnet.zip.


## Upload dataset - Create and Clean dataframe

In [4]:
upload_train = files.upload()

Saving vs_train.csv to vs_train.csv


In [5]:
upload_dev = files.upload()

Saving vs_dev.csv to vs_dev.csv


In [11]:
train_df = pd.read_csv(io.BytesIO(upload_train['vs_train.csv']))
dev_df = pd.read_csv(io.BytesIO(upload_dev['vs_dev.csv']))

In [12]:
train_df.dropna(subset = ["tweet"], inplace=True)
train_df.drop_duplicates(subset = ["tweet"], inplace=True)

dev_df.dropna(subset = ["tweet"], inplace=True)

train_df.drop(['Unnamed: 0'], axis = 1, inplace = True) 
dev_df.drop(['Unnamed: 0'], axis = 1, inplace = True) 

In [13]:
print(train_df) # training data

                                                   tweet  label
0      Sip N Shop Come thru right now #Marjais #Popul...      0
1      I don't know about you but My family and I wil...      1
2      @MSignorile Immunizations should be mandatory....      2
3      President Obama spoke in favor of vaccination ...      0
4      "@myfoxla: Arizona monitoring hundreds for mea...      0
...                                                  ...    ...
15971  @Salon if u believe the anti-vax nutcases caus...      1
15972  How do you feel about parents who don't #vacci...      0
15973  70 Preschoolers Tested for Measles in Simi Val...      0
15974  Finance Minister: Budget offers room to procur...      0
15975  Are you up to date on vaccines? Take CDC’s vac...      2

[15881 rows x 2 columns]


In [14]:
print(dev_df) # validation data

                                                  tweet  label
0     @user They had a massive surge in with covid d...      1
1     Required vaccines for school: Parents and guar...      0
2     “@KCStar: Two more Johnson County children hav...      0
3     NV can do better. Which states are the best (a...      2
4     Nothing like killing ourselves w/ our own fear...      2
...                                                 ...    ...
2277  RT @abc7: Number of measles cases reported in ...      0
2278  Evidence points to the idea that "measles affe...      0
2279  Where's @SavedYouAClick "@voxdotcom: Why you s...      2
2280  Some of my favorite people have autism. If tha...      2
2281  Coronavirus: The married couple behind the suc...      0

[2282 rows x 2 columns]


## Use Glove Embeddings

In [15]:
TEXT = data.Field(lower=True, sequential=True, tokenize="spacy", batch_first=True)
LABEL = data.LabelField(dtype = torch.long, sequential=False)

In [16]:
train_df.to_csv("train.csv", index=False)
dev_df.to_csv("valid.csv", index=False)

train_data, valid_data = data.TabularDataset.splits(
    path="", train="train.csv", 
    validation="valid.csv",format="csv", skip_header=True, 
    fields=[('tweet', TEXT), ('label', LABEL)]
)

print(f'Number of training examples: {len(train_data)}')
print(f'Number of validation examples: {len(valid_data)}')

Number of training examples: 15881
Number of validation examples: 2282


In [19]:
TEXT.build_vocab(train_data, vectors='glove.twitter.27B.25d', max_size=20000, min_freq=5)

# get the vocab instance
vocab = TEXT.vocab
vocab.vectors

.vector_cache/glove.twitter.27B.zip: 1.52GB [05:13, 4.85MB/s]                            
100%|█████████▉| 1193513/1193514 [00:25<00:00, 47127.62it/s]


tensor([[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.6959, -1.1469, -0.4180,  ..., -0.6788,  0.7061,  0.4083],
        ...,
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]])

In [20]:
TEXT.vocab.freqs.most_common(10) # seems right

[('.', 11333),
 ('#', 9475),
 ('the', 8671),
 ('to', 6866),
 (',', 6630),
 ('vaccine', 5080),
 ('a', 5003),
 (':', 4629),
 ('measles', 4438),
 ('of', 4320)]

In [21]:
LABEL.build_vocab(train_data)
vocab_label = LABEL.vocab
print("Size of LABEL vocabulary:",len(vocab_label))
vocab_label.freqs

Size of LABEL vocabulary: 3


Counter({'0': 7385, '1': 2070, '2': 6426})

In [22]:
print(f"Unique tokens in TEXT vocabulary: {len(TEXT.vocab)}")
print(f"Unique tokens in LABEL vocabulary: {len(LABEL.vocab)}")

Unique tokens in TEXT vocabulary: 4596
Unique tokens in LABEL vocabulary: 3


## Create Batch Iterator

In [23]:
BATCH_SIZE = 64
train_iterator, valid_iterator = data.BucketIterator.splits(
    (train_data, valid_data), sort_key=lambda x: len(x.tweet),
    batch_size=BATCH_SIZE, shuffle=True)

LABEL.vocab.freqs

Counter({'0': 7385, '1': 2070, '2': 6426})

In [24]:
pretrained_embeddings = TEXT.vocab.vectors
pretrained_embeddings.shape

torch.Size([4596, 25])

## LSTM

In [30]:
class LSTM(nn.Module):
    def __init__(self, vocab_size, hidden_size, num_layers, embedding_size, output_size, weights, bidirectional):
        super(LSTM, self).__init__()

        self.vocab_size = vocab_size
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.output_size = output_size
        self.bidirectional = bidirectional

        self.embedding = nn.Embedding(vocab_size, embedding_size)
        self.embedding.weight = nn.Parameter(weights, requires_grad=False)

        self.lstm = nn.LSTM(input_size=self.embedding_size,
                           hidden_size=self.hidden_size,
                           #dropout=dropout,
                           num_layers=self.num_layers, 
                           bidirectional=self.bidirectional,
                           batch_first=True
                           )
        
        if self.bidirectional: 
          self.fc = nn.Linear(self.hidden_size*2, self.output_size)
        else:
          self.fc = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, x):
        x = self.embedding(x)
        
        #x = x.reshape(x.shape[1], x.shape[0], x.shape[2])

        if self.bidirectional:
            h = torch.zeros((self.num_layers*2, x.size(0), self.hidden_size))
            c = torch.zeros((self.num_layers*2, x.size(0), self.hidden_size))
        else:
            h = torch.zeros((self.num_layers, x.size(0), self.hidden_size))
            c = torch.zeros((self.num_layers, x.size(0), self.hidden_size))

        out, (ht, ct) = self.lstm(x, (h, c))
         
        if self.bidirectional: 
          out = out.contiguous().view(-1, self.hidden_size*2)
        else:
          out = out.contiguous().view(-1, self.hidden_size)
          
        out = self.fc(out)   
         
        #Keep only the hidden representation of the last item of the sequence as the representative of the sample.
        out = out.view(x.size(0), -1, self.output_size)
        out = out[:, -1, :]
        
        return out

In [31]:
#Define layer sizes
vocab_size = pretrained_embeddings.shape[0]
hidden_size = 128
num_layers = 2
embedding_size = pretrained_embeddings.shape[1]
output_size = 3

#Define Hyperparameters
learning_rate = 1e-4

#Initialize model, optimizer
model = LSTM(vocab_size, hidden_size, num_layers, embedding_size, output_size, pretrained_embeddings, False)
criterion = nn.CrossEntropyLoss()
#criterion = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
#optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.2, weight_decay=0.01)
#optimizer = optim.Adagrad(model.parameters(), lr=learning_rate)

In [32]:
model

LSTM(
  (embedding): Embedding(4596, 25)
  (lstm): LSTM(25, 128, num_layers=2, batch_first=True)
  (fc): Linear(in_features=128, out_features=3, bias=True)
)

In [34]:
epoch_loss = []
epoch_loss_dev = []
epoch_acc = []
epoch_acc_dev = []
clip = 5

for epoch in range(100):

  batch_losses = []
  batch_acc = 0
  total = 0
  total_dev = 0
  loss = 0
  pred_proba = []

  # training
  model.train()
  for batch in train_iterator:  # for every batch
    if batch.tweet.shape[0] == BATCH_SIZE:
      y_pred = model(batch.tweet)
      #print(y_pred)
      loss = criterion(y_pred, batch.label)
      batch_losses.append(loss)
      #Delete previously stored gradients
      optimizer.zero_grad()
      #Perform backpropagation starting from the loss calculated in this epoch
      loss.backward()
      nn.utils.clip_grad_norm_(model.parameters(), clip)
      #Update model's weights based on the gradients calculated during backprop
      optimizer.step()

      # Total number of labels
      total += batch.label.size(0)
      # Total correct predictions
      _,pred_label = torch.max(y_pred, dim = 1)
      batch_acc += (pred_label == batch.label).sum()

  # validation    
  with torch.no_grad():
    batch_losses_dev = []
    batch_acc_dev = 0
    model.eval()
    for batch in valid_iterator:
      if batch.tweet.shape[0] == BATCH_SIZE:
        y_dev_pred = model(batch.tweet)
        prob = F.softmax(y_dev_pred, dim=1)   # probability that an instance belogs to each class
        #print(prob.shape)
        for i in prob:
          pred_proba.append(i.tolist())
        loss_dev = criterion(y_dev_pred, batch.label)
        batch_losses_dev.append(loss_dev)
        # number of labels
        total_dev += batch.label.size(0)
        # correct predictions
        _,pred_label = torch.max(y_dev_pred, dim = 1)  # get max probability
        batch_acc_dev += (pred_label == batch.label).sum()


  accuracy = batch_acc/total
  accuracy_dev = batch_acc_dev/total_dev

  train_loss = sum(batch_losses)/len(train_iterator)
  valid_loss = sum(batch_losses_dev)/len(valid_iterator)

  epoch_loss.append(train_loss)
  epoch_loss_dev.append(valid_loss)
  epoch_acc.append(accuracy)
  epoch_acc_dev.append(accuracy_dev)

  print(f"Epoch {epoch:3}: | Train Loss = {train_loss:.5f} | Train Accuracy = {accuracy:.5f} | Validation Loss = {valid_loss:.5f} | Validation Accuracy = {accuracy_dev:.5f} ")

Epoch   0: | Train Loss = 0.81124 | Train Accuracy = 0.63143 | Validation Loss = 0.95096 | Validation Accuracy = 0.49062 
Epoch   1: | Train Loss = 0.80164 | Train Accuracy = 0.63552 | Validation Loss = 0.94475 | Validation Accuracy = 0.49732 
Epoch   2: | Train Loss = 0.79583 | Train Accuracy = 0.64025 | Validation Loss = 0.95699 | Validation Accuracy = 0.46830 
Epoch   3: | Train Loss = 0.79223 | Train Accuracy = 0.64157 | Validation Loss = 0.93259 | Validation Accuracy = 0.49866 
Epoch   4: | Train Loss = 0.78851 | Train Accuracy = 0.64252 | Validation Loss = 0.91082 | Validation Accuracy = 0.54018 
Epoch   5: | Train Loss = 0.78153 | Train Accuracy = 0.64636 | Validation Loss = 0.89146 | Validation Accuracy = 0.56429 
Epoch   6: | Train Loss = 0.78004 | Train Accuracy = 0.65089 | Validation Loss = 0.85658 | Validation Accuracy = 0.59955 
Epoch   7: | Train Loss = 0.77650 | Train Accuracy = 0.65026 | Validation Loss = 0.85843 | Validation Accuracy = 0.59330 
Epoch   8: | Train Loss 

KeyboardInterrupt: ignored

## GRU

In [None]:
class GRU(nn.Module):
    def __init__(self, vocab_size, hidden_size, num_layers, embedding_size, output_size, weights, bidirectional):
        super(GRU, self).__init__()

        self.vocab_size = vocab_size
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.output_size = output_size
        self.bidirectional = bidirectional

        self.embedding = nn.Embedding(vocab_size, embedding_size)
        self.embedding.weight = nn.Parameter(weights, requires_grad=False)

        self.GRU = nn.GRU(input_size=self.embedding_size,
                           hidden_size=self.hidden_size,
                           #dropout=dropout,
                           num_layers=self.num_layers, 
                           bidirectional=self.bidirectional,
                           batch_first=True
                           )
        
        if self.bidirectional: 
          self.fc = nn.Linear(self.hidden_size*2, self.output_size)
        else:
          self.fc = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, x):
        x = self.embedding(x)
        
        #x = x.reshape(x.shape[1], x.shape[0], x.shape[2])

        if self.bidirectional:
            h = torch.zeros((self.num_layers*2, x.size(0), self.hidden_size))
        else:
            h = torch.zeros((self.num_layers, x.size(0), self.hidden_size))

        out, (ht, ct) = self.GRU(x, (h, c))
         
        if self.bidirectional: 
          out = out.contiguous().view(-1, self.hidden_size*2)
        else:
          out = out.contiguous().view(-1, self.hidden_size)
          
        out = self.fc(out)   
         
        #Keep only the hidden representation of the last item of the sequence as the representative of the sample.
        out = out.view(x.size(0), -1, self.output_size)
        out = out[:, -1, :]
        
        return out

In [None]:
#Define layer sizes
vocab_size = pretrained_embeddings.shape[0]
hidden_size = 128
num_layers = 2
embedding_size = pretrained_embeddings.shape[1]
output_size = 3

#Define Hyperparameters
learning_rate = 1e-4

#Initialize model, optimizer
model = GRU(vocab_size, hidden_size, num_layers, embedding_size, output_size, pretrained_embeddings, False)
criterion = nn.CrossEntropyLoss()
#criterion = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
#optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.2, weight_decay=0.01)
#optimizer = optim.Adagrad(model.parameters(), lr=learning_rate)

In [None]:
model

LSTM(
  (embedding): Embedding(4596, 25)
  (lstm): LSTM(25, 128, num_layers=2, batch_first=True)
  (fc): Linear(in_features=128, out_features=3, bias=True)
)

In [None]:
epoch_loss = []
epoch_loss_dev = []
epoch_acc = []
epoch_acc_dev = []
clip = 5

for epoch in range(100):

  batch_losses = []
  batch_acc = 0
  total = 0
  total_dev = 0
  loss = 0
  pred_proba = []

  # training
  model.train()
  for batch in train_iterator:  # for every batch
    if batch.tweet.shape[0] == BATCH_SIZE:
      y_pred = model(batch.tweet)
      #print(y_pred)
      loss = criterion(y_pred, batch.label)
      batch_losses.append(loss)
      #Delete previously stored gradients
      optimizer.zero_grad()
      #Perform backpropagation starting from the loss calculated in this epoch
      loss.backward()
      nn.utils.clip_grad_norm_(model.parameters(), clip)
      #Update model's weights based on the gradients calculated during backprop
      optimizer.step()

      # Total number of labels
      total += batch.label.size(0)
      # Total correct predictions
      _,pred_label = torch.max(y_pred, dim = 1)
      batch_acc += (pred_label == batch.label).sum()

  # validation    
  with torch.no_grad():
    batch_losses_dev = []
    batch_acc_dev = 0
    model.eval()
    for batch in valid_iterator:
      if batch.tweet.shape[0] == BATCH_SIZE:
        y_dev_pred = model(batch.tweet)
        prob = F.softmax(y_dev_pred, dim=1)   # probability that an instance belogs to each class
        #print(prob.shape)
        for i in prob:
          pred_proba.append(i.tolist())
        loss_dev = criterion(y_dev_pred, batch.label)
        batch_losses_dev.append(loss_dev)
        # number of labels
        total_dev += batch.label.size(0)
        # correct predictions
        _,pred_label = torch.max(y_dev_pred, dim = 1)  # get max probability
        batch_acc_dev += (pred_label == batch.label).sum()


  accuracy = batch_acc/total
  accuracy_dev = batch_acc_dev/total_dev

  train_loss = sum(batch_losses)/len(train_iterator)
  valid_loss = sum(batch_losses_dev)/len(valid_iterator)

  epoch_loss.append(train_loss)
  epoch_loss_dev.append(valid_loss)
  epoch_acc.append(accuracy)
  epoch_acc_dev.append(accuracy_dev)

  print(f"Epoch {epoch:3}: | Train Loss = {train_loss:.5f} | Train Accuracy = {accuracy:.5f} | Validation Loss = {valid_loss:.5f} | Validation Accuracy = {accuracy_dev:.5f} ")

### Evaluation

In [None]:
model.eval()
true = y_dev
pred = model(x_dev).squeeze(1)
after_train = criterion(pred, true) 
print('Test loss after Training' , after_train.item())

In [None]:
_,pred_label = torch.max(pred, dim = 1)

In [None]:
target_names = ['neutral', 'anti-vax', 'pro-vax']
df = pd.DataFrame({'true':true})
df2 = pd.DataFrame({'pred':pred})
cm = confusion_matrix(true, pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=target_names)
disp.plot()
plt.show()

In [None]:
print("Precision-Recall-F1 - Training Data :")
print(precision_recall_fscore_support(true, pred, average='micro'))
print()
print("Precision-Recall-F1 - Test Data :")
print(precision_recall_fscore_support(true_dev, pred_dev, average='micro'))
print()

In [None]:
print(classification_report(true, pred))

In [None]:
macro_roc_auc_ovo = roc_auc_score(true_dev, pred_proba, multi_class="ovo", average="macro")
weighted_roc_auc_ovo = roc_auc_score(true_dev, pred_proba, multi_class="ovo", average="weighted")
macro_roc_auc_ovr = roc_auc_score(true_dev, pred_proba, multi_class="ovr", average="macro")
weighted_roc_auc_ovr = roc_auc_score(true_dev, pred_proba, multi_class="ovr", average="weighted")
print(
    "One-vs-One ROC AUC scores:\n{:.6f} (macro),\n{:.6f} "
    "(weighted by prevalence)".format(macro_roc_auc_ovo, weighted_roc_auc_ovo)
)
print()
print(
    "One-vs-Rest ROC AUC scores:\n{:.6f} (macro),\n{:.6f} "
    "(weighted by prevalence)".format(macro_roc_auc_ovr, weighted_roc_auc_ovr)
)

### Plots

In [None]:
# probabilities
df_prob = pd.DataFrame(pred_proba)
df_prob

In [None]:
# roc curve for classes
fpr = {}
tpr = {}
thresh ={}

n_class = 3

for i in range(n_class):    
    fpr[i], tpr[i], thresh[i] = roc_curve(true_dev, df_prob[i], pos_label=i)
    
# plotting    
plt.plot(fpr[0], tpr[0], linestyle='--',color='orange', label='Class 0 vs Rest')
plt.plot(fpr[1], tpr[1], linestyle='--',color='green', label='Class 1 vs Rest')
plt.plot(fpr[2], tpr[2], linestyle='--',color='blue', label='Class 2 vs Rest')
plt.title('Multiclass ROC curve')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive rate')
plt.legend(loc='best')
plt.savefig('Multiclass ROC',dpi=300);  

In [None]:
def plot_graph_loss(epochs):
    fig = plt.figure(figsize=(12,12))
    plt.title("Train/Validation Loss")
    plt.plot(list(np.arange(epochs) + 1), epoch_loss_dev, label='validation')
    plt.xlabel('num_epochs', fontsize=12)
    plt.ylabel('loss', fontsize=12)
    plt.legend(['train', 'validation']);

plot_graph_loss(100)

In [None]:
def plot_graph_acc(epochs):
    fig = plt.figure(figsize=(12,12))
    plt.title("Train/Validation Accuracy")
    plt.plot(list(np.arange(epochs) + 1), epoch_acc_dev, label='validation')
    plt.xlabel('num_epochs', fontsize=12)
    plt.ylabel('accuracy', fontsize=12)
    plt.legend(['train', 'validation']);

plot_graph_acc(100)    

## Second Model

In [None]:
class LSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
        super(LSTM, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.input_dim = input_dim
        self.embedding = nn.Embedding.from_pretrained(weights)
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim)#.requires_grad_()
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim)#.requires_grad_() 
        out = self.embedding(x)
        out, (hn, cn) = self.lstm(out, h0, c0)
        out = self.fc(out[:, -1, :])
        return out

In [None]:
#Define layer sizes
input_dim = x.shape[1]
hidden_dim = 128
num_layers = 4
output_dim = 3

#Define Hyperparameters
learning_rate = 1e-4

#Initialize model, optimizer, loss function
model = LSTM(input_dim, hidden_dim, num_layers, output_dim)

optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
model

In [None]:
epoch_loss = []
epoch_loss_dev = []
epoch_acc = []
epoch_acc_dev = []

for epoch in range(100):

  batch_losses = []
  batch_acc = 0
  total = 0
  total_dev = 0
  loss = 0
  pred = []
  true = []
  pred_dev = []
  true_dev = []

  # training
  model.train()
  for x_batch, y_batch in train_dataloader:  # for every batch
    print(x_batch.shape)
    y_pred = model(x_batch).squeeze(1)
    loss = loss_func(y_pred, y_batch)
    batch_losses.append(loss)
    #Delete previously stored gradients
    optimizer.zero_grad()
    #Perform backpropagation starting from the loss calculated in this epoch
    loss.backward()
    #Update model's weights based on the gradients calculated during backprop
    optimizer.step()

    # Total number of labels
    total += y_batch.size(0)
    # Total correct predictions
    _,pred_label = torch.max(y_pred, dim = 1)
    batch_acc += (pred_label == y_batch).sum()
    for i in pred_label:
      pred.append(i)
    for i in y_batch:
      true.append(i.item())

  # validation    
  with torch.no_grad():
    batch_losses_dev = []
    batch_acc_dev = 0
    model.eval()

    for x_batch, y_batch in dev_dataloader:
        y_dev_pred = model(x_batch)
        loss_dev = loss_func(y_dev_pred, y_batch)
        batch_losses_dev.append(loss_dev)
        # number of labels
        total_dev += y_batch.size(0)
        # correct predictions
        _,pred_label = torch.max(y_dev_pred, dim = 1)  # get max probability
        batch_acc_dev += (pred_label == y_batch).sum()
        for i in pred_label:
          pred_dev.append(i)
        for i in y_batch:
          true_dev.append(i.item())


  accuracy = batch_acc/total
  accuracy_dev = batch_acc_dev/total_dev

  train_loss = sum(batch_losses)/len(train_dataloader)
  valid_loss = sum(batch_losses_dev)/len(dev_dataloader)

  epoch_loss.append(train_loss)
  epoch_loss_dev.append(valid_loss)
  epoch_acc.append(accuracy)
  epoch_acc_dev.append(accuracy_dev)

  print(f"Epoch {epoch:3}: | Train Loss = {train_loss:.5f} | Train Accuracy = {accuracy:.5f} | Validation Loss = {valid_loss:.5f} | Validation Accuracy = {accuracy_dev:.5f} ")

### Evaluation

In [None]:
target_names = ['neutral', 'anti-vax', 'pro-vax']
df = pd.DataFrame({'true':true})
df2 = pd.DataFrame({'pred':pred})
cm = confusion_matrix(true, pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=target_names)
disp.plot()
plt.show()

In [None]:
print("Precision-Recall-F1 - Training Data :")
print(precision_recall_fscore_support(true, pred, average='micro'))
print()
print("Precision-Recall-F1 - Test Data :")
print(precision_recall_fscore_support(true_dev, pred_dev, average='micro'))
print()

In [None]:
print(classification_report(true, pred))

### Plots

In [None]:
def plot_graph_loss(epochs):
    fig = plt.figure(figsize=(12,12))
    plt.title("Train/Validation Loss")
    plt.plot(list(np.arange(epochs) + 1) , epoch_loss, label='train')
    plt.plot(list(np.arange(epochs) + 1), epoch_loss_dev, label='validation')
    plt.xlabel('num_epochs', fontsize=12)
    plt.ylabel('loss', fontsize=12)
    plt.legend(['train', 'validation']);
plot_graph_loss(100)

In [None]:
def plot_graph_acc(epochs):
    fig = plt.figure(figsize=(12,12))
    plt.title("Train/Validation Accuracy")
    plt.plot(list(np.arange(epochs) + 1) , epoch_acc, label='train')
    plt.plot(list(np.arange(epochs) + 1), epoch_acc_dev, label='validation')
    plt.xlabel('num_epochs', fontsize=12)
    plt.ylabel('accuracy', fontsize=12)
    plt.legend(['train', 'validation']);
plot_graph_acc(100)    