In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

df = pd.read_csv('datasets0/Reviews.csv')

In [2]:
df.drop(['UserId', 'Id', 'ProductId', 'ProfileName', 'Time', 'HelpfulnessNumerator', 'HelpfulnessDenominator'], axis=1, inplace=True)

In [3]:
df_train = df[55000:59999]
df_valid = df[250000:251000]
df_test = df[251001:252000]

In [4]:
df_train.head()

Unnamed: 0,Score,Summary,Text
55000,1,Worst snack I ever tasted!,These are definitely the worst snack I have ev...
55001,2,Very Disappointing,Based on the reviews I thought I'd give the Wi...
55002,4,"Delicious, but a little salty","Like other reviewers have said, these are a li..."
55003,5,"GF snacks, ""Just the Cheese""",Yay! I used to be able to find these at local ...
55004,2,will not re-order,I was expecting something better than what arr...


In [5]:
import torch

if torch.cuda.is_available():       
    device = torch.device("cuda")
    print(f'There are {torch.cuda.device_count()} GPU(s) available.')
    print('Device name:', torch.cuda.get_device_name(0))

else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

There are 2 GPU(s) available.
Device name: NVIDIA GeForce GTX 1080 Ti


## Tokenization and Input Formatting

In [6]:
import re
import nltk

def text_preprocessing(text):
    """
    - Correct errors (eg. '&amp;' to '&')
    @param    text (str): a string to be processed.
    @return   text (Str): the processed string.
    """
    # Replace '&amp;' with '&'
    text = re.sub(r'&amp;', '&', text)

    # Remove trailing whitespace
    text = re.sub(r'\s+', ' ', text).strip()

    return text

In [7]:
# Print sentence 0
print('Original: ', df['Text'][0])
print('Processed: ', text_preprocessing(df['Text'][0]))

Original:  I have bought several of the Vitality canned dog food products and have found them all to be of good quality. The product looks more like a stew than a processed meat and it smells better. My Labrador is finicky and she appreciates this product better than  most.
Processed:  I have bought several of the Vitality canned dog food products and have found them all to be of good quality. The product looks more like a stew than a processed meat and it smells better. My Labrador is finicky and she appreciates this product better than most.


In [8]:
from transformers import BertTokenizer

# Load the BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)

# Create a function to tokenize a set of texts
def preprocessing_for_bert(data):
    """Perform required preprocessing steps for pretrained BERT.
    @param    data (np.array): Array of texts to be processed.
    @return   input_ids (torch.Tensor): Tensor of token ids to be fed to a model.
    @return   attention_masks (torch.Tensor): Tensor of indices specifying which
                  tokens should be attended to by the model.
    """
    # Create empty lists to store outputs
    input_ids = []
    attention_masks = []

    # For every sentence...
    for sent in data:
        # `encode_plus` will:
        #    (1) Tokenize the sentence
        #    (2) Add the `[CLS]` and `[SEP]` token to the start and end
        #    (3) Truncate/Pad sentence to max length
        #    (4) Map tokens to their IDs
        #    (5) Create attention mask
        #    (6) Return a dictionary of outputs
        encoded_sent = tokenizer.encode_plus(
            text=text_preprocessing(sent[:MAX_LEN]),  # Preprocess sentence
            add_special_tokens=True,        # Add `[CLS]` and `[SEP]`
            max_length=MAX_LEN,             # Max length to truncate/pad
            padding='max_length',           # Pad sentence to max length
            #return_tensors='pt',           # Return PyTorch tensor
            return_attention_mask=True      # Return attention mask
            )
        
        # Add the outputs to the lists
        input_ids.append(encoded_sent.get('input_ids'))
        attention_masks.append(encoded_sent.get('attention_mask'))

    # Convert lists to tensors
    input_ids = torch.tensor(input_ids)
    attention_masks = torch.tensor(attention_masks)

    return input_ids, attention_masks

In [9]:
# Specify `MAX_LEN`
MAX_LEN = 384

# Print sentence 0 and its encoded token ids
# token_ids = list(preprocessing_for_bert(df['Text'])[0].squeeze().numpy())
# print('Original: ', df['Text'][0])
# print('Token IDs: ', token_ids)

# Run function `preprocessing_for_bert` on the train set and the validation set
print('Tokenizing data...')
train_inputs, train_masks = preprocessing_for_bert(df_train['Text'])
val_inputs, val_masks = preprocessing_for_bert(df_valid['Text'])

Tokenizing data...


In [10]:
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler

# Convert other data types to torch.Tensor
train_labels = torch.tensor((df_train['Score']-1).to_numpy())
val_labels = torch.tensor((df_valid['Score']-1).to_numpy())

# For fine-tuning BERT, the authors recommend a batch size of 16 or 32.
batch_size = 2

# Create the DataLoader for our training set
train_data = TensorDataset(train_inputs, train_masks, train_labels)
train_sampler = RandomSampler(train_data)
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)

# Create the DataLoader for our validation set
val_data = TensorDataset(val_inputs, val_masks, val_labels)
val_sampler = SequentialSampler(val_data)
val_dataloader = DataLoader(val_data, sampler=val_sampler, batch_size=batch_size)

## Training Model

In [11]:
%%time
import torch
import torch.nn as nn
from transformers import BertModel

# Create the BertClassfier class
class BertClassifier(nn.Module):
    """Bert Model for Classification Tasks.
    """
    def __init__(self, freeze_bert=False):
        """
        @param    bert: a BertModel object
        @param    classifier: a torch.nn.Module classifier
        @param    freeze_bert (bool): Set `False` to fine-tune the BERT model
        """
        super(BertClassifier, self).__init__()
        # Specify hidden size of BERT, hidden size of our classifier, and number of labels
        D_in, H, D_out = 768, 50, 5

        # Instantiate BERT model
        self.bert = BertModel.from_pretrained('bert-base-uncased')

        # Instantiate an one-layer feed-forward classifier
        self.classifier = nn.Sequential(
            nn.Linear(D_in, H),
            nn.ReLU(),
            #nn.Dropout(0.5),
            nn.Linear(H, D_out)
        )

        # Freeze the BERT model
        if freeze_bert:
            for param in self.bert.parameters():
                param.requires_grad = False
        
    def forward(self, input_ids, attention_mask):
        """
        Feed input to BERT and the classifier to compute logits.
        @param    input_ids (torch.Tensor): an input tensor with shape (batch_size,
                      max_length)
        @param    attention_mask (torch.Tensor): a tensor that hold attention mask
                      information with shape (batch_size, max_length)
        @return   logits (torch.Tensor): an output tensor with shape (batch_size,
                      num_labels)
        """
        # Feed input to BERT
        outputs = self.bert(input_ids=input_ids,
                            attention_mask=attention_mask)
        
        # Extract the last hidden state of the token `[CLS]` for classification task
        last_hidden_state_cls = outputs[0][:, 0, :]

        # Feed input to classifier to compute logits
        logits = self.classifier(last_hidden_state_cls)

        return logits

CPU times: user 28 ms, sys: 0 ns, total: 28 ms
Wall time: 234 ms


In [12]:
from transformers import AdamW, get_linear_schedule_with_warmup

def initialize_model(epochs=4):
    """Initialize the Bert Classifier, the optimizer and the learning rate scheduler.
    """
    # Instantiate Bert Classifier
    bert_classifier = BertClassifier(freeze_bert=False)

    # Tell PyTorch to run the model on GPU
    bert_classifier.to(device)

    # Create the optimizer
    optimizer = AdamW(bert_classifier.parameters(),
                      lr=2e-4,    # Default learning rate
                      eps=1e-8    # Default epsilon value
                      )

    # Total number of training steps
    total_steps = len(train_dataloader) * epochs

    # Set up the learning rate scheduler
    scheduler = get_linear_schedule_with_warmup(optimizer,
                                                num_warmup_steps=0, # Default value
                                                num_training_steps=total_steps)
    return bert_classifier, optimizer, scheduler

In [13]:
import random
import time

# Specify loss function
loss_fn = nn.CrossEntropyLoss()

def set_seed(seed_value=42):
    """
    Set seed for reproducibility.
    """
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    torch.cuda.manual_seed_all(seed_value)

def train(model, train_dataloader, val_dataloader=None, epochs=4, evaluation=False):
    """
    Train the BertClassifier model.
    """
    # Start training loop
    print("Start training...\n")
    for epoch_i in range(epochs):
        # =======================================
        #               Training
        # =======================================
        # Print the header of the result table
        print(f"{'Epoch':^7} | {'Batch':^7} | {'Train Loss':^12} | {'Val Loss':^10} | {'Val Acc':^9} | {'Elapsed':^9}")
        print("-"*70)

        # Measure the elapsed time of each epoch
        t0_epoch, t0_batch = time.time(), time.time()

        # Reset tracking variables at the beginning of each epoch
        total_loss, batch_loss, batch_counts = 0, 0, 0

        # Put the model into the training mode
        model.train()

        # For each batch of training data...
        for step, batch in enumerate(train_dataloader):
            batch_counts +=1
            # Load batch to GPU
            b_input_ids, b_attn_mask, b_labels = tuple(t.to(device) for t in batch)
            # Zero out any previously calculated gradients
            #model.zero_grad()

            # Perform a forward pass. This will return logits.
            logits = model(b_input_ids, b_attn_mask)

            # Compute loss and accumulate the loss values
            loss = loss_fn(logits, b_labels)
            batch_loss += loss.item()
            total_loss += loss.item()

            # Perform a backward pass to calculate gradients
            loss.backward()

            # Clip the norm of the gradients to 1.0 to prevent "exploding gradients"
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

            # Update parameters and the learning rate
            optimizer.step()
            scheduler.step()

            # Print the loss values and time elapsed for every 20 batches
            if (step % 20 == 0 and step != 0) or (step == len(train_dataloader) - 1):
                # Calculate time elapsed for 20 batches
                time_elapsed = time.time() - t0_batch

                # Print training results
                print(f"{epoch_i + 1:^7} | {step:^7} | {batch_loss / batch_counts:^12.6f} | {'-':^10} | {'-':^9} | {time_elapsed:^9.2f}")

                # Reset batch tracking variables
                batch_loss, batch_counts = 0, 0
                t0_batch = time.time()

        # Calculate the average loss over the entire training data
        avg_train_loss = total_loss / len(train_dataloader)

        print("-"*70)
        # =======================================
        #               Evaluation
        # =======================================
        if evaluation == True:
            # After the completion of each training epoch, measure the model's performance
            # on our validation set.
            val_loss, val_accuracy = evaluate(model, val_dataloader)

            # Print performance over the entire training data
            time_elapsed = time.time() - t0_epoch
            
            print(f"{epoch_i + 1:^7} | {'-':^7} | {avg_train_loss:^12.6f} | {val_loss:^10.6f} | {val_accuracy:^9.2f} | {time_elapsed:^9.2f}")
            print("-"*70)
        print("\n")
    
    print("Training complete!")


def evaluate(model, val_dataloader):
    """After the completion of each training epoch, measure the model's performance
    on our validation set.
    """
    # Put the model into the evaluation mode. The dropout layers are disabled during
    # the test time.
    model.eval()

    # Tracking variables
    val_accuracy = []
    val_loss = []

    # For each batch in our validation set...
    for batch in val_dataloader:
        # Load batch to GPU
        b_input_ids, b_attn_mask, b_labels = tuple(t.to(device) for t in batch)

        # Compute logits
        with torch.no_grad():
            logits = model(b_input_ids, b_attn_mask)

        # Compute loss
        loss = loss_fn(logits, b_labels)
        val_loss.append(loss.item())

        # Get the predictions
        preds = torch.argmax(logits, dim=1).flatten()

        # Calculate the accuracy rate
        accuracy = (preds == b_labels).cpu().numpy().mean() * 100
        val_accuracy.append(accuracy)

    # Compute the average accuracy and loss over the validation set.
    val_loss = np.mean(val_loss)
    val_accuracy = np.mean(val_accuracy)

    return val_loss, val_accuracy

In [14]:
set_seed(42)    # Set seed for reproducibility
bert_classifier, optimizer, scheduler = initialize_model(epochs=2)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [15]:
train(bert_classifier, train_dataloader, val_dataloader, epochs=10, evaluation=True)

Start training...

 Epoch  |  Batch  |  Train Loss  |  Val Loss  |  Val Acc  |  Elapsed 
----------------------------------------------------------------------
   1    |   20    |   1.014283   |     -      |     -     |   3.73   
   1    |   40    |   1.115069   |     -      |     -     |   2.25   
   1    |   60    |   1.616656   |     -      |     -     |   2.25   
   1    |   80    |   1.147929   |     -      |     -     |   2.26   
   1    |   100   |   1.194802   |     -      |     -     |   2.25   
   1    |   120   |   1.298855   |     -      |     -     |   2.25   
   1    |   140   |   1.307164   |     -      |     -     |   2.26   
   1    |   160   |   1.192954   |     -      |     -     |   2.26   
   1    |   180   |   1.169830   |     -      |     -     |   2.26   
   1    |   200   |   1.112374   |     -      |     -     |   2.26   
   1    |   220   |   1.166562   |     -      |     -     |   2.27   
   1    |   240   |   0.882768   |     -      |     -     |   2.26   


   1    |  2320   |   1.324621   |     -      |     -     |   2.26   
   1    |  2340   |   1.180793   |     -      |     -     |   2.26   
   1    |  2360   |   0.974621   |     -      |     -     |   2.26   
   1    |  2380   |   1.489309   |     -      |     -     |   2.26   
   1    |  2400   |   1.472872   |     -      |     -     |   2.27   
   1    |  2420   |   1.079488   |     -      |     -     |   2.26   
   1    |  2440   |   1.097633   |     -      |     -     |   2.26   
   1    |  2460   |   0.865502   |     -      |     -     |   2.26   
   1    |  2480   |   1.215085   |     -      |     -     |   2.26   
   1    |  2499   |   1.377729   |     -      |     -     |   2.12   
----------------------------------------------------------------------
   1    |    -    |   1.222546   |  1.296027  |   60.30   |  300.72  
----------------------------------------------------------------------


 Epoch  |  Batch  |  Train Loss  |  Val Loss  |  Val Acc  |  Elapsed 
----------------

   2    |  2060   |   1.423855   |     -      |     -     |   2.27   
   2    |  2080   |   1.151823   |     -      |     -     |   2.27   
   2    |  2100   |   1.707563   |     -      |     -     |   2.27   
   2    |  2120   |   0.933104   |     -      |     -     |   2.26   
   2    |  2140   |   1.620881   |     -      |     -     |   2.26   
   2    |  2160   |   1.537265   |     -      |     -     |   2.27   
   2    |  2180   |   0.958061   |     -      |     -     |   2.27   
   2    |  2200   |   1.264539   |     -      |     -     |   2.26   
   2    |  2220   |   1.240339   |     -      |     -     |   2.26   
   2    |  2240   |   1.478502   |     -      |     -     |   2.26   
   2    |  2260   |   1.052267   |     -      |     -     |   2.27   
   2    |  2280   |   0.914716   |     -      |     -     |   2.26   
   2    |  2300   |   1.468710   |     -      |     -     |   2.27   
   2    |  2320   |   1.155161   |     -      |     -     |   2.26   
   2    |  2340   | 

   3    |  1860   |   1.165858   |     -      |     -     |   2.26   
   3    |  1880   |   0.907149   |     -      |     -     |   2.27   
   3    |  1900   |   1.205397   |     -      |     -     |   2.27   
   3    |  1920   |   0.843768   |     -      |     -     |   2.28   
   3    |  1940   |   1.242648   |     -      |     -     |   2.28   
   3    |  1960   |   0.959569   |     -      |     -     |   2.28   
   3    |  1980   |   1.452767   |     -      |     -     |   2.28   
   3    |  2000   |   1.073376   |     -      |     -     |   2.27   
   3    |  2020   |   1.600255   |     -      |     -     |   2.27   
   3    |  2040   |   1.045675   |     -      |     -     |   2.28   
   3    |  2060   |   1.170977   |     -      |     -     |   2.28   
   3    |  2080   |   0.919051   |     -      |     -     |   2.28   
   3    |  2100   |   1.077822   |     -      |     -     |   2.27   
   3    |  2120   |   1.206688   |     -      |     -     |   2.26   
   3    |  2140   | 

   4    |  1600   |   0.964459   |     -      |     -     |   2.28   
   4    |  1620   |   1.603114   |     -      |     -     |   2.28   
   4    |  1640   |   1.216750   |     -      |     -     |   2.28   
   4    |  1660   |   1.220849   |     -      |     -     |   2.28   
   4    |  1680   |   1.013798   |     -      |     -     |   2.28   
   4    |  1700   |   1.267804   |     -      |     -     |   2.28   
   4    |  1720   |   1.546869   |     -      |     -     |   2.27   
   4    |  1740   |   1.322367   |     -      |     -     |   2.27   
   4    |  1760   |   1.110236   |     -      |     -     |   2.26   
   4    |  1780   |   1.125551   |     -      |     -     |   2.27   
   4    |  1800   |   0.964874   |     -      |     -     |   2.27   
   4    |  1820   |   1.163463   |     -      |     -     |   2.27   
   4    |  1840   |   1.308799   |     -      |     -     |   2.26   
   4    |  1860   |   1.246781   |     -      |     -     |   2.27   
   4    |  1880   | 

   5    |  1340   |   1.135156   |     -      |     -     |   2.28   
   5    |  1360   |   1.556576   |     -      |     -     |   2.28   
   5    |  1380   |   1.107854   |     -      |     -     |   2.28   
   5    |  1400   |   1.187842   |     -      |     -     |   2.27   
   5    |  1420   |   1.161608   |     -      |     -     |   2.28   
   5    |  1440   |   1.154745   |     -      |     -     |   2.27   
   5    |  1460   |   1.062741   |     -      |     -     |   2.27   
   5    |  1480   |   1.215607   |     -      |     -     |   2.27   
   5    |  1500   |   1.556597   |     -      |     -     |   2.27   
   5    |  1520   |   1.290891   |     -      |     -     |   2.27   
   5    |  1540   |   0.942098   |     -      |     -     |   2.28   
   5    |  1560   |   1.261412   |     -      |     -     |   2.27   
   5    |  1580   |   1.267443   |     -      |     -     |   2.27   
   5    |  1600   |   1.080417   |     -      |     -     |   2.27   
   5    |  1620   | 

   6    |  1080   |   1.416867   |     -      |     -     |   2.27   
   6    |  1100   |   1.017336   |     -      |     -     |   2.27   
   6    |  1120   |   1.167601   |     -      |     -     |   2.27   
   6    |  1140   |   1.343446   |     -      |     -     |   2.27   
   6    |  1160   |   0.884829   |     -      |     -     |   2.27   
   6    |  1180   |   1.333849   |     -      |     -     |   2.26   
   6    |  1200   |   0.973333   |     -      |     -     |   2.26   
   6    |  1220   |   1.372016   |     -      |     -     |   2.27   
   6    |  1240   |   1.276063   |     -      |     -     |   2.28   
   6    |  1260   |   1.491168   |     -      |     -     |   2.27   
   6    |  1280   |   1.199625   |     -      |     -     |   2.27   
   6    |  1300   |   0.802887   |     -      |     -     |   2.29   
   6    |  1320   |   0.756282   |     -      |     -     |   2.28   
   6    |  1340   |   1.179131   |     -      |     -     |   2.27   
   6    |  1360   | 

   7    |   820   |   1.152052   |     -      |     -     |   2.26   
   7    |   840   |   1.032251   |     -      |     -     |   2.27   
   7    |   860   |   1.088742   |     -      |     -     |   2.27   
   7    |   880   |   1.397903   |     -      |     -     |   2.28   
   7    |   900   |   1.272703   |     -      |     -     |   2.28   
   7    |   920   |   1.238581   |     -      |     -     |   2.28   
   7    |   940   |   1.113558   |     -      |     -     |   2.27   
   7    |   960   |   1.663264   |     -      |     -     |   2.28   
   7    |   980   |   1.200361   |     -      |     -     |   2.28   
   7    |  1000   |   1.386536   |     -      |     -     |   2.27   
   7    |  1020   |   1.554065   |     -      |     -     |   2.27   
   7    |  1040   |   1.194261   |     -      |     -     |   2.27   
   7    |  1060   |   0.985493   |     -      |     -     |   2.26   
   7    |  1080   |   0.880409   |     -      |     -     |   2.26   
   7    |  1100   | 

   8    |   560   |   1.298262   |     -      |     -     |   2.26   
   8    |   580   |   1.032891   |     -      |     -     |   2.26   
   8    |   600   |   1.142039   |     -      |     -     |   2.26   
   8    |   620   |   0.965496   |     -      |     -     |   2.27   
   8    |   640   |   1.309241   |     -      |     -     |   2.28   
   8    |   660   |   1.007299   |     -      |     -     |   2.27   
   8    |   680   |   1.283798   |     -      |     -     |   2.28   
   8    |   700   |   1.133599   |     -      |     -     |   2.28   
   8    |   720   |   0.966055   |     -      |     -     |   2.28   
   8    |   740   |   1.219687   |     -      |     -     |   2.26   
   8    |   760   |   1.371940   |     -      |     -     |   2.27   
   8    |   780   |   1.565833   |     -      |     -     |   2.27   
   8    |   800   |   1.071660   |     -      |     -     |   2.27   
   8    |   820   |   1.010322   |     -      |     -     |   2.27   
   8    |   840   | 

   9    |   300   |   1.408080   |     -      |     -     |   2.27   
   9    |   320   |   1.199149   |     -      |     -     |   2.27   
   9    |   340   |   1.483760   |     -      |     -     |   2.27   
   9    |   360   |   1.483297   |     -      |     -     |   2.28   
   9    |   380   |   1.220738   |     -      |     -     |   2.27   
   9    |   400   |   1.170271   |     -      |     -     |   2.28   
   9    |   420   |   1.107427   |     -      |     -     |   2.27   
   9    |   440   |   1.501627   |     -      |     -     |   2.27   
   9    |   460   |   1.143090   |     -      |     -     |   2.26   
   9    |   480   |   1.282559   |     -      |     -     |   2.26   
   9    |   500   |   1.360938   |     -      |     -     |   2.27   
   9    |   520   |   1.278271   |     -      |     -     |   2.27   
   9    |   540   |   1.540562   |     -      |     -     |   2.27   
   9    |   560   |   1.120850   |     -      |     -     |   2.27   
   9    |   580   | 

  10    |   40    |   1.770478   |     -      |     -     |   2.26   
  10    |   60    |   1.244153   |     -      |     -     |   2.27   
  10    |   80    |   0.882368   |     -      |     -     |   2.27   
  10    |   100   |   1.023933   |     -      |     -     |   2.27   
  10    |   120   |   0.913606   |     -      |     -     |   2.26   
  10    |   140   |   1.176484   |     -      |     -     |   2.26   
  10    |   160   |   1.269993   |     -      |     -     |   2.27   
  10    |   180   |   1.133634   |     -      |     -     |   2.27   
  10    |   200   |   1.280986   |     -      |     -     |   2.26   
  10    |   220   |   1.137322   |     -      |     -     |   2.26   
  10    |   240   |   1.297229   |     -      |     -     |   2.26   
  10    |   260   |   1.205506   |     -      |     -     |   2.26   
  10    |   280   |   1.853442   |     -      |     -     |   2.27   
  10    |   300   |   1.104846   |     -      |     -     |   2.28   
  10    |   320   | 

  10    |  2400   |   1.391136   |     -      |     -     |   2.27   
  10    |  2420   |   1.893721   |     -      |     -     |   2.27   
  10    |  2440   |   1.091594   |     -      |     -     |   2.28   
  10    |  2460   |   1.906515   |     -      |     -     |   2.28   
  10    |  2480   |   1.529574   |     -      |     -     |   2.28   
  10    |  2499   |   1.208236   |     -      |     -     |   2.13   
----------------------------------------------------------------------
  10    |    -    |   1.204614   |  1.330489  |   60.30   |  299.89  
----------------------------------------------------------------------


Training complete!


In [16]:
import torch.nn.functional as F

def bert_predict(model, test_dataloader):
    """Perform a forward pass on the trained BERT model to predict probabilities
    on the test set.
    """
    # Put the model into the evaluation mode. The dropout layers are disabled during
    # the test time.
    model.eval()

    all_logits = []

    # For each batch in our test set...
    for batch in test_dataloader:
        # Load batch to GPU
        b_input_ids, b_attn_mask = tuple(t.to(device) for t in batch)[:2]

        # Compute logits
        with torch.no_grad():
            logits = model(b_input_ids, b_attn_mask)
        all_logits.append(logits)
    
    # Concatenate logits from each batch
    all_logits = torch.cat(all_logits, dim=0)

    # Apply softmax to calculate probabilities
    probs = F.softmax(all_logits, dim=1).cpu().numpy()

    return probs

In [17]:
# # Concatenate the train set and the validation set
# full_train_data = torch.utils.data.ConcatDataset([train_data, val_data])
# full_train_sampler = RandomSampler(full_train_data)
# full_train_dataloader = DataLoader(full_train_data, sampler=full_train_sampler, batch_size=32)

# # Train the Bert Classifier on the entire training data
# set_seed(42)
# bert_classifier, optimizer, scheduler = initialize_model(epochs=2)
# train(bert_classifier, full_train_dataloader, epochs=2)

## Final Predictions

In [18]:
# Run `preprocessing_for_bert` on the test set
print('Tokenizing data...')
test_inputs, test_masks = preprocessing_for_bert(df_test['Text'])

test_labels = torch.tensor((df_test['Score']-1).to_numpy())


# Create the DataLoader for our test set
test_dataset = TensorDataset(test_inputs, test_masks, test_labels)
test_sampler = SequentialSampler(test_dataset)
test_dataloader = DataLoader(test_dataset, sampler=test_sampler, batch_size=batch_size)

Tokenizing data...


In [19]:
# Compute predicted probabilities on the test set
probs = bert_predict(bert_classifier, test_dataloader)

test_loss, test_accuracy = evaluate(bert_classifier, test_dataloader)
print("Test Loss:", test_loss)
print("Test Accuracy", test_accuracy)

Test Loss: 1.3138304194509982
Test Accuracy 61.6
