In [1]:
import os
import pickle

In [2]:
!pip install -qq transformers

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
fastai 1.0.61 requires nvidia-ml-py3, which is not installed.[0m


In [3]:
import transformers
from transformers import BertModel, BertTokenizer, AdamW, get_linear_schedule_with_warmup
import torch
import numpy as np
from torch.utils.data import Dataset, DataLoader
from bs4 import BeautifulSoup
import torch.nn.functional as F

In [4]:
from dataloader_classes_func import ReviewDataset, createDataLoader

In [5]:
with open("./data/train_reviews_v2.pkl",'rb') as fp:
    train_reviews= pickle.load(fp)

with open("./data/val_reviews_v2.pkl",'rb') as fp:
    val_reviews= pickle.load(fp)
    
with open("./data/train_labels_v2.pkl",'rb') as fp:
    train_labels= pickle.load(fp)

with open("./data/val_labels_v2.pkl",'rb') as fp:
    val_labels= pickle.load(fp)

In [6]:
MODEL_NAME = 'bert-base-cased'
tokenizer= BertTokenizer.from_pretrained(MODEL_NAME)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=213450.0, style=ProgressStyle(descripti…




In [7]:
train_dataset= {'data': train_reviews, 'labels': train_labels}
train_dataloader= createDataLoader(train_dataset, tokenizer)

In [8]:
val_dataset= {'data': val_reviews, 'labels': val_labels}
val_dataloader= createDataLoader(val_dataset, tokenizer)

In [9]:
class SentimentClassifier(torch.nn.Module):
    
    def __init__(self, n_class=2):
        super(SentimentClassifier, self).__init__()
        self.n_class= n_class
        self.bert_model= BertModel.from_pretrained('bert-base-cased')
        self.drop = torch.nn.Dropout(p=0.3)
        self.out= torch.nn.Linear(self.bert_model.config.hidden_size, self.n_class)
        
    def forward(self, input_ids, attention_mask):
        bert_output= self.bert_model(input_ids= input_ids, attention_mask= attention_mask)
        dropout_output= self.drop(bert_output.pooler_output)
        linear_output= self.out(dropout_output)
        output= F.softmax(linear_output, dim= 1)
        return output

In [10]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#device = torch.device("cpu")

In [11]:
model= SentimentClassifier()
model= model.to(device)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=433.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=435779157.0, style=ProgressStyle(descri…




In [12]:
dl_item= next(iter(train_dataloader))
ip_id_tensor= dl_item['encoding']['input_ids'].squeeze().to(device)
attention_mask_tensor= dl_item['encoding']['attention_mask'].squeeze().to(device)

In [13]:
r= model(ip_id_tensor, attention_mask_tensor)

In [14]:
r

tensor([[0.5453, 0.4547],
        [0.6109, 0.3891],
        [0.5654, 0.4346],
        [0.5718, 0.4282],
        [0.6912, 0.3088],
        [0.8144, 0.1856],
        [0.7984, 0.2016],
        [0.5609, 0.4391],
        [0.7568, 0.2432],
        [0.7072, 0.2928],
        [0.7341, 0.2659],
        [0.5610, 0.4390],
        [0.5962, 0.4038],
        [0.6420, 0.3580],
        [0.6776, 0.3224],
        [0.7962, 0.2038],
        [0.7285, 0.2715],
        [0.6484, 0.3516],
        [0.5426, 0.4574],
        [0.7178, 0.2822],
        [0.6497, 0.3503],
        [0.7618, 0.2382],
        [0.6332, 0.3668],
        [0.6574, 0.3426],
        [0.7631, 0.2369],
        [0.6415, 0.3585],
        [0.5754, 0.4246],
        [0.5476, 0.4524],
        [0.3960, 0.6040],
        [0.6964, 0.3036],
        [0.5876, 0.4124],
        [0.7349, 0.2651]], device='cuda:0', grad_fn=<SoftmaxBackward>)

In [15]:
r.shape

torch.Size([32, 2])

In [12]:
def forward_nd_back_prop(model, input_data, criterion, optimizer ):
    ip_id_tensor= input_data['encoding']['input_ids'].squeeze().to(device)
    attention_mask_tensor= input_data['encoding']['attention_mask'].squeeze().to(device)
    labels= input_data['label'].to(device)
    
    model.train()
    optimizer.zero_grad()
    fwd_out= model(ip_id_tensor, attention_mask_tensor)
    model_output= torch.argmax(fwd_out, dim= 1)
    
    correct_preds= model_output.eq(labels).sum().item()
    acc= correct_preds/ len(labels)
    loss= criterion(fwd_out, labels)
    
    loss.backward()
    optimizer.step()
    
    return loss.item(), acc
    
    

In [13]:
def val_loss_and_acc(model, input_data, criterion):
    ip_id_tensor= input_data['encoding']['input_ids'].squeeze().to(device)
    attention_mask_tensor= input_data['encoding']['attention_mask'].squeeze().to(device)
    labels= input_data['label'].to(device)
    
    model.eval()
    #optimizer.zero_grad()
    fwd_out= model(ip_id_tensor, attention_mask_tensor)
    model_output= torch.argmax(fwd_out, dim= 1)
    
    correct_preds= model_output.eq(labels).sum().item()
    acc= correct_preds/ len(labels)
    loss= criterion(fwd_out, labels)
    
    #loss.backward()
    #optimizer.step()
    
    return loss.item(), acc

In [18]:
import datetime

In [14]:
def train(model, 
          train_dataloader, 
          val_dataloader,
          criterion,
          optimizer,
          n_epoch= 10,
          save_dir="./Saved_models"):
    
    epoch_train_loss=[]
    epoch_val_loss= []
    
    os.makedirs(save_dir, exist_ok= True)
    
    for epoch in range(1, n_epoch+1):
        train_losses= []
        train_acc= []
        val_losses= []
        val_acc= []
        
        
        for batch_i, batch_data in enumerate(train_dataloader, 1):
            train_batch_loss, train_batch_accuracy= forward_nd_back_prop(model, 
                                                                         batch_data, criterion, optimizer )
            val_batch_loss, val_batch_accuracy= val_loss_and_acc(model, batch_data, criterion)
            
            train_losses.append(train_batch_loss)
            train_acc.append(train_batch_accuracy)
            val_losses.append(val_batch_loss)
            val_acc.append(val_batch_accuracy)
            
        print("Epoch ({} / {}) Train_loss: {} , Train_accuracy: {} , Val_loss: {}, Val_acc: {}".
              format(epoch, n_epoch, np.mean(train_losses), np.mean(train_acc), 
                     np.mean(val_losses), np.mean(val_acc)))
        
        curr_train_loss= np.mean(train_losses)
        curr_val_loss= np.mean(val_losses)
        curr_train_acc= np.mean(train_acc)
        curr_val_acc= np.mean(val_acc)
        
        try:
            if min(epoch_val_loss) > curr_val_loss:
                epoch_val_loss.append(curr_val_loss)
                print("Saving the model")
                model_name= "M_20210206.pt"
                torch.save(model.state_dict(), model_name)
        except:
            print("Saving the model")
            model_name= "M_20210206.pt"
            save_path= os.path.join(save_dir, model_name)
            torch.save(model.state_dict(), save_path)
        
        epoch_val_loss.append(curr_val_loss)
        epoch_train_loss.append(curr_train_loss)
        

In [15]:
optimizer= AdamW(model.parameters(), lr=2e-5, correct_bias=False)
loss_fn = torch.nn.CrossEntropyLoss().to(device)

In [16]:
model.load_state_dict(torch.load("./Saved_models/M_2021-02-04_16:18:49.502614train_0.918_val0.935.pt"))

<All keys matched successfully>

In [17]:
train(model, train_dataloader, val_dataloader, loss_fn, optimizer, n_epoch= 7)

Epoch (1 / 7) Train_loss: 0.42025277452468873 , Train_accuracy: 0.88925 , Val_loss: 0.3992387859344482, Val_acc: 0.91275
Saving the model
Epoch (2 / 7) Train_loss: 0.3810523626804352 , Train_accuracy: 0.93025 , Val_loss: 0.36811351761817934, Val_acc: 0.9452
Saving the model
Epoch (3 / 7) Train_loss: 0.3735261456489563 , Train_accuracy: 0.93855 , Val_loss: 0.36200039138793944, Val_acc: 0.95095
Saving the model
Epoch (4 / 7) Train_loss: 0.3657917757034302 , Train_accuracy: 0.94655 , Val_loss: 0.35700319595336916, Val_acc: 0.95595
Saving the model
Epoch (5 / 7) Train_loss: 0.362313111782074 , Train_accuracy: 0.94985 , Val_loss: 0.352675622177124, Val_acc: 0.9603
Saving the model
Epoch (6 / 7) Train_loss: 0.3614415914058685 , Train_accuracy: 0.95085 , Val_loss: 0.35208485717773436, Val_acc: 0.96075
Saving the model
Epoch (7 / 7) Train_loss: 0.35306149158477784 , Train_accuracy: 0.95935 , Val_loss: 0.3467588393688202, Val_acc: 0.96625
Saving the model


In [18]:
torch.save(optimizer.state_dict(),"optimizer_state.pt")

In [11]:
!nvidia-smi

Wed Feb  3 10:36:24 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.80.02    Driver Version: 450.80.02    CUDA Version: 11.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla K80           On   | 00000000:00:1E.0 Off |                    0 |
| N/A   28C    P8    31W / 149W |      0MiB / 11441MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+---------------------------------------------------------------------------