<a href="https://colab.research.google.com/github/plehman2000/TwitterProject/blob/Optimization/DataModelling/getSentiment/getSentiment_optimization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Optimizing Deep Learning Model for Sentiment Analysis

## Analyzing current model.

Installing dependencies.

In [None]:
!pip install transformers
!pip install torchviz

In [None]:
import os
import numpy as np
from pprint import pprint
from termcolor import colored, cprint
from transformers import BertModel
from graphviz import Source
import transformers
import torch
import torch.nn as nn
import torchviz

Debugger.

In [None]:
from IPython.core.debugger import Tracer

Data.

In [None]:
samples = np.asarray(a=list([
    "This movie is so painful to watch.",
    "I cannot even fathom how gruesome the accident was.",
    "Springtime is such a refreshing season to go out and play.",
    "I love the sweet flavors of icecream."
]))

Model.

In [None]:
class BertClassifier(nn.Module):

    def __init__(self, dropout=0.5):
        super(BertClassifier, self).__init__()
        self.bert = BertModel.from_pretrained('bert-base-cased')
        self.layer1 = nn.Sequential(
          nn.Dropout(dropout),
          nn.Linear(768, 100),
          nn.ReLU(),
          nn.Linear(100, 3),
          nn.ReLU()
        )

    def forward(self, input_id, mask):
        _, pooled_output = self.bert(input_ids= input_id, attention_mask=mask,return_dict=False)
        out = self.layer1(pooled_output)
        return out

In [None]:
# GITHUB_REPO_BRANCH = 'https://www.github.com/plehman2000/TwitterProject/tree/main'
# MODELPATH = GITHUB_REPO_BRANCH +'/DataModelling/getSentiment/sentimentModel/'
# stat_dict = torch.load(MODELPATH + 'model.pkl', map_location='cuda')
model = BertClassifier()
# model.load_state_dict(stat_dict)
model.to('cuda')
device = torch.device('cuda')

Code for `getSentiment` function.

In [None]:
tracer = Tracer(colors='Linux')
disp = True
with torch.no_grad():
  model.eval()
  tokenizer = transformers.BertTokenizer.from_pretrained('bert-base-cased')
  all_probs=[]
  for batch_index, sample in enumerate(samples):
    inputs = tokenizer.encode_plus(
        text=sample,
        text_pair=None,
        add_special_tokens=True,
        max_length=512,
        pad_to_max_length=True
    )

    # debugging
    print(f'inputs dictionary with length: {len(inputs)}')
    print(inputs)

    ids = torch.unsqueeze((torch.tensor(inputs["input_ids"], dtype=torch.long)).to(device, dtype=torch.long), 0)
    token_type_ids = (torch.tensor(inputs["token_type_ids"], dtype=torch.long)).to(device, dtype=torch.long)
    mask = torch.unsqueeze((torch.tensor(inputs['attention_mask'], dtype=torch.long)).to(device, dtype=torch.long), 0)

    # debugging
    cprint(text=f'ids with shape {ids.shape}:\n{ids}\n', color='blue')
    cprint(text=f'token_type_ids with shape {token_type_ids.shape}:\n{token_type_ids}\n', color='yellow')
    cprint(text=f'mask with shape {mask.shape}:\n{mask}\n', color='green')

    # Zero out any previously calculated gradients
    model.zero_grad()

    #Forward Pass (faux inference)
    logits = model(input_id=ids, mask=mask)

    # saving graphviz of model per sample
    torchviz.make_dot(var=logits, params=dict(model.named_parameters())).render(filename=str(batch_index), format='png')

    # debugging
    print(f'logits:\n{logits}')

    probabilities = (logits).detach().cpu().numpy()[0]
    all_probs.append(probabilities)

  if disp:
    for index, sample in enumerate(samples):
      print(f'Text: \"{sample}\"')
      ##order is Negative, Neutral, Positive
      percentages = all_probs[index]
      
      print(f'{100*percentages[0]:.2f}% Negative, {100*percentages[1]:.2f}% Neutral, {100*percentages[2]:.2f}% Positive')


In [None]:
from ipywidgets import SelectionSlider
graph_selected = SelectionSlider(options=np.arange(start=1, stop=samples.size + 1))
graph_selected

In [None]:
Source.from_file(filename=str(graph_selected.value))

## TensorRT Optimization Strategies

In [None]:
!pip install torch_tensorrt
!pip install tensorrt
!pip install pycuda

In [None]:
!git clone https://github.com/NVIDIA-AI-IOT/torch2trt
!python ./torch2trt/setup.py install --plugins

In [None]:
!nvidia-smi

In [None]:
import torch_tensorrt

model = BertClassifier().eval()

inputs = list([torch_tensorrt.Input(
          min_shape=[64,],
          opt_shape=[128,],
          max_shape=[512,],
          dtype=torch.int64,
        )])

enabled_precisions = set({torch.uint8, torch.int16, torch.int32, torch.int64})

trt_ts_module = torch_tensorrt.compile(model=model, inputs=inputs, enabled_precisions=enabled_precisions)

inputs = tokenizer.encode_plus(text=samples.tolist())
ids = torch.from_numpy(np.asarray(a=inputs['input_ids']))
ids_halved = ids.to('cuda').half()
torch.jit.save(trt_ts_module, "trt_ts_module.ts")

cprint(text=f'ids:\n{ids}', color='white')
cprint(text=f'ids_halved:\n{ids_halved}', color='magenta')