In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
from transformers import BertTokenizer , BertModel

import torch
from torch import nn
device = torch.device('cpu')
class_names = ['1-Star', '2-Star', '3-Star', '4-Star', '5-Star']

In [3]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [4]:
# RatingEstimator is a sublcass of nn.module
# It indcated its pytorch neural network module
# Need to Implement the forward pass

class RatingEstimator(nn.Module):

      def __init__(self, n_classes):
            # Caling the constructor of 'nn.Module' to initialize the network
            super(RatingEstimator, self).__init__()

            # Loading the pre-trained Bert-base model , it returns output as seperate tensors rather than as a  dictionary
            self.bert = BertModel.from_pretrained('bert-base-uncased',return_dict=False)

            # Multiple inear Layer with Relu activation
            # Final linear layer that maps the output of the BERT model to the number of classes
            # This layer will be responsible for the final classification.
            self.classifier = nn.Sequential(
                  nn.Dropout(p = 0.3),
                  nn.Linear(768, 512),
                  nn.ReLU(),
                  nn.Linear(512, 256),
                  nn.ReLU(),
                  nn.Linear(256, n_classes),
            )

      def forward(self, input_ids, attention_mask):

            # Bert model takes the input token ids and attention mask as input
            _, pooled_output = self.bert(
                  input_ids = input_ids,
                  attention_mask= attention_mask
            )

            # Applying dropout and passes through final linear layer
            output = self.classifier(pooled_output)
            return output

In [None]:
# Loading the Best Weights of the Model 
path = '/content/drive/MyDrive/FineTune_Bert_Model_state_save/best_model_parameter.bin'
model = RatingEstimator(len(class_names))
model.load_state_dict(torch.load(path , map_location=device))
model.eval()

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

RatingEstimator(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_a

In [6]:
# Predicting Rating on Raw text data
def get_rating(sample_review):

    encoded_review = tokenizer.encode_plus(
        sample_review,
        max_length=512,
        truncation=True,
        padding=True,
        add_special_tokens=True,
        return_token_type_ids=False,
        return_attention_mask=True,
        return_tensors='pt'
    )

    input_ids = encoded_review['input_ids'].to(device)
    attention_mask = encoded_review['attention_mask'].to(device)

    output = model(input_ids,attention_mask)
    _,prediction = torch.max(output,dim=1)

    print(f'Review text : {sample_review}')
    print(f'Rating : {class_names[prediction]}')

sample_text = 'This movie is a whimsical escape into nostalgia, where Woody Allens romantic portrayal of Paris shines, yet its narrative depth struggles to match the enchanting backdrop.'

get_rating(sample_text)

Review text : This movie is a whimsical escape into nostalgia, where Woody Allens romantic portrayal of Paris shines, yet its narrative depth struggles to match the enchanting backdrop.
Rating : 3-Star


In [7]:
s = 'It is a good movie '
get_rating(s)

Review text : It is a good movie 
Rating : 4-Star


In [8]:
s = 'It was one of the worst movies'
get_rating(s)

Review text : It was one of the worst movies
Rating : 1-Star
