__BERT Sentiment Analysis Model Demonstration__

This Python notebook demonstrates the BERT sentiment analysis model on user input text reviews. Simply run all code chunks and input a customer review when prompted at the last code chunk.

Imports

In [1]:
import pandas as pd
import numpy as np
import os
import random
from pathlib import Path
import json
import re
import string
from collections import Counter, defaultdict
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
import warnings
warnings.filterwarnings('ignore')
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
stopWords_nltk = set(stopwords.words('english'))
import torch
if torch.cuda.is_available():
    device_name = torch.device("cuda")
else:
    device_name = torch.device('cpu')
print("Using {}.".format(device_name))
from tqdm.notebook import tqdm
from transformers import BertTokenizer
from torch.utils.data import TensorDataset
from transformers import BertForSequenceClassification

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\marti\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Using cpu.


Initialize Configuration Settings

In [2]:
class Config():
    seed_val = 17
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    epochs = 5
    batch_size = 6
    seq_length = 512
    lr = 2e-5
    eps = 1e-8
    pretrained_model = 'bert-base-uncased'
    test_size=0.15
    random_state=42
    add_special_tokens=True
    return_attention_mask=True
    pad_to_max_length=True
    do_lower_case=False
    return_tensors='pt'

config = Config()

Set Random Seed and Initialize Device Configuration

In [3]:
import random

device = config.device

random.seed(config.seed_val)
np.random.seed(config.seed_val)
torch.manual_seed(config.seed_val)
torch.cuda.manual_seed_all(config.seed_val)

Initialize and Load Model

In [4]:
model = BertForSequenceClassification.from_pretrained(config.pretrained_model,
                                                      num_labels=3,
                                                      output_attentions=False,
                                                      output_hidden_states=False)

from transformers import BertTokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)

model.load_state_dict(torch.load(f'_BERT_epoch_2.model', map_location=torch.device('cpu')))

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


<All keys matched successfully>

Model Demonstration

There are three sample text reviews extracted from the "unique_data.csv" test dataset to showcase the sentiment analysis model. Feel free to input your own custom text reviews to test the model!

In [5]:
# negative test example (1 star): Fail...'nuff said.
# neutral test example (3 stars): Coffee was fine, but nothing spectacular.
# positive test example (5 stars): Great food. Everything we ordered was delicious. Exceptional service. Will definitely return next time we're in the area.

# enter a review or piece of text to be analzyed
sample_review = input("Please enter a sample text review: ")

# prediction code for a single peice of text
encoded_data_test_single = tokenizer.batch_encode_plus(
[sample_review],
add_special_tokens=config.add_special_tokens,
return_attention_mask=config.return_attention_mask,
pad_to_max_length=config.pad_to_max_length,
max_length=config.seq_length,
truncation=True,
return_tensors=config.return_tensors
)
input_ids_test = encoded_data_test_single['input_ids']
attention_masks_test = encoded_data_test_single['attention_mask']

inputs = {'input_ids':      input_ids_test.to(device),
          'attention_mask':attention_masks_test.to(device),
         }

with torch.no_grad():
    outputs = model(**inputs)

prediction = np.argmax(np.concatenate([outputs[0].detach().cpu().numpy()], axis=0), axis=1).flatten()[0]

# print sentiment
print(f"Review: {sample_review}")
print("Predicted Sentiment: " + ["Negative", "Neutral", "Positive"][prediction])

Review: Coffee was fine, but nothing spectacular.
Predicted Sentiment: Neutral
