In [24]:
# from google.colab import drive
# drive.mount('/content/drive')
# %cd /content/drive/MyDrive/Sentiment/Inference

from collections import Counter
from string import punctuation

import numpy as np
import pandas as pd
import torch

import models

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/Sentiment/Inference


In [25]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [26]:
def load_checkpoint(model, file_name, device=None, optimizer=None):
    if not device:
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    ckpt = torch.load(file_name, map_location=device)
    model_weights = ckpt['model_weights']
    model.load_state_dict(model_weights)
    print("Model's pretrained weights loaded!")
    if optimizer:
        optimizer.load_state_dict(ckpt['optimizer_state'])
    print("Optimizer's state loaded!")

In [27]:
df = pd.read_csv("./IMDB Dataset.csv")

# Convert to lower case
df['review'] = df['review'].apply(lambda x:x.lower())

# Remove punctuation
df['clean_text'] = df['review'].apply(lambda x:''.join([c for c in x if c not in punctuation]))

# Create list of reviews
review_list = df['clean_text'].tolist()

In [28]:
def get_vocab_int_dict(review_list):
  # Create a list of words
    review_list = ' '.join(review_list)
    words = review_list.split()
    # Count words using Counter Method
    count_words = Counter(words)
    vocab_to_int = {w:i+1 for i, (w,c) in enumerate(count_words.items())}
    return vocab_to_int

def pad_features(reviews_int, seq_length):
    ''' Return features of review_ints, where each review is padded with 0's or truncated to the input seq_length.'''

    features = np.zeros((len(reviews_int), seq_length), dtype = int)
    for i, review in enumerate(reviews_int):
        review_len = len(review)
        if review_len <= seq_length:
            zeroes = list(np.zeros(seq_length-review_len))
            new = zeroes+review
        elif review_len > seq_length:
            new = review[0:seq_length]
        features[i,:] = np.array(new)
    return features

def preprocess(review, vocab_to_int):
    review = review.lower()
    word_list = review.split()
    num_list = []
    reviews_int = []
    for word in word_list:
        if word in vocab_to_int.keys():
            num_list.append(vocab_to_int[word])
    reviews_int.append(num_list)
    return reviews_int

def predict(model, test_review, vocab_to_int, sequence_length=500):
    ''' Prints out whether a given review is predicted to be positive or negative in sentiment.'''

    int_rev = preprocess(test_review, vocab_to_int)
    features = pad_features(int_rev, seq_length=seq_length)

    features = torch.from_numpy(features)

    model.eval()
    val_h = model.init_hidden(1)
    val_h = tuple([each.data for each in val_h])

    if(torch.cuda.is_available()):
        features = features.cuda()
        model = model.cuda()
        val_h = tuple([each.cuda() for each in val_h])

    output, val_h = model(features, val_h)

    pred = torch.round(output)
    output = ["Positive" if pred.item() == 1 else "Negative"]

    print(output)


In [29]:
vocab_to_int = get_vocab_int_dict(review_list)

vocab_size = len(vocab_to_int) + 1
output_size = 1
embedding_dim = 400
hidden_dim = 256
n_layers = 2
num_heads = 8
drop_prob_1 = 0.5
drop_prob_2 = 0.3

model = models.SentimentAttentionLSTM(vocab_size, output_size, embedding_dim, hidden_dim, n_layers, num_heads, drop_prob_1, drop_prob_2)

load_checkpoint(model, "LSTM-2_ckpt_epch_3.pth")

Model's pretrained weights loaded!
Optimizer's state loaded!


In [30]:
# Test reviews
test_review_pos = 'This movie had the best acting and the dialogue was so good. I loved it.'
test_review_neg = 'The worst movie I have seen; acting was terrible and I want my money back. This movie had bad acting and the dialogue was slow.'

# Call function
seq_length=500
print(test_review_pos)
predict(model, test_review_pos, vocab_to_int, 500)
print(test_review_neg)
predict(model, test_review_neg, vocab_to_int, 500)

This movie had the best acting and the dialogue was so good. I loved it.
['Positive']
The worst movie I have seen; acting was terrible and I want my money back. This movie had bad acting and the dialogue was slow.
['Negative']
