In [51]:
import torch
import torchtext
import torch.nn as nn
import numpy as np
import spacy
import json
from string import punctuation

#ENTER THE PATH WHERE YOU HAVE SAVED MODEL
MODEL_PATH = "/content/drive/My Drive/Colab Notebooks/movieReccomendation/"
VOCAB_PATH = "/content/drive/My Drive/Colab Notebooks/movieReccomendation/vocab/vocab.json"

sp = spacy.load('en_core_web_sm')
all_stopwords = sp.Defaults.stop_words

def load(file_path):
    with open(file_path, 'r', encoding='utf8') as f:
        entry = json.load(f)
    return entry['word2id']
dictionary = load(VOCAB_PATH)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class SentimentClassification(nn.Module):
    def __init__(self, input_dim, embedding_dim, hidden_dim, output_dim, n_layers, pad_idx, dropout):
        super(SentimentClassification, self).__init__()

        self.output_dim = output_dim
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers

        self.embedding.weight.requires_grad = False
        self.lstm = nn.LSTM(embedding_dim, 
                           hidden_dim, 
                           num_layers=n_layers, 
                           bidirectional=True, 
                           dropout=dropout, batch_first=True)
        
        self.dropout = nn.Dropout(dropout)

        self.fc = nn.Linear(hidden_dim * 2, output_dim)
        
        self.dropout = nn.Dropout(dropout)
        self.sig = nn.Sigmoid()

    def forward(self, text, text_lengths):
        batch_size = text.size(1)       
        #text_dim = (seq_len, batch_size)
        
        #embed_dim = (seq_len, batch_size, embed_dim)
        embeds = self.embedding(text)
        
        packed_embedded = nn.utils.rnn.pack_padded_sequence(embeds, text_lengths, enforce_sorted=False)
        packed_output, (hidden, cell) = self.lstm(packed_embedded)

        #unpack sequence
        output, output_lengths = nn.utils.rnn.pad_packed_sequence(packed_output)
        #print('out dim ', output.size())
        #output = [sent len, batch size, hid dim * num directions]
        # stack up lstm outputs
        
        #hidden = [num layers * num directions, batch size, hid dim]
        #cell = [num layers * num directions, batch size, hid dim] 
        
        #concat the final forward (hidden[-2,:,:]) and backward (hidden[-1,:,:]) hidden layers
        #and apply dropout
        
        hidden = self.dropout(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim = 1))
        out = self.fc(hidden)
        #hidden = [batch size, hid dim * num directions] 
        return out

    def init_hidden(self, batch_size):
        ''' Initializes hidden state '''
        # Create two new tensors with sizes n_layers x batch_size x hidden_dim,
        # initialized to zero, for hidden state and cell state of LSTM
        weight = next(self.parameters()).data
        
        if (torch.cuda.is_available()):
            hidden = (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().cuda(),
                  weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().cuda())
        else:
            hidden = (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_(),
                      weight.new(self.n_layers, batch_size, self.hidden_dim).zero_())
        
        return hidden


nlp = spacy.load('en_core_web_sm', disable = ['ner', 'parser', 'textcat', 'tagger', '...'])
model_ = torch.load(MODEL_PATH+'SentimentClf_model3.pt', map_location=device)
model_.eval()


def token_filter(token):
    """Filter the token for text_preprocessing function.
    Check if the token is not: punctuation, whitespace, stopword or digit.
    
    Parameters
    ----------
    token : spacy.Token
        Token passed from text_preprocessing function.

    Returns
    -------
    Bool
       True if token meets the criteria, otherwise False.
        
    """
    return not (token.is_punct | token.is_space | token.is_digit | token.like_num | token.is_stop)

def spacy_preprocess(review):
    review = review.lower()
    print(review)
    text = [word for word in nlp(review)]
    # Remove stopwords, spaces, punctutations and digits
    text = [word for word in text if token_filter(word)]
    print(text)
        # Lemmatization
    text = [token.lemma_ for token in text if token.lemma_ != '-PRON-']
    return text

def predict_sentiment(model_, sentence):
    model_.eval()
    #tokenized = [tok.text for tok in nlp.tokenizer(preprocess(sentence))]
    tokenized = spacy_preprocess(sentence)
    print(tokenized)
    tokens_without_sw= [word for word in tokenized if not word in all_stopwords]
    indexed = [dictionary[t] for t in tokens_without_sw]
    #print(indexed)
    length = [len(indexed)]
    tensor = torch.LongTensor(indexed).to(device)
    tensor = tensor.unsqueeze(1)
    length_tensor = torch.LongTensor(length)
    prediction = torch.sigmoid(model_(tensor, length_tensor))
    return prediction.item()

def inference(review:str):
    pred_prob = predict_sentiment(model_, review)
    print(pred_prob)
    if pred_prob>=0.5:
        return 'positive'
    else:
        return 'negative'

review = input("ENTER THE REVIEW HERE: ")
#review = "it is worst movie with really bad characters."
sentiment = inference(review)
print(sentiment)

ENTER THE REVIEW HERE: it is worst movie with really bad characters.
it is worst movie with really bad characters.
[worst, movie, bad, characters]
['wrong', 'movie', 'bad', 'character']
0.006410098634660244
negative
