# 4. CNN Sentiment Analysis for IMDB - Movie Review

In [3]:
!cp '/content/drive/My Drive/EVA4/phase2/s9_neuralWordEmbeddings/sentiment_analysis_model_cnn2.pt' '/content'
!cp '/content/drive/My Drive/EVA4/phase2/s9_neuralWordEmbeddings/sentiment_analysis_model_cnn2_st_dct.pt' '/content'


In [4]:
!cp '/content/drive/My Drive/EVA4/phase2/s9_neuralWordEmbeddings/TEXT_fields_cnn2.pkl' '/content'

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import dill
import spacy
from torchtext import data
#from torchtext import datasets
import random #,pickle
import numpy as np

In [2]:
spacy.__version__

'2.2.4'

# Load model pt file

In [13]:
import io
with open('/content/sentiment_analysis_model_cnn2_st_dct.pt','rb') as f:
    bytestream_model = io.BytesIO(f.read())

In [14]:
model_ckpnt = torch.load(bytestream_model,map_location=torch.device('cpu'))

# Load Text vocab file

In [15]:
with open('/content/TEXT_fields_cnn2.pkl','rb') as f:
    bytestream_text = io.BytesIO(f.read())

In [16]:
TEXT = torch.load(bytestream_text, pickle_module=dill)

In [17]:
TEXT.vocab

<torchtext.vocab.Vocab at 0x7f497c718630>

In [18]:
SEED = 1234

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

#TEXT = data.Field(tokenize = 'spacy', batch_first = True)
LABEL = data.LabelField(dtype = torch.float)

In [19]:
class CNN2(nn.Module):
    def __init__(self, vocab_size, embedding_dim, n_filters, filter_sizes, output_dim, 
                 dropout, pad_idx):
        
        super().__init__()
                
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx = pad_idx)
        
        self.convs = nn.ModuleList([
                                    nn.Conv2d(in_channels = 1, 
                                              out_channels = n_filters, 
                                              kernel_size = (fs, embedding_dim)) 
                                    for fs in filter_sizes
                                    ])
        
        self.fc = nn.Linear(len(filter_sizes) * n_filters, output_dim)
        
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, text):
                
        #text = [batch size, sent len]
        
        embedded = self.embedding(text)
                
        #embedded = [batch size, sent len, emb dim]
        
        embedded = embedded.unsqueeze(1)
        
        #embedded = [batch size, 1, sent len, emb dim]
        
        conved = [F.relu(conv(embedded)).squeeze(3) for conv in self.convs]
            
        #conved_n = [batch size, n_filters, sent len - filter_sizes[n] + 1]
                
        pooled = [F.max_pool1d(conv, conv.shape[2]).squeeze(2) for conv in conved]
        
        #pooled_n = [batch size, n_filters]
        
        cat = self.dropout(torch.cat(pooled, dim = 1))

        #cat = [batch size, n_filters * len(filter_sizes)]
            
        return self.fc(cat)

In [20]:
INPUT_DIM = len(TEXT.vocab)
EMBEDDING_DIM = 100
N_FILTERS = 100
FILTER_SIZES = [3,4,5]
OUTPUT_DIM = 1
DROPOUT = 0.5
PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token] #TEXT.vocab.stoi[TEXT.pad_token]

model = CNN2(INPUT_DIM, EMBEDDING_DIM, N_FILTERS, FILTER_SIZES, OUTPUT_DIM, DROPOUT, PAD_IDX)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'### Device is : {device}')
model = model.to(device)

### Device is : cpu


In [21]:
#model.load_state_dict(torch.load('/content/sentiment_analysis_model_cnn2_st_dct.pt',map_location=torch.device('cpu')))
model.load_state_dict(model_ckpnt)   #torch.load(bytestream_model,map_location=torch.device('cpu')))

<All keys matched successfully>

In [22]:
model.eval()

CNN2(
  (embedding): Embedding(25002, 100, padding_idx=1)
  (convs): ModuleList(
    (0): Conv2d(1, 100, kernel_size=(3, 100), stride=(1, 1))
    (1): Conv2d(1, 100, kernel_size=(4, 100), stride=(1, 1))
    (2): Conv2d(1, 100, kernel_size=(5, 100), stride=(1, 1))
  )
  (fc): Linear(in_features=300, out_features=1, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
)

In [23]:
import spacy
nlp = spacy.load('en')

def predict_sentiment(model, sentence, min_len = 5):
    model.eval()
    tokenized = [tok.text for tok in nlp.tokenizer(sentence)]
    if len(tokenized) < min_len:
        tokenized += ['<pad>'] * (min_len - len(tokenized))
    #indexed = [data.stoi[t] for t in tokenized]
    indexed = [TEXT.vocab.stoi[t] for t in tokenized]
    tensor = torch.LongTensor(indexed).to(device)
    tensor = tensor.unsqueeze(0)
    prediction = torch.sigmoid(model(tensor))
    return prediction.item()

In [24]:
predict_sentiment(model, "This film is terrible")

0.1281609684228897

In [25]:
'Negative' if predict_sentiment(model, "This film is terrible")<0.5 else 'Possitive'

'Negative'