In [1]:
import pickle

# 4. CNN Sentiment Analysis for IMDB - Movie Review

In [2]:
import torchtext
torchtext.__version__

'0.3.1'

In [4]:
!cp '/content/drive/My Drive/EVA4/phase2/s9_neuralWordEmbeddings/sentiment_analysis_model_cnn2.pt' '/content'
!cp '/content/drive/My Drive/EVA4/phase2/s9_neuralWordEmbeddings/sentiment_analysis_model_cnn2_st_dct.pt' '/content'


In [5]:
!cp '/content/drive/My Drive/EVA4/phase2/s9_neuralWordEmbeddings/TEXT_fields_cnn2.pkl' '/content'

In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import dill
import spacy
from torchtext import data
#from torchtext import datasets
import random,pickle
import numpy as np

In [7]:
spacy.__version__

'2.2.4'

In [8]:
SEED = 1234

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

TEXT = data.Field(tokenize = 'spacy', batch_first = True)
LABEL = data.LabelField(dtype = torch.float)

In [None]:
class CNN(nn.Module):
    def __init__(self, vocab_size, embedding_dim, n_filters, filter_sizes, output_dim, 
                 dropout, pad_idx):
        
        super().__init__()
                
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx = pad_idx)
        
        self.convs = nn.ModuleList([
                                    nn.Conv2d(in_channels = 1, 
                                              out_channels = n_filters, 
                                              kernel_size = (fs, embedding_dim)) 
                                    for fs in filter_sizes
                                    ])
        
        self.fc = nn.Linear(len(filter_sizes) * n_filters, output_dim)
        
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, text):
                
        #text = [batch size, sent len]
        
        embedded = self.embedding(text)
                
        #embedded = [batch size, sent len, emb dim]
        
        embedded = embedded.unsqueeze(1)
        
        #embedded = [batch size, 1, sent len, emb dim]
        
        conved = [F.relu(conv(embedded)).squeeze(3) for conv in self.convs]
            
        #conved_n = [batch size, n_filters, sent len - filter_sizes[n] + 1]
                
        pooled = [F.max_pool1d(conv, conv.shape[2]).squeeze(2) for conv in conved]
        
        #pooled_n = [batch size, n_filters]
        
        cat = self.dropout(torch.cat(pooled, dim = 1))

        #cat = [batch size, n_filters * len(filter_sizes)]
            
        return self.fc(cat)

In [None]:
with open('/content/text_vocab_1.pkl','rb') as p:
    txt_vocab = pickle.load(p)   

In [None]:
INPUT_DIM = len(txt_vocab) #len(TEXT.vocab)
EMBEDDING_DIM = 100
N_FILTERS = 100
FILTER_SIZES = [3,4,5]
OUTPUT_DIM = 1
DROPOUT = 0.5
PAD_IDX = txt_vocab.stoi[TEXT.pad_token] #TEXT.vocab.stoi[TEXT.pad_token]

model = CNN(INPUT_DIM, EMBEDDING_DIM, N_FILTERS, FILTER_SIZES, OUTPUT_DIM, DROPOUT, PAD_IDX)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'### Device is : {device}')
model = model.to(device)

In [None]:
model.load_state_dict(torch.load('tut4-model.pt',map_location=torch.device('cpu')))

In [None]:
model.eval()

In [None]:

nlp = spacy.load('en')

def predict_sentiment(model, sentence, min_len = 5):
    model.eval()
    tokenized = [tok.text for tok in nlp.tokenizer(sentence)]
    
    if len(tokenized) < min_len:
        tokenized += ['<pad>'] * (min_len - len(tokenized))
    #print(tokenized)
    indexed = [txt_vocab.stoi[t] for t in tokenized]
    tensor = torch.LongTensor(indexed).to(device)
    tensor = tensor.unsqueeze(0)
    prediction = torch.sigmoid(model(tensor))
    return prediction.item()

In [None]:
predict_sentiment(model, "This film is terrible")

In [None]:
'Negative' if predict_sentiment(model, "This film is terrible")<0.5 else 'Possitive'

In [None]:
import dill
torch.save(TEXT, "TEXT_fields.pkl", pickle_module=dill)

In [None]:
TEXT = torch.load("TEXT_fields.pkl", pickle_module=dill)

In [None]:
with open("TEXT.Field","wb")as f:
     dill.dump(TEXT,f)


In [None]:
with open("TEXT.Field","rb")as f:
     TEXT2=dill.load(f)

In [None]:
!cp '/content/drive/My Drive/EVA4/phase2/s9_neuralWordEmbeddings/TEXT_fields.pkl' .
