# CNN Sentiement Analysis Model
Look to here to find source of implementation: https://arxiv.org/pdf/1408.5882.pdf

In [1]:
import torch
from gensim.models import KeyedVectors
from torch import nn
from torch import functional as F

In [2]:
embedding_file_path = 'data/pretrained_word_embedding/GoogleNews-vectors-negative300.bin'
word2vec = KeyedVectors.\
            load_word2vec_format(embedding_file_path, binary=True)

In [3]:
word_to_int = {}
int_to_word = {}
for i, x in enumerate(word2vec.vocab):
    word_to_int[x] = i
    int_to_word[i] = x

In [4]:
class SentimentAnalysisCNN(nn.Module):
    
    def __init__(self):
        super(SentimentAnalysisCNN, self).__init__()
        
        ### Hyperparameters ###
        self.filter_sizes = [2, 3, 5]
        self.num_filters = 32
        #######################
        
        # Embed the input
        self.embedding = nn.Embedding.from_pretrained(torch.FloatTensor(word2vec.vectors))
        
        # Time to convoulte!
        self.convs = nn.ModuleList([nn.Conv2d(1, 
                                              self.num_filters, 
                                              kernel_size=(h, 300))
                                    for h in self.filter_sizes])
        
        # Feed forward
        fc_input = self.num_filters * len(self.filter_sizes)
        self.fc_layers = nn.Sequential(
                                        nn.Linear(fc_input, 512),
                                        nn.ReLU(),
                                        nn.Dropout(0.3),
                                        nn.Linear(512, 512),
                                        nn.ReLU(),
                                        nn.Dropout(0.3),
                                        nn.Linear(512, 1),
                                        nn.Sigmoid()
                                       )
    
    def forward(self, x):
        x = self.embedding(x)
        x = x.unsqueeze(1)
        conv_outputs = [self.convolute_and_pool(x, c) for c in self.convs]
        x = torch.cat(conv_outputs, 1)
        x = self.fc_layers(x)
        return x
    
    def convolute_and_pool(self, x, conv):
        x = torch.relu(conv(x))
        x = x.squeeze(3)
        return torch.max_pool1d(x, x.size(2)).squeeze(2)

In [5]:
a = 'Time will break the world'
b = 'I was impressed that I'
a = torch.tensor([word_to_int[x] for x in a.split(' ')])
b = torch.tensor([word_to_int[x] for x in b.split(' ')])
features = torch.stack((a, b))

model = SentimentAnalysisCNN()
model.forward(features)

tensor([[0.5053],
        [0.5072]], grad_fn=<SigmoidBackward>)