In [1]:
import re
import pandas as pd
import numpy as np

## Pytorch Imports
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.utils.data
import torch.nn.functional as F
import torch.optim as optim

## NLP Libraries
import spacy
from sklearn.model_selection import train_test_split
from nltk import download
import gensim
from nltk.corpus import stopwords

spacy_en = spacy.load('en')
download('stopwords')

Using TensorFlow backend.
  return f(*args, **kwds)


[nltk_data] Error loading stopwords: <urlopen error [SSL:
[nltk_data]     CERTIFICATE_VERIFY_FAILED] certificate verify failed
[nltk_data]     (_ssl.c:777)>


False

## 2. Dataset
- more information here: https://www.kaggle.com/c/sentiment-analysis-on-movie-reviews
- 0 - negative    
- 1 - positive  

In [2]:
train = pd.read_pickle('train.pkl')
train.drop(labels='phrase_preprocessed', inplace=True,axis=1)
train.head()

Unnamed: 0,phrase,sentiment,sentiment_simple
0,A series of escapades demonstrating the adage ...,1,NEG
1,"This quiet , introspective and entertaining in...",4,POS
2,"Even fans of Ismail Merchant 's work , I suspe...",1,NEG
3,A positively thrilling combination of ethnogra...,3,POS
4,Aggressive self-glorification and a manipulati...,1,NEG


In [3]:
test = pd.read_pickle('test.pkl')
test.drop(labels='phrase_preprocessed', inplace=True,axis=1)
test.head()

Unnamed: 0,phrase
0,An intermittently pleasing but mostly routine ...
1,Kidman is really the only thing that 's worth ...
2,Once you get into its rhythm ... the movie bec...
3,I kept wishing I was watching a documentary ab...
4,"Kinnear does n't aim for our sympathy , but ra..."


## 3. Preprocessing

In [4]:
def transformText(text, do_stop=False, do_stem=False):
    
    stops = set(stopwords.words("english"))
    
    # Convert text to lower
    text = text.lower()
    
    # Removing non ASCII chars    
    text = re.sub(r'[^\x00-\x7f]',r' ',text)
    
    # Strip multiple whitespaces
    text = gensim.corpora.textcorpus.strip_multiple_whitespaces(text)
    
    # Removing all the stopwords
    
    if (do_stop==True):
        filtered_words = [word for word in text.split() if word not in stops]
    else:
        filtered_words = [word for word in text.split()]

    # Removing all the tokens with lesser than 3 characters
    filtered_words = gensim.corpora.textcorpus.remove_short(filtered_words, minsize=2)
    
    # Preprocessed text after stop words removal
    text = " ".join(filtered_words)
    
    # Remove the punctuation
    text = gensim.parsing.preprocessing.strip_punctuation2(text)
    
    # Strip all the numerics
    text = gensim.parsing.preprocessing.strip_numeric(text)
    
    # Strip multiple whitespaces
    text = gensim.corpora.textcorpus.strip_multiple_whitespaces(text)
    
    if (do_stem==True):
        # Stemming
        text = gensim.parsing.preprocessing.stem_text(text)
    return text

In [5]:
train['phrase_preprocessed']=train['phrase'].apply(lambda x: transformText(x,do_stop=False, do_stem=False))
train.head()

Unnamed: 0,phrase,sentiment,sentiment_simple,phrase_preprocessed
0,A series of escapades demonstrating the adage ...,1,NEG,series of escapades demonstrating the adage th...
1,"This quiet , introspective and entertaining in...",4,POS,this quiet introspective and entertaining inde...
2,"Even fans of Ismail Merchant 's work , I suspe...",1,NEG,even fans of ismail merchant s work suspect wo...
3,A positively thrilling combination of ethnogra...,3,POS,positively thrilling combination of ethnograph...
4,Aggressive self-glorification and a manipulati...,1,NEG,aggressive self glorification and manipulative...


In [9]:
a=train['phrase_preprocessed'].isnull()
type(a)

pandas.core.series.Series

In [11]:
len(train[train['sentiment_simple']=='NEG'])

4927

In [12]:
len(train[train['sentiment_simple']=='POS'])

3602

In [None]:
= 

In [7]:
a[a==True]

Series([], Name: phrase_preprocessed, dtype: bool)

In [8]:
test['phrase_preprocessed']=test['phrase'].apply(lambda x: transformText(x,do_stop=False, do_stem=False))
test.head()

Unnamed: 0,phrase,phrase_preprocessed
0,An intermittently pleasing but mostly routine ...,an intermittently pleasing but mostly routine ...
1,Kidman is really the only thing that 's worth ...,kidman is really the only thing that s worth w...
2,Once you get into its rhythm ... the movie bec...,once you get into its rhythm the movie becomes...
3,I kept wishing I was watching a documentary ab...,kept wishing was watching documentary about th...
4,"Kinnear does n't aim for our sympathy , but ra...",kinnear does n t aim for our sympathy but rath...


## 4. Train/Test split, Vocab

In [9]:
test_size = 0.2

In [10]:
x_train, x_valid, y_train, y_valid = train_test_split(train['phrase_preprocessed'],
                                                      train['sentiment_simple'], 
                                                      test_size=0.2)

In [11]:
x_test=test['phrase_preprocessed']
x_test[0:5]

0    an intermittently pleasing but mostly routine ...
1    kidman is really the only thing that s worth w...
2    once you get into its rhythm the movie becomes...
3    kept wishing was watching documentary about th...
4    kinnear does n t aim for our sympathy but rath...
Name: phrase_preprocessed, dtype: object

In [12]:
## Build Vocabulary
word_to_ix = {}
for sent in list(x_train) + list(x_valid) + list(x_test):
    for word in sent.split():
        if word not in word_to_ix:
            word_to_ix[word] = len(word_to_ix)

In [13]:
word_to_ix

{'feeble': 0,
 'comedy': 1,
 'no': 2,
 'doubt': 3,
 'the': 4,
 'star': 5,
 'and': 6,
 'everyone': 7,
 'else': 8,
 'involved': 9,
 'had': 10,
 'their': 11,
 'hearts': 12,
 'in': 13,
 'right': 14,
 'place': 15,
 'you': 16,
 'can': 17,
 'drive': 18,
 'by': 19,
 'it': 20,
 'without': 21,
 'noticing': 22,
 'anything': 23,
 'special': 24,
 'save': 25,
 'for': 26,
 'few': 27,
 'comic': 28,
 'turns': 29,
 'intended': 30,
 'otherwise': 31,
 'an': 32,
 'instance': 33,
 'of': 34,
 'old': 35,
 'dog': 36,
 'not': 37,
 'only': 38,
 'learning': 39,
 'but': 40,
 'inventing': 41,
 'remarkable': 42,
 'new': 43,
 'trick': 44,
 'although': 45,
 'shot': 46,
 'with': 47,
 'little': 48,
 'style': 49,
 'skins': 50,
 'is': 51,
 'heartfelt': 52,
 'achingly': 53,
 'real': 54,
 'fantastic': 55,
 'premise': 56,
 'anchors': 57,
 'this': 58,
 'movie': 59,
 'what': 60,
 'needs': 61,
 'either': 62,
 'more': 63,
 'rigid': 64,
 'blair': 65,
 'witch': 66,
 'commitment': 67,
 'to': 68,
 'its': 69,
 'mockumentary': 70,
 'f

In [14]:
print("Vocab size = {}".format(len(word_to_ix)))

Vocab size = 17512


In [15]:
label_to_ix = { "NEG": 0, "POS": 1 }

In [16]:
VOCAB_SIZE = len(word_to_ix)
VOCAB_SIZE

17512

In [17]:
NUM_LABELS = len(label_to_ix)
NUM_LABELS

2

## 5. Making dataset iterable

In [18]:
batch_size = 10
n_iters = 1000
num_epochs = n_iters/(len(x_train) / batch_size)
num_epochs = int(num_epochs)
num_epochs

1

In [19]:
## iterable datasets
train_data=list(zip(x_train,y_train))
train_data[0:5]

[('feeble comedy', 'NEG'),
 ('no doubt the star and everyone else involved had their hearts in the right place',
  'POS'),
 ('you can drive right by it without noticing anything special save for few comic turns intended and otherwise',
  'NEG'),
 ('an instance of an old dog not only learning but inventing remarkable new trick',
  'POS'),
 ('although shot with little style skins is heartfelt and achingly real',
  'POS')]

In [20]:
valid_data=list(zip(x_valid,y_valid))
valid_data[0:5]

[('the shackles', 'NEG'),
 ('it never plays as dramatic even when dramatic things happen to people',
  'NEG'),
 ('an engaging formulaic sports drama that carries charge of genuine excitement',
  'POS'),
 ('smart sassy interpretation of the oscar wilde play', 'POS'),
 ('ca n t remember the last time saw an audience laugh so much during movie but there s only one problem it s supposed to be drama',
  'NEG')]

## 7. Model - LSTM Classifier

In [21]:
EMBEDDING_DIM = 30
HIDDEN_DIM = 50
NUM_LAYERS = 1
DROPOUT = 0

In [22]:
class LSTMClassifier(nn.Module):

    def __init__(self, embedding_dim, hidden_dim, num_layers, vocab_size, label_size, dropout):
        super(LSTMClassifier, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers, dropout=dropout, batch_first=False)
        self.hidden2label = nn.Linear(hidden_dim, label_size)
        self.hidden = self.init_hidden()

    def init_hidden(self):
        # the first is the hidden h
        # the second is the cell  c
        return (Variable(torch.zeros(self.num_layers, 1, self.hidden_dim)),
                Variable(torch.zeros(self.num_layers, 1, self.hidden_dim)))

    def forward(self, sentence):
        embeds = self.word_embeddings(sentence)
        x = embeds.view(len(sentence), 1, -1)
        lstm_out, self.hidden = self.lstm(x, self.hidden)
        y  = self.hidden2label(lstm_out[-1])
        log_probs = F.log_softmax(y)
        return log_probs

In [23]:
model = LSTMClassifier(embedding_dim=EMBEDDING_DIM,
                            hidden_dim=HIDDEN_DIM,
                            num_layers=NUM_LAYERS,
                            vocab_size=VOCAB_SIZE,
                            label_size=NUM_LABELS,
                            dropout=DROPOUT)

In [24]:
model

LSTMClassifier (
  (word_embeddings): Embedding(17512, 30)
  (lstm): LSTM(30, 50)
  (hidden2label): Linear (50 -> 2)
)

In [25]:
def make_context_vector(seq, to_ix):
    idxs = [to_ix[w] for w in seq.split()]
    tensor = torch.LongTensor(idxs)
    return tensor

In [26]:
def make_target(label, label_to_idx):
    return torch.LongTensor([label_to_idx[label]])

## 8. Loss Function and Optimizer

In [27]:
loss_function = nn.CrossEntropyLoss()
learning_rate = 0.01
optimizer = optim.Adam(model.parameters(),lr = learning_rate)

In [28]:
sample=train_data[2][0]
sample

'you can drive right by it without noticing anything special save for few comic turns intended and otherwise'

In [29]:
make_context_vector(train_data[10][0],word_to_ix)


  20
 129
 130
 131
 132
 133
  68
 134
   4
 135
  68
 136
  97
 137
[torch.LongTensor of size 14]

In [30]:
sample_context=Variable(make_context_vector(sample,word_to_ix))
sample_context

Variable containing:
 16
 17
 18
 14
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
  6
 31
[torch.LongTensor of size 18]

In [31]:
out=model(sample_context)
out

Variable containing:
-0.7626 -0.6282
[torch.FloatTensor of size 1x2]

In [32]:
iter = 0
for epoch in range(num_epochs):
    for (sent,label) in train_data:
        # Step 1 - clear the gradients
        model.zero_grad()
        optimizer.zero_grad()
        model.hidden = model.init_hidden()
    
        ## Avoid breaking for empty input
        try:
            ## Step 2- Prepare input and label
            context_vec = Variable(make_context_vector(sent, word_to_ix))
            target = Variable(make_target(label, label_to_ix)) 
            # Step 3 - Run forward pass
            output = model(context_vec)  
            # Step 4 - Compute loss, gradients, update parameters
            loss = loss_function(output, target)
            loss.backward()
            optimizer.step()
        except:
            pass
        iter+=1      
        ## Calculate final accuracy
        if iter % 500 ==0:
            correct = 0
            total = 0
            for (sent,label) in valid_data:
                context_vec = Variable(make_context_vector(sent, word_to_ix))
                target = Variable(make_target(label, label_to_ix))
                output = model(context_vec)
                _,predicted = torch.max(output.data,1)
                total += target.size(0)
                correct += (predicted == make_target(label, label_to_ix)).sum()
            accuracy = 100 * correct/total
            print('Iterations: {}. Loss: {}. Accuracy: {}'.format(iter,loss.data[0],accuracy))

Iterations: 500. Loss: 0.3723790645599365. Accuracy: 54.74794841735053
Iterations: 1000. Loss: 1.08137047290802. Accuracy: 56.15474794841735
Iterations: 1500. Loss: 1.151354432106018. Accuracy: 56.389214536928485
Iterations: 2000. Loss: 0.8430496454238892. Accuracy: 50.70339976553341
Iterations: 2500. Loss: 0.742389976978302. Accuracy: 56.56506447831184
Iterations: 3000. Loss: 0.6001772284507751. Accuracy: 55.74443141852286
Iterations: 3500. Loss: 0.4139164984226227. Accuracy: 56.09613130128957
Iterations: 4000. Loss: 1.0805284976959229. Accuracy: 54.51348182883939
Iterations: 4500. Loss: 0.8681607246398926. Accuracy: 49.941383352872215
Iterations: 5000. Loss: 1.1741716861724854. Accuracy: 54.161781946072686
Iterations: 5500. Loss: 0.5108356475830078. Accuracy: 54.8651817116061
Iterations: 6000. Loss: 0.28323712944984436. Accuracy: 53.458382180539274
Iterations: 6500. Loss: 0.9901161193847656. Accuracy: 56.858147713950764


In [33]:
n=3
bow_vec = Variable(make_context_vector(valid_data[n][0], word_to_ix))
print("-"*20 + " INPUT "+"-"*30)
print("TRUE LABEL = {}".format(valid_data[n][1]))
print("SENTENCE = {}".format(valid_data[n][0]))
print("-"*20 + " PREDICTION "+"-"*30)
log_probs = model(bow_vec)
_,predicted = torch.max(log_probs.data,1)
print("PRED = {}".format(predicted[0]))
print("PRED = {}".format(list(label_to_ix.keys())[list(label_to_ix.values()).index(predicted[0])]))
##print("LOG_PROB = {}".format(log_probs))
print("PROBS = {}".format(F.softmax(log_probs)))

-------------------- INPUT ------------------------------
TRUE LABEL = POS
SENTENCE = smart sassy interpretation of the oscar wilde play
-------------------- PREDICTION ------------------------------
PRED = 0
PRED = NEG
PROBS = Variable containing:
 0.6039  0.3961
[torch.FloatTensor of size 1x2]



## Loading Pre-trained vectors

In [34]:
from gensim.models import KeyedVectors

In [35]:
!ls ../../../vectors

GoogleNews-vectors-negative300.bin wiki-news-300d-1M-subword.vec
glove.42B.300d.txt                 wiki-news-300d-1M.vec
lid.176.ftz


In [36]:
w2v = KeyedVectors.load_word2vec_format('~/repos/vectors/GoogleNews-vectors-negative300.bin', binary = True)

## Build weights initialization matrix

In [37]:
W2V_DIM = 300
## standard deviation to use
sd = 1/np.sqrt(W2V_DIM)
## Random initialization
weights = np.random.normal(0, scale=sd, size=[VOCAB_SIZE, W2V_DIM])
weights = weights.astype(np.float32)

In [38]:
weights[1][0:50]

array([-0.03308766,  0.02778587, -0.05535707,  0.01274751, -0.01436458,
       -0.00800677, -0.00284676,  0.08017665,  0.06320183, -0.08630445,
        0.02757211,  0.02853842, -0.06298611, -0.03727243, -0.04372744,
        0.01708458,  0.03430955,  0.0535158 ,  0.01962308, -0.04440845,
       -0.06047032,  0.08077332,  0.01939448,  0.08300257, -0.03951006,
        0.03059672,  0.12212086,  0.02846081, -0.01342562, -0.04188478,
        0.16933852, -0.0456421 , -0.10265084,  0.00022967, -0.0214686 ,
       -0.02636538, -0.07641384, -0.00283593, -0.01508848, -0.07175078,
       -0.00822397,  0.04603168,  0.01206001, -0.03778317,  0.04014652,
        0.0456126 ,  0.01643336, -0.01612621,  0.15607595,  0.01549839], dtype=float32)

In [39]:
for word in word_to_ix:
    id = word_to_ix.get(word,None)
    if id is not None:
        try:
            weights[id]=w2v.wv.word_vec(word)
        except:
            weights[id]=np.random.normal(0, scale=sd, size=[1, W2V_DIM]) ## If word not present, initialize randomly

In [40]:
word_to_ix

{'feeble': 0,
 'comedy': 1,
 'no': 2,
 'doubt': 3,
 'the': 4,
 'star': 5,
 'and': 6,
 'everyone': 7,
 'else': 8,
 'involved': 9,
 'had': 10,
 'their': 11,
 'hearts': 12,
 'in': 13,
 'right': 14,
 'place': 15,
 'you': 16,
 'can': 17,
 'drive': 18,
 'by': 19,
 'it': 20,
 'without': 21,
 'noticing': 22,
 'anything': 23,
 'special': 24,
 'save': 25,
 'for': 26,
 'few': 27,
 'comic': 28,
 'turns': 29,
 'intended': 30,
 'otherwise': 31,
 'an': 32,
 'instance': 33,
 'of': 34,
 'old': 35,
 'dog': 36,
 'not': 37,
 'only': 38,
 'learning': 39,
 'but': 40,
 'inventing': 41,
 'remarkable': 42,
 'new': 43,
 'trick': 44,
 'although': 45,
 'shot': 46,
 'with': 47,
 'little': 48,
 'style': 49,
 'skins': 50,
 'is': 51,
 'heartfelt': 52,
 'achingly': 53,
 'real': 54,
 'fantastic': 55,
 'premise': 56,
 'anchors': 57,
 'this': 58,
 'movie': 59,
 'what': 60,
 'needs': 61,
 'either': 62,
 'more': 63,
 'rigid': 64,
 'blair': 65,
 'witch': 66,
 'commitment': 67,
 'to': 68,
 'its': 69,
 'mockumentary': 70,
 'f

In [41]:
weights[5][0:50]

array([ 0.1640625 ,  0.18847656,  0.14160156, -0.02941895,  0.02087402,
        0.13769531, -0.0168457 , -0.32617188,  0.07519531, -0.05200195,
        0.11816406,  0.09179688,  0.06689453, -0.04614258, -0.04321289,
        0.38476562,  0.0213623 , -0.09423828,  0.05712891,  0.18066406,
       -0.08740234,  0.3359375 , -0.078125  , -0.07861328, -0.02111816,
       -0.28320312,  0.08740234,  0.1796875 ,  0.11083984,  0.0480957 ,
       -0.00469971,  0.03857422,  0.01940918,  0.15332031,  0.07714844,
        0.01574707,  0.21875   ,  0.16113281, -0.14257812,  0.12695312,
        0.04736328, -0.48242188,  0.10302734,  0.11816406,  0.24316406,
       -0.00631714, -0.04858398,  0.05395508,  0.31835938,  0.16113281], dtype=float32)

In [42]:
w2v.wv.word_vec("star")[0:50]

array([ 0.1640625 ,  0.18847656,  0.14160156, -0.02941895,  0.02087402,
        0.13769531, -0.0168457 , -0.32617188,  0.07519531, -0.05200195,
        0.11816406,  0.09179688,  0.06689453, -0.04614258, -0.04321289,
        0.38476562,  0.0213623 , -0.09423828,  0.05712891,  0.18066406,
       -0.08740234,  0.3359375 , -0.078125  , -0.07861328, -0.02111816,
       -0.28320312,  0.08740234,  0.1796875 ,  0.11083984,  0.0480957 ,
       -0.00469971,  0.03857422,  0.01940918,  0.15332031,  0.07714844,
        0.01574707,  0.21875   ,  0.16113281, -0.14257812,  0.12695312,
        0.04736328, -0.48242188,  0.10302734,  0.11816406,  0.24316406,
       -0.00631714, -0.04858398,  0.05395508,  0.31835938,  0.16113281], dtype=float32)

In [43]:
w2v.wv.word_vec("want")[0:50]

array([  1.36718750e-01,   1.48437500e-01,   1.14746094e-01,
         6.98242188e-02,  -1.66992188e-01,  -6.28662109e-03,
         3.51562500e-01,  -7.91015625e-02,  -8.44726562e-02,
         2.19726562e-02,  -9.47265625e-02,  -9.47265625e-02,
        -1.95312500e-03,  -4.12597656e-02,  -1.39648438e-01,
         1.77734375e-01,   6.34765625e-02,   2.27539062e-01,
         8.20312500e-02,  -8.83789062e-02,  -1.88476562e-01,
        -3.36914062e-02,   9.57031250e-02,   8.78906250e-02,
         1.09863281e-01,   9.71679688e-02,   7.96318054e-05,
         7.56835938e-02,   7.32421875e-03,  -1.23535156e-01,
         1.03027344e-01,   1.65039062e-01,  -3.83300781e-02,
        -2.24609375e-01,   8.98437500e-02,   6.93359375e-02,
         1.96289062e-01,   7.76367188e-02,  -3.97949219e-02,
         9.96093750e-02,   4.17480469e-02,  -1.53320312e-01,
         1.66015625e-01,   1.56250000e-02,  -1.99218750e-01,
        -1.76757812e-01,   8.39843750e-02,   2.46582031e-02,
        -8.00781250e-02,

In [44]:
word_to_ix['want']

736

In [45]:
weights[736][0:50]

array([  1.36718750e-01,   1.48437500e-01,   1.14746094e-01,
         6.98242188e-02,  -1.66992188e-01,  -6.28662109e-03,
         3.51562500e-01,  -7.91015625e-02,  -8.44726562e-02,
         2.19726562e-02,  -9.47265625e-02,  -9.47265625e-02,
        -1.95312500e-03,  -4.12597656e-02,  -1.39648438e-01,
         1.77734375e-01,   6.34765625e-02,   2.27539062e-01,
         8.20312500e-02,  -8.83789062e-02,  -1.88476562e-01,
        -3.36914062e-02,   9.57031250e-02,   8.78906250e-02,
         1.09863281e-01,   9.71679688e-02,   7.96318054e-05,
         7.56835938e-02,   7.32421875e-03,  -1.23535156e-01,
         1.03027344e-01,   1.65039062e-01,  -3.83300781e-02,
        -2.24609375e-01,   8.98437500e-02,   6.93359375e-02,
         1.96289062e-01,   7.76367188e-02,  -3.97949219e-02,
         9.96093750e-02,   4.17480469e-02,  -1.53320312e-01,
         1.66015625e-01,   1.56250000e-02,  -1.99218750e-01,
        -1.76757812e-01,   8.39843750e-02,   2.46582031e-02,
        -8.00781250e-02,

## LSTM Classifier with Word2vec

In [46]:
W2V_DIM = 300
HIDDEN_DIM = 50
NUM_LAYERS = 1
DROPOUT = 0

In [47]:
class LSTMClassifierW2vec(nn.Module):

    def __init__(self, embedding_dim, hidden_dim, num_layers, vocab_size, label_size, pre_trained_weights, dropout):
        super(LSTMClassifierW2vec, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)
        #print(pre_trained_weights[5][0:10])
        self.word_embeddings.weight.data=torch.Tensor(pre_trained_weights)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers, batch_first=False,dropout=dropout,bidirectional=False)
        self.hidden2label = nn.Linear(hidden_dim, label_size)
        self.hidden = self.init_hidden()

    def init_hidden(self):
        # the first is the hidden h
        # the second is the cell  c
        return (Variable(torch.zeros(self.num_layers, 1, self.hidden_dim)),
                Variable(torch.zeros(self.num_layers, 1, self.hidden_dim)))

    def forward(self, sentence):
        embeds = self.word_embeddings(sentence)
        x = embeds.view(len(sentence), 1, -1)
        lstm_out, self.hidden = self.lstm(x, self.hidden)
        y  = self.hidden2label(lstm_out[-1])
        log_probs = F.log_softmax(y)
        return log_probs

In [48]:
model_w2v = LSTMClassifierW2vec(embedding_dim=W2V_DIM,
                            hidden_dim=HIDDEN_DIM,
                            num_layers=NUM_LAYERS,
                            vocab_size=VOCAB_SIZE,
                            label_size=NUM_LABELS,
                            pre_trained_weights = weights,
                            dropout = DROPOUT)

In [49]:
model_w2v

LSTMClassifierW2vec (
  (word_embeddings): Embedding(17512, 300)
  (lstm): LSTM(300, 50)
  (hidden2label): Linear (50 -> 2)
)

In [50]:
## Testing if weights were copies
msg = "star"
sample_context=Variable(make_context_vector(msg,word_to_ix))
sample_context

Variable containing:
 5
[torch.LongTensor of size 1]

In [51]:
w2v.wv.word_vec("star")[0:50]

array([ 0.1640625 ,  0.18847656,  0.14160156, -0.02941895,  0.02087402,
        0.13769531, -0.0168457 , -0.32617188,  0.07519531, -0.05200195,
        0.11816406,  0.09179688,  0.06689453, -0.04614258, -0.04321289,
        0.38476562,  0.0213623 , -0.09423828,  0.05712891,  0.18066406,
       -0.08740234,  0.3359375 , -0.078125  , -0.07861328, -0.02111816,
       -0.28320312,  0.08740234,  0.1796875 ,  0.11083984,  0.0480957 ,
       -0.00469971,  0.03857422,  0.01940918,  0.15332031,  0.07714844,
        0.01574707,  0.21875   ,  0.16113281, -0.14257812,  0.12695312,
        0.04736328, -0.48242188,  0.10302734,  0.11816406,  0.24316406,
       -0.00631714, -0.04858398,  0.05395508,  0.31835938,  0.16113281], dtype=float32)

In [52]:
loss_function_w2v = nn.CrossEntropyLoss()
learning_rate_w2v = 0.001
optimizer_w2v = optim.Adam(model_w2v.parameters(),lr = learning_rate_w2v)

In [53]:
sample=train_data[2][0]
sample

'you can drive right by it without noticing anything special save for few comic turns intended and otherwise'

In [54]:
sample_context=Variable(make_context_vector(sample,word_to_ix))
sample_context

Variable containing:
 16
 17
 18
 14
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
  6
 31
[torch.LongTensor of size 18]

In [55]:
out=model_w2v(sample_context)
out

Variable containing:
-0.6449 -0.7439
[torch.FloatTensor of size 1x2]

In [None]:
iter = 0
for epoch in range(num_epochs):
    for (sent,label) in train_data:
        # Step 1 - clear the gradients
        model_w2v.zero_grad()
        optimizer_w2v.zero_grad()
        model_w2v.hidden = model_w2v.init_hidden()
    
        ## Avoid breaking for empty input
        try:
            ## Step 2- Prepare input and label
            context_vec = Variable(make_context_vector(sent, word_to_ix))
            target = Variable(make_target(label, label_to_ix)) 
            # Step 3 - Run forward pass
            output = model_w2v(context_vec)  
            # Step 4 - Compute loss, gradients, update parameters
            loss = loss_function_w2v(output, target)
            loss.backward()
            optimizer_w2v.step()
        except:
            pass
        iter+=1      
        ## Calculate final accuracy
        if iter % 500 ==0:
            correct = 0
            total = 0
            for (sent,label) in valid_data:
                context_vec = Variable(make_context_vector(sent, word_to_ix))
                target = Variable(make_target(label, label_to_ix))
                output = model_w2v(context_vec)
                _,predicted = torch.max(output.data,1)
                total += target.size(0)
                correct += (predicted == make_target(label, label_to_ix)).sum()
            accuracy = 100 * correct/total
            print('Iterations: {}. Loss: {}. Accuracy: {}'.format(iter,loss.data[0],accuracy))

Iterations: 500. Loss: 1.5224510431289673. Accuracy: 67.87807737397421
Iterations: 1000. Loss: 0.5993427038192749. Accuracy: 70.1641266119578
Iterations: 1500. Loss: 2.0562186241149902. Accuracy: 67.81946072684643
Iterations: 2000. Loss: 0.1832445114850998. Accuracy: 70.80890973036342
Iterations: 2500. Loss: 0.1929742395877838. Accuracy: 71.16060961313013
Iterations: 3000. Loss: 0.3004419803619385. Accuracy: 69.69519343493552
Iterations: 3500. Loss: 0.7613804340362549. Accuracy: 70.63305978898008
Iterations: 4000. Loss: 0.22596986591815948. Accuracy: 69.34349355216881
Iterations: 4500. Loss: 0.275987446308136. Accuracy: 64.88862837045721
Iterations: 5000. Loss: 0.12038353085517883. Accuracy: 67.99531066822978
Iterations: 5500. Loss: 0.21046432852745056. Accuracy: 70.69167643610785
Iterations: 6000. Loss: 0.0739918053150177. Accuracy: 70.22274325908558


In [None]:
n=3
context_vec = Variable(make_context_vector(valid_data[n][0], word_to_ix))
print("-"*20 + " INPUT "+"-"*30)
print("TRUE LABEL = {}".format(valid_data[n][1]))
print("SENTENCE = {}".format(valid_data[n][0]))
print("-"*20 + " PREDICTION "+"-"*30)
log_probs = model_w2v(context_vec)
_,predicted = torch.max(log_probs.data,1)
print("PRED = {}".format(predicted[0]))
print("PRED = {}".format(list(label_to_ix.keys())[list(label_to_ix.values()).index(predicted[0])]))
##print("LOG_PROB = {}".format(log_probs))
print("PROBS = {}".format(F.softmax(log_probs)))