# Sentiment Classification


## Loading the dataset

In [1]:
from keras.datasets import imdb

vocab_size = 10000 #vocab size

(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=vocab_size) # vocab_size is no.of words to consider from the dataset, ordering based on frequency.

Using TensorFlow backend.


In [2]:
from keras.preprocessing.sequence import pad_sequences
vocab_size = 10000 #vocab size
maxlen = 20  #number of word used from each review

## Train test split

In [3]:
#load dataset as a list of ints
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=vocab_size)
#make all sequences of the same length
x_train = pad_sequences(x_train, maxlen=maxlen)
x_test =  pad_sequences(x_test, maxlen=maxlen)

In [4]:
x_train.shape

(25000, 20)

In [5]:
x_test.shape

(25000, 20)

In [6]:
word_to_id= imdb.get_word_index()

In [7]:
id_to_word={value:key for key,value in word_to_id.items()}

In [8]:
print(id_to_word[1])

the


## Build Keras Embedding Layer Model
We can think of the Embedding layer as a dicionary that maps a index assigned to a word to a word vector. This layer is very flexible and can be used in a few ways:

* The embedding layer can be used at the start of a larger deep learning model. 
* Also we could load pre-train word embeddings into the embedding layer when we create our model.
* Use the embedding layer to train our own word2vec models.

The keras embedding layer doesn't require us to onehot encode our words, instead we have to give each word a unqiue intger number as an id. For the imdb dataset we've loaded this has already been done, but if this wasn't the case we could use sklearn [LabelEncoder](http://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.LabelEncoder.html).

In [9]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, LSTM, SpatialDropout1D

embed_dim = 128
lstm_out = 196

model = Sequential()
model.add(Embedding(vocab_size, embed_dim,input_length = x_train.shape[1]))
model.add(SpatialDropout1D(0.4))
model.add(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(2,activation='softmax'))
model.compile(loss = 'sparse_categorical_crossentropy', optimizer='adam',metrics = ['accuracy'])
print(model.summary())

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 20, 128)           1280000   
_________________________________________________________________
spatial_dropout1d (SpatialDr (None, 20, 128)           0         
_________________________________________________________________
lstm (LSTM)                  (None, 196)               254800    
_________________________________________________________________
dense (Dense)                (None, 2)                 394       
Total params: 1,535,194
Trainable params: 1,535,194
Non-trainable params: 0
_________________________________________________________________
None


In [10]:
batch_size = 32
model.fit(x_train, y_train, epochs = 10, batch_size=batch_size, verbose = 2)

Train on 25000 samples
Epoch 1/10
25000/25000 - 29s - loss: 0.5357 - accuracy: 0.7212
Epoch 2/10
25000/25000 - 29s - loss: 0.4218 - accuracy: 0.8056
Epoch 3/10
25000/25000 - 28s - loss: 0.3541 - accuracy: 0.8411
Epoch 4/10
25000/25000 - 30s - loss: 0.3024 - accuracy: 0.8693
Epoch 5/10
25000/25000 - 29s - loss: 0.2536 - accuracy: 0.8928
Epoch 6/10
25000/25000 - 28s - loss: 0.2130 - accuracy: 0.9114
Epoch 7/10
25000/25000 - 28s - loss: 0.1793 - accuracy: 0.9276
Epoch 8/10
25000/25000 - 28s - loss: 0.1479 - accuracy: 0.9418
Epoch 9/10
25000/25000 - 28s - loss: 0.1255 - accuracy: 0.9517
Epoch 10/10
25000/25000 - 28s - loss: 0.1097 - accuracy: 0.9583


<tensorflow.python.keras.callbacks.History at 0x1fb1d838808>

In [11]:
score,acc = model.evaluate(x_test, y_test, verbose = 2, batch_size = batch_size)
print("score: %.2f" % (score))
print("acc: %.2f" % (acc))

25000/25000 - 4s - loss: 0.9483 - accuracy: 0.7367
score: 0.95
acc: 0.74


In [12]:
x_test[3]

array([  75,   26,    2,  574,   19,    4, 1729,   23,    4,  268,   38,
         95,  138,    4,  609,  191,   75,   28,  314, 1772])

## Retrive the output of each layer in keras for a given single test sample from the trained model you built

In [13]:
import keras.backend as K
import numpy as np
features = np.random.rand(1,maxlen)
get_all_layer_outputs = K.function([model.layers[0].input],
                                  [l.output for l in model.layers[1:]])

layer_output = get_all_layer_outputs([features])

In [14]:
print(layer_output)

[array([[[ 0.06500909, -0.00077358,  0.01465192, ...,  0.055968  ,
         -0.05338809,  0.03880438],
        [ 0.06500909, -0.00077358,  0.01465192, ...,  0.055968  ,
         -0.05338809,  0.03880438],
        [ 0.06500909, -0.00077358,  0.01465192, ...,  0.055968  ,
         -0.05338809,  0.03880438],
        ...,
        [ 0.06500909, -0.00077358,  0.01465192, ...,  0.055968  ,
         -0.05338809,  0.03880438],
        [ 0.06500909, -0.00077358,  0.01465192, ...,  0.055968  ,
         -0.05338809,  0.03880438],
        [ 0.06500909, -0.00077358,  0.01465192, ...,  0.055968  ,
         -0.05338809,  0.03880438]]], dtype=float32), array([[ 0.01151941, -0.02985353, -0.04485181, -0.02542052,  0.00611752,
         0.00113654,  0.02062319, -0.04196103, -0.01966707, -0.1609039 ,
        -0.08177948,  0.02433589, -0.06721227, -0.07411642,  0.00405345,
        -0.08459633,  0.07390124,  0.0308803 ,  0.10148957, -0.0060621 ,
        -0.04277287, -0.02356807, -0.3693281 ,  0.01572105,  0.0

In [None]:
from tensorflow.keras import backend as K
layer_names = [layer.name for layer in model.layers]
inp = model.input                                           # input 
outputs = [layer.output for layer in model.layers]          # all layer outputs
functors = [K.function([inp], [out]) for out in outputs]    # evaluation functions

# Testing
test = x_test[3]
count = 0
for func in functors:
  print('\n')
  print("Layer Name: ",layer_names[count])
  print('\n')
  print(func([test]))
  count+=1