<b>Building the Neural Network for sentiment analysis of imdb data</b>

In [42]:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"]="3" #solved the problem of future warning of floating variable from keras
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt

import numpy as np
from keras.utils import to_categorical
from keras import models
from keras import layers
from keras.models import Sequential
from keras.layers import Dense, Activation

In [29]:
#Importing the data from Imdb
from keras.datasets import imdb 
(training_data,training_targets),(testing_data,testing_targets) = imdb.load_data(num_words = 10000)
data = np.concatenate((training_data,testing_data),axis = 0)
targets = np.concatenate((training_targets,testing_targets),axis = 0)

In [30]:
#Checking the Data imported
print("Categories:", np.unique(targets))
print("Number of unique words:", len(np.unique(np.hstack(data))))

Categories: [0 1]
Number of unique words: 9998


In [31]:
length = [len(i) for i in data]
print("Total Dataset:",len(length))
print("Average Review length:", np.mean(length))
print("Standard Deviation:", round(np.std(length)))
#data is list of lists containing the 5000 movies reviews where each movie is rated by different no. of users.

Total Dataset: 50000
Average Review length: 234.75892
Standard Deviation: 173.0


In [32]:
print("Label:", targets[4999])
print(len(data[4999]))
#each data in data list is labeled as 0 or 1 in target list which gives the sentiment analysis of dataset.

Label: 0
171


In [33]:
#retrieves the dictionary mapping word indices back into the original words so that we can read them
index = imdb.get_word_index()
reverse_index = dict([(value, key) for (key, value) in index.items()]) 
decoded = " ".join( [reverse_index.get(i - 3, "#") for i in data[0]] )
print(decoded)

# this film was just brilliant casting location scenery story direction everyone's really suited the part they played and you could just imagine being there robert # is an amazing actor and now the same being director # father came from the same scottish island as myself so i loved the fact there was a real connection with this film the witty remarks throughout the film were great it was just brilliant so much that i bought the film as soon as it was released for # and would recommend it to everyone to watch and the fly fishing was amazing really cried at the end it was so sad and you know what they say if you cry at a film it must have been good and this definitely was also # to the two little boy's that played the # of norman and paul they were just brilliant children are often left out of the # list i think because the stars that play them all grown up are such a big profile for the whole film but these children are amazing and should be praised for what they have done don't you thi

In [35]:
def vectorize(sequences, dimension = 10000):
    results = np.zeros((len(sequences), dimension))
    for i, sequence in enumerate(sequences):
        results[i, sequence] = 1
    return results
 
data = vectorize(data)
targets = np.array(targets).astype("float32")
#this is done to to make the dataset uniform so that the dataset becomes homogeneous w.r.t length

In [36]:
test_x = data[:10000] #Training set will be 40,000 and testing set is 10,000
test_y = targets[:10000]
train_x = data[10000:]
train_y = targets[10000:]

In [64]:
model = Sequential() #defining the type of model we need
#defining the input layer
model.add(Dense(50,
                kernel_initializer='random_uniform',
                bias_initializer='zeros',activation = "relu",input_shape = (10000, )))
#model.add(layers.Dense(50, activation = "tanh", input_shape=(10000, )))

# Hidden - Layers
model.add(layers.Dropout(0.3, noise_shape=None, seed=None))
model.add(layers.Dense(50,
                       kernel_initializer='random_uniform',
                       bias_initializer='zeros',
                       activation = "tanh"))
model.add(layers.Dropout(0.2, noise_shape=None, seed=None))
model.add(layers.Dense(50,
                        kernel_initializer='random_uniform',
                        bias_initializer='zeros',
                        activation = "relu"))

# Output- Layer
model.add(layers.Dense(1, activation = "sigmoid"))
#Examining the Model Built
model.summary()


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_29 (Dense)             (None, 50)                500050    
_________________________________________________________________
dropout_14 (Dropout)         (None, 50)                0         
_________________________________________________________________
dense_30 (Dense)             (None, 50)                2550      
_________________________________________________________________
dropout_15 (Dropout)         (None, 50)                0         
_________________________________________________________________
dense_31 (Dense)             (None, 50)                2550      
_________________________________________________________________
dense_32 (Dense)             (None, 1)                 51        
Total params: 505,201
Trainable params: 505,201
Non-trainable params: 0
_________________________________________________________________


In [66]:
#Compiling the Model
model.compile(
 optimizer = "rmsprop",
 loss = "binary_crossentropy",
 metrics = ["accuracy"]
)

In [67]:
#Trainig our Model 
results = model.fit(
 train_x, train_y,
 epochs= 2,
 batch_size = 32,
 validation_data = (test_x, test_y)
)


Train on 40000 samples, validate on 10000 samples
Epoch 1/2
Epoch 2/2


In [68]:
print("Test-Accuracy:", np.mean(results.history["val_acc"]))

Test-Accuracy: 0.8996


In [79]:
ynew = model.predict(test_x)
count = 0
for i in range(len(ynew)):
    y = test_y[i]
    #print("Expected Output:",test_y[i])
    x = np.round(ynew[i])[0]
    #print("Predicted Output:",x)
    if x!= y:
        count += 1
print("Test Accuray:",1-count/len(ynew))        
    
    

Test Accuray: 0.8996
