In [1]:
#Loading the IMDB dataset
from keras.datasets import imdb

(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words = 10000)

ModuleNotFoundError: No module named 'tensorflow'

In [None]:
train_data[0]

In [None]:
train_labels[0]

In [None]:
max([max(sequence) for sequence in train_data])

In [None]:
word_index = imdb.get_word_index()

reverse_word_index = dict(
    [(value,key) for (key, value) in word_index.items()]
)

decoded_review = " ".join(
    [reverse_word_index.get(i-3, '?') for i in train_data[0]]
)

In [None]:
decoded_review

In [None]:
#Encoding the integer sequence into binary matrix

import numpy as np

def vectorize_sequences(sequences, dimension = int(10000)):
    results = np.zeros((len(sequences), dimension))
    for i, sequence in enumerate(sequences):
        results[i, sequence] = 1
    return results

x_train = vectorize_sequences(train_data)
x_test = vectorize_sequences(test_data)

In [None]:
x_train[0]

In [None]:
y_train = np.asarray(train_labels).astype('float32')
y_test = np.asarray(test_labels).astype('float32')

In [None]:
#Model Defination

from keras import models
from keras import layers

model = models.Sequential()
model.add(layers.Dense(16, activation='relu', input_shape = (10000,)))
model.add(layers.Dense(16, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

Activation Function adds non-linearity in data transformation into higher dimensional space which provides access to much richer hypothesis space that would benifit from deep representation.

In [None]:
#Compiling the Model

model.compile(optimizer='rmsprop', loss='binary_crossentropy',metrics=['acc'])

In [None]:
#Configuring the Optimizers and Using Custom losses and metrics

from keras import optimizers
from keras import losses
from keras import metrics

model.compile(optimizer=optimizers.RMSprop(learning_rate=0.001), loss=losses.binary_crossentropy, 
              metrics=[metrics.binary_accuracy])

In [None]:
x_val = x_train[:10000]
partial_x_train = x_train[10000:]

In [None]:
y_val = y_train[:10000]
partial_y_train = y_train[10000:]

In [None]:
trained_model = model.fit(partial_x_train, partial_y_train, epochs=20, 
                    batch_size=512, validation_data=(x_val, y_val))

In [None]:
trained_model_dict = trained_model.history
trained_model_dict.keys()

In [None]:
import matplotlib.pyplot as plt

loss_value = trained_model_dict['loss']
val_loss_value = trained_model_dict['val_loss']

epochs = range(1,len(loss_value)+1)

plt.plot(epochs, loss_value, 'bo', label='Training Loss')
plt.plot(epochs, val_loss_value, 'b', label= 'Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()

In [None]:
acc_values = trained_model_dict['binary_accuracy']
val_acc_values = trained_model_dict['val_binary_accuracy']


plt.plot(epochs, acc_values, 'bo', label='Training Accuracy')
plt.plot(epochs, val_acc_values, 'b', label= 'Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.show()

We can see validation loss and validation accuracy peaks at fourth epoch which shows even though accuracy and loss are decreasing in training set, it is overfitting data after fourth epoch

In [None]:
#Retraining the model from scratching to eliminate overfitting only training upto fourth epoch

model = models.Sequential()
model.add(layers.Dense(16, activation='relu', input_shape=(10000,)))
model.add(layers.Dense(16, activation='relu'))
model.add(layers.Dense(1,activation='sigmoid'))

model.compile(optimizer=optimizers.RMSprop(learning_rate=0.001), loss=losses.binary_crossentropy,
              metrics=[metrics.binary_accuracy])
model.fit(x_train,y_train, epochs=4, batch_size=512)
results = model.evaluate(x_test, y_test)