In [1]:
from keras.datasets import imdb

(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=10000)

In [2]:
# preparing the data
import numpy as np

#One-hot encode your lists to turn them into vectors of 0s and 1s.
def vectorize_sequences(sequences, dimension=10000):
    results = np.zeros((len(sequences), dimension))
    for i, sequence in enumerate(sequences):
        results[i, sequence] = 1
    return results

x_train = vectorize_sequences(train_data)
x_test = vectorize_sequences(test_data)

In [3]:
#vectorize your labels

y_train = np.asarray(train_labels).astype('float32')
y_test = np.asarray(test_labels).astype('float32')

In [4]:
#validation data
x_val = x_train[:10000] #first 10000 elements
partial_x_train = x_train[10000:] #all the elements above 10000th element

y_val = y_train[:10000]
partial_y_train = y_train[10000:]

## Lets try two hidden layers

In [5]:
#building your model
#last time i used 1 hidden layer, lets try using 2 or 3 hidden layers
from keras import models
from keras import layers

model = models.Sequential()
model.add(layers.Dense(16, activation='relu', input_shape=(10000,)))
model.add(layers.Dense(16, activation='relu'))
model.add(layers.Dense(16, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

In [6]:
model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])

In [7]:
history= model.fit(partial_x_train, partial_y_train,
                 epochs=4, batch_size=512,
                 validation_data=(x_val,y_val))

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


In [8]:
results = model.evaluate(x_test,y_test)
results



[0.2946679890155792, 0.883679986000061]

Notice that during our previous approach (1 hidden layer) the accuracy was 0.9976, this time its 0.997. So there isn't much difference in the accuracy. But the validation loss this time is a little higher than last time. Lets plot the accuracy and validation curves

In [9]:
history_dict=history.history
history_dict.keys()

dict_keys(['loss', 'accuracy', 'val_loss', 'val_accuracy'])

## Let's change the number of hidden units to 32

In [10]:
model = models.Sequential()
model.add(layers.Dense(32, activation='relu', input_shape=(10000,)))
model.add(layers.Dense(32, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

In [11]:
model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])

In [12]:
history= model.fit(partial_x_train, partial_y_train,
                 epochs=4, batch_size=512,
                 validation_data=(x_val,y_val))

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


In [13]:
results = model.evaluate(x_test,y_test)
results



[0.3002668023109436, 0.8785600066184998]

## Let's use mse loss function

In [14]:
model = models.Sequential()
model.add(layers.Dense(16, activation='relu', input_shape=(10000,)))
model.add(layers.Dense(16, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

In [15]:
model.compile(optimizer='rmsprop', loss='mse', metrics=['accuracy'])

In [16]:
history= model.fit(partial_x_train, partial_y_train,
                 epochs=4, batch_size=512,
                 validation_data=(x_val,y_val))

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


In [17]:
results = model.evaluate(x_test,y_test)
results



[0.08931127190589905, 0.8838800191879272]

## Lets use tanh activation

In [20]:
model = models.Sequential()
model.add(layers.Dense(16, activation='tanh', input_shape=(10000,)))
model.add(layers.Dense(16, activation='tanh'))
model.add(layers.Dense(1, activation='sigmoid'))

In [21]:
model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])

In [22]:
history= model.fit(partial_x_train, partial_y_train,
                 epochs=4, batch_size=512,
                 validation_data=(x_val,y_val))

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


In [23]:
results = model.evaluate(x_test,y_test)
results



[0.2914726734161377, 0.8798800110816956]

Comparing the model evaluation scores of all the different changes that we tried:

**Two hidden layers:**      [0.2946679890155792, 0.883679986000061]<br>
**No. of hidden units 32:** [0.3002668023109436, 0.8785600066184998]<br>
**MSE loss function:**      [0.08931127190589905, 0.8838800191879272]<br>
**tanh activation:**        [0.2914726734161377, 0.8798800110816956]<br>

we can see that the best evaluation score we got was by using MSE loass function instead of binary-crossentropy