# Notes from Deep Learning with Python using Keras
book by Francois Chollet

In [139]:
import sys
import tensorflow as tf
import numpy as np

from keras import models
from keras import layers

print(sys.version)
print(tf.__version__)

3.5.4 |Anaconda, Inc.| (default, Nov  8 2017, 18:11:28) 
[GCC 4.2.1 Compatible Clang 4.0.1 (tags/RELEASE_401/final)]
1.4.1


## MNIST dataset

In [None]:
from keras.datasets import mnist
from keras.utils import to_categorical

In [None]:
# import the mnist data
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

In [None]:
# network architecture using Sequential class
# network = models.Sequential()
# network.add(layers.Dense(512, activation='relu', input_shape=(28 * 28,)))
# network.add(layers.Dense(10, activation='softmax'))

# same network architecture but using functional API
# gives more control over network architecture
input_tensor = layers.Input(shape=(28 * 28,))
x = layers.Dense(32, activation='relu')(input_tensor)
output_tensor = layers.Dense(10, activation='softmax')(x)  # why softmax here and not relu ???

network = models.Model(inputs=input_tensor, outputs=output_tensor)

In [None]:
# compilation step
network.compile(optimizer='rmsprop',
                loss='categorical_crossentropy',
                metrics=['accuracy'])

In [None]:
# prepare image data
train_images = train_images.reshape((60000, 28 * 28))
train_images = train_images.astype('float32') / 255

test_images = test_images.reshape((10000, 28 * 28))
test_images = test_images.astype('float32') / 255

In [None]:
# prepare labels
train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)

In [None]:
# fit the model
network.fit(train_images, train_labels, epochs=5, batch_size=128)

In [None]:
# evaluate model
test_loss, test_acc = network.evaluate(test_images, test_labels)
print('test_acc: ', test_acc)

## Binary classification example using the IMDB dataset


In [56]:
from keras.datasets import imdb

In [59]:
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=10000)

In [128]:
print("Shape:", train_data.shape)
print("An example of an element in train_data: type is {} if length: {}".format(type(train_data[0]), len(train_data[0])))
print("Max word index: ", max([max(x) for x in train_data]))
print("Min word index: ", min([min(x) for x in train_data]))
print(len(train_data))

Shape: (25000,)
An example of an element in train_data: type is <class 'list'> if length: 218
Max word index:  9999
Min word index:  1
25000


In [121]:
# get words from word_index of an example in the training data
# word index in train_data has been scaled up by 3
word_index = imdb.get_word_index()
reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])
print(' '.join([reverse_word_index.get(i-3, '?') for i in train_data[0]]))

? this film was just brilliant casting location scenery story direction everyone's really suited the part they played and you could just imagine being there robert ? is an amazing actor and now the same being director ? father came from the same scottish island as myself so i loved the fact there was a real connection with this film the witty remarks throughout the film were great it was just brilliant so much that i bought the film as soon as it was released for ? and would recommend it to everyone to watch and the fly fishing was amazing really cried at the end it was so sad and you know what they say if you cry at a film it must have been good and this definitely was also ? to the two little boy's that played the ? of norman and paul they were just brilliant children are often left out of the ? list i think because the stars that play them all grown up are such a big profile for the whole film but these children are amazing and should be praised for what they have done don't you thi

In [123]:
# another way to understand how the indexing work in the data
# The idea is that the word index given in the dictionary imdb.get_word_index()
# does not align with the word index in train_data from imdb.load_data().
# Indices 0, 1, and 2 in train_data is reserved for PADDING, START, and UNKNOWN
word_index = imdb.get_word_index()
word_index = {k:(v+3) for k,v in word_index.items()}

word_index['PADDING'] = 0
word_index['START'] = 1
word_index['UNKNOWN'] = 2

reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])
print(' '.join([reverse_word_index.get(i, '?') for i in train_data[0]]))

START this film was just brilliant casting location scenery story direction everyone's really suited the part they played and you could just imagine being there robert UNKNOWN is an amazing actor and now the same being director UNKNOWN father came from the same scottish island as myself so i loved the fact there was a real connection with this film the witty remarks throughout the film were great it was just brilliant so much that i bought the film as soon as it was released for UNKNOWN and would recommend it to everyone to watch and the fly fishing was amazing really cried at the end it was so sad and you know what they say if you cry at a film it must have been good and this definitely was also UNKNOWN to the two little boy's that played the UNKNOWN of norman and paul they were just brilliant children are often left out of the UNKNOWN list i think because the stars that play them all grown up are such a big profile for the whole film but these children are amazing and should be prais

In [130]:
# encode the interger sequences into a binary matrix
def vectorize_sequences(sequences, dimension=10000):
    # dimension is the top 10,000 words
    
    # create a matrix of zeros where a review is
    # along the row and its words are each in the columns
    results = np.zeros((len(sequences), dimension))
    
    # for a given row, replace 0 with 1 if integer
    # corresponding to word is in the review
    for i, sequence in enumerate(sequences):
        results[i, sequence]= 1.

    return results

x_train = vectorize_sequences(train_data)
x_test = vectorize_sequences(test_data)

In [140]:
# vectorize labels (just cast it as a float32)
y_train = np.asarray(train_labels).astype('float32')
y_test = np.asarray(test_labels).astype('float32')

In [141]:
# create validation set
x_val = x_train[:10000]
partial_x_train = x_train[10000:]

y_val = y_train[:10000]
partial_y_train = y_train[10000:]

In [142]:
# build network architecture
model = models.Sequential()
model.add(layers.Dense(16, activation='relu', input_shape=(10000,)))
model.add(layers.Dense(16, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

In [None]:
# compile the model
model.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['accuracy'])

history = model.fit(partial_x_train,
                    partial_y_train,
                    epochs=20,
                    batch_size=512,
                    
                   )