# Tutorial Mnist Sequential

# Data preparation

In [27]:
from __future__ import print_function  
# for Python2 compatibility 

In [28]:
import numpy 
numpy.random.seed(1337)   # for experiment reproducibility 

In [29]:
import keras
from keras.models import Sequential #model
from keras.layers import Dense  #'inside layers'
from keras.optimizers import SGD  #changes model parameters so the model predicts better
from keras.datasets import mnist #dataset

In [30]:
num_classes = 10 # zero to nine digits

batch_size = 128  # our data points will be fed in the network 128 datapoints at the time, more efficient than feed all the data points   
epochs = 20 #how many times our model will see our database

In [31]:
# the data, shuffled and split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [32]:
print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)
#60000 data points in train data
#10000 data points in test data
#each image is 28*28 pixels

(60000, 28, 28) (60000,) (10000, 28, 28) (10000,)


In [33]:
#first thing we need to do is to put 28*28 matrix into a single vector 
x_train = x_train.reshape(60000, 784)  # 28x28 = 784 pixel-vector
x_test = x_test.reshape(10000, 784)

x_train = x_train.astype('float32')  #changing type 
x_test = x_test.astype('float32') 

x_train /= 255  # 0 - 255 ...  0-1 , bc kernel range is 255? 
x_test /= 255   # normalizing your data 


print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

60000 train samples
10000 test samples


In [34]:
print(x_train.shape) #yay - a single vector, ready for training
print(y_train.shape)

(60000, 784)
(60000,)


In [35]:
y_train[0]

5

In [36]:
# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes) #converting 5 to a single vector consisting of num_classes (10)
#all classes are 0 except for the '5' one
y_test = keras.utils.to_categorical(y_test, num_classes)

In [38]:
y_train[0] #all classes are 0 except for the '5' one
#ONE-HOT VECTOR ENCODING for representing class labels

array([0., 0., 0., 0., 0., 1., 0., 0., 0., 0.], dtype=float32)

That's called a one-hot vector. 

# Training / Testing

In [39]:
model = Sequential()

In [40]:
model.add( Dense(512, activation='sigmoid', input_shape=(784,)  ) ) #add first dense layer
model.add( Dense(512, activation='sigmoid'  ) ) #the second
model.add( Dense(num_classes, activation='softmax')) #...

Instructions for updating:
Colocations handled automatically by placer.


In [48]:
# What does the model look like? 
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 512)               401920    
_________________________________________________________________
dense_2 (Dense)              (None, 512)               262656    
_________________________________________________________________
dense_3 (Dense)              (None, 10)                5130      
Total params: 669,706
Trainable params: 669,706
Non-trainable params: 0
_________________________________________________________________


In [49]:
# Compile the model 
model.compile(loss='categorical_crossentropy', optimizer=SGD(),  #categorical_crossentropy: if our predicted values don't match - we change the parameters
              metrics=['accuracy'])

In [50]:
# let's perform the learning 
history = model.fit( x_train, y_train, 
           batch_size=batch_size,
           epochs=epochs, 
           verbose=1, 
           validation_data=(x_test, y_test))
#sroting results so we can take look at it 

Train on 60000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [51]:
# Let's evaluate the model 
score = model.evaluate(x_test, y_test)



See some great results here: http://rodrigob.github.io/are_we_there_yet/build/classification_datasets_results.html#4d4e495354 

In [56]:
score[1] #accuracy

0.9038

In [57]:
score[0] #loss

0.33501097632050514

# Original Keras Tutorial Convolutional 


### Imports

In [77]:
from __future__ import print_function

import datetime
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K

### Some variables

In [78]:
now = datetime.datetime.now

# our data points will be fed in the network 128 datapoints at the time, more efficient than feed all the data points   
batch_size = 128 
# digits 0-4
num_classes = 5 
#how many times our model will see our database
epochs = 5 
# input image dimensions
img_rows, img_cols = 28, 28
# number of convolutional filters to use
filters = 32
# size of pooling area for max pooling
pool_size = 2
# convolution kernel size
kernel_size = 3


In [79]:
if K.image_data_format() == 'channels_first': 
    input_shape = (1, img_rows, img_cols)
else:
    input_shape = (img_rows, img_cols, 1)

### Data preparation

In [80]:
# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# create two datasets one with digits below 5 and one with 5 and above
x_train_lt5 = x_train[y_train < 5]
y_train_lt5 = y_train[y_train < 5]
x_test_lt5 = x_test[y_test < 5]
y_test_lt5 = y_test[y_test < 5]

#digits above 5
x_train_gte5 = x_train[y_train >= 5]
y_train_gte5 = y_train[y_train >= 5] - 5
x_test_gte5 = x_test[y_test >= 5]
y_test_gte5 = y_test[y_test >= 5] - 5


### Defining model

In [81]:
# define two groups of layers: feature (convolutions) and classification (dense)
feature_layers = [
    Conv2D(filters, kernel_size,
           padding='valid',
           input_shape=input_shape),
    Activation('relu'),
    Conv2D(filters, kernel_size),
    Activation('relu'),
    MaxPooling2D(pool_size=pool_size),
    Dropout(0.25),
    Flatten(),
]

classification_layers = [
    Dense(128),
    Activation('relu'),
    Dropout(0.5),
    Dense(num_classes),
    Activation('softmax')
]

# create complete model
model = Sequential(feature_layers + classification_layers)

### Training & Validation

In [82]:
def train_model(model, train, test, num_classes):
    x_train = train[0].reshape((train[0].shape[0],) + input_shape)
    x_test = test[0].reshape((test[0].shape[0],) + input_shape)
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    x_train /= 255
    x_test /= 255
    print('x_train shape:', x_train.shape)
    print(x_train.shape[0], 'train samples')
    print(x_test.shape[0], 'test samples')
    # convert class vectors to binary class matrices
    y_train = keras.utils.to_categorical(train[1], num_classes)
    y_test = keras.utils.to_categorical(test[1], num_classes)
    model.compile(loss='categorical_crossentropy',
                  optimizer='adadelta',
                  metrics=['accuracy'])
    t = now()
    model.fit(x_train, y_train, #ACTUAL TRAINING
              batch_size=batch_size,
              epochs=epochs,
              verbose=1,
              validation_data=(x_test, y_test))
    print('Training time: %s' % (now() - t))
    score = model.evaluate(x_test, y_test, verbose=0)
    print('Test score:', score[0]) #loss function
    print('Test accuracy:', score[1]) 
    
#-----Function call-------
# train model for 5-digit classification [0..4]
train_model(model,
            (x_train_lt5, y_train_lt5),
            (x_test_lt5, y_test_lt5), num_classes)

x_train shape: (30596, 28, 28, 1)
30596 train samples
5139 test samples
Train on 30596 samples, validate on 5139 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training time: 0:04:55.360156
Test score: 0.006347224395555649
Test accuracy: 0.9976649153531816


### Freeze convolutional layers

In [83]:
# freeze feature(=convolutional) layers 
# to prevent retraining with new data
for l in feature_layers:
    l.trainable = False

### Transfer: training & validation of the model with new data

In [84]:
# transfer: train dense layers for new classification task [5..9] WITH NEW DATA basically
train_model(model,
            (x_train_gte5, y_train_gte5),
            (x_test_gte5, y_test_gte5), num_classes)

x_train shape: (29404, 28, 28, 1)
29404 train samples
4861 test samples
Train on 29404 samples, validate on 4861 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training time: 0:02:00.016625
Test score: 0.025710257719178155
Test accuracy: 0.9915655213872779
