In [2]:
%matplotlib inline
from matplotlib import pyplot
from mpl_toolkits.axes_grid1 import ImageGrid


In [23]:
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten, Lambda, Reshape, MaxPooling2D, Conv2D
from keras.datasets import mnist
from keras.utils.np_utils import to_categorical
import numpy as np    
import tensorflow as tf

### Check backend

In [4]:
import keras
print('Keras version:', keras.__version__)
from keras import backend
backend.backend()

Keras version: 2.0.2


'tensorflow'

### Import data and reshape it

In [5]:
(x_train, y_train), (x_test,y_test) = mnist.load_data()

Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz


In [6]:
x_train.shape, y_train.shape, x_test.shape, y_test.shape

((60000, 28, 28), (60000,), (10000, 28, 28), (10000,))

In [7]:
x_train = np.expand_dims(x_train, 1)
x_test = np.expand_dims(x_test, 1)

y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

In [8]:
x_train.shape, y_train.shape, x_test.shape, y_test.shape

((60000, 1, 28, 28), (60000, 10), (10000, 1, 28, 28), (10000, 10))

In [9]:
y_test[0:5]

array([[ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.],
       [ 0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.]])

### Constants

In [10]:
batch_size = 64

In [11]:
def seed():
    np.random.seed(42)
    tf.set_random_seed(42)

In [12]:
mean_px = x_train.mean().astype(np.float32)
std_px = x_train.std().astype(np.float32)

def norm(x):
    return (x - mean_px) / std_px

### Linear model

In [13]:
def linear_model_without_normalization():
    model = Sequential([
        Flatten(input_shape=(1,28,28)),
        Dense(10, activation='softmax')
    ])
    model.compile(loss='categorical_crossentropy', optimizer='sgd' , metrics=['accuracy'])
    return model

In [14]:
def linear_model():
    model = Sequential([
        Lambda(norm, input_shape=(1,28,28)),
        Flatten(),
        Dense(10, activation='softmax')
    ])
    model.compile(loss='categorical_crossentropy', optimizer='sgd' , metrics=['accuracy'])
    return model

### Neural network

In [15]:
def single_layer_network(activation = 'sigmoid', hidden_layer = 25, optimizer = 'sgd'):
    model = Sequential([        
        Lambda(norm, input_shape=(1,28,28)),
        Flatten(),
        Dense(hidden_layer, activation=activation),
        Dense(10, activation='softmax')        
    ])
    model.compile(loss='categorical_crossentropy', optimizer=optimizer , metrics=['accuracy'])
    return model

### Two layered neural network

In [16]:
def two_leyered_network(activation = 'sigmoid', hidden_layer = 25, optimizer = 'sgd'):
    model = Sequential([        
        Lambda(norm, input_shape=(1,28,28)),
        Flatten(),
        Dense(hidden_layer, activation=activation),
        Dense(hidden_layer, activation=activation),
        Dense(10, activation='softmax')        
    ])
    model.compile(loss='categorical_crossentropy', optimizer=optimizer , metrics=['accuracy'])
    return model

### First convnet

In [31]:
def simple_convnet(activation = 'relu', hidden_layer = 25, optimizer = 'adam'):
    data_format='channels_first'
    model = Sequential([
        Lambda(norm, input_shape=(1,28,28)),
        Conv2D(32,3, activation=activation, data_format=data_format),
        Conv2D(32,3, activation=activation, data_format=data_format),
        MaxPooling2D(data_format=data_format),
        Flatten(),
        Dense(hidden_layer, activation=activation),
        Dense(10, activation='softmax')
    ])
    model.compile(loss='categorical_crossentropy', optimizer=optimizer , metrics=['accuracy'])
    return model

### Testing

In [41]:
def fit(model):
    seed()
    return model.fit(x_train, y_train, nb_epoch=3, batch_size = batch_size, verbose=2, validation_data=(x_test, y_test))

def small_fit(model, epochs=3, max_train = 1000, max_test = 1000):
    seed()
    small_x_train = x_train[0:max_train]
    small_y_train = y_train[0:max_train]
    small_x_test = x_test[0:max_test]
    small_y_test = y_test[0:max_test]
    return model.fit(small_x_train, small_y_train, epochs=epochs, batch_size = batch_size, verbose=2, validation_data=(small_x_test, small_y_test))

#### linear models

In [218]:
linear = linear_model_without_normalization()
fit(linear)

Train on 60000 samples, validate on 10000 samples
Epoch 1/3
2s - loss: 11.5147 - acc: 0.2853 - val_loss: 11.2015 - val_acc: 0.3049
Epoch 2/3
2s - loss: 11.2182 - acc: 0.3039 - val_loss: 11.1603 - val_acc: 0.3075
Epoch 3/3
2s - loss: 11.1989 - acc: 0.3052 - val_loss: 11.1870 - val_acc: 0.3059


<keras.callbacks.History at 0x7f373e7c3d30>

In [23]:
linear = linear_model()
fit(linear)

Train on 60000 samples, validate on 10000 samples
Epoch 1/3
1s - loss: 0.5113 - acc: 0.8505 - val_loss: 0.3486 - val_acc: 0.9023
Epoch 2/3
1s - loss: 0.3453 - acc: 0.9006 - val_loss: 0.3163 - val_acc: 0.9098
Epoch 3/3
1s - loss: 0.3203 - acc: 0.9085 - val_loss: 0.3030 - val_acc: 0.9136


<keras.callbacks.History at 0x7f66926d1a90>

#### neural nets

In [46]:
net = single_layer_network('sigmoid', 25, 'adam')
fit(net)

Train on 60000 samples, validate on 10000 samples
Epoch 1/3
2s - loss: 0.6220 - acc: 0.8671 - val_loss: 0.3285 - val_acc: 0.9211
Epoch 2/3
2s - loss: 0.2867 - acc: 0.9242 - val_loss: 0.2496 - val_acc: 0.9318
Epoch 3/3
2s - loss: 0.2298 - acc: 0.9371 - val_loss: 0.2166 - val_acc: 0.9423


<keras.callbacks.History at 0x7f667ab035c0>

In [48]:
net = single_layer_network('sigmoid', 128, 'sgd')
fit(net)

Train on 60000 samples, validate on 10000 samples
Epoch 1/3
3s - loss: 1.0252 - acc: 0.7684 - val_loss: 0.5879 - val_acc: 0.8699
Epoch 2/3
3s - loss: 0.5140 - acc: 0.8765 - val_loss: 0.4319 - val_acc: 0.8929
Epoch 3/3
3s - loss: 0.4158 - acc: 0.8921 - val_loss: 0.3719 - val_acc: 0.9016


<keras.callbacks.History at 0x7f667a3b9d30>

In [47]:
net = single_layer_network('sigmoid', 128, 'adam')
fit(net)

Train on 60000 samples, validate on 10000 samples
Epoch 1/3
4s - loss: 0.3458 - acc: 0.9067 - val_loss: 0.2026 - val_acc: 0.9417
Epoch 2/3
3s - loss: 0.1725 - acc: 0.9503 - val_loss: 0.1459 - val_acc: 0.9569
Epoch 3/3
3s - loss: 0.1279 - acc: 0.9633 - val_loss: 0.1216 - val_acc: 0.9641


<keras.callbacks.History at 0x7f667a87d898>

In [51]:
net = single_layer_network('relu', 128, 'adam')
fit(net)

Train on 60000 samples, validate on 10000 samples
Epoch 1/3
4s - loss: 0.2372 - acc: 0.9289 - val_loss: 0.1310 - val_acc: 0.9590
Epoch 2/3
3s - loss: 0.1060 - acc: 0.9682 - val_loss: 0.0996 - val_acc: 0.9675
Epoch 3/3
4s - loss: 0.0753 - acc: 0.9767 - val_loss: 0.0914 - val_acc: 0.9709


<keras.callbacks.History at 0x7f6678510b38>

In [50]:
net = single_layer_network('sigmoid', 512, 'adam')
fit(net)

Train on 60000 samples, validate on 10000 samples
Epoch 1/3
11s - loss: 0.2803 - acc: 0.9177 - val_loss: 0.1616 - val_acc: 0.9533
Epoch 2/3
11s - loss: 0.1258 - acc: 0.9635 - val_loss: 0.1048 - val_acc: 0.9666
Epoch 3/3
10s - loss: 0.0823 - acc: 0.9762 - val_loss: 0.0886 - val_acc: 0.9719


<keras.callbacks.History at 0x7f6679c0d828>

In [52]:
net = single_layer_network('relu', 512, 'adam')
fit(net)

Train on 60000 samples, validate on 10000 samples
Epoch 1/3
11s - loss: 0.1925 - acc: 0.9417 - val_loss: 0.1177 - val_acc: 0.9628
Epoch 2/3
11s - loss: 0.0796 - acc: 0.9759 - val_loss: 0.0849 - val_acc: 0.9724
Epoch 3/3
11s - loss: 0.0546 - acc: 0.9828 - val_loss: 0.0767 - val_acc: 0.9759


<keras.callbacks.History at 0x7f669c53f9b0>

In [32]:
net = two_leyered_network('relu', 128, 'adam')
small_fit(net,5)

Train on 1000 samples, validate on 1000 samples
Epoch 1/5
0s - loss: 1.3540 - acc: 0.5850 - val_loss: 0.8517 - val_acc: 0.7160
Epoch 2/5
0s - loss: 0.4914 - acc: 0.8590 - val_loss: 0.5888 - val_acc: 0.7930
Epoch 3/5
0s - loss: 0.2882 - acc: 0.9140 - val_loss: 0.5112 - val_acc: 0.8230
Epoch 4/5
0s - loss: 0.1910 - acc: 0.9500 - val_loss: 0.4663 - val_acc: 0.8400
Epoch 5/5
0s - loss: 0.1164 - acc: 0.9730 - val_loss: 0.4983 - val_acc: 0.8320


<keras.callbacks.History at 0x7f375c3a1da0>

#### Convnets test

In [44]:
net = simple_convnet()
small_fit(net, 5, 60000, 10000)

Train on 60000 samples, validate on 10000 samples
Epoch 1/5
358s - loss: 0.1670 - acc: 0.9492 - val_loss: 0.0544 - val_acc: 0.9818
Epoch 2/5
360s - loss: 0.0522 - acc: 0.9840 - val_loss: 0.0405 - val_acc: 0.9868
Epoch 3/5
359s - loss: 0.0353 - acc: 0.9889 - val_loss: 0.0400 - val_acc: 0.9870
Epoch 4/5
358s - loss: 0.0271 - acc: 0.9912 - val_loss: 0.0423 - val_acc: 0.9875
Epoch 5/5


KeyboardInterrupt: 

In [37]:
net.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lambda_8 (Lambda)            (None, 1, 28, 28)         0         
_________________________________________________________________
conv2d_12 (Conv2D)           (None, 32, 26, 26)        320       
_________________________________________________________________
conv2d_13 (Conv2D)           (None, 32, 24, 24)        9248      
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 32, 12, 12)        0         
_________________________________________________________________
flatten_6 (Flatten)          (None, 4608)              0         
_________________________________________________________________
dense_11 (Dense)             (None, 25)                115225    
_________________________________________________________________
dense_12 (Dense)             (None, 10)                260       
Total para

In [183]:
def plot(grid, image):
    (c, w, h) = image.shape
    image_rgb = np.zeros([w, h, 3])
    normalized_image = (image - image.min()) / (image.max() - image.min())
    image_rgb[:, :, 0] = normalized_image
    grid.imshow(image_rgb[:,:,0], cmap='gray')

In [207]:
def plot_weights(weights):
    fig = pyplot.figure(figsize=(15,10))
    grid = ImageGrid(fig, 111, nrows_ncols=(4, 8), axes_pad=0.3)

    for w in range(len(weights[0])):
        plot(grid[w], weights[0][w])
        grid[w].axis('off')

    pyplot.show(block=True)

In [222]:
l = net.layers[1]
print(l)
weights = l.get_weights()
print(weights)
#plot_weights(weights)
weights[0][0].shape, len(weights[0])


<keras.layers.convolutional.Convolution2D object at 0x7f374c46beb8>
[array([[[[  8.55829101e-03,   2.33265013e-02,   1.46452278e-01],
         [  1.00396061e-02,   1.03864051e-01,  -1.67869050e-02],
         [ -1.00498572e-01,  -1.44897297e-01,  -2.81897075e-02]]],


       [[[  1.58030540e-01,   7.20363334e-02,  -3.78120020e-02],
         [  1.61624357e-01,   1.10332526e-01,   1.75694078e-01],
         [ -2.39192834e-03,  -4.63606715e-02,   8.19005221e-02]]],


       [[[  1.40042119e-02,  -2.87507102e-02,  -1.15399413e-01],
         [ -4.37535830e-02,   9.51907262e-02,   1.15011394e-01],
         [ -8.39040577e-02,  -7.27592483e-02,  -1.71873998e-02]]],


       [[[  2.22148336e-02,  -1.49293885e-01,  -1.41439304e-01],
         [ -1.42387360e-01,  -8.72359425e-02,   5.23324348e-02],
         [ -1.29353208e-02,  -3.36085185e-02,  -1.30088881e-01]]],


       [[[ -9.27122682e-02,  -1.10739127e-01,  -4.57207598e-02],
         [ -8.58479440e-02,   6.58797547e-02,   6.43698946e-02],
     

((1, 3, 3), 32)