In [21]:
'''Trains a simple deep NN on the MNIST dataset.

Gets to 98.40% test accuracy after 20 epochs
(there is *a lot* of margin for parameter tuning).
2 seconds per epoch on a K520 GPU.
'''

from __future__ import print_function
import numpy as np
np.random.seed(1337)  # for reproducibility

from keras.datasets import mnist
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import SGD, Adam, RMSprop
from keras.utils import np_utils
from keras.regularizers import l2, activity_l2


batch_size = 512
nb_classes = 10
nb_epoch = 400

# the data, shuffled and split between train and test sets
(X_train, y_train), (X_test, y_test) = mnist.load_data()

X_train = X_train.reshape(60000, 784)
X_test = X_test.reshape(10000, 784)

X_train_10000 = X_train[:1000]
X_test_10000 = X_test[:10000]

X_train_10000 = X_train_10000.astype('float32')
X_test_10000 = X_test_10000.astype('float32')
X_train_10000 /= 255
X_test_10000 /= 255


X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255

print(X_train_10000.shape[0], 'train samples')
print(X_test_10000.shape[0], 'test samples')

# convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(y_train[:1000], nb_classes)
Y_test = np_utils.to_categorical(y_test[:10000], nb_classes)

FC0 = Dense(512, W_regularizer=l2(0), input_shape=(784,))
FC1 = Dense(512)
FC2 = Dense(10)

model = Sequential()
model.add(FC0)
model.add(Activation('relu'))
model.add(FC1)
model.add(Activation('relu'))
model.add(FC2)
model.add(Activation('softmax'))

model.summary()

model.compile(loss='mse',
              optimizer='adadelta',
              metrics=['accuracy'])

history = model.fit(X_train_10000, Y_train,
                    batch_size=batch_size, nb_epoch=nb_epoch,
                    verbose=0, validation_data=(X_test_10000, Y_test))
score = model.evaluate(X_test_10000, Y_test, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])

score = model.evaluate(X_train_10000, Y_train, verbose=0)
print('Train score:', score[0])
print('Train accuracy:', score[1])

1000 train samples
10000 test samples
____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
dense_52 (Dense)                 (None, 512)           401920      dense_input_18[0][0]             
____________________________________________________________________________________________________
activation_52 (Activation)       (None, 512)           0           dense_52[0][0]                   
____________________________________________________________________________________________________
dense_53 (Dense)                 (None, 512)           262656      activation_52[0][0]              
____________________________________________________________________________________________________
activation_53 (Activation)       (None, 512)           0           dense_53[0][0]                   
_____________________________________________________

# Methodology
 1. Train the mlp network setting l2 = 0, get the LOSS
 2. Save the weight 
 3. Alter one of the particular weight
 4. load the weight back into the network
 5. feed input and set training = false
 6. get $ \frac{\delta L}{\delta wij} $
 7. calculate $ \lambda = -\frac{1}{2}\frac{1}{\sum_j^{nw}\sum_i^{nw} W_ij} $
 8. use the lambda in reg. and measure the accuracy b4 and after

## Recording result

### 3 Layers 512 512 10 l2 on the last layer only
 - epoch=3, l2=0, acc=0.9734,0.9734
 - epoch=3, l2=100, acc=0.9657,0.9657
 
### 2 Layers 512 10 l2 on the first layer
 - epoch=3, l2=100, acc= 0.9631
 - epoch=3, l2=0, acc= 0.9636
 - epoch=3, l2=0.00001, acc= 0.9638
 
### 2 Layers 4096 10 L2 on the first layer
 - ep=3, l2 = 0.00001, Test accuracy: 0.9748
 - ep=3, l2 = 10000, Test accuracy: 0.9597, 0.9597

In [23]:
l0_w = model.layers[0].get_weights()
l2_w = model.layers[2].get_weights()
l4_w = model.layers[4].get_weights()

l0_ww = l0_w[0]

In [30]:
print (l0_ww.shape)
l0_ww[0].shape
l0_w[0][0] = [0]*512

(784, 512)


In [None]:
model.load_weights('my_model_weights.h5')

In [33]:
FC0 = Dense(512, W_regularizer=l2(0), input_shape=(784,), trainable=False)
FC1 = Dense(512, trainable=False)
FC2 = Dense(10, trainable=False)

model = Sequential()
model.add(FC0)
model.add(Activation('relu'))
model.add(FC1)
model.add(Activation('relu'))
model.add(FC2)
model.add(Activation('softmax'))

model.layers[0].set_weights(l0_w)
model.layers[2].set_weights(l2_w)
model.layers[4].set_weights(l4_w)

model.summary()

model.compile(loss='mse',
              optimizer='adadelta',
              metrics=['accuracy'])

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
dense_58 (Dense)                 (None, 512)           401920      dense_input_20[0][0]             
____________________________________________________________________________________________________
activation_58 (Activation)       (None, 512)           0           dense_58[0][0]                   
____________________________________________________________________________________________________
dense_59 (Dense)                 (None, 512)           262656      activation_58[0][0]              
____________________________________________________________________________________________________
activation_59 (Activation)       (None, 512)           0           dense_59[0][0]                   
___________________________________________________________________________________________

In [34]:
history = model.fit(X_train_10000, Y_train,
                    batch_size=batch_size, nb_epoch=nb_epoch,
                    verbose=0, validation_data=(X_test_10000, Y_test))

score = model.evaluate(X_test_10000, Y_test, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])

score = model.evaluate(X_train_10000, Y_train, verbose=0)
print('Train score:', score[0])
print('Train accuracy:', score[1])

Test score: 0.0179820289901
Test accuracy: 0.8806
Train score: 0.00241962236166
Train accuracy: 0.988
