In [2]:
import numpy as np
from keras import models, optimizers, utils, initializers, losses
from keras.datasets import mnist
from keras.layers import Dense
import keras.backend as K

## New Model for binary classification

### Load Data
Load Mnist data and make a binary classification problem <br />
Category 0: digits 0 to 4 <br />
Category 1: digits 5 to 9 <br />

In [9]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape(10000, 784)
x_train = x_train / 255
x_test = x_test / 255
label_train = np.zeros(y_train.shape)
ind = np.argwhere(y_train>4)
label_train[ind] = 1
label_test = np.zeros(y_test.shape)
ind = np.argwhere(y_test>4)
label_test[ind] = 1

print("Train matrix: ", x_train.shape)
print("Train labels: ", y_train.shape)
print("Test data: ", x_test.shape)
print("Test labels: ", y_test.shape)

sample = np.random.permutation(60000)
np.save("binary_mnist_samples.npy", sample)

Train matrix:  (60000, 784)
Train labels:  (60000,)
Test data:  (10000, 784)
Test labels:  (10000,)


## Create network
Neural network with 2 hidden layers, each with 300 nodes <br/>
Weights initialized to truncated normal, mean=0, stdev=0.4, between -0.8 and 0.8 <br/>
The weights are initialized to 0.1 for the first layer, 0 for the others <br/>
The activation functionss are ReLu, except the last layer, which is linear <br/>
THe loss used is the logistic loss, imported from the model python file <br/>
Training is performed for 20 epochs

In [10]:
mdl = models.Sequential()
mdl.add(Dense(600,input_dim=784, activation='relu', 
               kernel_initializer=initializers.TruncatedNormal(mean=0.0, stddev=0.04),
               bias_initializer=initializers.Constant(value = 0.1)))
mdl.add(Dense(600, activation='relu', 
               kernel_initializer=initializers.TruncatedNormal(mean=0.0, stddev=0.04),
               bias_initializer=initializers.Constant(value = 0)))
mdl.add(Dense(600, activation='relu', 
               kernel_initializer=initializers.TruncatedNormal(mean=0.0, stddev=0.04),
               bias_initializer=initializers.Constant(value = 0)))
mdl.add(Dense(1,activation='sigmoid',
               kernel_initializer=initializers.TruncatedNormal(mean=0.0, stddev=0.04),
               bias_initializer=initializers.Constant(value = 0)))
opt = optimizers.SGD(0.01, 0.9)


mdl.compile(optimizer=opt, loss="binary_crossentropy")

### Model Training and Evaluation
We train the checkpoints of the model and evaluate their performance

In [11]:
x_ckpt_1 = x_train[sample[:500]]
label_ckpt_1 = label_train[sample[:500]]
history = mdl.fit(x_ckpt_1, label_ckpt_1,
          batch_size=100, epochs=120,
          verbose=2)

y_pred = mdl.predict(x_test).flatten()
print("Model Output: ", y_pred)
print("Label: ", label_test)
label_pred = np.round(y_pred)
acc = np.sum(label_pred!=label_test) / label_test.size
print("Test accuracy: ", acc)
mdl.save("binary_mnist_500.h5")

Epoch 1/120
 - 0s - loss: 0.6944
Epoch 2/120
 - 0s - loss: 0.6857
Epoch 3/120
 - 0s - loss: 0.6742
Epoch 4/120
 - 0s - loss: 0.6607
Epoch 5/120
 - 0s - loss: 0.6444
Epoch 6/120
 - 0s - loss: 0.6254
Epoch 7/120
 - 0s - loss: 0.5998
Epoch 8/120
 - 0s - loss: 0.5720
Epoch 9/120
 - 0s - loss: 0.5373
Epoch 10/120
 - 0s - loss: 0.4998
Epoch 11/120
 - 0s - loss: 0.4626
Epoch 12/120
 - 0s - loss: 0.4298
Epoch 13/120
 - 0s - loss: 0.3988
Epoch 14/120
 - 0s - loss: 0.3686
Epoch 15/120
 - 0s - loss: 0.3401
Epoch 16/120
 - 0s - loss: 0.3161
Epoch 17/120
 - 0s - loss: 0.2882
Epoch 18/120
 - 0s - loss: 0.2655
Epoch 19/120
 - 0s - loss: 0.2475
Epoch 20/120
 - 0s - loss: 0.2266
Epoch 21/120
 - 0s - loss: 0.2086
Epoch 22/120
 - 0s - loss: 0.1975
Epoch 23/120
 - 0s - loss: 0.1770
Epoch 24/120
 - 0s - loss: 0.1555
Epoch 25/120
 - 0s - loss: 0.1453
Epoch 26/120
 - 0s - loss: 0.1346
Epoch 27/120
 - 0s - loss: 0.1182
Epoch 28/120
 - 0s - loss: 0.1081
Epoch 29/120
 - 0s - loss: 0.0938
Epoch 30/120
 - 0s - lo

In [12]:
x_ckpt_2 = x_train[sample[:3000]]
label_ckpt_2 = label_train[sample[:3000]]
history = mdl.fit(x_ckpt_2, label_ckpt_2,
          batch_size=100, epochs=120,
          verbose=2)

y_pred = mdl.predict(x_test).flatten()
print("Model Output: ", y_pred)
print("Label: ", label_test)
label_pred = np.round(y_pred)
acc = np.sum(label_pred!=label_test) / label_test.size
print("Test accuracy: ", acc)
mdl.save("binary_mnist_3000.h5")

Epoch 1/120
 - 0s - loss: 0.3504
Epoch 2/120
 - 0s - loss: 0.1478
Epoch 3/120
 - 0s - loss: 0.0915
Epoch 4/120
 - 0s - loss: 0.0699
Epoch 5/120
 - 0s - loss: 0.0501
Epoch 6/120
 - 0s - loss: 0.0382
Epoch 7/120
 - 0s - loss: 0.0284
Epoch 8/120
 - 0s - loss: 0.0220
Epoch 9/120
 - 0s - loss: 0.0188
Epoch 10/120
 - 0s - loss: 0.0129
Epoch 11/120
 - 0s - loss: 0.0098
Epoch 12/120
 - 0s - loss: 0.0085
Epoch 13/120
 - 0s - loss: 0.0067
Epoch 14/120
 - 0s - loss: 0.0051
Epoch 15/120
 - 0s - loss: 0.0046
Epoch 16/120
 - 0s - loss: 0.0035
Epoch 17/120
 - 0s - loss: 0.0031
Epoch 18/120
 - 0s - loss: 0.0027
Epoch 19/120
 - 0s - loss: 0.0025
Epoch 20/120
 - 0s - loss: 0.0023
Epoch 21/120
 - 0s - loss: 0.0022
Epoch 22/120
 - 0s - loss: 0.0019
Epoch 23/120
 - 0s - loss: 0.0017
Epoch 24/120
 - 0s - loss: 0.0016
Epoch 25/120
 - 0s - loss: 0.0015
Epoch 26/120
 - 0s - loss: 0.0014
Epoch 27/120
 - 0s - loss: 0.0013
Epoch 28/120
 - 0s - loss: 0.0012
Epoch 29/120
 - 0s - loss: 0.0011
Epoch 30/120
 - 0s - lo

In [13]:
x_ckpt_3 = x_train[sample[:10000]]
label_ckpt_3 = label_train[sample[:10000]]
history = mdl.fit(x_ckpt_3, label_ckpt_3,
          batch_size=100, epochs=120,
          verbose=2)

y_pred = mdl.predict(x_test).flatten()
print("Model Output: ", y_pred)
print("Label: ", label_test)
label_pred = np.round(y_pred)
acc = np.sum(label_pred!=label_test) / label_test.size
print("Test accuracy: ", acc)
mdl.save("binary_mnist_10000.h5")

Epoch 1/120
 - 1s - loss: 0.1516
Epoch 2/120
 - 1s - loss: 0.0631
Epoch 3/120
 - 1s - loss: 0.0418
Epoch 4/120
 - 1s - loss: 0.0265
Epoch 5/120
 - 1s - loss: 0.0189
Epoch 6/120
 - 1s - loss: 0.0135
Epoch 7/120
 - 1s - loss: 0.0067
Epoch 8/120
 - 1s - loss: 0.0040
Epoch 9/120
 - 1s - loss: 0.0027
Epoch 10/120
 - 1s - loss: 0.0019
Epoch 11/120
 - 1s - loss: 0.0013
Epoch 12/120
 - 1s - loss: 0.0011
Epoch 13/120
 - 1s - loss: 9.2671e-04
Epoch 14/120
 - 1s - loss: 8.2012e-04
Epoch 15/120
 - 1s - loss: 7.4079e-04
Epoch 16/120
 - 1s - loss: 6.6281e-04
Epoch 17/120
 - 1s - loss: 6.1674e-04
Epoch 18/120
 - 1s - loss: 5.5867e-04
Epoch 19/120
 - 1s - loss: 5.1738e-04
Epoch 20/120
 - 1s - loss: 4.7805e-04
Epoch 21/120
 - 1s - loss: 4.4209e-04
Epoch 22/120
 - 1s - loss: 4.1632e-04
Epoch 23/120
 - 1s - loss: 3.9239e-04
Epoch 24/120
 - 1s - loss: 3.7248e-04
Epoch 25/120
 - 1s - loss: 3.4881e-04
Epoch 26/120
 - 1s - loss: 3.3315e-04
Epoch 27/120
 - 1s - loss: 3.1487e-04
Epoch 28/120
 - 1s - loss: 2.98

In [14]:
x_ckpt_4 = x_train[sample[:30000]]
label_ckpt_4 = label_train[sample[:30000]]
history = mdl.fit(x_ckpt_4, label_ckpt_4,
          batch_size=100, epochs=120,
          verbose=2)

y_pred = mdl.predict(x_test).flatten()
print("Model Output: ", y_pred)
print("Label: ", label_test)
label_pred = np.round(y_pred)
acc = np.sum(label_pred!=label_test) / label_test.size
print("Test accuracy: ", acc)
mdl.save("binary_mnist_30000.h5")

Epoch 1/120
 - 2s - loss: 0.0894
Epoch 2/120
 - 2s - loss: 0.0404
Epoch 3/120
 - 2s - loss: 0.0272
Epoch 4/120
 - 2s - loss: 0.0184
Epoch 5/120
 - 2s - loss: 0.0124
Epoch 6/120
 - 2s - loss: 0.0097
Epoch 7/120
 - 2s - loss: 0.0053
Epoch 8/120
 - 2s - loss: 0.0032
Epoch 9/120
 - 2s - loss: 0.0016
Epoch 10/120
 - 2s - loss: 6.7085e-04
Epoch 11/120
 - 2s - loss: 4.4166e-04
Epoch 12/120
 - 2s - loss: 3.5444e-04
Epoch 13/120
 - 2s - loss: 2.9696e-04
Epoch 14/120
 - 2s - loss: 2.5984e-04
Epoch 15/120
 - 2s - loss: 2.3256e-04
Epoch 16/120
 - 2s - loss: 2.0936e-04
Epoch 17/120
 - 2s - loss: 1.9185e-04
Epoch 18/120
 - 2s - loss: 1.7604e-04
Epoch 19/120
 - 2s - loss: 1.6223e-04
Epoch 20/120
 - 2s - loss: 1.5153e-04
Epoch 21/120
 - 2s - loss: 1.4122e-04
Epoch 22/120
 - 2s - loss: 1.3335e-04
Epoch 23/120
 - 2s - loss: 1.2483e-04
Epoch 24/120
 - 2s - loss: 1.1790e-04
Epoch 25/120
 - 2s - loss: 1.1188e-04
Epoch 26/120
 - 2s - loss: 1.0650e-04
Epoch 27/120
 - 2s - loss: 1.0117e-04
Epoch 28/120
 - 2s 