In [2]:
import os; os.environ["KERAS_BACKEND"] = "tensorflow";
from keras import backend as K

# hack to make the image ordering from theano still usable
# https://stackoverflow.com/questions/41651628/negative-dimension-size-caused-by-subtracting-3-from-1-for-conv2d
K.set_image_dim_ordering('th')

import utils2; reload(utils2)
from utils2 import *

import tensorflow as tf
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))

K.set_session(sess)

# set some config - might be able to just remove later
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
session = tf.Session(config=config)

In [3]:
config

gpu_options {
  allow_growth: true
}

In [5]:
# util funtion to get avaiable GPUs from tensorflow point of view
from tensorflow.python.client import device_lib

def get_available_gpus():
    local_device_protos = device_lib.list_local_devices()
    return [x.name for x in local_device_protos if x.device_type == 'GPU']
get_available_gpus()

[u'/device:GPU:0']

In [6]:
# this placeholder will contain our input digits, as flat vectors
img = tf.placeholder(tf.float32, shape=(None, 784))

In [7]:
from keras.layers import Dense

# Keras layers can be called on TensorFlow tensors:
x = Dense(128, activation='relu')(img)  # fully-connected layer with 128 units and ReLU activation
x = Dense(128, activation='relu')(x)
preds = Dense(10, activation='softmax')(x)  # output layer with 10 units and a softmax activation

In [8]:
labels = tf.placeholder(tf.float32, shape=(None, 10))

from keras.objectives import categorical_crossentropy
loss = tf.reduce_mean(categorical_crossentropy(labels, preds))

In [9]:
from tensorflow.examples.tutorials.mnist import input_data
mnist_data = input_data.read_data_sets('MNIST_data', one_hot=True)

train_step = tf.train.GradientDescentOptimizer(0.5).minimize(loss)

sess.run(tf.global_variables_initializer())  # - required for Tensorflow variable initialization

with sess.as_default():
    for i in range(100):
        batch = mnist_data.train.next_batch(50)
        train_step.run(feed_dict={img: batch[0],
                                  labels: batch[1]})

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [10]:
mnist_data = input_data.read_data_sets('MNIST_data', one_hot=True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [11]:
# SETUP 
batch_size = 256 # MNIST data is small, so my computer can surely handle this
from keras.datasets import mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()
(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

((60000, 28, 28), (60000,), (10000, 28, 28), (10000,))

In [12]:
# MNIST contains grayscale images, so we need to expand a dim
# so that we got data in shape: (n_samples, n_channels, h, w)
X_test = np.expand_dims(X_test, 1) # second dimension
X_train = np.expand_dims(X_train, 1) # second dimension

In [13]:
X_train.shape, X_test.shape, X_train.dtype

((60000, 1, 28, 28), (10000, 1, 28, 28), dtype('uint8'))

In [14]:
y_train[:5]

array([5, 0, 4, 1, 9], dtype=uint8)

In [15]:
y_train = onehot(y_train)
y_test = onehot(y_test)

In [16]:
# normalize the input
print(X_train.shape)
mean_px = X_train.mean().astype(np.float32)
std_px = X_train.std().astype(np.float32)


(60000, 1, 28, 28)


In [17]:
def norm_input(x): return (x - mean_px)/std_px
# make keras' numpyArrayIterator
gen = image.ImageDataGenerator()
batches = gen.flow(X_train, y_train, batch_size=batch_size)
test_batches = gen.flow(X_test, y_test, batch_size=batch_size)

# new keras params:
# steps_per_epoch: Total number of steps (batches of samples) 
# before declaring one epoch finished and starting the next
# epoch.  similar for validation_steps
steps_per_epoch = int(np.ceil(batches.n/batch_size))
validation_steps = int(np.ceil(test_batches.n/batch_size))

(batches.n, test_batches.n, steps_per_epoch, validation_steps) # show dims

(60000, 10000, 234, 39)

In [18]:
# BATCHNORM + DROPOUT + DATA AUGMENTATION
# Combine all the good stuff so far
def get_model_bn_do():
    """WIll work if we set dim ordering correctly"""
    model = Sequential([
        Lambda(norm_input, input_shape=(1, 28, 28)),
        Conv2D(32, (3, 3), activation='relu'),
        BatchNormalization(axis=1),
        Conv2D(32, (3, 3), activation='relu'),
        MaxPooling2D(pool_size=(2, 2)),
        BatchNormalization(axis=1),
        Conv2D(64, (3, 3), activation='relu'),
        BatchNormalization(axis=1),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D(),
        Flatten(),
        BatchNormalization(),
        Dense(512, activation='relu'),
        BatchNormalization(),
        Dropout(0.5),
        Dense(10, activation='softmax')
    ])
    model.compile(Adam(), loss='categorical_crossentropy',
                 metrics=['accuracy'])
    return model

In [19]:
model_bn_do = get_model_bn_do()

In [20]:
# should ALWAYS use the default learning rates for a couple of epoch
model_bn_do.fit_generator(batches, epochs=1,
                steps_per_epoch=steps_per_epoch,
                validation_steps=validation_steps, 
                validation_data=test_batches)


Epoch 1/1


<keras.callbacks.History at 0x7f3024ffe490>

In [21]:
model_bn_do.optimizer.lr = 0.1 # shift-M to merge in command mode
model_bn_do.fit_generator(batches, epochs=4,
                steps_per_epoch=steps_per_epoch,
                validation_steps=validation_steps, 
                validation_data=test_batches)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x7f3024345cd0>

In [22]:
model_bn_do.optimizer.lr = 0.01 # shift-M to merge in command mode
model_bn_do.fit_generator(batches, epochs=8,
                steps_per_epoch=steps_per_epoch,
                validation_steps=validation_steps, 
                validation_data=test_batches)


Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<keras.callbacks.History at 0x7f30241bc110>

In [23]:
model_bn_do.optimizer.lr = 0.001 # shift-M to merge in command mode
model_bn_do.fit_generator(batches, epochs=12,
                steps_per_epoch=steps_per_epoch,
                validation_steps=validation_steps, 
                validation_data=test_batches)


Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12


<keras.callbacks.History at 0x7f3024347310>

In [24]:
# ENSEMBLE ACCROSS ALL THESE MODELS
# TODO: MAKE 6 MODELS LIKE THIS and average them
def fit_model():
    model_bn_do = get_model_bn_do()
    model_bn_do.fit_generator(batches, epochs=8,
                steps_per_epoch=steps_per_epoch,
                validation_steps=validation_steps, 
                validation_data=test_batches)
    
    model_bn_do.optimizer.lr = 0.1 
    model_bn_do.fit_generator(batches, epochs=4,
                steps_per_epoch=steps_per_epoch,
                validation_steps=validation_steps, 
                validation_data=test_batches)
    
    model_bn_do.optimizer.lr = 0.01 
    model_bn_do.fit_generator(batches, epochs=8,
                steps_per_epoch=steps_per_epoch,
                validation_steps=validation_steps, 
                validation_data=test_batches)
    
    model_bn_do.optimizer.lr = 0.001 
    model_bn_do.fit_generator(batches, epochs=8,
                steps_per_epoch=steps_per_epoch,
                validation_steps=validation_steps, 
                validation_data=test_batches)
    return model_bn_do

In [25]:
models = [fit_model() for i in range(6)]

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8


Epoch 8/8
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Epoch 1/8


Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


In [26]:
HOME_DIR = os.getenv('HOME')
path = HOME_DIR + '/data/mnist/'
model_path = path + 'models/'

In [27]:
for i, m in enumerate(models):
    m.save_weights(model_path+'cnn-mnist-'+str(i)+'.pkl')

In [29]:
evals = np.array([m.evaluate(X_test, y_test, batch_size=256) for m in models])



In [30]:
evals.mean(axis=0)


array([0.03120511, 0.99301667])

In [31]:
all_preds = np.stack([m.predict(X_test, batch_size=256) for m in models])


In [32]:
all_preds.shape
avg_preds = all_preds.mean(axis=0).astype(np.float64)
avg_preds.shape, y_test.shape


((10000, 10), (10000, 10))

In [37]:
# keras metric function takes (y_true, y_pred)
# it returns array of size [n_samples] with interger values in [0, 1]
# need to do eval() and mean() to get the accuracy over the whole set
# need a tensorflow session to make this work:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    ensemble_acc = keras.metrics.categorical_accuracy(
        y_test, avg_preds).eval().mean()

In [38]:
print(ensemble_acc.shape, avg_preds.shape, y_test.shape)
print(y_test[0,:], avg_preds[0,:])
print(y_test.dtype, avg_preds.dtype)
print(ensemble_acc)
# https://stackoverflow.com/questions/40768313/tensorflow-eval-without-session-or-move-variable-to-an-other-session

((), (10000, 10), (10000, 10))
(array([0., 0., 0., 0., 0., 0., 0., 1., 0., 0.]), array([1.42073757e-11, 5.86463586e-11, 1.51004972e-11, 6.81085813e-11,
       3.10937075e-11, 8.37415803e-14, 6.46788641e-15, 1.00000000e+00,
       1.32774422e-14, 4.71349765e-11]))
(dtype('float64'), dtype('float64'))
0.9952


In [None]:
# TODO: TRAIN FOR LONGER TIME AND PARAM SEARCH