# CNN transfer learning - Keras+TensorFlow

This is for CNN models transferred from pretrained model, using Keras based on TensorFlow. First, some preparation work.

In [1]:
from keras.layers import Conv2D, MaxPooling2D, Input, Dense, Flatten, Activation, add, Lambda
from keras.layers.normalization import BatchNormalization
from keras.layers.pooling import GlobalAveragePooling2D
from keras.optimizers import RMSprop
from keras.backend import tf as ktf
from keras.models import Model, Sequential, load_model
from keras.callbacks import ModelCheckpoint
from keras.applications.resnet50 import ResNet50
from lib.data_utils import get_MNIST_data

Using TensorFlow backend.


Read the MNIST data. Notice that we assume that it's 'kaggle-DigitRecognizer/data/train.csv', and we use helper function to read into a dictionary.

In [2]:
# by default, there would be 41000 training data, 1000 test data and 1000 validation data(within traning set)
data = get_MNIST_data(fit=True)

# see if we get the data correctly
print('image size: ', data['X_train'].shape)

image size:  (41000, 28, 28, 3)


## Freeze-weights transfer

We would use ResNet50 provided in Keras. In this section, the pretrained model would all be freezed, and new output layer would be attatched to the model, and only this output layer would be trained.

In [None]:
# build the model
# preprocess to (28,28,3), then build a resize layer using tf.resize_images() to (224,224,3) as input
inputs = Input(shape=(28,28,3))
inputs_resize = Lambda(lambda img: ktf.image.resize_images(img, (224,224)))(inputs) # resize layer
resnet50 = ResNet50(include_top=False, input_tensor=inputs_resize, input_shape=(224,224,3), pooling='avg')
x = resnet50.output
#x = Dense(units=1024, activation='relu')(x)
predictions = Dense(units=10, activation='softmax')(x)

# connect the model
freezemodel = Model(inputs=inputs, outputs=predictions)
#freezemodel.summary()

# freeze all ResNet50 layers
for layer in resnet50.layers:
    layer.trainable = False

# set the loss and optimizer
freezemodel.compile(optimizer='rmsprop', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# fit the model
checkpoint = ModelCheckpoint('../models/freezeResNet_{epoch:02d}-{loss:.2f}.h5',
                             monitor='loss',
                             save_best_only=True)
freezemodel.fit(data['X_train'], data['y_train'].reshape(-1,1),
                batch_size=16, epochs=10, callbacks=[checkpoint], initial_epoch=1)

# test the model and see accuracy
score = freezemodel.evaluate(data['X_test'], data['y_test'].reshape(-1, 1), batch_size=32)
print(score)

In [None]:
# save the model: 0.96
freezemodel.save('ResNet50_freeze.h5')

In [None]:
# continue the model training
freezemodel = load_model('../models/ResNet50_freeze.h5', custom_objects={'ktf': ktf})

# set the loss and optimizer
rmsprop = RMSprop(lr=0.0001)
freezemodel.compile(optimizer=rmsprop, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# fit the model
checkpoint = ModelCheckpoint('../models/freezeResNet_{epoch:02d}-{loss:.2f}.h5',
                             monitor='loss',
                             save_best_only=True)
freezemodel.fit(data['X_train'], data['y_train'].reshape(-1, 1),
                batch_size=16, epochs=10, callbacks=[checkpoint], initial_epoch=4)

## Fine-tune transfer

In [3]:
# build the model
# preprocess to (28,28,3), then build a resize layer using tf.resize_images() to (224,224,3) as input
inputs = Input(shape=(28,28,3))
inputs_resize = Lambda(lambda img: ktf.image.resize_images(img, (224,224)))(inputs) # resize layer
resnet50 = ResNet50(include_top=False, input_tensor=inputs_resize, input_shape=(224,224,3), pooling='avg')
x = resnet50.output
#x = Dense(units=1024, activation='relu')(x)
predictions = Dense(units=10, activation='softmax')(x)

# connect the model
tunemodel = Model(inputs=inputs, outputs=predictions)
#freezemodel.summary()

# set the loss and optimizer
rmsprop = RMSprop(lr=0.0001)
tunemodel.compile(optimizer=rmsprop, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# fit the model
checkpoint = ModelCheckpoint('../models/tuneResNet_{epoch:02d}-{loss:.2f}.h5',
                             monitor='loss',
                             save_best_only=True)
tunemodel.fit(data['X_train'], data['y_train'].reshape(-1, 1),
                batch_size=16, epochs=10, callbacks=[checkpoint], initial_epoch=0)

# test the model and see accuracy
score = tunemodel.evaluate(data['X_test'], data['y_test'].reshape(-1, 1), batch_size=32)
print(score)

Epoch 1/10


ResourceExhaustedError: OOM when allocating tensor with shape[64]
	 [[Node: bn2a_branch2a/AssignMovingAvg/sub = Sub[T=DT_FLOAT, _class=["loc:@bn2a_branch2a/moving_mean"], _device="/job:localhost/replica:0/task:0/gpu:0"](bn2a_branch2a/moving_mean/read, bn2a_branch2a/moments/Squeeze)]]
	 [[Node: loss/mul/_2477 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_29065_loss/mul", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]()]]

Caused by op 'bn2a_branch2a/AssignMovingAvg/sub', defined at:
  File "/usr/lib/python3.5/runpy.py", line 184, in _run_module_as_main
    "__main__", mod_spec)
  File "/usr/lib/python3.5/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.5/dist-packages/ipykernel/__main__.py", line 3, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.5/dist-packages/traitlets/config/application.py", line 592, in launch_instance
    app.start()
  File "/usr/local/lib/python3.5/dist-packages/ipykernel/kernelapp.py", line 405, in start
    ioloop.IOLoop.instance().start()
  File "/usr/local/lib/python3.5/dist-packages/zmq/eventloop/ioloop.py", line 162, in start
    super(ZMQIOLoop, self).start()
  File "/usr/local/lib/python3.5/dist-packages/tornado/ioloop.py", line 883, in start
    handler_func(fd_obj, events)
  File "/usr/local/lib/python3.5/dist-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/usr/local/lib/python3.5/dist-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/usr/local/lib/python3.5/dist-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/usr/local/lib/python3.5/dist-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/usr/local/lib/python3.5/dist-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/usr/local/lib/python3.5/dist-packages/ipykernel/kernelbase.py", line 260, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/usr/local/lib/python3.5/dist-packages/ipykernel/kernelbase.py", line 212, in dispatch_shell
    handler(stream, idents, msg)
  File "/usr/local/lib/python3.5/dist-packages/ipykernel/kernelbase.py", line 370, in execute_request
    user_expressions, allow_stdin)
  File "/usr/local/lib/python3.5/dist-packages/ipykernel/ipkernel.py", line 175, in do_execute
    shell.run_cell(code, store_history=store_history, silent=silent)
  File "/usr/local/lib/python3.5/dist-packages/IPython/core/interactiveshell.py", line 2902, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/usr/local/lib/python3.5/dist-packages/IPython/core/interactiveshell.py", line 3006, in run_ast_nodes
    if self.run_code(code, result):
  File "/usr/local/lib/python3.5/dist-packages/IPython/core/interactiveshell.py", line 3066, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-3-fbde49e0578f>", line 5, in <module>
    resnet50 = ResNet50(include_top=False, input_tensor=inputs_resize, input_shape=(224,224,3), pooling='avg')
  File "/usr/local/lib/python3.5/dist-packages/keras/applications/resnet50.py", line 212, in ResNet50
    x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1))
  File "/usr/local/lib/python3.5/dist-packages/keras/applications/resnet50.py", line 104, in conv_block
    x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
  File "/usr/local/lib/python3.5/dist-packages/keras/engine/topology.py", line 602, in __call__
    output = self.call(inputs, **kwargs)
  File "/usr/local/lib/python3.5/dist-packages/keras/layers/normalization.py", line 181, in call
    self.momentum),
  File "/usr/local/lib/python3.5/dist-packages/keras/backend/tensorflow_backend.py", line 924, in moving_average_update
    x, value, momentum, zero_debias=False)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/training/moving_averages.py", line 72, in assign_moving_average
    update_delta = (variable - value) * decay
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/variables.py", line 705, in _run_op
    return getattr(ops.Tensor, operator)(a._AsTensor(), *args)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/math_ops.py", line 865, in binary_op_wrapper
    return func(x, y, name=name)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/gen_math_ops.py", line 2629, in _sub
    result = _op_def_lib.apply_op("Sub", x=x, y=y, name=name)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/op_def_library.py", line 767, in apply_op
    op_def=op_def)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py", line 2630, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py", line 1204, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

ResourceExhaustedError (see above for traceback): OOM when allocating tensor with shape[64]
	 [[Node: bn2a_branch2a/AssignMovingAvg/sub = Sub[T=DT_FLOAT, _class=["loc:@bn2a_branch2a/moving_mean"], _device="/job:localhost/replica:0/task:0/gpu:0"](bn2a_branch2a/moving_mean/read, bn2a_branch2a/moments/Squeeze)]]
	 [[Node: loss/mul/_2477 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_29065_loss/mul", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]()]]


# Create submissions

Load the saved trained models and produce predictions for submission on Kaggle.

In [None]:
from lib.data_utils import create_submission
from keras.models import load_model

# for freeze ResNet50 model (3 epochs)
simple_CNN = load_model('../models/freezeResNet_03-0.09.h5', custom_objects={'ktf': ktf})
print('Load model successfully.')
create_submission(simple_CNN, '../data/test.csv', '../submission/submission_freezeResNet_03.csv', 16, fit=True)