In [1]:
import tensorflow as tf
from keras.backend.tensorflow_backend import set_session
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.4
config.gpu_options.visible_device_list = "0"
set_session(tf.Session(config=config))

import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
import keras
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
from keras.datasets import cifar10
from keras.utils import to_categorical
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau, EarlyStopping
from .models import AttentionResNetCifar10

In [3]:
# load data
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

In [4]:
# define generators for training and validation data
train_datagen = ImageDataGenerator(
    featurewise_center=True,
    featurewise_std_normalization=True,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True)

val_datagen = ImageDataGenerator(
    featurewise_center=True,
    featurewise_std_normalization=True)

# compute quantities required for featurewise normalization
# (std, mean, and principal components if ZCA whitening is applied)
train_datagen.fit(x_train)
val_datagen.fit(x_train)

In [5]:
# build a model
model = AttentionResNetCifar10(n_classes=10)

In [None]:
# plot model graph
SVG(model_to_dot(model).create(prog='dot', format='svg'))

In [6]:
# prepare usefull callbacks
lr_reducer = ReduceLROnPlateau(monitor='val_acc', factor=0.2, patience=7, min_lr=10e-7, epsilon=0.01, verbose=1)
early_stopper = EarlyStopping(monitor='val_acc', min_delta=0, patience=15, verbose=1)
callbacks= [lr_reducer, early_stopper]

In [7]:
# define loss, metrics, optimizer
model.compile(keras.optimizers.Adam(lr=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
# fits the model on batches with real-time data augmentation
batch_size = 32

model.fit_generator(train_datagen.flow(x_train, y_train, batch_size=batch_size),
                    steps_per_epoch=len(x_train)//batch_size, epochs=200,
                    validation_data=val_datagen.flow(x_test, y_test, batch_size=batch_size), 
                    validation_steps=len(x_test)//batch_size,
                    callbacks=callbacks, initial_epoch=0)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200

Epoch 00036: ReduceLROnPlateau reducing learning rate to 1.9999999494757503e-05.
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200

Epoch 00047: ReduceLROnPlateau reducing learning rate to 3.999999898951501e-06.
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200

Epoch 00054: ReduceLROnPlateau reducing learning rate to 1e-06.
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
E

<keras.callbacks.History at 0x7f39db525e48>

In [13]:
model.evaluate_generator(val_datagen.flow(x_test, y_test), steps=len(x_test)/32, use_multiprocessing=True)

[0.3466754733324051, 0.8917]