## Xception model training

### import libs

In [1]:
from keras.models import * 
from keras.layers import *
from keras.applications import *
from keras.preprocessing.image import *
from keras.utils.training_utils import multi_gpu_model
from keras import optimizers
from keras import regularizers
from multiprocessing import cpu_count
import tensorflow as tf

Using TensorFlow backend.


In [2]:
nb_classes = 20
nb_cpus = cpu_count()//2
nb_gpus = 4

image_size = (299, 299)
input_shape= (299,299,3)

train_path = "/home/cnn/Documents/batch6.1/cells_half299_cv2_white/train"
valid_path = "/home/cnn/Documents/batch6.1/cells_half299_cv2_white/valid"

### pretrain model

In [3]:
with tf.device('/cpu:0'):
    input_tensor = Input(input_shape)
    x = Lambda(xception.preprocess_input)(input_tensor)

    base_model = Xception(input_tensor=x, weights=None, include_top=False)
    m_out = base_model.output
    m_out = SeparableConv2D(4096, kernel_size=3, strides=2)(m_out)
    m_out = BatchNormalization()(m_out)
    p_out = GlobalAveragePooling2D()(m_out)
    p_out = Dropout(0.5)(p_out)
    predictions = Dense(nb_classes, activation='softmax', name="predictions1")(p_out)

    for layer in base_model.layers:
        layer.trainable = False

    model = Model(inputs=base_model.input, outputs=predictions)
    
    model.load_weights("weights_batch6_003.hdf5", by_name=True)

if nb_gpus > 1:
    parallel_model = multi_gpu_model(model, gpus=nb_gpus)
    
optimizer = optimizers.SGD(lr=0.01, momentum=0.9, decay=0.0003)
parallel_model.compile(optimizer=optimizer, 
              loss='categorical_crossentropy', 
              metrics=['accuracy'])
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 299, 299, 3)  0                                            
__________________________________________________________________________________________________
lambda_1 (Lambda)               (None, 299, 299, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
block1_conv1 (Conv2D)           (None, 149, 149, 32) 864         lambda_1[0][0]                   
__________________________________________________________________________________________________
block1_conv1_bn (BatchNormaliza (None, 149, 149, 32) 128         block1_conv1[0][0]               
__________________________________________________________________________________________________
block1_con

In [4]:
batch_size = 64
epochs = 2

In [5]:
img_gen_t = ImageDataGenerator(rotation_range=30,                            
                               width_shift_range=0.1,
                               height_shift_range=0.1,
                               zoom_range=0.1,
                               brightness_range=[0.8, 1.2],
                               horizontal_flip=True,
                               vertical_flip=True)
train_generator = img_gen_t.flow_from_directory(train_path, 
                                                target_size=image_size, shuffle=True, batch_size=batch_size)

img_gen_v = ImageDataGenerator()
valid_generator = img_gen_v.flow_from_directory(valid_path,
                                                target_size=image_size, shuffle=True, batch_size=batch_size)

parallel_model.fit_generator(generator=train_generator, 
                             steps_per_epoch=len(train_generator), 
                             epochs=epochs, 
                             verbose=1,
                             validation_data=valid_generator, 
                             validation_steps=len(valid_generator), 
                             workers=nb_cpus, 
                             use_multiprocessing=True)

model.save_weights("Xception_first_train.h5")

Found 456792 images belonging to 20 classes.
Found 12681 images belonging to 20 classes.
Epoch 1/2
Epoch 2/2


### train model

In [3]:
from keras import optimizers
from keras import regularizers


with tf.device('/cpu:0'):
    input_tensor = Input(input_shape)
    x = Lambda(xception.preprocess_input)(input_tensor)

    base_model = Xception(input_tensor=x, weights=None, include_top=False)
    m_out = base_model.output
    m_out = SeparableConv2D(4096, kernel_size=3, strides=2)(m_out)
    m_out = BatchNormalization()(m_out)
    p_out = GlobalAveragePooling2D()(m_out)
    p_out = Dropout(0.5)(p_out)
    predictions = Dense(nb_classes, activation='softmax', name="predictions1")(p_out)

    model = Model(inputs=base_model.input, outputs=predictions)
    
    model.load_weights('Xception_first_train.h5')

if nb_gpus > 1:
    parallel_model = multi_gpu_model(model, gpus=nb_gpus)
    
optimizer = optimizers.SGD(lr=0.005, momentum=0.9, decay=0.0003)
parallel_model.compile(optimizer=optimizer, 
              loss='categorical_crossentropy', 
              metrics=['accuracy'])
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 299, 299, 3)  0                                            
__________________________________________________________________________________________________
lambda_1 (Lambda)               (None, 299, 299, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
block1_conv1 (Conv2D)           (None, 149, 149, 32) 864         lambda_1[0][0]                   
__________________________________________________________________________________________________
block1_conv1_bn (BatchNormaliza (None, 149, 149, 32) 128         block1_conv1[0][0]               
__________________________________________________________________________________________________
block1_con

In [4]:
# model.load_weights('Xception_first_train.h5')

In [5]:
batch_size = 64
epochs = 150

In [None]:
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping, ReduceLROnPlateau, TensorBoard

img_gen_t = ImageDataGenerator(rotation_range=30,                            
                               width_shift_range=0.1,
                               height_shift_range=0.1,
                               zoom_range=0.1,
                               brightness_range=[0.8, 1.2],
                               horizontal_flip=True,
                               vertical_flip=True)
train_generator = img_gen_t.flow_from_directory(train_path, 
                                                target_size=image_size, shuffle=True, batch_size=batch_size)

img_gen_v = ImageDataGenerator()
valid_generator = img_gen_v.flow_from_directory(valid_path,
                                                target_size=image_size, shuffle=True, batch_size=batch_size)

checkpoint = ModelCheckpoint("weights_{epoch:03d}_{val_loss:.4f}.hdf5", monitor='val_loss', verbose=1,
                             save_best_only=False, save_weights_only=True, mode='min', period=1)

tensorboard = TensorBoard(log_dir="./logs", histogram_freq=0, batch_size=batch_size, write_graph=True, write_images=True)

reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=1, min_lr=0.000001)

callbacks = [checkpoint, tensorboard, reduce_lr]


parallel_model.fit_generator(generator=train_generator, 
                    steps_per_epoch=len(train_generator), 
                    epochs=epochs, 
                    verbose=1,
                    validation_data=valid_generator, 
                    validation_steps=len(valid_generator), 
                    callbacks=callbacks, 
                    workers=nb_cpus, 
                    use_multiprocessing=True,
                    initial_epoch=1
                   )

Found 456792 images belonging to 20 classes.
Found 12681 images belonging to 20 classes.
Epoch 2/150

#### plot training trend

In [None]:
# 1. first need to retrieve acc/loss/val_acc/val_loss info from tensorboard
# in terminal: tensorboard --logdir ./logs
# open browser with given link
# save data to local csv and merge four separate files

# 2. read from saved data csv
import pandas as pd
import matplotlib.pyplot as plt

csv_file = "./info_20181118.csv"

df = pd.read_csv(csv_file)

step = list(df.step)
acc = list(df.acc)
loss = list(df.loss)
val_acc = list(df.val_acc)
val_loss = list(df.val_loss)

# 3. plot acc and loss
from matplotlib.patches import Rectangle

fig = plt.figure(1, figsize=(12,6), dpi=90)
ax = fig.add_subplot(121)
ax.plot(step, acc,label="train")
ax.plot(step, val_acc,label="valid")
ax.legend()
plt.title("training accuracy")
plt.xlabel("epoch")
plt.ylabel("accuracy")
ax = fig.add_subplot(122)
ax.plot(step, loss,label="train")
ax.plot(step, val_loss,label="vaild")
ax.legend()
plt.title("training loss")
plt.xlabel("epoch")
plt.ylabel("loss")