In [None]:
import tensorflow as tf
import keras as kr
from keras import backend as K
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from random import randint
import time
import os
from utility import *

In [2]:
batch_size, stop, course_history = 32, 4, []
log_dir = './logs/'+time.ctime().replace(' ', '_').replace(':', '.')
os.mkdir(log_dir)
print('To check in on tensorboard, copy and paste following line to cmd')
print('tensorboard --logdir={0}'.format(log_dir))

In [7]:
def step_decay(epoch):
    initial_lrate = 0.045
    lrate = initial_lrate * 0.94 ** np.floor(epoch/2)# Learning rate decay: decay of rate 0.94 every 2 epochs
    return lrate

In [4]:
%matplotlib inline
if not os.path.exists('./data'):
    os.mkdir('./data')
if not os.path.exists('./data/models/'):
    os.mkdir('./data/models')

In [5]:
if any([True if 'preped_data_cifar' in file_name else False for file_name in os.listdir('./data/')]):
    with np.load(file='./data/preped_data_cifar.npz') as big_load:
        X = big_load['train_arr']
        x_test = big_load['test_arr']
        y = big_load['y_fine']
        y_test = big_load['y_fine_test'] 
        y_c = big_load['y_c_train']
        y_c_test = big_load['y_c_test']
        fine2coarse=big_load['fine2coarse']

In [6]:
x_train, x_val, y_train, y_val = train_test_split(
    X, y, test_size=.1, random_state=0)
_, _, y_c_train, y_c_val = train_test_split(
    X, y_c, test_size=.1, random_state=0)

In [7]:
img_input = kr.layers.Input(shape=(128, 128, 3), name='image_input')
x = kr.layers.Conv2D(32, (3, 3), strides=(2, 2), use_bias=False, name='block1_conv1')(img_input)
x = kr.layers.BatchNormalization(name='block1_conv1_bn')(x)
x = kr.layers.Activation('relu', name='block1_conv1_act')(x)
x = kr.layers.Conv2D(64, (3, 3), use_bias=False, name='block1_conv2')(x)
x = kr.layers.BatchNormalization(name='block1_conv2_bn')(x)
x = kr.layers.Activation('relu', name='block1_conv2_act')(x)
residual = kr.layers.Conv2D(128, (1, 1), strides=(2, 2),
                  padding='same', use_bias=False)(x)
residual = kr.layers.BatchNormalization()(residual)
x = kr.layers.SeparableConv2D(128, (3, 3), padding='same', use_bias=False, name='block2_sepconv1')(x)
x = kr.layers.BatchNormalization(name='block2_sepconv1_bn')(x)
x = kr.layers.Activation('relu', name='block2_sepconv2_act')(x)
x = kr.layers.SeparableConv2D(128, (3, 3), padding='same', use_bias=False, name='block2_sepconv2')(x)
x = kr.layers.BatchNormalization(name='block2_sepconv2_bn')(x)
x = kr.layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same', name='block2_pool')(x)
x = kr.layers.add([x, residual])
residual = kr.layers.Conv2D(256, (1, 1), strides=(2, 2),
                  padding='same', use_bias=False)(x)
residual = kr.layers.BatchNormalization()(residual)
x = kr.layers.Activation('relu', name='block3_sepconv1_act')(x)
x = kr.layers.SeparableConv2D(256, (3, 3), padding='same', use_bias=False, name='block3_sepconv1')(x)
x = kr.layers.BatchNormalization(name='block3_sepconv1_bn')(x)
x = kr.layers.Activation('relu', name='block3_sepconv2_act')(x)
x = kr.layers.SeparableConv2D(256, (3, 3), padding='same', use_bias=False, name='block3_sepconv2')(x)
x = kr.layers.BatchNormalization(name='block3_sepconv2_bn')(x)
x = kr.layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same', name='block3_pool')(x)
x = kr.layers.add([x, residual])
x = kr.layers.GlobalAveragePooling2D(name='avg_pool')(x)
x = kr.layers.Dense(2, activation='softmax', name='predictions')(x)
model = kr.Model(img_input, x, name='shared_layers')

In [11]:
def gen_callbacks(log_dir, run_name):
    log_dir = log_dir+os.sep+run_name
    if os.path.exists(log_dir):
        log_dir += '_00'
        while os.path.exists(log_dir):
            _log_dir = log_dir[:-2]
            _log_dir += log_dir[-2]+str(int(log_dir[-1])+1)\
                if int(log_dir[-1])+1 < 9 else str(int(log_dir[-2])+1)+'0'
            log_dir = _log_dir
            del _log_dir
    os.mkdir(log_dir)
    callbacks=[
        TensorBoard(log_dir=log_dir),
        kr.callbacks.LearningRateScheduler(step_decay),
        kr.callbacks.History()]
    return callbacks    

In [10]:
model.compile(optimizer=kr.optimizers.SGD(lr=0.045, momentum=0.9, decay=0),
              loss='categorical_crossentropy', 
              metrics=['accuracy', 'top_k_categorical_accuracy', 'MAE'])

In [11]:
model.fit(x_train, y_c_train, batch_size=batch_size, initial_epoch=0, 
          validation_data=(x_val, y_c_val), epochs=stop, callbacks=gen_callbacks(log_dir, 'base_block'))
x_shared_preds = model.predict(x_train, verbose=1, batch_size=32)
x_shared_preds_val = model.predict(x_val, verbose=1, batch_size=32)

Train on 4500 samples, validate on 500 samples
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x7f66e9f63588>

In [26]:
model = kr.Model(inputs=img_input, outputs=model.layers[-3].output)
x_shared_out = model.predict(x_train, verbose=1, batch_size=32)
x_shared_out_val = model.predict(x_val, verbose=1, batch_size=32)

In [34]:
model.save_weights(filepath='data/models/tony_net/shared_layers_weights')
with open('data/models/tony_net/shared_layers.json', 'w') as json_file:
    json_file.write(model.to_json())
del model
kr.backend.clear_session()

In [97]:
def generate_fine(shared_out, shared_preds):
    samples = shared_preds.shape[0]
    _classes = shared_preds.shape[-1]
    fine = np.concatenate(
        (shared_out,
         shared_preds.reshape(samples, 1, 1, _classes) \
         * np.ones(shape=(shared_out.shape[0:3]+(_classes,))),),
        axis=3)
    return fine

In [116]:
fine_x = generate_fine(x_shared_out, x_shared_preds)
fine_x_val = generate_fine(x_shared_out_val, x_shared_preds_val)

4500 2 (4500, 2)
500 2 (500, 2)


In [117]:
fine_input = kr.layers.Input(shape=(fine_x.shape[1:]), name='fine_input')
residual = kr.layers.Conv2D(728, (1, 1), strides=(2, 2),
                  padding='same', use_bias=False)(fine_input)
residual = kr.layers.BatchNormalization()(residual)
x = kr.layers.Activation('relu', name='block4_sepconv1_act')(fine_input)
x = kr.layers.SeparableConv2D(728, (3, 3), padding='same', use_bias=False, name='block4_sepconv1')(x)
x = kr.layers.BatchNormalization(name='block4_sepconv1_bn')(x)
x = kr.layers.Activation('relu', name='block4_sepconv2_act')(x)
x = kr.layers.SeparableConv2D(728, (3, 3), padding='same', use_bias=False, name='block4_sepconv2')(x)
x = kr.layers.BatchNormalization(name='block4_sepconv2_bn')(x)
x = kr.layers.MaxPooling2D((3, 3), strides=(2, 2), padding='same', name='block4_pool')(x)
x = kr.layers.add([x, residual])
for i in range(3):
    residual = x
    prefix = 'block' + str(i + 5)
    x = kr.layers.Activation('relu', name=prefix + '_sepconv1_act')(x)
    x = kr.layers.SeparableConv2D(728, (3, 3), padding='same', use_bias=False, name=prefix + '_sepconv1')(x)
    x = kr.layers.BatchNormalization(name=prefix + '_sepconv1_bn')(x)
    x = kr.layers.Activation('relu', name=prefix + '_sepconv2_act')(x)
    x = kr.layers.SeparableConv2D(728, (3, 3), padding='same', use_bias=False, name=prefix + '_sepconv2')(x)
    x = kr.layers.BatchNormalization(name=prefix + '_sepconv2_bn')(x)
    x = kr.layers.Activation('relu', name=prefix + '_sepconv3_act')(x)
    x = kr.layers.SeparableConv2D(728, (3, 3), padding='same', use_bias=False, name=prefix + '_sepconv3')(x)
    x = kr.layers.BatchNormalization(name=prefix + '_sepconv3_bn')(x)
    x = kr.layers.add([x, residual])
x = kr.layers.GlobalAveragePooling2D(name='avg_pool')(x)
x = kr.layers.Dense(5, activation='softmax', name='predictions')(x)
model = kr.Model(fine_input, x, name='fine_model')

In [123]:
course_cat = 2
start, end = 5 * (course_cat - 1), 5 * course_cat
label = np.argmax(y_train, axis=1)
label_val = np.argmax(y_val, axis=1)
find_ind = np.where(np.logical_and(label>=start, label<end))[0]
find_ind_val = np.where(np.logical_and(label_val>=start, label_val<end))[0]
del course_cat, label, label_val

In [127]:
fine_x[find_ind].shape, y_train[find_ind][:,start:end].shape, \
fine_x_val[find_ind_val].shape, y_val[find_ind_val][:,start:end].shape

((2252, 16, 16, 258), (2252, 5), (248, 16, 16, 258), (248, 5))

In [128]:
model.compile(optimizer=kr.optimizers.SGD(lr=0.045, momentum=0.9, decay=0),
              loss='categorical_crossentropy', 
              metrics=['accuracy', 'top_k_categorical_accuracy', 'MAE'])

In [129]:
model.fit(fine_x[find_ind], y_train[find_ind][:,start:end], batch_size=batch_size, initial_epoch=0, 
          validation_data=(fine_x_val[find_ind_val], y_val[find_ind_val][:,start:end]), epochs=39, callbacks=callbacks)

Train on 2252 samples, validate on 248 samples
Epoch 1/39
Epoch 2/39
Epoch 3/39
Epoch 4/39
Epoch 5/39
Epoch 6/39
Epoch 7/39
Epoch 8/39
Epoch 9/39
Epoch 10/39
Epoch 11/39
Epoch 12/39
Epoch 13/39
Epoch 14/39
Epoch 15/39
Epoch 16/39
Epoch 17/39
Epoch 18/39
Epoch 19/39
Epoch 20/39
Epoch 21/39
Epoch 22/39
Epoch 23/39
Epoch 24/39
Epoch 25/39
Epoch 26/39
Epoch 27/39
Epoch 28/39
Epoch 29/39
Epoch 30/39
Epoch 31/39
Epoch 32/39
Epoch 33/39
Epoch 34/39
Epoch 35/39
Epoch 36/39
Epoch 37/39
Epoch 38/39
Epoch 39/39


<keras.callbacks.History at 0x7f667c241e48>

In [130]:
model.save_weights(filepath='data/models/tony_net/fine_1_weights')
with open('data/models/tony_net/fine_1.json', 'w') as json_file:
    json_file.write(model.to_json())
del model
kr.backend.clear_session()