# Common - Package import

In [1]:
# print_function for compatibility with Python 3
from __future__ import print_function
print('print function is ready to serve')

# Matplotlib for visualization
from matplotlib import pyplot as plt

# display plots in the notebook
%matplotlib inline

print function is ready to serve


In [2]:
# NumPy for numerical computing
import numpy as np
np.random.seed(123)
import random
random.seed(123)

# Pandas for DataFrames
import pandas as pd
pd.set_option('display.max_column', 100)

from keras.callbacks import ModelCheckpoint
from keras.callbacks import ReduceLROnPlateau
from keras.optimizers import Adam

import os
import gc
from skimage import io
from skimage.transform import rescale, resize, downscale_local_mean

Using TensorFlow backend.


In [3]:
import sys
sys.path.append('./utils')

from data import Data
from models import Models
from tags import Tags
tags = Tags()

In [4]:
PLANET_KAGGLE_ROOT = '/data/planet-data/'
if not os.path.exists(PLANET_KAGGLE_ROOT):
    PLANET_KAGGLE_ROOT = '/Users/jiayou/Documents/Kaggle Data/Amazon'

N_TAGS = 17
N_TRAIN = 40479
N_TEST_T = 40669
N_TEST_F = 20522
N_TEST = N_TEST_T + N_TEST_F

# Training

In [9]:
def train(val=0, toy=None, d=None):
    print('')
    print('Training with val = {}'.format(val))
    print('')
    
    if d is None:
        d = Data(tif=False, toy=toy)

    m = Models.new_resnet50(input_shape=(256,256,3), leaky=True)

    if toy is None:
        h = m.fit_generator(
            d.gen_train(32, val=val), steps_per_epoch=1000,
            epochs=30, initial_epoch=0,
            validation_data=d.gen_val(100, val=val), validation_steps=80,
            callbacks=[
                ModelCheckpoint('weights-v10-f{}.hdf5'.format(val), save_best_only=True, verbose=1),
                ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=1, min_lr=1e-7, verbose=1)],
            max_q_size=10)
    else:
        h = m.fit_generator(
            d.gen_train(1), steps_per_epoch=8,
            epochs=30, initial_epoch=0,
            validation_data=d.gen_val(2), validation_steps=1,
            callbacks=[
                ModelCheckpoint('weights-v10-f{}.hdf5'.format(val), save_best_only=True, verbose=1),
                ReduceLROnPlateau(monitor='loss', factor=0.2, patience=1, min_lr=1e-7, verbose=1)],
            max_q_size=10)
        
    return h

In [10]:
# Ensemble training
toy = None
results = []
d = Data(tif=False, toy=toy)
for i in [1,2,3,4]:
    r = train(val=i, d=d, toy=toy)
    results.append(r)
    gc.collect()

Loading data...
Getting 2 training images...
Got 1 images
Done
Loaded fold 0.
Getting 2 training images...
Got 1 images
Done
Loaded fold 1.
Getting 2 training images...
Got 1 images
Done
Loaded fold 2.
Getting 2 training images...
Got 1 images
Done
Loaded fold 3.
Getting 2 training images...
Got 1 images
Done
Loaded fold 4.
Loading done

Training with val = 1

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30


KeyboardInterrupt: 

# Tag fine tuning

In [1]:
def mask_weights(w, select):
    for i in range(N_TAGS):
        if not i in select:
            w[318][:,i] = 0
            w[319][i] = -1e7
    return w

def tune_tag(weights, select=[], val=0, toy=None, d=None):
    print('')
    print('Training with val = {}'.format(val))
    print('')
    
    if d is None:
        d = Data(tif=False, toy=toy)

    m = Models.load_resnet50(weights)
    m.set_weights(mask_weights(m.get_weights(), select))
    m.compile(metrics=['accuracy'],
              loss='binary_crossentropy',
              optimizer=Adam(lr=0.0001))

    h = m.fit_generator(
        d.gen_mask(d.gen_train(32, val=val), select=select), steps_per_epoch=1000,
        epochs=10, initial_epoch=0,
        validation_data=d.gen_mask(d.gen_val(100, val=val), select=select), validation_steps=80,
        callbacks=[
            ModelCheckpoint('weights-v9-f{}-tune.hdf5'.format(val), save_best_only=True, verbose=1),
            ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=0, min_lr=5e-7, verbose=1)],
        max_q_size=10)
    
    return h

In [None]:
pred = None
pred8 = None

def predict_val(toy=None, batch_size=20, weights='', d=None, val=0):
    model = Models.load_resnet50(weights)
    print('Model weights loaded')
    
    if d is None:
        d = Data(toy=toy)
    
    cnt = 0
    global pred
    global pred8
    n = len(d.y[val])
    pred = np.zeros((n, N_TAGS))
    pred8 = np.zeros((n * 8, N_TAGS))
    
    print('Start predicting..')
    for X in d.gen_val_augmented(batch_size, val=val):
        y = model.predict_on_batch(X)
        k = int(len(y) / 8 + 0.1)
        pred8[cnt*8:(cnt+k)*8,:] = y[:,:]
        for i in range(k):
            pred[cnt+i,:] = d.consolidate(y[8*i:8*(i+1),:])
        cnt += k
        print('Predicted {} images'.format(cnt))
    print('Predicted all {} images'.format(cnt))

In [19]:
select = [2, 4, 7, 12]
val = 4
d = Data(tif=False)

In [3]:
tune_tag('weights-v9-f{}.hdf5'.format(val), select=select, val=val, d=d)

In [4]:
predict_val(d=d, weights='weights-v9-f{}-tune.hdf5'.format(val), val=val)

In [5]:
tags.plot_roc(pred, d.y[val], title='Fine tune tags')