In [1]:
import sys
import os
sys.path.insert(1, os.path.join(sys.path[0], '..'))
from utils import *
import vgg16bn
import pandas as pd
import numpy as np
from glob import glob
from keras.layers import Dense, Conv2D, BatchNormalization, Dropout, Flatten, MaxPooling2D
from keras.models import Sequential
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import Adam
from sklearn.metrics import log_loss
%matplotlib inline

data_dir = os.path.join(os.getcwd(), 'data', 'whale')
img_dir  = os.path.join(os.getcwd(), 'data', 'whale', 'imgs')
batch_size=64
num_class = 447
img_shape=(3, 224, 224)
target_size = (224,224)

Using gpu device 0: Tesla K80 (CNMeM is disabled, cuDNN 5103)
Using Theano backend.


In [2]:
data_dir

'/home/ubuntu/courses/deeplearning1/nbs/data/whale'

In [3]:
img_dir

'/home/ubuntu/courses/deeplearning1/nbs/data/whale/imgs'

In [4]:
ImageDataGenerator?

In [4]:
def get_tr_batches(dr = img_dir+ '/train', gen = ImageDataGenerator(), batch_size=batch_size):
    return gen.flow_from_directory(dr, shuffle=True, target_size=target_size, batch_size=64)

def get_val_batches(dr = img_dir+ '/valid', gen = ImageDataGenerator(), batch_size=batch_size):
    return gen.flow_from_directory(dr, shuffle=False, target_size=target_size, batch_size=batch_size)


def get_test_batches(dr = img_dir+ '/test', gen = ImageDataGenerator(), batch_size=batch_size):
    return gen.flow_from_directory(dr, shuffle=False, target_size=target_size, batch_size=batch_size)

tr_features = load_array('vgg_conv_tr.bc')
val_features = load_array('vgg_val_tr.bc')


tr_labels = load_array('whale_tr_classes.bc')
val_labels = load_array('whale_val_classes.bc')


def fit_model(model, epochs, tr_b = get_tr_batches(), val_b = get_val_batches()):
    model.fit_generator(tr_b, tr_b.nb_sample, 
                    validation_data=val_b, 
                    nb_val_samples=val_b.nb_sample, 
                    nb_epoch=epochs)

In [6]:
def get_model():
    vgg = Vgg16BN()
    model = vgg.model
    last_conv = [i for i, v in enumerate(model.layers) if type(v) is Conv2D][-1]
    for layer in model.layers[: last_conv+1]: layer.trainable=False
    model.layers = model.layers[:-1]
    model.add(Dense(num_class, activation='softmax'))
    return model

In [7]:
model = get_model()

Downloading data from http://files.fast.ai/models/vgg16_bn.h5

In [10]:
augmenter = ImageDataGenerator(rotation_range=180, horizontal_flip=True, vertical_flip=True, zoom_range=0.10)

In [11]:
tr_batches = get_tr_batches(gen=augmenter)

Found 3635 images belonging to 447 classes.


In [12]:
val_b = get_val_batches()

Found 909 images belonging to 447 classes.


In [13]:
model.compile(Adam(0.001), 'categorical_crossentropy', metrics=['accuracy'])

In [14]:
model.fit_generator(tr_batches, 
                    tr_batches.nb_sample, 
                    validation_data=val_b, 
                    nb_val_samples=val_b.nb_sample, nb_epoch=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7f274a37d050>

In [15]:
model.fit_generator(tr_batches, 
                    tr_batches.nb_sample, 
                    validation_data=val_b, 
                    nb_val_samples=val_b.nb_sample, nb_epoch=5)

Epoch 1/5
Epoch 2/5
  64/3635 [..............................] - ETA: 98s - loss: 5.8316 - acc: 0.0156

KeyboardInterrupt: 

In [16]:
model.save_weights('vgg_bn_aug__5.9465.h5')

In [17]:
model.fit_generator(tr_batches, 
                    tr_batches.nb_sample, 
                    validation_data=val_b, 
                    nb_val_samples=val_b.nb_sample, nb_epoch=3)

Epoch 1/3
Epoch 2/3
 256/3635 [=>............................] - ETA: 92s - loss: 5.8858 - acc: 0.0078

KeyboardInterrupt: 

In [18]:
model.load_weights('vgg_bn_aug__5.9465.h5')

In [19]:
model.optimizer.lr = 0.00001

In [20]:
model.fit_generator(tr_batches, 
                    tr_batches.nb_sample, 
                    validation_data=val_b, 
                    nb_val_samples=val_b.nb_sample, nb_epoch=1)

Epoch 1/1


<keras.callbacks.History at 0x7f274632af50>

In [21]:
model.save_weights('vgg_bn_aug__5.9419.h5')

In [22]:
model.fit_generator(tr_batches, 
                    tr_batches.nb_sample, 
                    validation_data=val_b, 
                    nb_val_samples=val_b.nb_sample, nb_epoch=1)

Epoch 1/1


<keras.callbacks.History at 0x7f27458bcbd0>

In [23]:
model.save_weights('vgg_bn_aug__5.9386.h5')

In [24]:
model.fit_generator(tr_batches, 
                    tr_batches.nb_sample, 
                    validation_data=val_b, 
                    nb_val_samples=val_b.nb_sample, nb_epoch=1)

Epoch 1/1


<keras.callbacks.History at 0x7f2744e50290>

In [25]:
model.save_weights('vgg_bn_aug__5.9357.h5')

In [26]:
model.fit_generator(tr_batches, 
                    tr_batches.nb_sample, 
                    validation_data=val_b, 
                    nb_val_samples=val_b.nb_sample, nb_epoch=2)

Epoch 1/2
Epoch 2/2
 128/3635 [>.............................] - ETA: 97s - loss: 5.8626 - acc: 0.0156

KeyboardInterrupt: 

In [27]:
model.load_weights('vgg_bn_aug__5.9357.h5')

In [28]:
model.optimizer.lr = 0.000001

In [29]:
model.fit_generator(tr_batches, 
                    tr_batches.nb_sample, 
                    validation_data=val_b, 
                    nb_val_samples=val_b.nb_sample, nb_epoch=1)

Epoch 1/1


<keras.callbacks.History at 0x7f2744e50750>

In [30]:
model.save_weights('vgg_bn_aug__5.9350.h5')

In [31]:
model.fit_generator(tr_batches, 
                    tr_batches.nb_sample, 
                    validation_data=val_b, 
                    nb_val_samples=val_b.nb_sample, nb_epoch=1)

Epoch 1/1


<keras.callbacks.History at 0x7f27458af550>

In [32]:
model.save_weights('vgg_bn_aug__5.9341.h5')

In [33]:
model.fit_generator(tr_batches, 
                    tr_batches.nb_sample, 
                    validation_data=val_b, 
                    nb_val_samples=val_b.nb_sample, nb_epoch=1)

Epoch 1/1


<keras.callbacks.History at 0x7f27458beb90>

In [34]:
model.load_weights('vgg_bn_aug__5.9341.h5')

In [36]:
test_b = get_test_batches()

Found 6925 images belonging to 1 classes.


In [37]:
preds = model.predict_generator(test_b, test_b.nb_sample)

In [38]:
from create_whale_sub import create_sub

In [39]:
preds.shape

(6925, 447)

In [40]:
sub = create_sub(preds)

Found 3635 images belonging to 447 classes.
Found 6925 images belonging to 1 classes.


In [41]:
sub.head()

Unnamed: 0,Image,whale_00195,whale_00442,whale_02411,whale_02608,whale_02839,whale_03103,whale_03227,whale_03623,whale_03728,...,whale_98618,whale_98633,whale_98645,whale_98746,whale_98939,whale_98996,whale_99243,whale_99326,whale_99558,whale_99573
0,w_8037.jpg,0.00330140558071,0.00107163877692,0.00298443879001,0.00180055375677,0.00145936675835,0.000481946190121,0.00291534373537,0.00207554502413,0.00229167821817,...,0.0015491831582,0.00206484901719,0.00330859259702,0.00464711943641,0.00218376261182,0.0014458252117,0.00242781057023,0.00167237408459,0.00449525006115,0.00259288446978
1,w_4494.jpg,0.00330140558071,0.00107163877692,0.00298443879001,0.00180055375677,0.00145936675835,0.000481946190121,0.00291534373537,0.00207554502413,0.00229167821817,...,0.0015491831582,0.00206484901719,0.00330859259702,0.00464711943641,0.00218376261182,0.0014458252117,0.00242781057023,0.00167237408459,0.00449525006115,0.00259288446978
2,w_4673.jpg,0.00286285975017,0.00133441668004,0.00309766852297,0.00168319640215,0.00149236188736,0.000955631083343,0.00430827727541,0.00166171591263,0.00155158585403,...,0.0018753928598,0.002270426834,0.00298910285346,0.00349396490492,0.00135786493775,0.00148632435594,0.00244975904934,0.0020736171864,0.00255226390436,0.00230655097403
3,w_8273.jpg,0.00253723887727,0.00100558856502,0.00309851090424,0.00193557445891,0.000445468933322,0.00104380666744,0.00475372979417,0.00142451631837,0.00141202867962,...,0.00210296176374,0.00166398868896,0.00333113502711,0.00266497768462,0.00190908589866,0.00169698696118,0.00077131355647,0.00215010205284,0.00303134019487,0.00270436680876
4,w_7611.jpg,0.00286286580376,0.00133442285005,0.003097636858,0.00168321689125,0.001492371317,0.000955640512984,0.00430819019675,0.00166173430625,0.00155161705334,...,0.00187540159095,0.0022704184521,0.00298909749836,0.00349395861849,0.00135790521745,0.0014863446122,0.00244975276291,0.00207360880449,0.00255230534822,0.0023065651767


In [42]:
sub.to_csv('vgg_bn_aug.csv', index=False)

In [43]:
from IPython.display import FileLink
FileLink('vgg_bn_aug.csv')

scores 5.94492