In [2]:
import os
import operator
import numpy as np
from glob import glob
from shutil import copyfile, move
import pandas as pd
import matplotlib.pyplot as plt

%matplotlib inline

In [3]:
%pwd

'/home/patrick/Dev/Kaggle/ncfm'

In [4]:
MODEL_NAME = "baseline"

In [5]:
USE_SAMPLE = False

In [6]:
current_dir = os.getcwd()
HOME_DIR = current_dir

if USE_SAMPLE:
    DATA_DIR = current_dir + "/data/sample"
else:
    DATA_DIR = current_dir + "/data"

In [7]:
%cd $DATA_DIR/train
for d in glob('*'):
    f = glob(d + '/*jpg')
    print(d.split('/')[0], len(f))
    
print('\nSum:', len(glob('*/*.jpg')))

/home/patrick/Dev/Kaggle/ncfm/data/train
SHARK 141
NoF 372
YFT 588
DOL 94
ALB 1376
BET 160
LAG 54
OTHER 240

Sum: 3025


In [8]:
%cd $DATA_DIR/valid
for d in glob('*'):
    f = glob(d + '/*jpg')
    print(d.split('/')[0], len(f))
    
print('\nSum:', len(glob('*/*.jpg')))

/home/patrick/Dev/Kaggle/ncfm/data/valid
SHARK 35
NoF 93
YFT 146
DOL 23
ALB 343
BET 40
LAG 13
OTHER 59

Sum: 752


# Model

In [9]:
from keras.applications.vgg16 import VGG16
from keras.layers import Dense, Flatten, Dropout, GlobalAveragePooling2D
from keras.models import Model
from keras.preprocessing.image import ImageDataGenerator

Using TensorFlow backend.


In [10]:
base_model = VGG16(include_top=False)
x = base_model.output
x = GlobalAveragePooling2D()(x)
predictions = Dense(8, activation='softmax')(x)

model = Model(input=base_model.input, output=predictions)

for layer in base_model.layers:
    layer.trainable = False

model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_1 (InputLayer)             (None, None, None, 3) 0                                            
____________________________________________________________________________________________________
block1_conv1 (Convolution2D)     (None, None, None, 64 1792        input_1[0][0]                    
____________________________________________________________________________________________________
block1_conv2 (Convolution2D)     (None, None, None, 64 36928       block1_conv1[0][0]               
____________________________________________________________________________________________________
block1_pool (MaxPooling2D)       (None, None, None, 64 0           block1_conv2[0][0]               
___________________________________________________________________________________________

In [11]:
model.compile(optimizer='adam', loss='categorical_crossentropy')

In [12]:
N_EPOCH = 5
BATCH_SIZE = 64

idg = ImageDataGenerator(rescale=1/255)

train_flow = idg.flow_from_directory(DATA_DIR + '/train', shuffle=True, batch_size=BATCH_SIZE)
valid_flow = idg.flow_from_directory(DATA_DIR + '/valid', shuffle=True, batch_size=BATCH_SIZE)
test_flow = idg.flow_from_directory(DATA_DIR + '/test', shuffle=False, batch_size=BATCH_SIZE)

Found 3025 images belonging to 8 classes.
Found 752 images belonging to 8 classes.
Found 1000 images belonging to 1 classes.


In [13]:
model.fit_generator(train_flow, train_flow.nb_sample, nb_epoch=N_EPOCH,
                    validation_data=valid_flow, nb_val_samples=valid_flow.nb_sample)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f7d43a10da0>

In [14]:
pred = model.predict_generator(test_flow, test_flow.nb_sample)

In [15]:
files = [f.split('/')[1] for f in test_flow.filenames]
columns = list(zip(*sorted(train_flow.class_indices.items(), key=operator.itemgetter(1))))[0]
submission = pd.DataFrame(data=pred, index=files, columns=columns)
submission.index.name = 'image'

submission.head()

Unnamed: 0_level_0,ALB,BET,DOL,LAG,NoF,OTHER,SHARK,YFT
image,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
img_04814.jpg,0.275289,0.096118,0.076924,0.012023,0.049107,0.036156,0.022162,0.432221
img_03896.jpg,0.459509,0.026976,0.038521,0.018558,0.201569,0.059464,0.070386,0.125017
img_00030.jpg,0.463398,0.04125,0.042653,0.015507,0.156172,0.060276,0.064627,0.156118
img_03355.jpg,0.347095,0.0364,0.049939,0.018496,0.118339,0.092485,0.110336,0.22691
img_03853.jpg,0.440261,0.024843,0.042234,0.014844,0.202385,0.055593,0.066802,0.153038


In [16]:
submission.to_csv(DATA_DIR + '/result/sub_%s.csv' % MODEL_NAME)