In [7]:
import os
import glob

from keras.applications.inception_v3 import InceptionV3

from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image

from keras.models import Sequential, Model
from keras.layers import Input, Activation, Dropout, Flatten, Dense, GlobalAveragePooling2D
from keras import optimizers

import numpy as np
import pandas as pd

In [8]:
classes = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', 
           '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23']

In [9]:
batch_size = 32
nb_classes = len(classes)

img_rows, img_cols = 150, 150
channels = 3

train_data_dir = 'input/processed/train'
valid_data_dir = 'input/processed/valid'
test_data_dir = 'input/given/test'

nb_train_samples = 12399 - (15 * 24)
nb_val_samples = 15 * 24
nb_epoch = 30

result_dir = 'results'
if not os.path.exists(result_dir):
    os.mkdir(result_dir)

In [10]:
# モデルと学習済み重みをロード
# Fully-connected層（FC）はいらないのでinclude_top=False）
input_tensor = Input(shape=(img_rows, img_cols, 3))

In [19]:
# create the base pre-trained model
base_model = InceptionV3(weights='imagenet', include_top=False, input_tensor=input_tensor)

# add a global spatial average pooling layer
x = base_model.output
x = GlobalAveragePooling2D()(x)
# let's add a fully-connected layer
x = Dense(1024, activation='relu')(x)
# and a logistic layer
predictions = Dense(nb_classes, activation='softmax')(x)

# this is the model we will train
model = Model(inputs=base_model.input, outputs=predictions)

# first: train only the top layers (which were randomly initialized)
# i.e. freeze all convolutional InceptionV3 layers
for layer in base_model.layers:
    layer.trainable = False

In [20]:
# compile the model (should be done *after* setting layers to non-trainable)
model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [13]:
train_datagen = ImageDataGenerator(
    rescale=1.0 / 255.0,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True)

train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=(img_rows, img_cols),
    color_mode='rgb',
    classes=classes,
    class_mode='categorical',
    batch_size=batch_size,
    shuffle=True)

Found 11799 images belonging to 24 classes.


In [14]:
valid_datagen = ImageDataGenerator(rescale=1.0 / 255.0)

validation_generator = valid_datagen.flow_from_directory(
    valid_data_dir,
    target_size=(img_rows, img_cols),
    color_mode='rgb',
    classes=classes,
    class_mode='categorical',
    batch_size=batch_size,
    shuffle=True)

Found 360 images belonging to 24 classes.


In [None]:
# let's visualize layer names and layer indices to see how many layers
# we should freeze:
for i, layer in enumerate(base_model.layers):
   print(i, layer.name)

# we chose to train the top 2 inception blocks, i.e. we will freeze
# the first 172 layers and unfreeze the rest:
for layer in model.layers[:172]:
   layer.trainable = False
for layer in model.layers[172:]:
   layer.trainable = True

# we need to recompile the model for these modifications to take effect
# we use SGD with a low learning rate
from keras.optimizers import SGD
model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy', metrics=['accuracy'])

# we train our model again (this time fine-tuning the top 2 inception blocks
# alongside the top Dense layers
history = model.fit_generator(
    train_generator,
    samples_per_epoch=nb_train_samples,
    nb_epoch=nb_epoch,
    validation_data=validation_generator,
    nb_val_samples=nb_val_samples)

0 input_3
1 conv2d_283
2 batch_normalization_283
3 activation_283
4 conv2d_284
5 batch_normalization_284
6 activation_284
7 conv2d_285
8 batch_normalization_285
9 activation_285
10 max_pooling2d_13
11 conv2d_286
12 batch_normalization_286
13 activation_286
14 conv2d_287
15 batch_normalization_287
16 activation_287
17 max_pooling2d_14
18 conv2d_291
19 batch_normalization_291
20 activation_291
21 conv2d_289
22 conv2d_292
23 batch_normalization_289
24 batch_normalization_292
25 activation_289
26 activation_292
27 average_pooling2d_28
28 conv2d_288
29 conv2d_290
30 conv2d_293
31 conv2d_294
32 batch_normalization_288
33 batch_normalization_290
34 batch_normalization_293
35 batch_normalization_294
36 activation_288
37 activation_290
38 activation_293
39 activation_294
40 mixed0
41 conv2d_298
42 batch_normalization_298
43 activation_298
44 conv2d_296
45 conv2d_299
46 batch_normalization_296
47 batch_normalization_299
48 activation_296
49 activation_299
50 average_pooling2d_29
51 conv2d_295
52



 batch_normalization_302
79 batch_normalization_304
80 batch_normalization_307
81 batch_normalization_308
82 activation_302
83 activation_304
84 activation_307
85 activation_308
86 mixed2
87 conv2d_310
88 batch_normalization_310
89 activation_310
90 conv2d_311
91 batch_normalization_311
92 activation_311
93 conv2d_309
94 conv2d_312
95 batch_normalization_309
96 batch_normalization_312
97 activation_309
98 activation_312
99 max_pooling2d_15
100 mixed3
101 conv2d_317
102 batch_normalization_317
103 activation_317
104 conv2d_318
105 batch_normalization_318
106 activation_318
107 conv2d_314
108 conv2d_319
109 batch_normalization_314
110 batch_normalization_319
111 activation_314
112 activation_319
113 conv2d_315
114 conv2d_320
115 batch_normalization_315
116 batch_normalization_320
117 activation_315
118 activation_320
119 average_pooling2d_31
120 conv2d_313
121 conv2d_316
122 conv2d_321
123 conv2d_322
124 batch_normalization_313
125 batch_normalization_316
126 batch_normalization_321
127 

In [None]:
model.save_weights(os.path.join(result_dir, 'finetuning3.h5'))

In [None]:
# load test data and make prediction
path = os.path.join('input', 'processed', 'test', '*.jpg')
files = sorted(glob.glob(path))

In [None]:
test_id = []
test = []

for fl in files:
    flbase = os.path.basename(fl)
    img = image.load_img(fl, target_size=(img_rows, img_cols))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = x / 255.0

    test_id.append(flbase)
    test.append(model.predict(x)[0])    

In [None]:
df1 = pd.DataFrame.from_records(test, index=test_id)
df1.to_csv('sub1.csv')

In [None]:
label_list = []
df1_T = df1.transpose()
for i in range(len(df1_T.columns)):
    label_list.append(df1_T.ix[:, i].idxmax())

In [None]:
df2 = pd.Series(label_list, index=df1.index)
df2.to_csv('sub2.csv')

In [None]:
from IPython.display import FileLink
FileLink('sub1.csv')

In [None]:
FileLink('sub2.csv')