In [1]:
import os
import h5py
import numpy as np
from sklearn.utils import shuffle
import pandas as pd

import keras
from keras.preprocessing.image import *
from keras.optimizers import *
from keras.callbacks import *

np.random.seed(2017)

Using TensorFlow backend.


the ipynb is refer: 
Create the premodel by ResNet50,Xception, InceptionV3, VGG16, VGG19

In [2]:
dir = "/ext/Data/distracted_driver_detection/"
tag = "finetune"

def one_hot_encode(y):
    l = list()
    for item in y:
        c = [0. for i in range(10)]
        c[item] = 1.
        l.append(c)
    return np.array(l)
    
from keras.models import *
from keras.layers import *

def make_model(input_shape):

    input_tensor = Input(input_shape)
    x = input_tensor
    x = Dropout(0.5)(x)
    x = Dense(10, activation='softmax')(x)
    model = Model(input_tensor, x)
    
    return model

print("Done")

Done


parameters

In [3]:
batch_size = 128
epochs = 20

# Combine multi-modles

In [4]:
import h5py
import numpy as np
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
np.random.seed(2017)

X_train = []
X_valid = []

premodels = [
    "bottleneck_ResNet50.h5",
    "bottleneck_Xception.h5",
    "bottleneck_InceptionV3.h5",
]

for filename in premodels:
    print('------------------'+filename)
    with h5py.File(os.path.join("models", tag, filename), 'r') as h:
        X_train.append(np.array(h['train']))
        X_valid.append(np.array(h['valid']))
        y_train = np.array(h['label'])
        y_valid = np.array(h['valid_label'])

X_train = np.concatenate(X_train, axis=1)
X_valid = np.concatenate(X_valid, axis=1)

X_train, y_train = shuffle(X_train, y_train)
y_train = one_hot_encode(y_train)
X_valid, y_valid = shuffle(X_valid, y_valid)
y_valid = one_hot_encode(y_valid)

print(X_train.shape)
print(y_train.shape)

------------------bottleneck_ResNet50.h5
------------------bottleneck_Xception.h5
------------------bottleneck_InceptionV3.h5
(22424, 6144)
(22424, 10)


In [5]:
model_mix = make_model(X_train.shape[1:])

tensorboard_callback = keras.callbacks.TensorBoard()

print("Adam")
model_mix.compile(optimizer=Adam(lr=0.00001), loss='categorical_crossentropy', metrics=['accuracy'])
model_mix.fit(X_train, y_train, batch_size=batch_size, epochs=10, validation_data=(X_valid,y_valid), callbacks=[tensorboard_callback])
# print("RMSprop")
# model_mix.compile(optimizer=RMSprop(lr=1*0.00001), loss='categorical_crossentropy', metrics=['accuracy'])
# model_mix.fit(X_train, y_train, batch_size=batch_size, epochs=20, validation_data=(X_valid,y_valid))

model_mix.save("models/mixed-model.h5")
print("model save successed")

Adam
Train on 22424 samples, validate on 641 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
model save successed


In [6]:
def gen_kaggle_csv(model, X_test,  model_image_size, csv_name):
    y_pred = model.predict(X_test, verbose=1)
    print(y_pred[:3])
    y_pred = y_pred.clip(min=0.005, max=0.995)
    print()
    print(y_pred[:3])

    gen = ImageDataGenerator()
    test_generator = gen.flow_from_directory(dir + "test/", (model_image_size, model_image_size), shuffle=False, 
                                             batch_size=16, class_mode=None)

    l = list()
    for i, fname in enumerate(test_generator.filenames):
        name = fname[fname.rfind('/')+1:]
        l.append( [name, *y_pred[i]] )

    l = np.array(l)
    data = {'img': l[:,0]}
    for i in range(10):
        data["c%d"%i] = l[:,i+1]
    df = pd.DataFrame(data, columns=['img'] + ['c%d'%i for i in range(10)])
    df.head(10)
    df = df.sort_values(by='img')
    df.to_csv(csv_name, index=None, float_format='%.3f')

print("done")

done


In [7]:
test_premodels = [
     "bottleneck_ResNet50_test.h5", 
     "bottleneck_Xception_test.h5", 
     "bottleneck_InceptionV3_test.h5",
]
X_test = []
for filename in test_premodels:
    print('------------------'+filename)
    with h5py.File(os.path.join("models", tag, filename), 'r') as h:
        X_test.append(np.array(h['test']))
        
X_test = np.concatenate(X_test, axis=1)
gen_kaggle_csv(model_mix, X_test,  320, 'csv/mixed-pred.csv')

------------------bottleneck_ResNet50_test.h5
------------------bottleneck_Xception_test.h5
------------------bottleneck_InceptionV3_test.h5
   0.10419136  0.04569352  0.05489507  0.11030971]
 [ 0.14326157  0.1051169   0.09165204  0.1596227   0.15439957  0.04909711
   0.05507321  0.0661435   0.1162128   0.05942065]
 [ 0.19164048  0.04446024  0.12464575  0.13919875  0.19029716  0.09489754
   0.08738002  0.04626432  0.03568156  0.04553423]]

[[ 0.11352451  0.10771599  0.06836841  0.09339184  0.0998333   0.20207632
   0.10419136  0.04569352  0.05489507  0.11030971]
 [ 0.14326157  0.1051169   0.09165204  0.1596227   0.15439957  0.04909711
   0.05507321  0.0661435   0.1162128   0.05942065]
 [ 0.19164048  0.04446024  0.12464575  0.13919875  0.19029716  0.09489754
   0.08738002  0.04626432  0.03568156  0.04553423]]
Found 79726 images belonging to 1 classes.
