In [1]:
import os
import h5py
import numpy as np
from sklearn.utils import shuffle
import pandas as pd

from keras.preprocessing.image import *
from keras.optimizers import *

np.random.seed(2017)

Using TensorFlow backend.


the ipynb is refer: 
Create the premodel by ResNet50,Xception, InceptionV3, VGG16, VGG19

In [2]:
tag = "noscale"

def one_hot_encode(y):
    l = list()
    for item in y:
        c = [0. for i in range(10)]
        c[item] = 1.
        l.append(c)
    return np.array(l)

def load_data(pretrain_data_file):
    print('load data: ' + pretrain_data_file)
    with h5py.File(os.path.join("models", tag, pretrain_data_file), 'r') as h:
        X_train = np.array(h['train'])
        X_valid = np.array(h['valid'])
        y_train = np.array(h['label'])
        y_valid = np.array(h['valid_label'])
        X_train, y_train = shuffle(X_train, y_train)
        y_train = one_hot_encode(y_train)
        X_valid, y_valid = shuffle(X_valid, y_valid)
        y_valid = one_hot_encode(y_valid)
        return X_train, y_train, X_valid, y_valid
    
def load_test_data(data_file):
    print('load test data: ' + data_file)
    with h5py.File(os.path.join("models",  tag, data_file), 'r') as h:
        X_test = np.array(h['test'])
        return X_test
    
from keras.models import *
from keras.layers import *

def make_model(input_shape):

    input_tensor = Input(input_shape)
    x = input_tensor
    x = Dropout(0.5)(x)
    x = Dense(10, activation='softmax')(x)
    model = Model(input_tensor, x)
    #model.compile(optimizer='adadelta', loss='binary_crossentropy', metrics=['accuracy'])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    #model.compile(optimizer=RMSprop(lr=0.00001), loss='categorical_crossentropy', metrics=['accuracy'])
    
#     x = Dense(10)(x)
#     x = Activation('softmax')(x)
#     model = Model(input_tensor, x)
#     model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
#     #model.compile(Adam(lr=1e-3), loss='categorical_crossentropy')
    
    return model

print("Done")

Done


In [3]:
def gen_kaggle_csv(model, X_test,  model_image_size, csv_name):
    y_pred = model.predict(X_test, verbose=1)
    y_pred = y_pred.clip(min=0.005, max=0.995)
    print()
    print(y_pred[:3])

    dir = "/ext/Data/distracted_driver_detection/"

    gen = ImageDataGenerator()
    test_generator = gen.flow_from_directory(dir + "test/", (model_image_size, model_image_size), shuffle=False, 
                                             batch_size=16, class_mode=None)

    l = list()
    for i, fname in enumerate(test_generator.filenames):
        name = fname[fname.rfind('/')+1:]
        l.append( [name, *y_pred[i]] )

    l = np.array(l)
    data = {'img': l[:,0]}
    for i in range(10):
        data["c%d"%i] = l[:,i+1]
    df = pd.DataFrame(data, columns=['img'] + ['c%d'%i for i in range(10)])
    df.head(10)
    df = df.sort_values(by='img')
    df.to_csv(csv_name, index=None, float_format='%.3f')

print("done")

done


parameters

In [4]:
batch_size = 128
epochs = 20

## ResNet50

In [5]:
X_train, y_train, X_valid, y_valid = load_data("bottleneck_ResNet50.h5")
model_resnet = make_model(X_train.shape[1:])
model_resnet.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_valid,y_valid))
model_resnet.save("models/resnet50-model.h5")
print("model save successed")

load data: bottleneck_ResNet50.h5
Train on 22424 samples, validate on 641 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
model save successed


In [6]:
X_test = load_test_data("bottleneck_ResNet50_test.h5")
gen_kaggle_csv(model_resnet,  X_test,  224, 'csv/resnet50-pred.csv')

load test data: bottleneck_ResNet50_test.h5
[[ 0.02477641  0.005       0.005       0.005       0.005       0.91277683
   0.005       0.005       0.005       0.05214309]
 [ 0.005       0.01832871  0.005       0.30352515  0.06699577  0.0108735
   0.005       0.085459    0.02234364  0.48766503]
 [ 0.09130081  0.49960566  0.005       0.1234154   0.01765093  0.11166939
   0.005       0.005       0.00843587  0.13943082]]
Found 79726 images belonging to 1 classes.


## Xception

In [7]:
X_train, y_train, X_vaiid, y_valid = load_data("bottleneck_Xception.h5")
model_xception = make_model(X_train.shape[1:])
model_xception.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_valid,y_valid))
model_xception.save("models/xception-model.h5")
print("model save successed")

load data: bottleneck_Xception.h5
Train on 22424 samples, validate on 641 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
model save successed


In [8]:
X_test = load_test_data("bottleneck_Xception_test.h5")
gen_kaggle_csv(model_xception,  X_test,  299, 'csv/xception-pred.csv')

load test data: bottleneck_Xception_test.h5
[[ 0.20189193  0.08369905  0.04985116  0.02106302  0.03565321  0.26526636
   0.01911807  0.0128439   0.11833371  0.19227962]
 [ 0.05660634  0.02038704  0.005       0.4209429   0.38368106  0.04477174
   0.005       0.02331951  0.0067282   0.03842681]
 [ 0.005       0.005       0.005       0.005       0.98631674  0.005       0.005
   0.005       0.005       0.005     ]]
Found 79726 images belonging to 1 classes.


## InceptionV3

In [9]:
X_train, y_train, X_valid, y_valid = load_data("bottleneck_InceptionV3.h5")
model_inceptionV3 = make_model(X_train.shape[1:])
model_inceptionV3.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_valid,y_valid))
model_inceptionV3.save("models/inceptionV3-model.h5")
print("model save successed")

load data: bottleneck_InceptionV3.h5
Train on 22424 samples, validate on 641 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
model save successed


In [10]:
X_test = load_test_data("bottleneck_InceptionV3_test.h5")
gen_kaggle_csv(model_inceptionV3,  X_test,  299, 'csv/inceptionV3-pred.csv')

load test data: bottleneck_InceptionV3_test.h5
[[ 0.04222022  0.005       0.01107503  0.005       0.03452218  0.82440066
   0.00634712  0.005       0.02833127  0.04564087]
 [ 0.00641995  0.00837948  0.0053475   0.03800186  0.82393926  0.01966328
   0.02214672  0.00929285  0.01777785  0.0490313 ]
 [ 0.005       0.005       0.005       0.00839503  0.97672141  0.005       0.005
   0.005       0.005       0.005     ]]
Found 79726 images belonging to 1 classes.


## VGG16

In [11]:
X_train, y_train, X_valid, y_valid = load_data("bottleneck_VGG16.h5")
model_vgg16 = make_model(X_train.shape[1:])
model_vgg16.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_valid,y_valid))
model_vgg16.save("models/vgg16-model.h5")
print("model save successed")

load data: bottleneck_VGG16.h5
Train on 22424 samples, validate on 641 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
model save successed


In [12]:
X_test = load_test_data("bottleneck_VGG16_test.h5")
gen_kaggle_csv(model_vgg16,  X_test,  224, 'csv/vgg16-pred.csv')

load test data: bottleneck_VGG16_test.h5
[[ 0.01312078  0.0050959   0.005       0.02666136  0.16162024  0.62280899
   0.01529993  0.005       0.005       0.14873438]
 [ 0.005       0.04371846  0.02568576  0.04443453  0.7887668   0.005
   0.01020154  0.0267454   0.02863828  0.02889333]
 [ 0.00962569  0.00544351  0.00935891  0.0105534   0.24869126  0.03912386
   0.00736173  0.00695201  0.08751232  0.57537729]]
Found 79726 images belonging to 1 classes.


## VGG19

In [13]:
X_train, y_train, X_valid, y_valid = load_data("bottleneck_VGG19.h5")
model_vgg19 = make_model(X_train.shape[1:])
model_vgg19.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_valid,y_valid))
model_vgg19.save("models/vgg19-model.h5")
print("model save successed")

load data: bottleneck_VGG19.h5
Train on 22424 samples, validate on 641 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
model save successed


In [14]:
X_test = load_test_data("bottleneck_VGG19_test.h5")
gen_kaggle_csv(model_vgg16,  X_test,  224, 'csv/vgg19-pred.csv')

load test data: bottleneck_VGG19_test.h5
[[ 0.005       0.005       0.005       0.005       0.005       0.03686684
   0.005       0.00732123  0.005       0.94932932]
 [ 0.005       0.005       0.01058658  0.005       0.005       0.005       0.005
   0.93765777  0.01671658  0.03228797]
 [ 0.005       0.005       0.005       0.005       0.005       0.005       0.005
   0.005       0.04655677  0.94061124]]
Found 79726 images belonging to 1 classes.


# Combine multi-modles

In [15]:
import h5py
import numpy as np
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
np.random.seed(2017)

X_train = []
X_valid = []

premodels = ["bottleneck_ResNet50.h5", "bottleneck_Xception.h5", "bottleneck_InceptionV3.h5", "bottleneck_VGG16.h5", "bottleneck_VGG19.h5"]
#premodels = ["bottleneck_ResNet50.h5", "bottleneck_InceptionV3.h5",  "bottleneck_VGG19.h5"]
#premodels = ["bottleneck_Xception.h5",  "bottleneck_InceptionV3.h5", ]
for filename in premodels:
    print('------------------'+filename)
    with h5py.File(os.path.join("models", tag, filename), 'r') as h:
        X_train.append(np.array(h['train']))
        X_valid.append(np.array(h['valid']))
        y_train = np.array(h['label'])
        y_valid = np.array(h['valid_label'])

X_train = np.concatenate(X_train, axis=1)
X_valid = np.concatenate(X_valid, axis=1)

X_train, y_train = shuffle(X_train, y_train)
y_train = one_hot_encode(y_train)
X_valid, y_valid = shuffle(X_valid, y_valid)
y_valid = one_hot_encode(y_valid)

------------------bottleneck_ResNet50.h5
------------------bottleneck_Xception.h5
------------------bottleneck_InceptionV3.h5
------------------bottleneck_VGG16.h5
------------------bottleneck_VGG19.h5


In [16]:
model_mix = make_model(X_train.shape[1:])
model_mix.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_valid,y_valid))
model_mix.save("models/mixed-model.h5")
print("model save successed")

Train on 22424 samples, validate on 641 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
model save successed


In [17]:
test_premodels = ["bottleneck_ResNet50_test.h5", "bottleneck_Xception_test.h5", "bottleneck_InceptionV3_test.h5", 
            "bottleneck_VGG16_test.h5", "bottleneck_VGG19_test.h5"]
#test_premodels = ["bottleneck_ResNet50_test.h5", "bottleneck_InceptionV3_test.h5", "bottleneck_VGG19_test.h5"]
#test_premodels = ["bottleneck_Xception_test.h5", "bottleneck_InceptionV3_test.h5",  ]

X_test = []
for filename in test_premodels:
    print('------------------'+filename)
    with h5py.File(os.path.join("models", tag, filename), 'r') as h:
        X_test.append(np.array(h['test']))
        
X_test = np.concatenate(X_test, axis=1)
gen_kaggle_csv(model_mix, X_test,  299, 'csv/mixed-pred.csv')

------------------bottleneck_ResNet50_test.h5
------------------bottleneck_Xception_test.h5
------------------bottleneck_InceptionV3_test.h5
------------------bottleneck_VGG16_test.h5
------------------bottleneck_VGG19_test.h5
[[ 0.005       0.005       0.005       0.005       0.005       0.9832992
   0.005       0.005       0.005       0.01651365]
 [ 0.005       0.005       0.005       0.01044999  0.98336464  0.005       0.005
   0.005       0.005       0.005     ]
 [ 0.005       0.005       0.005       0.005       0.58331138  0.005       0.005
   0.005       0.01881646  0.39675233]]
Found 79726 images belonging to 1 classes.
