In [1]:
import h5py
import numpy as np
from sklearn.utils import shuffle
import pandas as pd
from keras.preprocessing.image import *

np.random.seed(2017)


Using TensorFlow backend.


the ipynb is refer: 
Create the premodel by ResNet50,Xception, InceptionV3, VGG16, VGG19

In [2]:
def one_hot_encode(y):
    l = list()
    for item in y:
        c = [0. for i in range(10)]
        c[item] = 1.
        l.append(c)
    return np.array(l)

def load_data(pretrain_data_file):
    print('load data: ' + pretrain_data_file)
    with h5py.File("models/new/" + pretrain_data_file, 'r') as h:
        X_train = np.array(h['train'])
        X_valid = np.array(h['valid'])
        y_train = np.array(h['label'])
        y_valid = np.array(h['valid_label'])
        X_train, y_train = shuffle(X_train, y_train)
        y_train = one_hot_encode(y_train)
        X_valid, y_valid = shuffle(X_valid, y_valid)
        y_valid = one_hot_encode(y_valid)
        return X_train, y_train, X_valid, y_valid
    
def load_test_data(data_file):
    print('load test data: ' + data_file)
    with h5py.File("models/" + data_file, 'r') as h:
        X_test = np.array(h['test'])
        return X_test
    
from keras.models import *
from keras.layers import *

def make_model(input_shape):

    input_tensor = Input(input_shape)
    x = input_tensor
    x = Dropout(0.5)(x)
    x = Dense(10, activation='softmax')(x)
    model = Model(input_tensor, x)
    #model.compile(optimizer='adadelta', loss='binary_crossentropy', metrics=['accuracy'])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    
#     x = Dense(10)(x)
#     x = Activation('softmax')(x)
#     model = Model(input_tensor, x)
#     model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
#     #model.compile(Adam(lr=1e-3), loss='categorical_crossentropy')
    
    return model
print("Done")

Done


In [3]:
def gen_kaggle_csv(model, X_test,  model_image_size, csv_name):
    y_pred = model.predict(X_test, verbose=1)
    y_pred = y_pred.clip(min=0.005, max=0.995)
    print()
    print(y_pred[:3])

    dir = "/ext/Data/distracted_driver_detection/"

    gen = ImageDataGenerator()
    test_generator = gen.flow_from_directory(dir + "test/", (model_image_size, model_image_size), shuffle=False, 
                                             batch_size=16, class_mode=None)

    l = list()
    for i, fname in enumerate(test_generator.filenames):
        name = fname[fname.rfind('/')+1:]
        l.append( [name, *y_pred[i]] )

    l = np.array(l)
    data = {'img': l[:,0]}
    for i in range(10):
        data["c%d"%i] = l[:,i+1]
    df = pd.DataFrame(data, columns=['img'] + ['c%d'%i for i in range(10)])
    df.head(10)
    df = df.sort_values(by='img')
    df.to_csv(csv_name, index=None, float_format='%.3f')

print("done")

done


parameters

In [4]:
batch_size = 128
epochs = 30

## ResNet50

In [5]:
X_train, y_train, X_valid, y_valid = load_data("bottleneck_ResNet50.h5")
model_resnet = make_model(X_train.shape[1:])
model_resnet.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_valid,y_valid))
model_resnet.save("models/resnet50-model.h5")
print("model save successed")

load data: bottleneck_ResNet50.h5
Train on 20787 samples, validate on 1637 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
model save successed


In [6]:
X_test = load_test_data("bottleneck_ResNet50_test.h5")
gen_kaggle_csv(model_resnet,  X_test,  224, 'csv/resnet50-pred.csv')

load test data: bottleneck_ResNet50_test.h5
[[ 0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.995]
 [ 0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.995]
 [ 0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.995]]
Found 79726 images belonging to 1 classes.


## Xception

In [7]:
X_train, y_train, X_vaiid, y_valid = load_data("bottleneck_Xception.h5")
model_xception = make_model(X_train.shape[1:])
model_xception.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_valid,y_valid))
model_xception.save("models/xception-model.h5")
print("model save successed")

load data: bottleneck_Xception.h5
Train on 20787 samples, validate on 1637 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
model save successed


In [8]:
X_test = load_test_data("bottleneck_Xception_test.h5")
gen_kaggle_csv(model_xception,  X_test,  299, 'csv/xception-pred.csv')

load test data: bottleneck_Xception_test.h5
[[ 0.02256569  0.005       0.005       0.005       0.005       0.7136004
   0.005       0.005       0.005       0.24994245]
 [ 0.03228534  0.005       0.005       0.27329922  0.59254557  0.03330377
   0.005       0.005       0.0168118   0.03815226]
 [ 0.005       0.005       0.005       0.01680736  0.97728217  0.005       0.005
   0.005       0.005       0.005     ]]
Found 79726 images belonging to 1 classes.


## InceptionV3

In [9]:
X_train, y_train, X_valid, y_valid = load_data("bottleneck_InceptionV3.h5")
model_inceptionV3 = make_model(X_train.shape[1:])
model_inceptionV3.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_valid,y_valid))
model_inceptionV3.save("models/inceptionV3-model.h5")
print("model save successed")

load data: bottleneck_InceptionV3.h5
Train on 20787 samples, validate on 1637 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
model save successed


In [10]:
X_test = load_test_data("bottleneck_InceptionV3_test.h5")
gen_kaggle_csv(model_inceptionV3,  X_test,  299, 'csv/inceptionV3-pred.csv')

load test data: bottleneck_InceptionV3_test.h5
[[ 0.005       0.005       0.005       0.005       0.005       0.995       0.005
   0.005       0.005       0.005     ]
 [ 0.04984068  0.18285815  0.005       0.14780131  0.35908973  0.02937607
   0.04085194  0.00946527  0.13645333  0.04022862]
 [ 0.03460037  0.07086907  0.005       0.07650265  0.5651921   0.01638084
   0.00663228  0.005       0.04057407  0.18895791]]
Found 79726 images belonging to 1 classes.


## VGG16

In [11]:
X_train, y_train, X_valid, y_valid = load_data("bottleneck_VGG16.h5")
model_vgg16 = make_model(X_train.shape[1:])
model_vgg16.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_valid,y_valid))
model_vgg16.save("models/vgg16-model.h5")
print("model save successed")

load data: bottleneck_VGG16.h5
Train on 20787 samples, validate on 1637 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
model save successed


In [12]:
X_test = load_test_data("bottleneck_VGG16_test.h5")
gen_kaggle_csv(model_vgg16,  X_test,  224, 'csv/vgg16-pred.csv')

load test data: bottleneck_VGG16_test.h5
[[ 0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.995]
 [ 0.005  0.005  0.005  0.005  0.995  0.005  0.005  0.005  0.005  0.005]
 [ 0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.995]]
Found 79726 images belonging to 1 classes.


## VGG19

In [13]:
X_train, y_train, X_valid, y_valid = load_data("bottleneck_VGG19.h5")
model_vgg19 = make_model(X_train.shape[1:])
model_vgg19.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_valid,y_valid))
model_vgg19.save("models/vgg19-model.h5")
print("model save successed")

load data: bottleneck_VGG19.h5
Train on 20787 samples, validate on 1637 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
model save successed


In [14]:
X_test = load_test_data("bottleneck_VGG19_test.h5")
gen_kaggle_csv(model_vgg16,  X_test,  224, 'csv/vgg19-pred.csv')

load test data: bottleneck_VGG19_test.h5
[[ 0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.995]
 [ 0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.995  0.005  0.005]
 [ 0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.005  0.995]]
Found 79726 images belonging to 1 classes.


# Combine multi-modles

In [15]:
import h5py
import numpy as np
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
np.random.seed(2017)

X_train = []
X_valid = []

#premodels = ["bottleneck_ResNet50.h5", "bottleneck_Xception.h5", "bottleneck_InceptionV3.h5", "bottleneck_VGG16.h5", "bottleneck_VGG19.h5"]
#premodels = ["bottleneck_ResNet50.h5", "bottleneck_InceptionV3.h5",  "bottleneck_VGG19.h5"]
premodels = ["bottleneck_Xception.h5",  "bottleneck_InceptionV3.h5", ]
for filename in premodels:
    print('------------------'+filename)
    with h5py.File("models/" + filename, 'r') as h:
        X_train.append(np.array(h['train']))
        X_valid.append(np.array(h['valid']))
        y_train = np.array(h['label'])
        y_valid = np.array(h['valid_label'])

X_train = np.concatenate(X_train, axis=1)
X_valid = np.concatenate(X_valid, axis=1)

X_train, y_train = shuffle(X_train, y_train)
y_train = one_hot_encode(y_train)
X_valid, y_valid = shuffle(X_valid, y_valid)
y_valid = one_hot_encode(y_valid)

------------------bottleneck_Xception.h5
------------------bottleneck_InceptionV3.h5


In [16]:
model_mix = make_model(X_train.shape[1:])
model_mix.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_valid,y_valid))
model_mix.save("models/mixed-model.h5")
print("model save successed")

Train on 20787 samples, validate on 1637 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
model save successed


In [17]:
#test_premodels = ["bottleneck_ResNet50_test.h5", "bottleneck_Xception_test.h5", "bottleneck_InceptionV3_test.h5", 
#             "bottleneck_VGG16_test.h5", "bottleneck_VGG19_test.h5"]
#test_premodels = ["bottleneck_ResNet50_test.h5", "bottleneck_InceptionV3_test.h5", "bottleneck_VGG19_test.h5"]
test_premodels = ["bottleneck_Xception_test.h5", "bottleneck_InceptionV3_test.h5",  ]

X_test = []
for filename in test_premodels:
    print('------------------'+filename)
    with h5py.File("models/" + filename, 'r') as h:
        X_test.append(np.array(h['test']))
        
X_test = np.concatenate(X_test, axis=1)
gen_kaggle_csv(model_mix, X_test,  299, 'csv/mixed-pred.csv')

------------------bottleneck_Xception_test.h5
------------------bottleneck_InceptionV3_test.h5
[[ 0.005       0.005       0.005       0.005       0.005       0.995       0.005
   0.005       0.005       0.005     ]
 [ 0.02359737  0.00751092  0.005       0.19098055  0.46178627  0.00814887
   0.0095683   0.005       0.26811218  0.02800828]
 [ 0.03848533  0.005       0.005       0.01209726  0.55914986  0.005       0.005
   0.005       0.0402375   0.34321329]]
Found 79726 images belonging to 1 classes.
