In [49]:
import numpy as np
from tqdm import tqdm
import cv2
import pandas as pd
import h5py
import os
# import shutil

# config global variables

In [50]:
debugMode = True
testWorkflow = True
numberData = 25000
numberTest = 12500
batchSize = 32
epochs = 20

filenameInceptionResNetV2 = 'featuresInceptoinResNetV2'
filenameInceptionV3 = 'featuresInceptionV3'
filenameResNet50 = 'featuresResNet50'

# if testWorkflow:
#     numberData = 1000
#     numberTest = 50
#     batchSize = 4
#     epochs = 2

In [None]:
def extract_feature(MODELTYPE, inputSize, convert2RGB, preprocessInput, fileName):
    #read data
    x = np.zeros((numberData, inputSize, inputSize, 3), dtype=np.uint8)
    test = np.zeros((numberTest, inputSize, inputSize, 3), dtype=np.uint8)
    y = np.zeros((numberData, 1), dtype=np.uint8)

    for index in tqdm(range(0, numberData, 2)):
        x[index] = cv2.resize(cv2.imread('train/cat.%d.jpg' %(index/2)), (inputSize, inputSize))
        x[index + 1] = cv2.resize(cv2.imread('train/dog.%d.jpg' %(index/2)), (inputSize, inputSize))
        if convert2RGB:
            x[index]     = x[index][:,:,::-1]
            x[index + 1] = x[index + 1][:,:,::-1]
        y[index + 1] = 1
    
    for index in tqdm(range(numberTest)):
        test[index] = cv2.resize(cv2.imread('test/%d.jpg' %(index+1)), (inputSize, inputSize))
        if convert2RGB:
            test[index] = test[index][:,:,::-1]

    #construct model    
    inputTensor = Input((inputSize, inputSize, 3))
    modelInput = inputTensor
    modelInput = Lambda(preprocessInput)(modelInput)
    baseModel = MODELTYPE(input_tensor=modelInput, weights='imagenet', include_top=False)
    outputFeature = GlobalAveragePooling2D()(baseModel.output)
    model = Model(baseModel.input, outputFeature)
    
    #predict
    featuresTrain = model.predict(x, verbose=debugMode)
    featuresTest = model.predict(test, verbose=debugMode)
    
    #save to file
    if os.path.exists(fileName):
        os.remove(fileName)
        
    with h5py.File(fileName) as h:
        h.create_dataset("train", data=featuresTrain)
        h.create_dataset("test",  data=featuresTest)
        h.create_dataset("label", data=y)
    
#     #for test
#     if testWorkflow:
#         inputTensorTest = Input(featuresTrain.shape[1:])
#         modelInputTest = inputTensorTest
#         modelInputTest = Dropout(0.5)(modelInputTest)
#         modelInputTest = Dense(1, activation='sigmoid')(modelInputTest)
#         modelTest = Model(inputTensorTest, modelInputTest)

#         modelTest.compile(optimizer='adam',
#                       loss='binary_crossentropy',
#                       metrics=['accuracy'])
        
#         modelTest.fit(featuresTrain, y, batch_size=batchSize, epochs=epochs, validation_split=0.2)

## extract features with ResNet50

In [None]:
import keras
from keras.layers import GlobalAveragePooling2D, Dense, Input, Dropout, Lambda
from keras.models import Model

from keras.applications.resnet50 import ResNet50
from keras.applications.resnet50 import preprocess_input as piResNet50

extract_feature(ResNet50, 224, False, piResNet50, filenameResNet50)

100%|██████████| 12500/12500 [01:42<00:00, 122.00it/s]
100%|██████████| 12500/12500 [01:04<00:00, 194.71it/s]


  864/25000 [>.............................] - ETA: 1:26:30

## extract features with InceptionV3

In [None]:
from keras.applications.inception_v3 import InceptionV3
from keras.applications.inception_v3 import preprocess_input as piInceptionV3

extract_feature(InceptionV3, 299, True, piInceptionV3, filenameInceptionV3)

## extract features with InceptionResNetV2

In [None]:
from keras.applications.inception_resnet_v2 import InceptionResNetV2
from keras.applications.inception_resnet_v2 import preprocess_input as piInceptionResNetV2

extract_feature(InceptionResNetV2, 299, True, piInceptionResNetV2, filenameInceptionResNetV2)

## predict and write submission

In [None]:
def writeSubmission(pred, predNumber, filename):
    submission = pd.read_csv("sample_submission.csv")

    for index in tqdm(range(predNumber)):
        submission.at[index, "label"] = pred[index]

    submission.to_csv(filename, index=None)

In [None]:
from sklearn.utils import shuffle

xTrainMerge = []
xTestMerge = []

index = 0

for featureFilename in [filenameResNet50, filenameInceptionV3, filenameInceptionResNetV2]:
    index += 1
    with h5py.File(featureFilename, 'r') as h:
        xTrain = np.array(h['train'])
        xTrainMerge.append(xTrain)
        
        xTest = np.array(h['test'])
        xTestMerge.append(xTest)
        
        yTrain = np.array(h['label'])

    #     print (yTrain)
        inputTensor = Input(xTrain.shape[1:])
        modelInput = inputTensor
        modelInput = Dropout(0.25)(modelInput)
        modelInput = Dense(1, activation='sigmoid')(modelInput)
        model = Model(inputTensor, modelInput)

        model.compile(optimizer='adam',
                      loss='binary_crossentropy',
                      metrics=['accuracy'])
        model.fit(xTrain, yTrain, batch_size=batchSize, epochs=epochs, validation_split=0.2)
        
        pred = model.predict(xTest, verbose=True)
        pred = pred.clip(min=0.005, max=0.995)

        writeSubmission(pred, numberTest, 'submission%d.csv' %index)
        

## merge features

In [None]:
xTrainMerge = np.concatenate(xTrainMerge, axis=1)
xTestMerge = np.concatenate(xTestMerge, axis=1)

xTrainMerge, yTrain = shuffle(xTrainMerge, yTrain)

inputTensor = Input(xTrainMerge.shape[1:])
modelInput = inputTensor
modelInput = Dropout(0.25)(modelInput)
modelInput = Dense(1, activation='sigmoid')(modelInput)
model = Model(inputTensor, modelInput)

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

model.fit(xTrainMerge, yTrain, batch_size=batchSize, epochs=epochs, validation_split=0.2)   

pred = model.predict(xTestMerge, verbose=True)
pred = pred.clip(min=0.005, max=0.995)

writeSubmission(pred, numberTest, 'submissionMergeFeature.csv')

## merge predictions

In [None]:
submission1 = pd.read_csv('submission1.csv')
submission2 = pd.read_csv('submission2.csv')
submission3 = pd.read_csv('submission3.csv')
submissionMergePredict  = pd.read_csv('sample_submission.csv')

for element in list(zip(range(numberTest),submission1['label'], submission2['label'], submission3['label'])):
    labels = element[1:]
    submissionMergePredict.at[element[0], "label"] = max(labels) if min(labels) > 0.5 else (min(labels) if max(labels) < 0.5 else np.mean(labels))
    print(submissionMergePredict.at[element[0], "label"], labels)
    
submissionMergePredict.to_csv('submissionMergePredict.csv', index=None)