In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
from os.path import join
import sys
sys.path.append("../data_preparation/")

In [3]:
# import csv
import json
import pickle
# from matplotlib import pyplot as plt
import numpy as np
import pandas as pd

In [4]:
import cPickle
import gzip
from tensorflow.python.lib.io import file_io

  from ._conv import register_converters as _register_converters


In [5]:
# import keras
# from keras.applications import Xception, VGG16, VGG19, ResNet50, InceptionV3
from keras.layers import Dense, concatenate#,GlobalAveragePooling2D, Dropout, Input
from keras.models import Model, load_model
# from keras.utils.np_utils import to_categorical

Using TensorFlow backend.


In [6]:
from sklearn.metrics import recall_score, precision_score, f1_score

In [7]:
from batch_generator import BatchGenerator, BatchSequence

# Multiple Batch Generator

In [33]:
def multiple_batch_generator(generator_num, **kwargs):
    """A generator to work with multiple inputs models
    
    We create a model with a list of multiple input layers when 
    we use :func:`keras.layers.concatenate`. However,
    :class:`batch_generator.BatchGenerator` returns a single tuple 
    with two arrays, which does not fit to a model with a multiple 
    input layers. Thus, with this generator, we create the necessary 
    input for such models.
    
    Arguments:
        generator_num {int} -- number of generators should be created
        \**kwargs -- See :class:`batch_generator.BatchGenerator`
        
    Yields:
        ([ndarray,...,ndarray], ndarray) -- in the tuple; list contains feature arrays from each generator, array out of the list contains the label set
    """
    generators_list = [BatchGenerator(**kwargs) for i in range(generator_num)]
    
    while True:
        Xy_list = [gen.next() for gen in generators_list]
        yield [Xy[0] for Xy in Xy_list], Xy_list[0][1]

## Model Initializations

### VGG16

In [10]:
model_vgg16 = load_model('Pretrained-networks/vgg16/vgg16_3000.h5')

In [11]:
for i, layer in enumerate(model_vgg16.layers):
    model_vgg16.layers[i].trainable = False
    model_vgg16.layers[i].name = '{}_{}'.format(layer.name, 'vgg16')

In [12]:
vgg16_out = model_vgg16.output

### VGG19

In [13]:
model_vgg19 = load_model('Pretrained-networks/vgg19/VGG19.h5')

In [14]:
for i, layer in enumerate(model_vgg19.layers):
    model_vgg19.layers[i].trainable = False
    model_vgg19.layers[i].name = '{}_{}'.format(layer.name, 'vgg19')

In [15]:
vgg19_out = model_vgg19.output

### Xception

In [15]:
model_xception = load_model('Pretrained-networks/Xception/xception.h5')

In [16]:
for i, layer in enumerate(model_xception.layers):
    model_xception.layers[i].trainable = False
    model_xception.layers[i].name = '{}_{}'.format(layer.name, 'xception')

In [17]:
xception_out = model_xception.output

### Inception

In [18]:
model_inception = load_model('Pretrained-networks/inception/inceptionV3.h5')

In [19]:
for i, layer in enumerate(model_inception.layers):
    model_inception.layers[i].trainable = False
    model_inception.layers[i].name = '{}_{}'.format(layer.name, 'inception')

In [20]:
inception_out = model_inception.output

### ResNet50

In [16]:
model_resnet50 = load_model('Pretrained-networks/ResNet50/ResNet50.h5')

In [17]:
for i, layer in enumerate(model_resnet50.layers):
    model_resnet50.layers[i].trainable = False
    model_resnet50.layers[i].name = '{}_{}'.format(layer.name, 'resnet50')

In [18]:
resnet50_out = model_resnet50.output

## Concatenation

In [19]:
merge_0 = concatenate([vgg16_out, vgg19_out, resnet50_out])

In [20]:
output = Dense(228, activation='sigmoid', name='main_output')(merge_0)

In [21]:
model = Model(inputs=[model_vgg16.input, model_vgg19.input, model_resnet50.input], outputs=output)

# Fit 

In [22]:
# Set the paths
input_path = os.path.abspath('../../mlipdata/')

In [23]:
images_path_train = os.path.join(input_path, 'files/train/')

with open('pickles/binarizer.pickle', 'rb') as pickle_file:
    binarizer = pickle.load(pickle_file)

In [24]:
with file_io.FileIO('../../mlipdata/server_train.pickle', mode='rb') as fp:
        data = gzip.GzipFile(fileobj=fp)
        y_train = cPickle.load(data)


In [25]:
y_train = np.array([j[1:] for j in y_train])


In [26]:
# Load training data from file
train={}
with open(os.path.join(input_path, 'train.json')) as json_data:
    train= json.load(json_data)

train_img_url = train['images']
train_img_url = pd.DataFrame(train_img_url)
train_ann = train['annotations']
train_ann = pd.DataFrame(train_ann)
train = pd.merge(train_img_url, train_ann, on='imageId', how='inner')
train['imageId'] = train['imageId'].astype(np.uint32)

#y_train = np.array(train.labelId)
#y_train_bin = binarizer.transform(y_train)

del train_img_url
del train_ann
del train
#del y_train

#### fit_generator with multiple inputs

In [27]:
batch_size = 64
steps = int(len(y_train[:1000])/batch_size)
epochs = 1

In [28]:
# Use binary loss instead of categorical loss to penalize each output independently
model.compile(optimizer='adam', loss='binary_crossentropy')

In [34]:
input_num = len(model.input_layers)

In [35]:
train_gen_multi = multiple_batch_generator(generator_num=input_num,
                                           input_dir=images_path_train,
                                           y=y_train[:1000],
                                           batch_size=batch_size)

In [None]:
model.fit_generator(train_gen_multi, steps_per_epoch=steps, epochs=epochs)

Epoch 1/1

### Save the model

In [None]:
model.save('./ensemble_model_1000.h5')

### Load the ensemble model

In [None]:
model = load_model('./ensemble_model_1000.h5')

# Validation

In [None]:
# Set the paths
input_path = os.path.abspath('../../mlipdata/')

with open('pickles/binarizer.pickle', 'rb') as pickle_file:
    binarizer = pickle.load(pickle_file)

In [None]:
images_path_validation = os.path.join(input_path, 'files/validation/')

In [None]:
with file_io.FileIO('../../mlipdata/server_validation.pickle', mode='rb') as fp:
        data = gzip.GzipFile(fileobj=fp)
        y_validation = cPickle.load(data)
y_validation = np.array([j[1:] for j in y_validation])     

In [None]:
validation={}
with open(os.path.join(input_path, 'validation.json')) as json_data:
    validation = json.load(json_data)

validation_img_url = validation['images']
validation_img_url = pd.DataFrame(validation_img_url)
validation_ann = validation['annotations']
validation_ann = pd.DataFrame(validation_ann)
validation = pd.merge(validation_img_url, validation_ann, on='imageId', how='inner')
validation['imageId'] = validation['imageId'].astype(np.uint32)

#y_validation = np.array(validation.labelId)
#y_validation_bin = binarizer.transform(y_validation)

del validation_img_url
del validation_ann
del validation

#### predict_generator with multiple inputs

In [None]:
batch_size = 50
val_steps = int(len(y_validation[:500])/batch_size)

In [None]:
input_num = len(model.input_layers)

In [None]:
val_generator_multi = multiple_batch_generator(generator_num=input_num,
                                               input_dir=images_path_validation,
                                               y=y_validation
                                               [:500],
                                               batch_size=batch_size)

In [None]:
predictions = model.predict_generator(val_generator_multi, steps=val_steps, verbose=1)

In [None]:
len(predictions)

In [None]:
y_true = y_validation[:500]
y_pred = (predictions > 0.5).astype(int)

In [None]:
pr = precision_score(y_true, y_pred, average='micro')
rc = recall_score(y_true, y_pred, average='micro')
f1 = f1_score(y_true, y_pred, average='micro')

print("Precision: {} Recall: {} F1: {}".format(pr, rc, f1))