# Making prediction of models for ensemble
--Sophia Andaloro, Shixiao Liang, Hongyi Liu (Apr 2020)

Prepared by Shixiao Liang

This notebook load up trained models from ./models and make prediction on validation set and test set and then save them for ensemble. We ran different cells in this notebook for different models because we reset our GPU and load another model by restarting the Python kernel. As a result this notebook cannot produce all the file within one run.

We loaded up 4 models and make predictions on validation set images and test set images. Predictions were made with both original image and augmented image (Zoom range (0.9,1,1) and random horizontal filp).

## Import packages and read info of validation set and test set

In [21]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0" #model will be trained on GPU 1

In [22]:
import matplotlib.pyplot as plt
import matplotlib.image as img
import numpy as np
# from scipy.misc import imresize
import matplotlib.image as mpimg
%matplotlib inline

import os
from os import listdir
from os.path import isfile, join
import shutil
import stat
import collections
from collections import defaultdict

from ipywidgets import interact, interactive, fixed
import ipywidgets as widgets

import h5py
from sklearn.model_selection import train_test_split
from sklearn import model_selection
from keras.utils.np_utils import to_categorical
from keras.applications.inception_v3 import preprocess_input
from keras.models import load_model
from keras.callbacks import ModelCheckpoint

In [23]:
# Ignore  the warnings
import warnings
# warnings.filterwarnings('always')
# warnings.filterwarnings('ignore')

# data visualisation and manipulation
import numpy as np
import pandas as pd
# from matplotlib import style
# import seaborn as sns

# style.use('fivethirtyeight')
# sns.set(style='whitegrid',color_codes=True)

import os                    
from PIL import Image
from sklearn.utils import shuffle
import sklearn as sklearn

In [24]:
import keras
from keras.models import Sequential
from keras.layers import Dense
import keras.layers as layers
import keras.preprocessing as pp

In [26]:
# from keras.applications.resnet50 import ResNet50
from keras.preprocessing import image
from keras.applications.resnet50 import preprocess_input, decode_predictions


In [27]:
n_classes =251

In [28]:
train_dir = './input/train_set/'
val_dir = './input/val_set/'
train_df = pd.read_csv('./input/train_labels.csv')
train_df['path'] = train_df['img_name'].map(lambda x: os.path.join(train_dir,x))
val_df = pd.read_csv('./input/val_labels.csv')
val_df['path'] = val_df['img_name'].map(lambda x: os.path.join(val_dir,x))
val_df['label'] = val_df['label'].astype(str)
train_df['label'] = train_df['label'].astype(str)

In [None]:
# df must be sorted to the same order as the output of keras image generator
val = val_df.sort_values('img_name').drop('path', axis=1)

new_val_index = np.arange(val_df.shape[0])

val.head()

In [31]:
test_dir = './input/test_set/'
test_df = pd.read_csv('./input/test_info.csv', header=0,)

new_row = pd.DataFrame({'test_024088.jpg': 'test_024088.jpg'}, index=[0])
test_df = pd.concat([new_row, test_df]).reset_index(drop = True) 
test_df.columns = ['img_name']
# df must be sorted to the same order as the output of keras image generator
test = test_df.sort_values('img_name')
test.head()

Unnamed: 0,img_name
4308,test_000000.jpg
4309,test_000001.jpg
4310,test_000002.jpg
4311,test_000003.jpg
4312,test_000004.jpg


In [32]:
from keras.preprocessing.image import ImageDataGenerator


## Making prediction of 256_irv2 model
This model has different output shape. As a result it produces prediction different from the other IRv2 models.

In [33]:
test_datagen = ImageDataGenerator(
    rescale=1/255.,
    zoom_range = 0.1,      # augmentation
    horizontal_flip = True)# disable to make prediction on original image

val_generator = test_datagen.flow_from_dataframe(
    dataframe=val,#sorted
    directory="./input/val_set/",
    x_col="img_name",
    y_col="label",
    batch_size=32,
    shuffle=False,
    class_mode="categorical",
    target_size=(256,256))


test_generator = test_datagen.flow_from_dataframe(
    dataframe=test,#sorted
    directory="./input/test_set/",
    x_col="img_name",
#     y_col="label",
    batch_size=32,
    shuffle=False,
    class_mode=None,
    target_size=(256,256))

Found 11994 validated image filenames belonging to 251 classes.
Found 118475 validated image filenames belonging to 251 classes.
Found 28377 validated image filenames.


In [14]:
model_test = load_model('./models/256_irv2_419__reg5.0e-04_lr5.0e-03_do0.40-11-0.69.hdf5')

In [15]:
# Get indices for mapping labels generated by image generator to the original label
key_list = list(train_generator.class_indices.keys())
val_list = list(train_generator.class_indices.values())

In [16]:
irv256_val_pred = model_test.predict_generator(val_generator, verbose=1, workers=1, use_multiprocessing=False)



In [17]:
irv256_val_pred_mapped = np.zeros((irv256_val_pred.shape))
for key, val in zip(key_list,val_list):
    irv256_val_pred_mapped[:,int(key)] = irv256_val_pred[:,val]
    
# np.save('./predictions/irv256_val_pred_mapped_aug', irv256_val_pred_mapped)

In [18]:
irv256_test_pred = model_test.predict_generator(test_generator, verbose=1, workers=1, use_multiprocessing=False)



In [19]:
irv256_test_pred_mapped = np.zeros((irv256_test_pred.shape))
for key, val in zip(key_list,val_list):
    irv256_test_pred_mapped[:,int(key)] = irv256_test_pred[:,val]
    
# np.save('./predictions/irv256_test_pred_mapped_aug', irv256_test_pred_mapped)

## Making prediction of other IRv2 models
These models have the same input shape (299,299) thus can share the some generator

In [34]:
test_datagen = ImageDataGenerator(
    rescale=1/255.)

val_generator = test_datagen.flow_from_dataframe(
    dataframe=val,#sorted
    directory="./input/val_set/",
    x_col="img_name",
    y_col="label",
    batch_size=32,
    shuffle=False,
    class_mode="categorical",
    target_size=(299,299))

test_generator = test_datagen.flow_from_dataframe(
    dataframe=test,#sorted
    directory="./input/test_set/",
    x_col="img_name",
#     y_col="label",
    batch_size=32,
    shuffle=False,
    class_mode=None,
    target_size=(299,299))

Found 11994 validated image filenames belonging to 251 classes.
Found 118475 validated image filenames belonging to 251 classes.
Found 28377 validated image filenames.


In [14]:
model_test = load_model('./models/424_lasttry_2_reg5.0e-04_lr1.0e-03_do0.30-101-0.71.hdf5')

#The other two model:

#299_irv2_419__reg5.0e-04_lr3.0e-03_do0.40-09-0.71.hdf5
#423_irv28by8_quiet_reg1.0e-05_lr4.0e-03_do0.40-14-0.69.hdf5

In [35]:
key_list = list(train_generator.class_indices.keys())
val_list = list(train_generator.class_indices.values())

In [36]:
irv424_val_pred = model_test.predict_generator(val_generator, verbose=1, workers=1, use_multiprocessing=False)



In [37]:
irv424_val_pred_mapped = np.zeros((irv424_val_pred.shape))
for key, val in zip(key_list,val_list):
    irv424_val_pred_mapped[:,int(key)] = irv424_val_pred[:,val]
    
# np.save('./irv424_val_pred_mapped', irv424_val_pred_mapped)

In [38]:
irv424_test_pred = model_test.predict_generator(test_generator, verbose=1, workers=1, use_multiprocessing=False)



In [39]:
irv424_test_pred_mapped = np.zeros((irv424_test_pred.shape))
for key, val in zip(key_list,val_list):
    irv424_test_pred_mapped[:,int(key)] = irv424_test_pred[:,val]
    
# np.save('./irv424_test_pred_mapped', irv424_test_pred_mapped)