# Example Pretrained Network

In [1]:
from os.path import join

from keras.applications import VGG16, InceptionV3
from keras.layers import GlobalAveragePooling2D, Dense, Dropout
from keras.models import Model, load_model
from keras.utils.np_utils import to_categorical

import pandas as pd
import csv
import os
import numpy as np
import json

from matplotlib import pyplot as plt
import sys
sys.path.append("../data_preparation/")

from batch_generator import BatchGenerator, BatchSequence

from sklearn.metrics import recall_score, precision_score, f1_score

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


## data folder

In [2]:
#datadir = os.getcwd()
input_path = os.path.abspath('../../mlipdata/')

In [3]:
train={}
test={}
validation={}
with open(os.path.join(input_path, 'train.json')) as json_data:
    train= json.load(json_data)
with open(os.path.join(input_path, 'test.json')) as json_data:
    test= json.load(json_data)
with open(os.path.join(input_path, 'validation.json')) as json_data:
    validation = json.load(json_data)

print('Train No. of images: %d'%(len(train['images'])))
print('Test No. of images: %d'%(len(test['images'])))
print('Validation No. of images: %d'%(len(validation['images'])))

# JSON TO PANDAS DATAFRAME
# train data
train_img_url=train['images']
train_img_url=pd.DataFrame(train_img_url)
train_ann=train['annotations']
train_ann=pd.DataFrame(train_ann)
train=pd.merge(train_img_url, train_ann, on='imageId', how='inner')

# test data
test=pd.DataFrame(test['images'])

# Validation Data
val_img_url=validation['images']
val_img_url=pd.DataFrame(val_img_url)
val_ann=validation['annotations']
val_ann=pd.DataFrame(val_ann)
validation=pd.merge(val_img_url, val_ann, on='imageId', how='inner')

datas = {'Train': train, 'Test': test, 'Validation': validation}
for data in datas.values():
    data['imageId'] = data['imageId'].astype(np.uint32)

Train No. of images: 1014544
Test No. of images: 39706
Validation No. of images: 9897


In [4]:
# np.array(train.labelId)
images_path_train = os.path.abspath('../../mlipdata/files/train/')
images_path_validation = os.path.abspath('../../mlipdata/files/validation/')
#images_path_validation = os.path.abspath('../../mlipdata/files/validation/')

# one hot encoding

# creating binarizer

In [5]:
from sklearn.preprocessing import MultiLabelBinarizer
mlb = MultiLabelBinarizer()

In [6]:
# loading labels
y_train = np.array(train.labelId)
y_validation = np.array(validation.labelId)

In [7]:
y_sum = np.concatenate((y_train, y_validation), axis=0)

In [8]:
mlb.fit(y_sum)

MultiLabelBinarizer(classes=None, sparse_output=False)

### write to pickle

In [9]:
import pickle

In [10]:
with open('binarizer.pickle', 'wb') as pickle_file:
    pickle.dump(mlb, pickle_file)

### load binarizer

In [11]:
with open('binarizer.pickle', 'rb') as pickle_file:
    binarizer = pickle.load(pickle_file)

In [31]:
y_train_bin = binarizer.transform(y_train)
y_val_bin = binarizer.transform(y_validation)

In [32]:
y_train_bin1000 = y_train_bin[:1000]
y_val_bin500 = y_val_bin[:500]

In [33]:
len(y_val_bin500)

500

# Network

In [8]:
# load the generator
training_gen = BatchGenerator(input_dir=images_path_train, y=y_train_bin1000, batch_size=64)

In [9]:
base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=(290,290,3))

In [10]:
# Adding the last two fully-connected layers
x = base_model.output
x = GlobalAveragePooling2D()(x) # global average pooling (flatten)
x = Dense(1024, activation='relu')(x) # should be rather large with 228 output labels
#x = Dropout(0.5)(x)
y = Dense(228, activation='softmax')(x) # sigmoid instead of softmax to have independent probabilities

In [129]:
model = Model(inputs=base_model.input, outputs=y)

In [130]:
# Train only the top layer
for layer in base_model.layers:
    layer.trainable = False

In [133]:
# Use binary loss instead of categorical loss to penalize each output independently
model.compile(optimizer='adam', loss='binary_crossentropy')

In [None]:
# 1000 steps = 640000 random images per epoch
model.fit_generator(training_gen, steps_per_epoch=100, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10

In [None]:
model.save('./inceptionV3.h5')

# Test the models

### VGG16

In [34]:
# load the generator
predict_gen = BatchSequence(input_dir=images_path_validation, y=y_val_bin500, batch_size=100)
model = load_model('vgg16/vgg16.h5')
predictions = model.predict_generator(predict_gen, verbose=1)



In [35]:
y_true = y_val_bin500
y_pred = (predictions > 0.5).astype(int)

pr = precision_score(y_true, y_pred, average='micro')
rc = recall_score(y_true, y_pred, average='micro')
f1 = f1_score(y_true, y_pred, average='micro')

print("Precision: {} Recall: {} F1: {}".format(pr, rc, f1))

Precision: 0.685393258427 Recall: 0.0315490043962 F1: 0.0603213844252


### VGG19

In [32]:
# load the generator
predict_gen = BatchSequence(input_dir=images_path_validation, y=y_val_bin500, batch_size=128)
model = load_model('vgg19/VGG19.h5')
predictions = model.predict_generator(predict_gen, verbose=1)



In [33]:
y_true = y_val_bin500
y_pred = (predictions > 0.5).astype(int)

pr = precision_score(y_true, y_pred, average='micro')
rc = recall_score(y_true, y_pred, average='micro')
f1 = f1_score(y_true, y_pred, average='micro')

print("Precision: {} Recall: {} F1: {}".format(pr, rc, f1))

Precision: 0.664596273292 Recall: 0.0276700284458 F1: 0.0531281032771


### Xception

In [39]:
# load the generator
predict_gen = BatchSequence(input_dir=images_path_validation, y=y_val_bin500, batch_size=128)
model = load_model('Xception/xception.h5')
predictions = model.predict_generator(predict_gen, verbose=1)



In [40]:
y_true = y_val_bin500
y_pred = (predictions > 0.5).astype(int)

pr = precision_score(y_true, y_pred, average='micro')
rc = recall_score(y_true, y_pred, average='micro')
f1 = f1_score(y_true, y_pred, average='micro')

print("Precision: {} Recall: {} F1: {}".format(pr, rc, f1))

Precision: 0.394 Recall: 0.0509438841479 F1: 0.0902221204488


### ResNet50

In [41]:
# load the generator
predict_gen = BatchSequence(input_dir=images_path_validation, y=y_val_bin500, batch_size=128)
model = load_model('ResNet50/ResNet50.h5')
predictions = model.predict_generator(predict_gen, verbose=1)



In [42]:
y_true = y_val_bin500
y_pred = (predictions > 0.5).astype(int)

pr = precision_score(y_true, y_pred, average='micro')
rc = recall_score(y_true, y_pred, average='micro')
f1 = f1_score(y_true, y_pred, average='micro')

print("Precision: {} Recall: {} F1: {}".format(pr, rc, f1))

Precision: 0.42774566474 Recall: 0.0191362813551 F1: 0.0366336633663


### InceptionV3

In [44]:
# load the generator
predict_gen = BatchSequence(input_dir=images_path_validation, y=y_val_bin500, batch_size=128)
model = load_model('inception/inceptionV3.h5')
predictions = model.predict_generator(predict_gen, verbose=1)



In [45]:
y_true = y_val_bin500
y_pred = (predictions > 0.5).astype(int)

pr = precision_score(y_true, y_pred, average='micro')
rc = recall_score(y_true, y_pred, average='micro')
f1 = f1_score(y_true, y_pred, average='micro')

print("Precision: {} Recall: {} F1: {}".format(pr, rc, f1))

Precision: 0.260521042084 Recall: 0.0336177915697 F1: 0.0595510765002


# MobileNet

In [34]:
for i in range(10):
    print(i)

0
1
2
3
4
5
6
7
8
9
