# Train multi-class classifier.

We train another set of classifiers that are used for classifications.  
These classifiers are trained using similar images for each target class; similarities between classes are computed in *classifier_similarity.ipynb*.

## Set up

In [None]:
import os
import sys

import numpy as np

import pandas as pd
import glob

In [None]:
BASE_MODEL_PATH="trained_model"
%mkdir -p $BASE_MODEL_PATH

In [None]:
SAVE_MODEL_PATH="{}/multiclass".format(BASE_MODEL_PATH)
%mkdir -p $SAVE_MODEL_PATH

## Data preparation

In [None]:
from models.modelutils import dir2filedict, split_fdict

Load category and file path information.

In [None]:
fdict = dir2filedict("data")

In [None]:
categories = sorted(fdict.keys())

Split data int {train, validation, test} datasets.

In [None]:
trdict, testdict = split_fdict(fdict, test_size=0.2, random_state = 123)

In [None]:
trdict, valdict = split_fdict(trdict, test_size=0.2, random_state = 456)

In [None]:
valdict['clouds'][0:5]

### Copy images files into temporary directories

In order to handle datasets as a suitable format of Keras ImageDataGenerator, images are copied into temporary directories with a specific structure.

In [None]:
import tempfile
import shutil

In [None]:
tmp_train_dir = tempfile.TemporaryDirectory()
tmp_valid_dir = tempfile.TemporaryDirectory()
tmp_test_dir = tempfile.TemporaryDirectory()

In [None]:
def copy_images(tmp_dir, data_dict):
    for cat in data_dict.keys():
        os.makedirs("{}/{}".format(tmp_dir.name, cat), exist_ok=True)
        for img_path in data_dict[cat]:
            img_name = img_path.split("/")[-1]
            shutil.copy2(img_path, "{}/{}/{}".format(tmp_dir.name, cat, img_name))

In [None]:
%%time
copy_images(tmp_train_dir, trdict)

In [None]:
%%time
copy_images(tmp_valid_dir, valdict)

In [None]:
%%time
copy_images(tmp_test_dir, testdict)

### Create ImageDataGenerator

In [None]:
from keras.preprocessing.image import ImageDataGenerator

In [None]:
IMG_SIZE = 256
BATCH_SIZE = 32

In [None]:
TRAIN_DATAGEN = ImageDataGenerator(
        rescale=1./255,
)

TRAIN_GENERATOR = TRAIN_DATAGEN.flow_from_directory(
        directory=tmp_train_dir.name,
        target_size=(IMG_SIZE, IMG_SIZE),
        class_mode='sparse',
        batch_size=BATCH_SIZE,
)

In [None]:
VALID_DATAGEN = ImageDataGenerator(
        rescale=1./255,
)

VALID_GENERATOR = VALID_DATAGEN.flow_from_directory(
        directory=tmp_valid_dir.name,
        target_size=(IMG_SIZE, IMG_SIZE),
        class_mode='sparse',
        batch_size=BATCH_SIZE,
)

In [None]:
TEST_DATAGEN = ImageDataGenerator(
        rescale=1./255,
)

TEST_GENERATOR = TEST_DATAGEN.flow_from_directory(
        directory=tmp_test_dir.name,
        target_size=(IMG_SIZE, IMG_SIZE),
        class_mode='sparse',
        batch_size=1,
)

## Train multi-class classifier and save it

In [None]:
from keras.applications.inception_v3 import InceptionV3
from keras.models import Model, model_from_json
from keras.layers import Dense, GlobalAveragePooling2D
from keras import optimizers

In [None]:
base_model = InceptionV3(weights='imagenet', include_top=False)
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(TRAIN_GENERATOR.num_classes, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)

In [None]:
for layer in model.layers[:len(base_model.layers)]:
    layer.trainable = False
for layer in model.layers[len(base_model.layers):]:
    layer.trainable = True

In [None]:
optimizer = optimizers.Adam(lr=0.001, decay=0.01)

In [None]:
model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=["accuracy"])

Execute training.

In [None]:
model.fit_generator(
    generator=TRAIN_GENERATOR
    , steps_per_epoch=TRAIN_GENERATOR.n // BATCH_SIZE 
    , epochs=5
    , verbose=1
    , validation_data=VALID_GENERATOR
    , validation_steps=VALID_GENERATOR.n // BATCH_SIZE
)

Save the trained classifier.

In [None]:
import json

In [None]:
model.save_weights('{}/multiclass.h5'.format(SAVE_MODEL_PATH))
with open("{}/multiclass.json".format(SAVE_MODEL_PATH), 'w') as f:
    json.dump(json.loads(model.to_json()), f) # model.to_json() is a STRING of json
with open("{}/multiclass-labels.json".format(SAVE_MODEL_PATH), 'w') as f:
    json.dump(TRAIN_GENERATOR.class_indices, f)

## Evaluate trained model under experiment of simple classification

Evaluation of the trained classifier with 16 classes multi-class classification using test datasets.  
This evaluation is not related to our paper.

In [None]:
%%time

model.evaluate_generator(
    TEST_GENERATOR
    , steps=TEST_GENERATOR.n
)

left: loss, right: accuracy