# Train second level classifier

In [1]:
import os
import sys

import numpy as np

import pandas as pd
import glob

In [2]:
BASE_MODEL_PATH="trained_model"
%mkdir -p $BASE_MODEL_PATH

In [3]:
from models.modelutils import ModelCompiler

Using TensorFlow backend.


In [4]:
compiler = ModelCompiler(BASE_MODEL_PATH)

In [5]:
from models.processor import create_generators

TRAIN_DATAGEN, VALID_DATAGEN = create_generators()

In [6]:
from models.modelutils import dir2filedict, split_fdict
import random

In [7]:
fdict = dir2filedict("data")

In [8]:
categories = sorted(fdict.keys())

In [9]:
random.seed(123)
trdict, valdict = split_fdict(fdict)

In [10]:
from models.one_vs_all import OneVsAllModelTrainer
from models.modelutils import split_files

In [11]:
trainer = OneVsAllModelTrainer(TRAIN_DATAGEN, VALID_DATAGEN)

In [12]:
from models.one_vs_all import FilesPair, TrValFiles

In [13]:
class SecondLevelModelTrainer:
    def __init__(self, base_model_name, basedir, trainer, compiler):
        self.base_model_name = base_model_name
        self.basedir = basedir

        self.compiler = compiler
        self.trainer = trainer
    def setup_filedict(self, train_files_dict, valid_files_dict):
        self.train_files_dict = train_files_dict
        self.valid_files_dict = valid_files_dict
        self.valid_files_dict_org = self.valid_files_dict
    def _model_path(self, target_key):
        return os.path.join(self.basedir, "{}_{}".format(self.base_model_name, target_key))
    def _split_by_set(self, target_key, false_keyset, files_dict):
        trues = files_dict[target_key]
        falses = [path for key in false_keyset for path in files_dict[key]]
        return FilesPair(trues, falses)
    def _split_files(self, targetkey, files_dict):
        return FilesPair(*split_files(targetkey, files_dict))
    def train_second_level(self, target_key, highcat_keyset, eachepochs=10, retrainings=1, removecheckpoint=True):
        self.trainer.retrainings = retrainings
        falseset = highcat_keyset - set(target_key)
        trs = self._split_by_set(target_key, falseset, self.train_files_dict)
        vals = self._split_by_set(target_key, falseset, self.valid_files_dict)
        trvals = TrValFiles(trs, vals)
        self._train_one_core("sec_"+target_key, trvals, eachepochs, removecheckpoint)
        
    def _train_one_setup(self, model_key, trvals):
        model_save_path = self._model_path(model_key)

        model = self.compiler.generate_compiled_model(model_save_path)
        self.trainer.set_model(model)
        self.trainer.set_savepath(model_save_path)
        self.trainer.set_dataset(trvals)


    def _train_one_core(self, model_key, trvals, eachepochs, removecheckpoint):
        self._train_one_setup(model_key, trvals)

        self.trainer.train_model(eachepochs=eachepochs)
        if removecheckpoint:
            self.trainer.remove_checkpoint()

    def remove_checkpoint(self, model_key):
        # utility method for cleaup interrupted case
        self.trainer.set_savepath(self._model_path(model_key))
        self.trainer.remove_checkpoint()



In [14]:
sec_trainer = SecondLevelModelTrainer("model", BASE_MODEL_PATH, trainer, compiler)

In [15]:
sec_trainer.setup_filedict(trdict, valdict)

In [16]:
classsim = pd.read_pickle("results/valid_sim_df.dat")

In [17]:
SIM_THRESHOLD = 0.1

In [18]:
def train_seconds(keys, eachepochs=5):
    for targetkey in keys:
        similarkeyset = set(classsim[targetkey][classsim[targetkey] >= SIM_THRESHOLD].index)
        try:
            if len(similarkeyset) == 1:
                print("no similar category. only first classifier is enough. skip second training.")
            else:
                sec_trainer.train_second_level(targetkey, similarkeyset,  eachepochs=eachepochs)
        except ValueError as e:
            print("ValueError, skip {0}: {1}".format(targetkey, e))

In [19]:
train_seconds(categories[0:1], eachepochs=2)

Epoch 1/2
10/98 [==>...........................] - ETA: 3:03 - loss: 0.7239 - acc: 0.5250

  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))




  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))






Epoch 2/2


### Try and error

In [15]:
train_one_category(catecories[0])

In [21]:
categories

['bay',
 'beach',
 'birds',
 'boeing',
 'buildings',
 'city',
 'clouds',
 'f-16',
 'face',
 'helicopter',
 'mountain',
 'ocean',
 'ships',
 'sky',
 'sunrise',
 'sunset']

In [54]:
catidx = 2

In [55]:
cat = categories[catidx]
classsim[cat][classsim[cat] >= 0.1]

birds    1.0
Name: birds, dtype: float64

In [57]:
targetkey = cat
set(classsim[targetkey][classsim[targetkey] >= SIM_THRESHOLD].index)

{'birds'}

In [58]:
simset = set(classsim[targetkey][classsim[targetkey] >= SIM_THRESHOLD].index)

In [59]:
len(simset)

1

In [17]:
classsim

Unnamed: 0,bay,beach,birds,boeing,buildings,city,clouds,f-16,face,helicopter,mountain,ocean,ships,sky,sunrise,sunset
bay,0.852273,0.560345,0.005682,0.0,0.029866,0.226044,0.0,0.0,0.011364,0.006757,0.172159,0.149968,0.013514,0.0,0.034483,0.017045
beach,0.560345,0.873563,0.005747,0.0,0.011494,0.047996,0.015385,0.0,0.0,0.006757,0.067744,0.238384,0.018251,0.014286,0.017241,0.08279
birds,0.005682,0.005747,1.0,0.0,0.0,0.006757,0.007692,0.0,0.011628,0.0,0.0,0.048278,0.0,0.0,0.0,0.006757
boeing,0.0,0.0,0.0,0.952381,0.007937,0.015873,0.0,0.179287,0.0,0.055234,0.0,0.043148,0.037323,0.0,0.0,0.0
buildings,0.029866,0.011494,0.0,0.007937,0.961538,0.564103,0.0,0.0,0.005814,0.013514,0.0125,0.007042,0.027027,0.007143,0.017241,0.013167
city,0.226044,0.047996,0.006757,0.015873,0.564103,0.905405,0.0,0.016393,0.006757,0.013514,0.06402,0.03464,0.033784,0.0,0.054753,0.033784
clouds,0.0,0.015385,0.007692,0.0,0.0,0.0,0.953846,0.0,0.0,0.0,0.050962,0.137703,0.0,0.722527,0.091114,0.092516
f-16,0.0,0.0,0.0,0.179287,0.0,0.016393,0.0,0.967213,0.008197,0.072331,0.00625,0.007042,0.061254,0.0,0.0,0.0
face,0.011364,0.0,0.011628,0.0,0.005814,0.006757,0.0,0.008197,0.976744,0.013514,0.00625,0.032755,0.005814,0.014286,0.014435,0.005814
helicopter,0.006757,0.006757,0.0,0.055234,0.013514,0.013514,0.0,0.072331,0.013514,0.959459,0.031757,0.006757,0.027027,0.007143,0.0,0.006757


In [64]:
cat = "bay"

In [65]:
model_path = "{}/model_{}".format(BASE_MODEL_PATH, cat)

In [66]:
model = compiler.generate_compiled_model(model_path)

In [67]:
trainer.set_model(model)
trainer.set_savepath(model_path)

In [68]:
true_train, false_train = split_files(cat, trdict)
true_valid, false_valid = split_files(cat, valdict)

In [69]:
trainer.set_dataset_files(true_train, false_train, true_valid, false_valid)

In [70]:
trainer.train_model()

Epoch 1/5

  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))




  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))






Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
