In [1]:
import os
import sys

import numpy as np

import pandas as pd
import glob

In [2]:
from keras.applications.inception_v3 import InceptionV3
from keras.models import Model, model_from_json
from keras.layers import Dense, GlobalAveragePooling2D
from keras import optimizers

basemodel_layer_num = 311 #corresponding to len(base_model.layers)

Using TensorFlow backend.


In [3]:
BASE_MODEL_PATH="trained_model"
%mkdir -p $BASE_MODEL_PATH

In [4]:
from models.modelutils import ModelCompiler

In [5]:
compiler = ModelCompiler(BASE_MODEL_PATH)

In [9]:
from models.processor import create_generators

TRAIN_DATAGEN, VALID_DATAGEN = create_generators()

In [48]:
# modelutils' is different extension.

def dir2filedict(basedir):
    res = {}
    for f in glob.iglob("{}/*/*".format(basedir), recursive=True):
        cat = os.path.basename(os.path.dirname(f))
        res.setdefault(cat, []).append(f)
    return res

In [15]:
import random

In [37]:
def split_train_valid(input_paths, ratio= TRAIN_VALID_RATIO):
    paths = sorted(input_paths)
    random.shuffle(paths)
    sep = int(len(paths)*ratio)
    return paths[0:sep], paths[sep:]

In [38]:
def split_fdict(fdict):
    trdict = {}
    valdict = {}
    cats = sorted(fdict.keys())
    for cat in cats:
        tr, val = split_train_valid(fdict[cat])
        trdict[cat] = tr
        valdict[cat] = val
    return trdict, valdict

In [14]:
TRAIN_VALID_RATIO=0.9

In [60]:
fdict = dir2filedict("data")

In [61]:
catecories = sorted(fdict.keys())

In [62]:
random.seed(123)
trdict, valdict = split_fdict(fdict)

In [19]:
# copy from one_vs_all.py for development purpose.

In [20]:
from models.one_vs_all import OneVsAllFilesIterator
from keras.callbacks import ModelCheckpoint
from keras.callbacks import EarlyStopping
import json

#Image resize size
SIZE = 224

In [21]:
import collections

FilesPair = collections.namedtuple("FilesPair", ["trues", "falses"])
TrValFiles = collections.namedtuple('TrValFiles', ['trainings', 'valids'])

In [22]:
from keras.preprocessing.image import Iterator
from keras import backend
from keras.preprocessing.image import img_to_array, load_img
import random
import numpy as np

In [23]:
class OneVsAllFilesIterator(Iterator):
    def __init__(self, true_files, false_files, image_data_generator, target_size=(256, 256), batch_size=32, shuffle=True, seed=None):

        self.image_data_generator = image_data_generator
        self.target_size = tuple(target_size)

        # assume channel last.
        self.data_format = 'channels_last'
        assert backend.image_data_format() == self.data_format
        self.image_shape = self.target_size + (3,)

        self.filenames = []
        self.filenames.extend(true_files)
        self.classes = np.ones(len(true_files))
        self.filenames.extend(false_files)
        self.classes = np.append(self.classes, np.zeros(len(false_files)))

        self.n = len(self.filenames)

        super(OneVsAllFilesIterator, self).__init__(self.n, batch_size, shuffle, seed)
    def _get_batches_of_transformed_samples(self, index_array):
        batch_x = np.zeros((len(index_array),) + self.image_shape, dtype=backend.floatx())
        for i, j in enumerate(index_array):
            fname = self.filenames[j]
            img = load_img(fname,
                           grayscale=False,
                           target_size=self.target_size)
            x = img_to_array(img, data_format=self.data_format)
            x = self.image_data_generator.random_transform(x)
            x = self.image_data_generator.standardize(x)
            batch_x[i] = x
        batch_y = self.classes[index_array].astype(backend.floatx())
        return batch_x, batch_y
    def next(self):
        """For python 2.x.
        # Returns
            The next batch.
        """
        with self.lock:
            index_array = next(self.index_generator)
        # The transformation of images is not under thread lock
        # so it can be done in parallel
        return self._get_batches_of_transformed_samples(index_array)

In [24]:
class OneVsAllModelTrainer:
    def __init__(self, train_datagen, valid_datagen):
        self.train_datagen = train_datagen
        self.valid_datagen = valid_datagen
    def set_model(self, model):
        self.model = model
    def set_savepath(self, model_save_path):
        self.model_save_path= model_save_path
        self.file_path = self.model_save_path + "-{epoch:02d}-{val_acc:.3f}.h5"
        self.checkpoint = ModelCheckpoint(
            self.file_path
            , monitor='val_acc'
            , verbose=1
            , save_best_only=False
            , mode='max'
        )
        self.callbacks_list = [self.checkpoint]
    def set_dataset(self, trvals):
        self.trvals = trvals
    def set_dataset_files(self, true_trainings, false_trainings, true_valids, false_valids):
        trs = FilesPair(trues=true_trainings, falses=false_trainings)
        vals = FilesPair(trues=true_valids, falses = false_valids)
        trval = TrValFiles(trs, vals)
        self.set_dataset(trval)
    def validation_generator(self, batch_size, target_size):

        # false_sampled = random.sample(false_valids, len(true_valids))
        # temporary use whole false validation data.
        vals = self.trvals.valids
        false_sampled = vals.falses
        return OneVsAllFilesIterator(vals.trues, false_sampled, self.valid_datagen, target_size=target_size, batch_size=batch_size)
    def save_result(self, history):
        # use epoch 99 as special (last saved model).
        self.model.save_weights("{0}-99-{1:.3f}.h5".format(self.model_save_path, history.history['val_acc'][-1]))
    def list_checkpoints_except_best(self):
        pat = "{}-*.h5".format(self.model_save_path)
        paths = list(glob.iglob(pat))
        best = choose_best_val_acc_path(paths)
        return [path for path in paths if path != best]
    def remove_checkpoint(self):
        list(map(os.remove, self.list_checkpoints_except_best()))
    def train_model(self, eachepochs=5, batch_size=16, target_size=(SIZE, SIZE)):
        with open("{0}.json".format(self.model_save_path), 'w') as f:
            json.dump(json.loads(self.model.to_json()), f) # model.to_json() is a STRING of json

        trs = self.trvals.trainings

        validgen = self.validation_generator(batch_size, target_size)
        traingen = OneVsAllFilesIterator(trs.trues, random.sample(trs.falses, len(trs.trues)),  self.train_datagen, target_size=target_size, batch_size= batch_size)

        history = self.model.fit_generator(
            generator=traingen
            #, steps_per_epoch= 100
            , steps_per_epoch= traingen.n/batch_size
            , epochs=eachepochs
            , verbose=1
            , validation_data=validgen
            , validation_steps=validgen.n/batch_size
            # , validation_steps=10
            , callbacks=self.callbacks_list
        )

        self.save_result(history)
        
        

In [25]:
from models.category import split_files

In [55]:
trainer = OneVsAllModelTrainer(TRAIN_DATAGEN, VALID_DATAGEN)

In [73]:
def train_one_category(cat, epoch=5):
    model_path = "{}/model_{}".format(BASE_MODEL_PATH, cat)
    model = compiler.generate_compiled_model(model_path)
    
    trainer.set_model(model)
    trainer.set_savepath(model_path)
    
    true_train, false_train = split_files(cat, trdict)
    true_valid, false_valid = split_files(cat, valdict)
    
    trainer.set_dataset_files(true_train, false_train, true_valid, false_valid)
    trainer.train_model(eachepochs=epoch)

In [None]:
train_one_category(catecories[1])

Epoch 1/5

  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))




### Try and error

In [64]:
cat = "bay"

In [65]:
model_path = "{}/model_{}".format(BASE_MODEL_PATH, cat)

In [66]:
model = compiler.generate_compiled_model(model_path)

In [67]:
trainer.set_model(model)
trainer.set_savepath(model_path)

In [68]:
true_train, false_train = split_files(cat, trdict)
true_valid, false_valid = split_files(cat, valdict)

In [69]:
trainer.set_dataset_files(true_train, false_train, true_valid, false_valid)

In [70]:
trainer.train_model()

Epoch 1/5

  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))




  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))






Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
