# Train first set of one vs. rest (OVR) classifiers.

We train a set of classifiers that are used for computations of $ClassSim$ and classifications.  
Modules used in this notebook are defined in files in *models* directory.

## Set up

In [None]:
import os
import sys

import numpy as np

import pandas as pd
import glob

In [None]:
BASE_MODEL_PATH="trained_model"
%mkdir -p $BASE_MODEL_PATH

In [None]:
from models.modelutils import ModelCompiler

In [None]:
compiler = ModelCompiler(BASE_MODEL_PATH)

In [None]:
from models.processor import create_generators

TRAIN_DATAGEN, VALID_DATAGEN = create_generators()

Load category information and all of image paths.

In [None]:
from models.modelutils import dir2filedict, split_fdict

In [None]:
fdict = dir2filedict("data")

In [None]:
categories = sorted(fdict.keys())

Split data int {train, validation, test} datasets.

In [None]:
trdict, testdict = split_fdict(fdict, test_size=0.2, random_state = 123)

In [None]:
trdict, valdict = split_fdict(trdict, test_size=0.2, random_state = 456)

In [None]:
valdict['clouds'][0:5]

## Classifier training

In [None]:
from models.one_vs_all import OneVsAllModelTrainer
from models.modelutils import split_files

In [None]:
trainer = OneVsAllModelTrainer(TRAIN_DATAGEN, VALID_DATAGEN)

In [None]:
def train_one_category(cat, epoch=5):
    model_path = "{}/model_{}".format(BASE_MODEL_PATH, cat)
    model = compiler.generate_compiled_model(model_path)
    
    trainer.set_model(model)
    trainer.set_savepath(model_path)
    
    true_train, false_train = split_files(cat, trdict)
    true_valid, false_valid = split_files(cat, valdict)
    
    trainer.set_dataset_files(true_train, false_train, true_valid, false_valid)
    trainer.train_model(eachepochs=epoch)

In [None]:
for i in range(0, len(categories)):
    train_one_category(categories[i])