# CHAracter Recognition in Natural Images (CHARIN)

### This notebook is an attempt to walk through the entire code step-by-step, explaining the different blocks, to give an overview of the project. 

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
os.getcwd()

'/Users/pradip.gupta/personal-projects/charni/notebook'

In [3]:
import sys, glob, shutil
os.chdir(os.path.dirname(os.getcwd()))
os.getcwd()

'/Users/pradip.gupta/personal-projects/charni'

#### Adding "src/networks" folder in path, to enable in-line imports for the network files using importlib

In [4]:
import os, sys
sys.path.append(os.path.abspath('./src/networks'))

In [5]:
#To handel OOM errors
import tensorflow as tf
import keras.backend.tensorflow_backend as ktf
from keras import backend as K
def get_session():
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction= 0.9,
                                allow_growth=True)
    return tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
ktf.set_session(get_session())

Using TensorFlow backend.


In [6]:
#Standard imports
import pandas as pd
import importlib
import pickle
import numpy as np
from keras.models import load_model
from keras.optimizers import Adam, RMSprop, Nadam, SGD

### Loading all the custom functions that we have written. These make the training script neat and help in debugging in case of errors. 

In [7]:
#Custom imports
import config
from src.training import data_loader
from src.training.data_generator import DataGenerator
from src.training.keras_callbacks import get_callbacks
from src.training.training_modes import training_scratch, training_checkpoint, fine_tune, transfer_learning
from src.training.keras_history import generate_stats
from src.training.plots import save_plots

## Reading Config

In [8]:
base_path = config.base_path    
exp_name = config.exp_name

#Params
    #Constants
size = config.size
classes = config.nclasses
chs = config.chs

    #Training Params
epochs = config.epochs
learning_rate = config.learning_rate   
batch_size = config.batch_size 
initial_epoch = config.initial_epoch

f = open(config.class_weights_path, 'rb')
class_weights = pickle.load(f)

training_frm_scratch = config.training_frm_scratch
training_frm_chkpt = config.training_frm_chkpt
fine_tuning = config.fine_tuning
transfer_lr = config.transfer_lr
trial = config.trial

if sum((training_frm_scratch, training_frm_chkpt, fine_tuning, transfer_lr)) != 1:
    raise Exception("Conflicting training modes")

## Building data source

In [9]:
X_train, y_train, X_val, y_val, X_test, y_test = data_loader.build_source(base_path)


Printing unique labels
for train: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
 49 50 51 52 53 54 55 56 57 58 59 60 61 62]
for val: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
 49 50 51 52 53 54 55 56 57 58 59 60 61 62]
for test: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
 49 50 51 52 53 54 55 56 57 58 59 60 61 62]


In [10]:
if trial:
    print("Running in trail mode")
    samples = config.samples
    X_train =  X_train[:samples]
    y_train = y_train[:samples]
    X_val = X_val[:samples] 
    y_val = y_val[:samples] 
    X_test = X_test[:samples]
    y_test = y_test[:samples]

## Data Generator

In [11]:
train_spe = int(np.floor(len(X_train)/ batch_size)) #spe = Steps per epoch
val_spe = int(np.floor(len(X_val)/batch_size))
print(train_spe, val_spe)

191 23


In [12]:
# Initialise training and validation generators
train_generator = DataGenerator(base_path, file_paths =X_train, labels =y_train, batch_size = batch_size, 
                                dim=(size,size), n_channels=chs, n_classes= classes, shuffle=True)

validation_generator = DataGenerator(base_path, file_paths =X_val, labels =y_val, batch_size = batch_size, 
                                     dim=(size,size), n_channels= chs, n_classes= classes, shuffle=True)

In [17]:
X_t,y_t = train_generator.__getitem__(2)
X_v,y_v = validation_generator.__getitem__(2)
X_t.shape, y_t.shape, X_v.shape, y_v.shape

((32, 64, 64, 1), (32, 62), (32, 64, 64, 1), (32, 62))

### Defining a super set for loss, optimiser and metric functions. The user can select any from there options using the config file

In [18]:
loss_class = {'cat_cross': 'categorical_crossentropy',
              'sp_cat_cross': 'sparse categorical crossentropy'}

metric_class = {'acc':'accuracy'}

optimiser_class = {'adam': (Adam, {}),
               'nadam': (Nadam, {}),
               'rmsprop': (RMSprop, {}),
               'sgd':(SGD, {'decay':1e-6, 'momentum':0.90, 'nesterov':True})}

## Initialise Model

In [19]:
if training_frm_scratch:
    model, gpu_model = training_scratch(optimiser_class, loss_class, metric_class)

elif training_frm_chkpt:
    model, gpu_model = training_checkpoint()

elif fine_tuning:
    model, gpu_model = fine_tune(optimiser_class, loss_class, metric_class)

elif transfer_lr:
    model, gpu_model = transfer_learning(optimiser_class, loss_class, metric_class)

Training from scratch


### Print the model params

In [20]:
print("Model training params:")
trainable_count = int(np.sum([K.count_params(p) for p in set(model.trainable_weights)]))
non_trainable_count = int(np.sum([K.count_params(p) for p in set(model.non_trainable_weights)]))
params = (trainable_count + non_trainable_count,trainable_count, non_trainable_count)

print('Total params: {:,}'.format(params[0]))
print('Trainable params: {:,}'.format(params[1]))
print('Non-trainable params: {:,}'.format(params[2]))

Model training params:
Total params: 12,727,806
Trainable params: 12,723,966
Non-trainable params: 3,840


### Set the callbacks to be used for training

In [21]:
#Set callbacks        
callbacks_list = get_callbacks(model)

## Start/Resume training

In [None]:
# Start/resume training
if config.no_of_gpu > 1:
    history = gpu_model.fit_generator(steps_per_epoch= train_spe,
                                      generator=train_generator,
                                      epochs=epochs,
                                      workers=4, 
                                      use_multiprocessing=True,
                                      validation_data = validation_generator,
                                      validation_steps = val_spe,
                                      initial_epoch = initial_epoch,
                                      class_weight = class_weights,
                                      callbacks = callbacks_list)

else:
    history = model.fit_generator(steps_per_epoch= train_spe,
                                      generator=train_generator,
                                      epochs=epochs,
                                      validation_data = validation_generator,
                                      validation_steps = val_spe,
                                      initial_epoch = initial_epoch,
                                      class_weight = class_weights,
                                      callbacks = callbacks_list)

In [None]:
#Save final complete model        
filename = "model_ep_"+str(int(epochs))+"_batch_"+str(int(batch_size))
model.save("./data/"+exp_name+"/"+filename+".h5")
print("Saved complete model file at: ", filename+"_model"+".h5")

In [None]:
#Save history
history_to_save = generate_stats(history, config)
pd.DataFrame(history_to_save).to_csv("./data/"+exp_name+"/"+filename + "_train_results.csv")
save_plots(history, exp_name)