**Necessary Imports & Inits**

In [None]:
import os
import cv2
import threading
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns
%matplotlib inline

np.random.seed(2)
sns.set(style='white', context='notebook', palette='deep')

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import itertools

from fastai.imports import *

from fastai.transforms import *
from fastai.conv_learner import *
from fastai.model import *
from fastai.dataset import *
from fastai.sgdr import *
from fastai.plots import *

sz=180
arch=resnext50
bs=24
DATASET_DIR = "numta/"
GENERATED_OUTPUT_DIR = os.path.join(DATASET_DIR, "Fastai_gen/")

**Utils**

In [None]:
# Declare constants which will be used while plotting the data
FS_AXIS_LABEL=14
FS_TITLE=17
FS_TICKS=12
FIG_WIDTH=20
ROW_HEIGHT=3

def imshow_group(x,y=None,y_pred=None,n_per_row=10):
    '''helper function to visualize a group of images along with their categorical true labels (y) and prediction probabilities.
    Args:
        x: images
        y: categorical true labels
        y_pred: predicted class probabilities
        n_per_row: number of images per row to be plotted
    '''
    n_sample=len(x)
    img_dim=x.shape[1]
    text_spacing = int(img_dim * 0.15)
    j=np.ceil(float(n_sample)/n_per_row)
    fig=plt.figure(figsize=(FIG_WIDTH,ROW_HEIGHT*j))
    for i,img in enumerate(x):
        plt.subplot(j,n_per_row,i+1)
        plt.imshow(img)
        if y is not None:
                plt.title('true label: {}'.format(np.argmax(y[i])))
        if y_pred is not None:
            top_n=3 # top 3 predictions with highest probabilities
            ind_sorted=np.argsort(y_pred[i])[::-1]
            h=img_dim+text_spacing
            for k in range(top_n):
                string='pred: {} ({:.0f}%)\n'.format(ind_sorted[k],y_pred[i,ind_sorted[k]]*100)
                plt.text(img_dim/2, h, string, horizontalalignment='center',verticalalignment='center')
                h+= text_spacing
        plt.axis('off')
    plt.show()
    
def shuffle_with_labels(a, b):
    assert len(a) == len(b)
    from sklearn.utils import shuffle
    return shuffle(a, b)

def resize_image(image, target_size):
    r = image.shape[0]
    c = image.shape[1]
                        
    ratio = float(target_size)/max(r,c)
    sz = (int(c*ratio), int(r*ratio))
    
    return cv2.resize(image, sz, interpolation = cv2.INTER_CUBIC)

def load_images(input_files, input_size, invert = False):
    
    x_batch = np.full([len(input_files), input_size, input_size, 3], 0, dtype=np.uint8)
    for i,file_path in enumerate(input_files):
        image_read = cv2.imread(file_path)
        image_read = resize_image(image_read, input_size)
        
        image = np.full([input_size, input_size, 3], 0, dtype=np.uint8)
        image[0:image_read.shape[0], 0:image_read.shape[1], :] = image_read
        
        x_batch[i] = image
        
    if (invert):
        x_batch = 255 - x_batch
    return x_batch

**Generation of Overlay Images** (we used fixed 5000 images for all the training which are attached. Also 4 and 0 were ignored while mirroring)

In [None]:
from tqdm import tqdm
def im_read(file_path, input_size):
    image_read = cv2.imread(file_path)
    image_read = resize_image(image_read, input_size)

    image = np.full([input_size, input_size, 3], 0, dtype=np.uint8)
    image[0:image_read.shape[0], 0:image_read.shape[1], :] = image_read
    return image


def overlay_on_image(image1, image2):
    alpha = 0.2
    beta = 0.8
    gamma = 0.0
    return cv2.addWeighted(cv2.flip(image1, 1), alpha, image2, beta, gamma)

def get_random_overlay_images(image_files, image_labels, count):

    ov_imgs = []
    labels = []
    
    for i in tqdm(range(count)):
        
        rnd1 = np.random.randint(len(image_files), size=1)
        rnd2 = np.random.randint(len(image_files), size=1)
        
        if (image_labels[rnd1[0]] == 4 or image_labels[rnd1[0]] == 0):
            continue
        
        img = overlay_on_image(
            im_read(image_files[rnd1[0]], 180),
            im_read(image_files[rnd2[0]], 180))

        ov_imgs.append(img)
        labels.append(image_labels[rnd2[0]])
    return np.array(ov_imgs), np.array(labels)

def generate_overlay_files():
    ov_images, labels = get_random_overlay_images(X_data, Y_data, 5000)

    
    mkdir(GENERATED_OVERLAY_DIR)

    from random import choice
    from string import ascii_uppercase

    for i in range(len(labels)):
        subdir = os.path.join(GENERATED_OVERLAY_DIR, str(labels[i]))
        mkdir(subdir)

        des_filename = ''.join(choice(ascii_uppercase) for _ in range(12))
        des_file_path = os.path.join(subdir, des_filename + ".png")
        cv2.imwrite(des_file_path, ov_images[i])
        

**Data Augmentation**

In [None]:
import imgaug as ia
from imgaug import augmenters as iaa

train_data_aug = iaa.Sequential(
    [
        iaa.SomeOf((1, 3),
            [
                iaa.Affine(
                    scale=(0.7, 1.1),
                    translate_percent={"x": (-0.1, 0.1), "y": (0.1, 0.1)},
                    rotate=(-30, 30),
                    shear=(-30, 30),
                    order=[0, 1],
                    cval=(0, 0),
                ),
                iaa.OneOf([
                    iaa.AdditiveGaussianNoise(
                        loc=0, scale=(0.0, 0.15*255)
                    ),
                    iaa.SaltAndPepper(0.15),
                    iaa.Salt(0.15)
                ]),

                iaa.OneOf([
                    iaa.Dropout((0.01, 0.05)),
                    iaa.CoarseDropout(
                        (0.03, 0.06), size_percent=(0.02, 0.04)
                    ),
                ]),

                iaa.OneOf([
                        iaa.OneOf([
                            iaa.GaussianBlur((3.0, 4.0)),
                            iaa.AverageBlur(k=(5, 7)),
                        ]),

                        iaa.Add((-10, 10), per_channel=0.5),
                    
                        iaa.Multiply((0.5, 1.5), per_channel=0.5),
                        
                        iaa.ContrastNormalization((0.5, 2.0), per_channel=0.5),
                ]),
            ],
            # do all of the above augmentations in random order
            random_order=True
        )
    ],
)

def get_augmentation(images):
    return train_data_aug.augment_images(images)

**Training Data Generation**

In [None]:
import pandas as pd
import random

from shutil import copyfile
import cv2

def mkdir(output_dir):
    if (not os.path.exists(output_dir)):
        os.makedirs(output_dir)
        
def move_images(from_path, to_path, invert=False):
    files = [f for f in os.listdir(from_path)]
    for single_file in files:
        source_file = os.path.join(from_path,single_file)
        destination_file = os.path.join(to_path, single_file)
        if invert:
            image = 255 - cv2.imread(source_file)
            cv2.imwrite(destination_file, image)
        else:
            copyfile(source_file, destination_file)
            
def split_in_single_test_dir():
    test_subdir = os.path.join(GENERATED_OUTPUT_DIR, 'test')
    mkdir(test_subdir)
    for testset in test_datasets:
        move_images(os.path.join(DATASET_DIR, testset), test_subdir)
    for testset in test_datasets_inv:
        move_images(os.path.join(DATASET_DIR, testset), test_subdir, invert=True)        
        
def separate_in_train_dir():
    mkdir(GENERATED_OUTPUT_DIR)
    train_subdir = os.path.join(GENERATED_OUTPUT_DIR, 'train')
    mkdir(train_subdir)
    for _, item in train.iterrows():
        save_dir = os.path.join(train_subdir, str(item['digit']))
        mkdir(save_dir)
        source_file = os.path.join(DATASET_DIR, item['database name'], item['filename'])
        destination_file = os.path.join(save_dir, item['filename'])
        copyfile(source_file, destination_file)
    for _, item in train_inv.iterrows():
        save_dir = os.path.join(train_subdir, str(item['digit']))
        mkdir(save_dir)
        source_file = os.path.join(DATASET_DIR, item['database name'], item['filename'])
        destination_file = os.path.join(save_dir, item['filename'])
        copyfile(source_file, destination_file)
        image = 255 - cv2.imread(source_file)
        cv2.imwrite(destination_file, image)
        
def make_valid_set():

    train_dir = os.path.join(GENERATED_OUTPUT_DIR, 'train/')
    label_dirs = os.listdir(train_dir)
    valid_subdir = os.path.join(GENERATED_OUTPUT_DIR, 'valid')
    mkdir(valid_subdir)
    for label_dir in label_dirs:
        source_path = os.path.join(train_dir, label_dir)
        dest_path = os.path.join(valid_subdir, label_dir)
        mkdir(dest_path)
        image_filenames = [f for f in os.listdir(os.path.join(train_dir,label_dir))]
        for image_filename in image_filenames:
            copyfile(os.path.join(source_path, image_filename), os.path.join(dest_path, image_filename))
            break
            #make it very small now. as we don't need it

from tqdm import tqdm
def generate_augmentation(training_dataset_dir):
    label_dirs = os.listdir(training_dataset_dir)
    for label_dir in label_dirs:
        label_path = os.path.join(training_dataset_dir, label_dir)
        image_files = np.array([f for f in os.listdir(label_path)])
        for im_file in tqdm(image_files):
            augmented_image = get_augmentation(np.expand_dims(cv2.imread(os.path.join(label_path,im_file)), axis=0))
            cv2.imwrite(os.path.join(label_path, "augmented_" + im_file), np.squeeze(augmented_image))
            augmented_image = get_augmentation(np.expand_dims(cv2.imread(os.path.join(label_path,im_file)), axis=0))
            cv2.imwrite(os.path.join(label_path, "augmented_1_" + im_file), np.squeeze(augmented_image))
            
def copy_overlay_images():
    train_subdir = os.path.join(GENERATED_OUTPUT_DIR, 'train')
    mkdir(train_subdir)
    db_dir = os.path.join(DATASET_DIR, 'overlays/')
    label_dirs = os.listdir(db_dir)
    for label_dir in label_dirs:
        source_path = os.path.join(db_dir, label_dir)
        dest_path = os.path.join(train_subdir, label_dir)
        mkdir(dest_path)
        image_filenames = [f for f in os.listdir(os.path.join(db_dir,label_dir))]
        for image_filename in image_filenames:
            copyfile(os.path.join(source_path, image_filename), os.path.join(dest_path, image_filename))


train_set = ["training-a.csv", "training-b.csv", "training-c.csv", "training-d.csv"]
train_set_inv = ["training-e.csv"]
train = pd.concat([pd.read_csv(os.path.join(DATASET_DIR, x)) for x in train_set])
train_inv = pd.concat([pd.read_csv(os.path.join(DATASET_DIR, x)) for x in train_set_inv])
print ("Generating training set")
separate_in_train_dir()
print ("Bringing overlay images")
copy_overlay_images()

make_valid_set()

test_datasets = ['testing-auga', 'testing-augc', 'testing-a', 'testing-b', 'testing-c', 'testing-d', 'testing-f']
test_datasets_inv = ['testing-e']
print ("Generating test set")
split_in_single_test_dir()

print ("Augmenting train set")
#Don't re run without deleting previous augmentation first (same image will be augmented twise otherwise).
generate_augmentation(os.path.join(GENERATED_OUTPUT_DIR, 'train'))

**Model**

In [None]:
tfms = tfms_from_model(arch, sz)
data = ImageClassifierData.from_paths(GENERATED_OUTPUT_DIR, test_name='test', tfms=tfms, bs=bs, num_workers=4)
learn = ConvLearner.pretrained(arch, data, ps=0.5)

In [None]:
lr = 1e-3
learn.fit(lr, 1)
learn.precompute=False

In [None]:
learn.fit(lr, 2, cycle_len=1)

In [None]:
learn.unfreeze()
lr=np.array([lr/9,lr/3,lr])

In [None]:
learn.fit(lr, 6, cycle_len=1)

In [None]:
#learn.save('180_all_50_lrg5')

In [None]:
#learn.load('180_all_50_lrg')