# Import Libraries

In [20]:
import cv2
import numpy as np
import os

from sklearn.model_selection import train_test_split
from skimage import data, io
from matplotlib import pyplot as plt
%matplotlib inline

from skimage import color 
from skimage.feature import canny 


import random
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten, Dropout
from tensorflow.keras import initializers
from keras.layers import LeakyReLU

from keras.utils import np_utils
from keras import optimizers

# Preprocessing

## Specify directories

In [21]:
#create lists to save the labels (the name of the shape)
train_dir = '/home/trojan/Desktop/pattern recognition/PB2/Implementation/data/abcde'
save_dir = '/home/trojan/Desktop/pattern recognition/PB2/Implementation'
character_list = ['a', 'b', 'c', 'd', 'e']

## Helper functions

In [16]:
def plotImages(images_arr):
    fig, axes = plt.subplots(1, 10, figsize=(20,20))
    axes = axes.flatten()
    for img, ax in zip( images_arr, axes):
        ax.imshow(img)
        ax.axis('off')
    plt.tight_layout()
    plt.show()

## Augmentation with keras

In [4]:
gen = ImageDataGenerator(featurewise_center=True, rotation_range=10, width_shift_range=0.1,
                         height_shift_range=0.1, shear_range=0.15, zoom_range=0.1, 
                         channel_shift_range=0., horizontal_flip=True, vertical_flip=True)

In [5]:
def augmentation_keras(dir):
    for character in character_list:
        for file_name in os.listdir(os.path.join(dir,character)):
                PATH = os.path.join(dir,character)
                img = os.path.join(dir,character,file_name)
                image = cv2.imread(img, -1)
                image = np.expand_dims(cv2.imread(img), 0)
                #image = image.squeeze()
                #plt.figure()
                #plt.imshow(image)
                aug_iter = gen.flow(image, save_to_dir=PATH, save_prefix='aug-image-' + file_name, save_format='png')
                aug_images = [next(aug_iter)[0].astype(np.uint8) for i in range(30)]
                #plotImages(aug_images)
                
                

In [None]:
# Run this block to start augmentation
augmentation_keras(train_dir)

In [25]:
#function to preprocess data
def preprocess(images, labels):

    dataDim = np.prod(images[0].shape)
    images = np.array(images)
    images = images.reshape(len(images), dataDim)
    images = images.astype('float32')
    images /=255
    labels = np.array(labels)
    
    return images, labels


# Training

In [26]:
best_model = []
val_accuracies = []
models = []
trained_models = []
names = []

In [35]:
n_classes = 5

    #iterate through each shape
    all_labels, all_images = [],[]def train():

    for character in character_list:
        print('Getting data for: ', character)
        for file_name in os.listdir(os.path.join(train_dir,character)):
            all_images.append(cv2.imread(os.path.join(train_dir,character,file_name), 0))
            #add an integer to the labels list
            all_labels.append(character_list.index(character))

    # train and validation split
    train_images, val_images, train_labels, val_labels = train_test_split(all_images, all_labels, 
                                                                          shuffle=True, stratify=all_labels, 
                                                                          test_size=0.1, random_state=42)

    print('Number of training images: ', len(train_images), '\n')
    

    # Preprocess (your own function)
    train_images, train_labels = preprocess(train_images, train_labels)
    val_images, val_labels = preprocess(val_images, val_labels)
    
    train_labels = np_utils.to_categorical(train_labels, n_classes)
    val_labels = np_utils.to_categorical(val_labels, n_classes)

    opt = optimizers.SGD(learning_rate=0.1)
    model = Sequential()
    
    model.add(Dense(128, input_dim=np.prod(train_images[0].shape), kernel_initializer='he_uniform'))
    model.add(Activation('relu'))
    model.add(Dropout(0.2))
    
    model.add(Dense(128, kernel_initializer='he_uniform'))
    model.add(Activation('relu'))
    model.add(Dropout(0.2))
    
    model.add(Dense(128, kernel_initializer='he_uniform'))
    model.add(Activation('relu'))
    model.add(Dropout(0.2))
    
    model.add(Dense(n_classes, kernel_initializer='he_uniform'))
    model.add(Activation('softmax'))
    
    # compile the keras model
    model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
    # fit the keras model on the dataset
    model.fit(train_images, train_labels, epochs=50, batch_size=10)
    # evaluate the keras model
    _, accuracy = model.evaluate(val_images, val_labels)
    print('Accuracy: %.2f' % (accuracy*100))     
        


In [36]:
if __name__ == '__main__':
    train()

Getting data for:  a
Getting data for:  b
Getting data for:  c
Getting data for:  d
Getting data for:  e
Number of training images:  4178 

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50

KeyboardInterrupt: 

In [7]:
# Choose the best model

best_acc = np.max(val_accuracies)
print("Best Validation Accuracy = {}".format(best_acc), '\n')
    
index_best_acc = np.argmax(val_accuracies, axis=None)

best_model = trained_models[index_best_acc]
print(best_model)

best_model_name = names[index_best_acc]   
print("Best Model Is {}".format(best_model_name), '\n')

Best Validation Accuracy = 82.15053763440861 

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=5, verbose=0,
                       warm_start=False)
Best Model Is RF 



## Save the model

In [8]:
import pickle

filename = "final_model"
model_path = os.path.join(save_dir,filename)
pickle.dump(best_model, open(model_path, 'wb'))

## Load the model

In [9]:
'''Model is provided in submission with report and notebook'''

loaded_model = pickle.load(open(model_path, 'rb'))
print(loaded_model)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=5, verbose=0,
                       warm_start=False)


# Testing

In [None]:
    """forTA (Do not erase here)
    test_dir = '../ForTA'
    test_labels, test_images = [], []
    for shape in shape_list:
        print('Getting data for: ', shape)
        for file_name in os.listdir(os.path.join(test_dir,shape)):
            test_images.append(cv2.imread(os.path.join(test_dir,shape,file_name), 0))
            #add an integer to the labels list
            test_labels.append(shape_list.index(shape))

    print('Number of test images: ', len(test_images))

    test_images, test_labels = preprocess(test_images, test_labels)
    #pred_labels = model.predict(test_images)
    pred_labels = loaded_model.predict(test_images)   # coz we are using 'loaded_model'
    pred_acc = np.sum(pred_labels==test_labels)/len(test_labels)*100
    print("Test Accuracy = {}".format(pred_acc))
    """
