# Import Libraries

In [1]:
import cv2
import numpy as np
import os

from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.decomposition import PCA

from sklearn import metrics
from sklearn.model_selection import train_test_split

from skimage.feature import canny 
import skimage
from skimage import data, io
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline

from skimage import color 

import random
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


# Preprocessing

## Specify directories

In [4]:
#create lists to save the labels (the name of the shape)
train_dir = '/home/trojan/Desktop/pattern recognition/PB1/Implementation/data/shapes'
save_dir = '/home/trojan/Desktop/pattern recognition/PB1/Implementation'
shape_list = ['circle', 'triangle', 'tetragon', 'pentagon', 'other']

## Helper functions

In [5]:
def plotImages(images_arr):
    fig, axes = plt.subplots(1, 10, figsize=(20,20))
    axes = axes.flatten()
    for img, ax in zip( images_arr, axes):
        ax.imshow(img)
        ax.axis('off')
    plt.tight_layout()
    plt.show()

#### Augmentation with keras

In [6]:
gen = ImageDataGenerator(featurewise_center=True, rotation_range=10, width_shift_range=0.1,
                         height_shift_range=0.1, shear_range=0.15, zoom_range=0.1, 
                         channel_shift_range=0., horizontal_flip=True, vertical_flip=True)

In [5]:
def augmentation_keras(dir):
    for shape in shape_list:
        for file_name in os.listdir(os.path.join(dir,shape)):
                PATH = os.path.join(dir,shape)
                img = os.path.join(dir,shape,file_name)
                image = cv2.imread(img, -1)
                image = np.expand_dims(cv2.imread(img), 0)
                #image = image.squeeze()
                #plt.figure()
                #plt.imshow(image)
                aug_iter = gen.flow(image, save_to_dir=PATH, save_prefix='aug-image-' + file_name, save_format='png')
                aug_images = [next(aug_iter)[0].astype(np.uint8) for i in range(100)]
                #plotImages(aug_images)
                
                

In [6]:
# Run this block to start augmentation

augmentation_keras(train_dir)



#### Manual augmentation functions

In [3]:
# In final work I used Keras augmwntation, skip these blocks

'''def augmentation(dir, Extension, th1, th2, x, y, deg, deg2, deg3):
    for shape in shape_list:
        for file_name in os.listdir(os.path.join(dir,shape)):
            PATH = os.path.join(dir,shape)
            img = os.path.join(dir,shape,file_name)
            image = cv2.imread(img, 0)
            
            # flipping the image
            image_xaxis_flipped = cv2.flip(image, 0)
            image_yaxis_flipped = cv2.flip(image, 1)
            
            # Canny edge detect
            image_canny = cv2.Canny(image,th1,th2)
    
            # Translation
            rows, cols = image.shape
            M_trans = np.float32([[1, 0, x], [0, 1, y]])
            image_translated = cv2.warpAffine(image, M_trans, (cols, rows))
            
            # Rotation
            rows, cols = image.shape
            M_rot_90 = cv2.getRotationMatrix2D((cols/2,rows/2), deg, 1)
            M_rot_180 = cv2.getRotationMatrix2D((cols/2,rows/2), deg2, 1)
            M_rot_270 = cv2.getRotationMatrix2D((cols/2,rows/2), deg3, 1)
            image_rotated_90 = cv2.warpAffine(image, M_rot_90, (cols, rows))
            image_rotated_180 = cv2.warpAffine(image, M_rot_180, (cols, rows))
            image_rotated_270 = cv2.warpAffine(image, M_rot_270, (cols, rows))
            
            
        
        
            cv2.imwrite(PATH + "/flip-xaxis-" + file_name, image_xaxis_flipped)
            cv2.imwrite(PATH + "/flip-yaxis-" + file_name, image_yaxis_flipped)
            cv2.imwrite(PATH + "/Edge Canny-" + file_name + str(th1) + "*" + str(th2) + Extension, image_canny)
            cv2.imwrite(PATH + "/Translation-" + file_name + str(x) + str(y) + Extension, image_translated)
            cv2.imwrite(PATH + "/Rotate-90-" + file_name + str(deg) + Extension, image_rotated_90)
            cv2.imwrite(PATH + "/Rotate-180-" + file_name + str(deg) + Extension, image_rotated_180)
            cv2.imwrite(PATH + "/Rotate-270-" + file_name + str(deg) + Extension, image_rotated_270)'''



In [4]:
#augmentation(train_dir, Extension='.png', th1=100, th2=200, x=20, y=20, deg=90, deg2=180, deg3=270)

In [7]:
seed = 5
num_trees = 100

def preprocess(images, labels):

    dataDim = np.prod(images[0].shape)
    images = np.array(images)
    images = images.reshape(len(images), dataDim)
    images = images.astype('float32')
    images /=255
    labels = np.array(labels)
    
    return images, labels

#function to preprocess data
def preprocess_flatten(images, labels):

    features = []
    for image in images:
        feature = np.reshape(image, (300*300))
        features.append(feature)
        
    labels = np.array(labels)
    return features, labels


def preprocess_canny_with_faltten(images, labels):
    features = []
    for i in range(len(images)):
        
        feature = canny((images[i]))
        feature = np.reshape(feature, (300*300))
        features.append(feature)

    labels = np.array(labels)
    return features, labels


def preprocess_PCA_with_flatten(images, labels):
    features = []
    for i in range(len(images)):

        image_pca = PCA().fit_transform(images[i])
        image_selected = image_pca[:,:2]
        image_selected = np.reshape(image_selected, (600))
        features.append(image_selected)

    labels = np.array(labels)
    return features, labels


def preprocess_canny_and_PCA_with_flatten(images, labels):

    features = []
    for i in range(len(images)):
        
        edges = canny((images[i]))
        image_pca = PCA().fit_transform(edges)
        image_selected = image_pca[:,:2]
        image_selected = np.reshape(image_selected, (600))
        features.append(image_selected)

    labels = np.array(labels)
    return features, labels

# function to make classifier
def classify(model, images, labels):

    model.fit(images, labels)
    return model


# Training

In [4]:
best_model = []
val_accuracies = []
models = []
trained_models = []
names = []

In [5]:
def train():
    models.append(('KNN', KNeighborsClassifier(n_neighbors=2)))
    models.append(('LR', LogisticRegression(random_state=seed, max_iter=1000)))
    models.append(('LDA', LinearDiscriminantAnalysis()))
    models.append(('CART', DecisionTreeClassifier(random_state=seed)))
    models.append(('RF', RandomForestClassifier(n_estimators=num_trees, random_state=seed)))
    models.append(('NB', GaussianNB()))
    models.append(('SVM', SVC(random_state=seed)))
    
    #iterate through each shape
    all_labels, all_images = [],[]
    for shape in shape_list:
        print('Getting data for: ', shape)
        for file_name in os.listdir(os.path.join(train_dir,shape)):
            all_images.append(cv2.imread(os.path.join(train_dir,shape,file_name), 0))
            #add an integer to the labels list
            all_labels.append(shape_list.index(shape))

    # train and validation split
    train_images, val_images, train_labels, val_labels = train_test_split(all_images, all_labels, 
                                                                          shuffle=True, stratify=all_labels, 
                                                                          test_size=0.1, random_state=42)

    print('Number of training images: ', len(train_images), '\n')

    # Preprocess (your own function)
    train_images, train_labels = preprocess(train_images, train_labels)
    val_images, val_labels = preprocess(val_images, val_labels)

    for name, model in models:
        
        best_model = model
        #train_images, train_labels = preprocess(train_images, train_labels)
        print (name)
        
        # Make a classifier (your own function)
        model = classify(model, train_images, train_labels)
        trained_models.append(model)

        # Predict the labels from the model (your own code depending the output of the train function)
        pred_labels = model.predict(train_images)

        # Calculate accuracy (Do not erase or modify here)
        pred_acc = np.sum(pred_labels==train_labels)/len(train_labels)*100
        print("Accuracy = {}".format(pred_acc))

        cm = metrics.confusion_matrix(train_labels, pred_labels)
        print(cm, '\n')
        
        # Validation
        print('Number of validation images: ', len(val_images))
    
        pred_val_labels = model.predict(val_images)
        val_acc = np.sum(pred_val_labels==val_labels)/len(val_labels)*100
        print("Val Accuracy = {}".format(val_acc), '\n')
        
        val_accuracies.append(val_acc)
        names.append(name)
        

    
        
        
        


In [6]:
if __name__ == '__main__':
    train()

Getting data for:  circle
Getting data for:  triangle
Getting data for:  tetragon
Getting data for:  pentagon
Getting data for:  other
Number of training images:  2262 

KNN
Accuracy = 96.15384615384616
[[451   0   0   0   0]
 [ 20 432   0   0   0]
 [  6   8 439   0   0]
 [  9   2   3 440   0]
 [ 26  10   2   1 413]] 

Number of validation images:  252
Val Accuracy = 90.07936507936508 

LR


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


Accuracy = 100.0
[[451   0   0   0   0]
 [  0 452   0   0   0]
 [  0   0 453   0   0]
 [  0   0   0 454   0]
 [  0   0   0   0 452]] 

Number of validation images:  252
Val Accuracy = 47.22222222222222 

LDA
Accuracy = 88.01945181255526
[[355  27  29   3  37]
 [ 30 404  12   0   6]
 [ 23  14 402   1  13]
 [ 20   6   6 420   2]
 [ 17  10  14   1 410]] 

Number of validation images:  252
Val Accuracy = 45.63492063492063 

CART
Accuracy = 100.0
[[451   0   0   0   0]
 [  0 452   0   0   0]
 [  0   0 453   0   0]
 [  0   0   0 454   0]
 [  0   0   0   0 452]] 

Number of validation images:  252
Val Accuracy = 67.06349206349206 

RF
Accuracy = 100.0
[[451   0   0   0   0]
 [  0 452   0   0   0]
 [  0   0 453   0   0]
 [  0   0   0 454   0]
 [  0   0   0   0 452]] 

Number of validation images:  252
Val Accuracy = 85.31746031746032 

NB
Accuracy = 37.97524314765694
[[438   1   1   2   9]
 [200  86   1   9 156]
 [149  51  65  51 137]
 [149  42   7 192  64]
 [362   8   1   3  78]] 

Number of 

In [8]:
# Choose the best model

best_acc = np.max(val_accuracies)
print("Best Validation Accuracy = {}".format(best_acc), '\n')
    
index_best_acc = np.argmax(val_accuracies, axis=None)

best_model_initial = models[index_best_acc]
print(best_model_initial)

best_model = trained_models[index_best_acc]
print(best_model)

best_model_name = names[index_best_acc]   
print("Best Model Is {}".format(best_model_name), '\n')

Best Validation Accuracy = 90.07936507936508 

('KNN', KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=2, p=2,
                     weights='uniform'))
KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=2, p=2,
                     weights='uniform')
Best Model Is KNN 



## Save the model

In [13]:
import pickle

filename = "final_model"
model_path = os.path.join(save_dir,filename)
pickle.dump(best_model, open(model_path, 'wb'))

## Load the model

In [14]:
'''Model is provided in submission with report and notebook'''

loaded_model = pickle.load(open(model_path, 'rb'))
print(loaded_model)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=2, p=2,
                     weights='uniform')


# Testing

In [None]:
    """forTA (Do not erase here)
    test_dir = '../ForTA'
    test_labels, test_images = [], []
    for shape in shape_list:
        print('Getting data for: ', shape)
        for file_name in os.listdir(os.path.join(test_dir,shape)):
            test_images.append(cv2.imread(os.path.join(test_dir,shape,file_name), 0))
            #add an integer to the labels list
            test_labels.append(shape_list.index(shape))

    print('Number of test images: ', len(test_images))

    test_images, test_labels = preprocess(test_images, test_labels)
    #pred_labels = model.predict(test_images)
    pred_labels = loaded_model.predict(test_images)
    pred_acc = np.sum(pred_labels==test_labels)/len(test_labels)*100
    print("Test Accuracy = {}".format(pred_acc))
    """
