# Import Libraries

In [1]:
import cv2
import numpy as np
import os

from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.decomposition import PCA

from sklearn import metrics
from sklearn.model_selection import train_test_split

from skimage.feature import canny 
import skimage
from skimage import data, io
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline

from skimage import color 

# Preprocessing

In [2]:
#create lists to save the labels (the name of the shape)
train_dir = '/home/trojan/Desktop/pattern recognition/PB1/Implementation/data/shapes'
shape_list = ['circle', 'triangle', 'tetragon', 'pentagon', 'other']

## Helper functions

In [3]:
def augmentation(dir, Extension, th1, th2, x, y, deg, deg2, deg3):
    for shape in shape_list:
        for file_name in os.listdir(os.path.join(dir,shape)):
            PATH = os.path.join(dir,shape)
            img = os.path.join(dir,shape,file_name)
            image = cv2.imread(img, 0)
            
            # flipping the image
            image_xaxis_flipped = cv2.flip(image, 0)
            image_yaxis_flipped = cv2.flip(image, 1)
            
            # Canny edge detect
            image_canny = cv2.Canny(image,th1,th2)
    
            # Translation
            rows, cols = image.shape
            M_trans = np.float32([[1, 0, x], [0, 1, y]])
            image_translated = cv2.warpAffine(image, M_trans, (cols, rows))
            
            # Rotation
            rows, cols = image.shape
            M_rot_90 = cv2.getRotationMatrix2D((cols/2,rows/2), deg, 1)
            M_rot_180 = cv2.getRotationMatrix2D((cols/2,rows/2), deg2, 1)
            M_rot_270 = cv2.getRotationMatrix2D((cols/2,rows/2), deg3, 1)
            image_rotated_90 = cv2.warpAffine(image, M_rot_90, (cols, rows))
            image_rotated_180 = cv2.warpAffine(image, M_rot_180, (cols, rows))
            image_rotated_270 = cv2.warpAffine(image, M_rot_270, (cols, rows))
            
            
        
        
            cv2.imwrite(PATH + "/flip-xaxis-" + file_name, image_xaxis_flipped)
            cv2.imwrite(PATH + "/flip-yaxis-" + file_name, image_yaxis_flipped)
            cv2.imwrite(PATH + "/Edge Canny-" + file_name + str(th1) + "*" + str(th2) + Extension, image_canny)
            cv2.imwrite(PATH + "/Translation-" + file_name + str(x) + str(y) + Extension, image_translated)
            cv2.imwrite(PATH + "/Rotate-90-" + file_name + str(deg) + Extension, image_rotated_90)
            cv2.imwrite(PATH + "/Rotate-180-" + file_name + str(deg) + Extension, image_rotated_180)
            cv2.imwrite(PATH + "/Rotate-270-" + file_name + str(deg) + Extension, image_rotated_270)



In [4]:
augmentation(train_dir, Extension='.png', th1=100, th2=200, x=20, y=20, deg=90, deg2=180, deg3=270)

In [5]:
seed = 5
num_trees = 100

'''def preprocess(images, labels):
    imgs = []
    for image in images:
        
        #dataDim = np.prod(image.shape)
        image = np.array(image)
        #image = image.reshape(image, dataDim)
        image = image.astype('float32')
        image /=255
        imgs.append(image)
    labels = np.array(labels)
    
    return imgs, labels'''

def preprocess(images, labels):

    dataDim = np.prod(images[0].shape)
    images = np.array(images)
    images = images.reshape(len(images), dataDim)
    images = images.astype('float32')
    images /=255
    labels = np.array(labels)
    
    return images, labels

#function to preprocess data
def preprocess_flatten(images, labels):

    features = []
    for image in images:
        feature = np.reshape(image, (300*300))
        features.append(feature)
        
    labels = np.array(labels)
    return features, labels


def preprocess_canny_with_faltten(images, labels):
    features = []
    for i in range(len(images)):
        
        feature = canny((images[i]))
        feature = np.reshape(feature, (300*300))
        features.append(feature)

    labels = np.array(labels)
    return features, labels


def preprocess_PCA_with_flatten(images, labels):
    features = []
    for i in range(len(images)):

        image_pca = PCA().fit_transform(images[i])
        image_selected = image_pca[:,:2]
        image_selected = np.reshape(image_selected, (600))
        features.append(image_selected)

    labels = np.array(labels)
    return features, labels


def preprocess_canny_and_PCA_with_flatten(images, labels):

    features = []
    for i in range(len(images)):
        
        edges = canny((images[i]))
        image_pca = PCA().fit_transform(edges)
        image_selected = image_pca[:,:2]
        image_selected = np.reshape(image_selected, (600))
        features.append(image_selected)

    labels = np.array(labels)
    return features, labels

def augmentation(images, labels):
    #for img, label in zip(images, labels)
    for img in images:
        height, width = img.shape[:2]
        M = np.float32([[1, 0, 100], [0, 1, 50]])
        translated = cv2.warpAffine(img, M, (width, height))
        #flipped = cv2.flip(img, 1)

        #center = (width // 2, height // 2)
        #R = cv2.getRotationMatrix2D(center, 60, 1.0)
        #rotated = cv2.warpAffine(img, R, (width, height))
        #labels.append(label)
        images.append(translated)
        #images.append(flipped)
        #images.append(rotated)
        
    for label in labels:
        labels.append(label)
        
    return images, labels

# function to make classifier
def classify(model, images, labels):

    model.fit(images, labels)
    return model


# Training

In [6]:
def train():
    
    models = []
    names = []
    models.append(('KNN', KNeighborsClassifier(n_neighbors=4)))
    models.append(('LR', LogisticRegression(random_state=seed, max_iter=100)))
    models.append(('LDA', LinearDiscriminantAnalysis()))
    models.append(('CART', DecisionTreeClassifier(random_state=seed)))
    models.append(('RF', RandomForestClassifier(n_estimators=num_trees, random_state=seed)))
    models.append(('NB', GaussianNB()))
    models.append(('SVM', SVC(random_state=seed)))
    
    #iterate through each shape
    all_labels, all_images = [],[]
    for shape in shape_list:
        print('Getting data for: ', shape)
        for file_name in os.listdir(os.path.join(train_dir,shape)):
            all_images.append(cv2.imread(os.path.join(train_dir,shape,file_name), 0))
            #add an integer to the labels list
            all_labels.append(shape_list.index(shape))

    # train and validation split
    train_images, val_images, train_labels, val_labels = train_test_split(all_images, all_labels, 
                                                                          shuffle=True, stratify=all_labels, 
                                                                          test_size=0.1, random_state=42)

    print('Number of training images: ', len(train_images), '\n')

    # Preprocess (your own function)
    train_images, train_labels = preprocess_PCA_with_flatten(train_images, train_labels)
    val_images, val_labels = preprocess_PCA_with_flatten(val_images, val_labels)

    for name, model in models:
        #train_images, train_labels = preprocess(train_images, train_labels)
        print (name)
        
        # Make a classifier (your own function)
        model = classify(model, train_images, train_labels)

        # Predict the labels from the model (your own code depending the output of the train function)
        pred_labels = model.predict(train_images)

        # Calculate accuracy (Do not erase or modify here)
        pred_acc = np.sum(pred_labels==train_labels)/len(train_labels)*100
        print("Accuracy = {}".format(pred_acc))

        cm = metrics.confusion_matrix(train_labels, pred_labels)
        print(cm, '\n')
        
        # Validation
        print('Number of validation images: ', len(val_images))
    
        pred_val_labels = model.predict(val_images)
        val_acc = np.sum(pred_val_labels==val_labels)/len(val_labels)*100
        print("Val Accuracy = {}".format(val_acc), '\n')


In [7]:
if __name__ == '__main__':
    train()

Getting data for:  circle
Getting data for:  triangle
Getting data for:  tetragon
Getting data for:  pentagon
Getting data for:  other
Number of training images:  180 

KNN
Accuracy = 61.111111111111114
[[28  7  0  0  1]
 [ 7 28  0  0  1]
 [ 7 11 15  2  1]
 [ 8  7  2 15  4]
 [ 5  6  0  1 24]] 

Number of validation images:  20
Val Accuracy = 40.0 

LR
Accuracy = 100.0
[[36  0  0  0  0]
 [ 0 36  0  0  0]
 [ 0  0 36  0  0]
 [ 0  0  0 36  0]
 [ 0  0  0  0 36]] 

Number of validation images:  20
Val Accuracy = 35.0 

LDA


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


Accuracy = 96.66666666666667
[[35  1  0  0  0]
 [ 1 33  1  0  1]
 [ 0  1 35  0  0]
 [ 0  1  0 35  0]
 [ 0  0  0  0 36]] 

Number of validation images:  20
Val Accuracy = 25.0 

CART
Accuracy = 100.0
[[36  0  0  0  0]
 [ 0 36  0  0  0]
 [ 0  0 36  0  0]
 [ 0  0  0 36  0]
 [ 0  0  0  0 36]] 

Number of validation images:  20
Val Accuracy = 60.0 

RF
Accuracy = 100.0
[[36  0  0  0  0]
 [ 0 36  0  0  0]
 [ 0  0 36  0  0]
 [ 0  0  0 36  0]
 [ 0  0  0  0 36]] 

Number of validation images:  20
Val Accuracy = 50.0 

NB
Accuracy = 58.333333333333336
[[29  1  1  1  4]
 [ 8 20  0  4  4]
 [ 6  3 18  5  4]
 [ 8  5  1 21  1]
 [12  0  2  5 17]] 

Number of validation images:  20
Val Accuracy = 40.0 

SVM
Accuracy = 73.88888888888889
[[28  3  2  0  3]
 [ 6 27  0  0  3]
 [ 5  2 27  0  2]
 [ 8  3  2 23  0]
 [ 6  0  2  0 28]] 

Number of validation images:  20
Val Accuracy = 50.0 



In [198]:

    """forTA (Do not erase here)
    test_dir = '../ForTA'
    test_labels, test_images = [], []
    for shape in shape_list:
        print('Getting data for: ', shape)
        for file_name in os.listdir(os.path.join(test_dir,shape)):
            test_images.append(cv2.imread(os.path.join(test_dir,shape,file_name), 0))
            #add an integer to the labels list
            test_labels.append(shape_list.index(shape))

    print('Number of test images: ', len(test_images))

    test_images, test_labels = preprocess(test_images, test_labels)
    pred_labels = model.predict(test_images)
    pred_acc = np.sum(pred_labels==test_labels)/len(test_labels)*100
    print("Test Accuracy = {}".format(pred_acc))
    """


'forTA (Do not erase here)\ntest_dir = \'../ForTA\'\ntest_labels, test_images = [], []\nfor shape in shape_list:\n    print(\'Getting data for: \', shape)\n    for file_name in os.listdir(os.path.join(test_dir,shape)):\n        test_images.append(cv2.imread(os.path.join(test_dir,shape,file_name), 0))\n        #add an integer to the labels list\n        test_labels.append(shape_list.index(shape))\n\nprint(\'Number of test images: \', len(test_images))\n\ntest_images, test_labels = preprocess(test_images, test_labels)\npred_labels = model.predict(test_images)\npred_acc = np.sum(pred_labels==test_labels)/len(test_labels)*100\nprint("Test Accuracy = {}".format(pred_acc))\n'