# Import Libraries

In [193]:
import cv2
import numpy as np
import os

from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC

from sklearn import metrics
from sklearn.model_selection import train_test_split

# Preprocessing

In [194]:
#create lists to save the labels (the name of the shape)
train_dir = '/home/trojan/Desktop/pattern recognition/PB1/Implementation/data/shapes'
shape_list = ['circle', 'triangle', 'tetragon', 'pentagon', 'other']

## Helper functions

In [195]:
seed = 5
num_trees = 100

#function to preprocess data
def preprocess(images, labels):
    """You can make your preprocessing code in this function.
    Here, we just flatten the images, for example.
    In addition, you can split this data into the training set and validation set for robustness to the test(unseen) data.

    :params list images: (Number of images x row x column)
    :params list labels: (Number of images, 1)
    :rtype: array
    :return: preprocessed images and labels
    """
    
    
    dataDim = np.prod(images[0].shape)
    images = np.array(images)
    images = images.reshape(len(images), dataDim)
    images = images.astype('float32')
    images /=255
    labels = np.array(labels)
    
    return images, labels

# function to make classifier
def classify(model, images, labels):
    """You can make your classifier code in this function.
    Here, we use KNN classifier, for example.

    :params array images: (Number of images x row x column)
    :params array labels: (Number of images)
    :return: classifier model
    """
    model.fit(images, labels)
    return model


# Training

In [196]:
def train():
    
    models = []
    names = []
    models.append(('KNN', KNeighborsClassifier(n_neighbors=2)))
    models.append(('LR', LogisticRegression(random_state=seed)))
    models.append(('LDA', LinearDiscriminantAnalysis()))
    models.append(('CART', DecisionTreeClassifier(random_state=seed)))
    models.append(('RF', RandomForestClassifier(n_estimators=num_trees, random_state=seed)))
    models.append(('NB', GaussianNB()))
    models.append(('SVM', SVC(random_state=seed)))
    
    #iterate through each shape
    all_labels, all_images = [],[]
    for shape in shape_list:
        print('Getting data for: ', shape)
        for file_name in os.listdir(os.path.join(train_dir,shape)):
            all_images.append(cv2.imread(os.path.join(train_dir,shape,file_name), 0))
            #add an integer to the labels list
            all_labels.append(shape_list.index(shape))

    # train and validation split
    train_images, val_images, train_labels, val_labels = train_test_split(all_images, all_labels, 
                                                                          shuffle=True, stratify=all_labels, 
                                                                          test_size=0.4, random_state=42)

    print('Number of training images: ', len(train_images), '\n')

    # Preprocess (your own function)
    train_images, train_labels = preprocess(train_images, train_labels)

    for name, model in models:
        
        print (name)
        
        # Make a classifier (your own function)
        model = classify(model, train_images, train_labels)

        # Predict the labels from the model (your own code depending the output of the train function)
        pred_labels = model.predict(train_images)

        # Calculate accuracy (Do not erase or modify here)
        pred_acc = np.sum(pred_labels==train_labels)/len(train_labels)*100
        print("Accuracy = {}".format(pred_acc))

        cm = metrics.confusion_matrix(train_labels, pred_labels)
        print(cm, '\n')
        
        # Validation
        print('Number of validation images: ', len(val_images))
    
        val_images, val_labels = preprocess(val_images, val_labels)
    
        pred_val_labels = model.predict(val_images)
        val_acc = np.sum(pred_val_labels==val_labels)/len(val_labels)*100
        print("Val Accuracy = {}".format(val_acc), '\n')


In [197]:
if __name__ == '__main__':
    train()

Getting data for:  circle
Getting data for:  triangle
Getting data for:  tetragon
Getting data for:  pentagon
Getting data for:  other
Number of training images:  15 

KNN
Accuracy = 60.0
[[3 0 0 0 0]
 [2 1 0 0 0]
 [1 0 2 0 0]
 [1 0 0 2 0]
 [1 0 1 0 1]] 

Number of validation images:  10
Val Accuracy = 20.0 

LR


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


Accuracy = 100.0
[[3 0 0 0 0]
 [0 3 0 0 0]
 [0 0 3 0 0]
 [0 0 0 3 0]
 [0 0 0 0 3]] 

Number of validation images:  10
Val Accuracy = 20.0 

LDA
Accuracy = 53.333333333333336
[[1 1 0 0 1]
 [0 2 1 0 0]
 [1 0 1 0 1]
 [1 0 0 2 0]
 [0 0 1 0 2]] 

Number of validation images:  10
Val Accuracy = 20.0 

CART
Accuracy = 100.0
[[3 0 0 0 0]
 [0 3 0 0 0]
 [0 0 3 0 0]
 [0 0 0 3 0]
 [0 0 0 0 3]] 

Number of validation images:  10
Val Accuracy = 20.0 

RF
Accuracy = 100.0
[[3 0 0 0 0]
 [0 3 0 0 0]
 [0 0 3 0 0]
 [0 0 0 3 0]
 [0 0 0 0 3]] 

Number of validation images:  10
Val Accuracy = 20.0 

NB
Accuracy = 100.0
[[3 0 0 0 0]
 [0 3 0 0 0]
 [0 0 3 0 0]
 [0 0 0 3 0]
 [0 0 0 0 3]] 

Number of validation images:  10
Val Accuracy = 20.0 

SVM
Accuracy = 93.33333333333333
[[3 0 0 0 0]
 [0 3 0 0 0]
 [0 0 3 0 0]
 [1 0 0 2 0]
 [0 0 0 0 3]] 

Number of validation images:  10
Val Accuracy = 20.0 



In [147]:

    """forTA (Do not erase here)
    test_dir = '../ForTA'
    test_labels, test_images = [], []
    for shape in shape_list:
        print('Getting data for: ', shape)
        for file_name in os.listdir(os.path.join(test_dir,shape)):
            test_images.append(cv2.imread(os.path.join(test_dir,shape,file_name), 0))
            #add an integer to the labels list
            test_labels.append(shape_list.index(shape))

    print('Number of test images: ', len(test_images))

    test_images, test_labels = preprocess(test_images, test_labels)
    pred_labels = model.predict(test_images)
    pred_acc = np.sum(pred_labels==test_labels)/len(test_labels)*100
    print("Test Accuracy = {}".format(pred_acc))
    """


'forTA (Do not erase here)\ntest_dir = \'../ForTA\'\ntest_labels, test_images = [], []\nfor shape in shape_list:\n    print(\'Getting data for: \', shape)\n    for file_name in os.listdir(os.path.join(test_dir,shape)):\n        test_images.append(cv2.imread(os.path.join(test_dir,shape,file_name), 0))\n        #add an integer to the labels list\n        test_labels.append(shape_list.index(shape))\n\nprint(\'Number of test images: \', len(test_images))\n\ntest_images, test_labels = preprocess(test_images, test_labels)\npred_labels = model.predict(test_images)\npred_acc = np.sum(pred_labels==test_labels)/len(test_labels)*100\nprint("Test Accuracy = {}".format(pred_acc))\n'