# Import Libraries

In [1]:
import cv2
import numpy as np
import os

from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.decomposition import PCA

from sklearn import metrics
from sklearn.model_selection import train_test_split

from skimage.feature import canny 
import skimage
from skimage import data, io
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline

from skimage import color 

# Preprocessing

In [2]:
#create lists to save the labels (the name of the shape)
train_dir = '/home/trojan/Desktop/pattern recognition/PB1/Implementation/data/shapes'
shape_list = ['circle', 'triangle', 'tetragon', 'pentagon', 'other']

## Helper functions

In [8]:
seed = 5
num_trees = 100

#function to preprocess data
def preprocess_flatten(images, labels):

    features = []
    for image in images:
        feature = np.reshape(image, (300*300))
        features.append(feature)
        
    labels = np.array(labels)
    return features, labels


def preprocess_canny_with_faltten(images, labels):
    features = []
    for i in range(len(images)):
        
        feature = canny((images[i]))
        feature = np.reshape(feature, (300*300))
        features.append(feature)

    labels = np.array(labels)
    return features, labels


def preprocess_PCA_with_flatten(images, labels):
    features = []
    for i in range(len(images)):

        image_pca = PCA().fit_transform(images[i])
        image_selected = image_pca[:,:2]
        image_selected = np.reshape(image_selected, (600))
        features.append(image_selected)

    labels = np.array(labels)
    return features, labels


def preprocess_canny_and_PCA_with_flatten(images, labels):

    features = []
    for i in range(len(images)):
        
        edges = canny((images[i]))
        image_pca = PCA().fit_transform(edges)
        image_selected = image_pca[:,:2]
        image_selected = np.reshape(image_selected, (600))
        features.append(image_selected)

    labels = np.array(labels)
    return features, labels

def augmentation(images, labels):
    #for img, label in zip(images, labels)
    for img in images:
        height, width = img.shape[:2]
        M = np.float32([[1, 0, 100], [0, 1, 50]])
        translated = cv2.warpAffine(img, M, (width, height))
        #flipped = cv2.flip(img, 1)

        #center = (width // 2, height // 2)
        #R = cv2.getRotationMatrix2D(center, 60, 1.0)
        #rotated = cv2.warpAffine(img, R, (width, height))
        #labels.append(label)
        images.append(translated)
        #images.append(flipped)
        #images.append(rotated)
        
    for label in labels:
        labels.append(label)
        
    return images, labels

# function to make classifier
def classify(model, images, labels):

    model.fit(images, labels)
    return model


In [9]:
all_labels, all_images = [],[]
for shape in shape_list:
    print('Getting data for: ', shape)
    for file_name in os.listdir(os.path.join(train_dir,shape)):
        all_images.append(cv2.imread(os.path.join(train_dir,shape,file_name), 0))
        #add an integer to the labels list
        all_labels.append(shape_list.index(shape))

# train and validation split
train_images, val_images, train_labels, val_labels = train_test_split(all_images, all_labels, 
                                                                          shuffle=True, stratify=all_labels, 
                                                                          test_size=0.2, random_state=42)

print('Number of training images: ', len(train_images), '\n')
print('Number of training labels: ', len(train_labels), '\n')

Getting data for:  circle
Getting data for:  triangle
Getting data for:  tetragon
Getting data for:  pentagon
Getting data for:  other
Number of training images:  20 

Number of training labels:  20 



In [10]:
aug_images = []
aug_labels = []
for i in range (0, 5):
    aug_images.append(train_images[i])
    aug_labels.append(train_labels[i])

print(('Number of aug images: ', len(aug_images), '\n'))
print(('Number of aug labels: ', len(aug_labels), '\n'))


('Number of aug images: ', 5, '\n')
('Number of aug labels: ', 5, '\n')


In [None]:
new_aug_images, new_aug_labels = augmentation (aug_images, aug_labels)

print(('New number of aug images: ', len(aug_images), '\n'))
print(('New number of aug labels: ', len(aug_labels), '\n'))

# Training

In [231]:
def train():
    
    models = []
    names = []
    models.append(('KNN', KNeighborsClassifier(n_neighbors=2)))
    models.append(('LR', LogisticRegression(random_state=seed)))
    models.append(('LDA', LinearDiscriminantAnalysis()))
    models.append(('CART', DecisionTreeClassifier(random_state=seed)))
    models.append(('RF', RandomForestClassifier(n_estimators=num_trees, random_state=seed)))
    models.append(('NB', GaussianNB()))
    models.append(('SVM', SVC(random_state=seed)))
    
    #iterate through each shape
    all_labels, all_images = [],[]
    for shape in shape_list:
        print('Getting data for: ', shape)
        for file_name in os.listdir(os.path.join(train_dir,shape)):
            all_images.append(cv2.imread(os.path.join(train_dir,shape,file_name), 0))
            #add an integer to the labels list
            all_labels.append(shape_list.index(shape))

    # train and validation split
    train_images, val_images, train_labels, val_labels = train_test_split(all_images, all_labels, 
                                                                          shuffle=True, stratify=all_labels, 
                                                                          test_size=0.2, random_state=42)

    print('Number of training images: ', len(train_images), '\n')

    # Preprocess (your own function)
    train_images, train_labels = preprocess_canny_and_PCA_with_flatten(train_images, train_labels)
    val_images, val_labels = preprocess_canny_and_PCA_with_flatten(val_images, val_labels)

    for name, model in models:
        #train_images, train_labels = preprocess(train_images, train_labels)
        print (name)
        
        # Make a classifier (your own function)
        model = classify(model, train_images, train_labels)

        # Predict the labels from the model (your own code depending the output of the train function)
        pred_labels = model.predict(train_images)

        # Calculate accuracy (Do not erase or modify here)
        pred_acc = np.sum(pred_labels==train_labels)/len(train_labels)*100
        print("Accuracy = {}".format(pred_acc))

        cm = metrics.confusion_matrix(train_labels, pred_labels)
        print(cm, '\n')
        
        # Validation
        print('Number of validation images: ', len(val_images))
    
        pred_val_labels = model.predict(val_images)
        val_acc = np.sum(pred_val_labels==val_labels)/len(val_labels)*100
        print("Val Accuracy = {}".format(val_acc), '\n')


In [232]:
if __name__ == '__main__':
    train()

Getting data for:  circle
Getting data for:  triangle
Getting data for:  tetragon
Getting data for:  pentagon
Getting data for:  other
Number of training images:  20 

KNN
Accuracy = 55.00000000000001
[[4 0 0 0 0]
 [0 4 0 0 0]
 [0 2 2 0 0]
 [0 3 0 1 0]
 [1 3 0 0 0]] 

Number of validation images:  5
Val Accuracy = 20.0 

LR
Accuracy = 100.0
[[4 0 0 0 0]
 [0 4 0 0 0]
 [0 0 4 0 0]
 [0 0 0 4 0]
 [0 0 0 0 4]] 

Number of validation images:  5
Val Accuracy = 40.0 

LDA
Accuracy = 65.0
[[2 1 0 0 1]
 [1 3 0 0 0]
 [1 1 2 0 0]
 [0 1 0 3 0]
 [0 0 0 1 3]] 

Number of validation images:  5
Val Accuracy = 40.0 

CART
Accuracy = 100.0
[[4 0 0 0 0]
 [0 4 0 0 0]
 [0 0 4 0 0]
 [0 0 0 4 0]
 [0 0 0 0 4]] 

Number of validation images:  5
Val Accuracy = 20.0 

RF
Accuracy = 100.0
[[4 0 0 0 0]
 [0 4 0 0 0]
 [0 0 4 0 0]
 [0 0 0 4 0]
 [0 0 0 0 4]] 

Number of validation images:  5
Val Accuracy = 40.0 

NB
Accuracy = 100.0
[[4 0 0 0 0]
 [0 4 0 0 0]
 [0 0 4 0 0]
 [0 0 0 4 0]
 [0 0 0 0 4]] 

Number of validatio

In [198]:

    """forTA (Do not erase here)
    test_dir = '../ForTA'
    test_labels, test_images = [], []
    for shape in shape_list:
        print('Getting data for: ', shape)
        for file_name in os.listdir(os.path.join(test_dir,shape)):
            test_images.append(cv2.imread(os.path.join(test_dir,shape,file_name), 0))
            #add an integer to the labels list
            test_labels.append(shape_list.index(shape))

    print('Number of test images: ', len(test_images))

    test_images, test_labels = preprocess(test_images, test_labels)
    pred_labels = model.predict(test_images)
    pred_acc = np.sum(pred_labels==test_labels)/len(test_labels)*100
    print("Test Accuracy = {}".format(pred_acc))
    """


'forTA (Do not erase here)\ntest_dir = \'../ForTA\'\ntest_labels, test_images = [], []\nfor shape in shape_list:\n    print(\'Getting data for: \', shape)\n    for file_name in os.listdir(os.path.join(test_dir,shape)):\n        test_images.append(cv2.imread(os.path.join(test_dir,shape,file_name), 0))\n        #add an integer to the labels list\n        test_labels.append(shape_list.index(shape))\n\nprint(\'Number of test images: \', len(test_images))\n\ntest_images, test_labels = preprocess(test_images, test_labels)\npred_labels = model.predict(test_images)\npred_acc = np.sum(pred_labels==test_labels)/len(test_labels)*100\nprint("Test Accuracy = {}".format(pred_acc))\n'