In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os, glob
import pickle
import pandas as pd
import cv2 as cv
%matplotlib inline

# classification required packages

from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.metrics import classification_report
import joblib

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
%matplotlib inline

In [2]:
def load_data(filedir, filepath, csvfile):
    data = np.load(os.path.join(filedir, filepath), allow_pickle=True)
    train_info = pd.read_csv(os.path.join(filedir, csvfile))
    labels = np.array(train_info['ClassId'])
    
    return data, labels

def resample(data, labels, n=100):
    resample_data = []
    resample_labels = []
    for i in range(43):
        index = np.where(labels == i)[0][:n]
        resample_data.append(data[index])
        resample_labels.append(labels[index])
    return np.array(resample_data).reshape(43*n, -1), np.array(resample_labels).reshape(43*n, -1)

def randomize_data(data, labels):
    randomize = np.arange(len(labels))
    np.random.shuffle(randomize)
    X = data[randomize]
    y = labels[randomize]
    
    return X,y

# Processed Image

In [3]:
data_dir = 'C:\\Users\\nhinp3\\Documents\\BML_Final project'
processed_train_path = 'Processed/trainProcessed.npy'
processed_test_path = 'Processed/testProcessed.npy'

In [4]:
trainProcessedData, trainLabels = load_data(data_dir, processed_train_path, 'Train.csv')
trainProcessedData, trainLabels = randomize_data(trainProcessedData, trainLabels)

In [5]:
testProcessedData, testLabels = load_data(data_dir, processed_test_path, 'Test.csv')

In [6]:
trainLabels.shape

(39209,)

In [7]:
estimators = [100,200,300,500]
def train_model_processed(estimators, trainData, trainLabels, testData, testLabels):
    for n in estimators:
        print(f'MLP WITH {n} HIDDEN LAYERS')
        if os.path.isfile(f"MLP_NoFE/mlp_{str(n)}_processed.pkl"):
            print("[INFO] loading classifier: MLP trained on ori images...")
            mlp = pickle.load(open(f"MLP_NoFE/mlp_{str(n)}_processed.pkl", 'rb'))
            print("[INFO] Classifer is loaded as instance ::rf::")
        else:
            print("[INFO] pre-trained classifier not found. \n Training Classifier MLP")
            mlp = MLPClassifier(n)
            print(mlp)
            mlp.fit(trainData.reshape(len(trainLabels), -1),trainLabels)
            print("[INFO] Succefully trained the classsifier. \n Saving the classifier for further use")
            pickle.dump(mlp, open(f"MLP_NoFE/mlp_{str(n)}_processed.pkl", 'wb')) 
            print("[INFO] Classifier Saved")
            
        predictions = mlp.predict(testData.reshape(len(testLabels), -1))
 
        # show a final classification report demonstrating the accuracy of the classifier
        print("EVALUATION ON TESTING DATA FOR" + str(n) + 'MLP')
        print(classification_report(testLabels, predictions))
        
        print("ACCURACY on TRAINING DATA")
        print(mlp.score(trainData.reshape(len(trainLabels), -1), trainLabels))
        
        
        print('-------------------------------------------------------')

In [8]:
train_model_processed(estimators, trainProcessedData, trainLabels, testProcessedData, testLabels)

MLP WITH 100 HIDDEN LAYERS
[INFO] loading classifier: MLP trained on ori images...
[INFO] Classifer is loaded as instance ::rf::
EVALUATION ON TESTING DATA FOR100MLP
              precision    recall  f1-score   support

           0       0.31      0.25      0.28        60
           1       0.65      0.91      0.76       720
           2       0.93      0.67      0.78       750
           3       0.67      0.82      0.74       450
           4       0.77      0.83      0.80       660
           5       0.78      0.75      0.76       630
           6       0.86      0.73      0.79       150
           7       0.80      0.66      0.72       450
           8       0.62      0.75      0.68       450
           9       0.93      0.85      0.89       480
          10       0.89      0.92      0.91       660
          11       0.84      0.71      0.77       420
          12       0.82      0.92      0.87       690
          13       0.91      0.90      0.91       720
          14       0.90

0.9997194521665944
-------------------------------------------------------
MLP WITH 500 HIDDEN LAYERS
[INFO] pre-trained classifier not found. 
 Training Classifier MLP
MLPClassifier(hidden_layer_sizes=500)
[INFO] Succefully trained the classsifier. 
 Saving the classifier for further use
[INFO] Classifier Saved
EVALUATION ON TESTING DATA FOR500MLP
              precision    recall  f1-score   support

           0       0.62      0.30      0.40        60
           1       0.87      0.93      0.90       720
           2       0.92      0.92      0.92       750
           3       0.85      0.84      0.84       450
           4       0.93      0.91      0.92       660
           5       0.77      0.90      0.83       630
           6       0.91      0.77      0.83       150
           7       0.89      0.81      0.85       450
           8       0.79      0.88      0.83       450
           9       0.96      0.96      0.96       480
          10       0.95      0.97      0.96       660


# Original Image

In [9]:
def resize_img(images):
    ret = []
    for im in images:
        im = cv.resize(cv.cvtColor(im, cv.COLOR_BGR2RGB), (32, 32))
        #print(im.shape)
        ret.append(im/255)
    ret = np.array(ret)
    #print(ret.shape)
    return ret

In [10]:
ori_train_path = 'Original/trainImages.npy'
ori_test_path = 'Original/testImages.npy'

In [11]:
trainOriData, trainOriLabels = load_data(data_dir, ori_train_path, 'Train.csv')
trainOriData = resize_img(trainOriData)
trainOriData.shape

(39209, 32, 32, 3)

In [12]:
testOriData, testOriLabels = load_data(data_dir, ori_test_path, 'Test.csv')
testOriData = resize_img(testOriData)
testOriData.shape

(12630, 32, 32, 3)

In [13]:
trainOriData, trainOriLabels = randomize_data(trainOriData, trainOriLabels)

In [14]:
trainOriData.shape

(39209, 32, 32, 3)

In [28]:
estimators = [100,200,300,500]
def train_model_ori(estimators, trainData, trainLabels, testData, testLabels):
    for n in estimators:
        print(f'MLP WITH {n} HIDDEN LAYERS')
        if os.path.isfile(f"MLP_NoFE/mlp_{str(n)}_ori.pkl"):
            print("[INFO] loading classifier: MLP trained on ori images...")
            mlp = pickle.load(open(f"MLP_NoFE/mlp_{str(n)}_ori.pkl", 'rb'))
            print("[INFO] Classifer is loaded as instance ::rf::")
        else:
            print("[INFO] pre-trained classifier not found. \n Training Classifier MLP")
            mlp = MLPClassifier(n)
            print(mlp)
            mlp.fit(trainData.reshape(len(trainLabels), -1),trainLabels)
            print("[INFO] Succefully trained the classsifier. \n Saving the classifier for further use")
            pickle.dump(mlp, open(f"MLP_NoFE/mlp_{str(n)}_ori.pkl", 'wb')) 
            print("[INFO] Classifier Saved")
            
        predictions = mlp.predict(testData.reshape(len(testLabels), -1))
 
        # show a final classification report demonstrating the accuracy of the classifier
        print("EVALUATION ON TESTING DATA FOR" + str(n) + 'MLP')
        print(classification_report(testLabels, predictions))
        
        print("ACCURACY on TRAINING DATA")
        print(mlp.score(trainData.reshape(len(trainLabels), -1), trainLabels))
        
        print('-------------------------------------------------------')

In [29]:
train_model_ori(estimators, trainOriData, trainOriLabels, testOriData, testOriLabels)

MLP WITH 100 HIDDEN LAYERS
[INFO] loading classifier: MLP trained on ori images...
[INFO] Classifer is loaded as instance ::rf::
EVALUATION ON TESTING DATA FOR100MLP
              precision    recall  f1-score   support

           0       0.36      0.35      0.36        60
           1       0.81      0.85      0.83       720
           2       0.94      0.84      0.89       750
           3       0.89      0.65      0.75       450
           4       0.71      0.92      0.80       660
           5       0.66      0.89      0.76       630
           6       0.72      0.71      0.72       150
           7       0.90      0.72      0.80       450
           8       0.82      0.91      0.86       450
           9       0.95      0.92      0.94       480
          10       0.95      0.95      0.95       660
          11       0.91      0.85      0.88       420
          12       0.99      0.94      0.96       690
          13       0.97      0.97      0.97       720
          14       0.99

0.9775561733275524
-------------------------------------------------------
MLP WITH 500 HIDDEN LAYERS
[INFO] loading classifier: MLP trained on ori images...
[INFO] Classifer is loaded as instance ::rf::
EVALUATION ON TESTING DATA FOR500MLP
              precision    recall  f1-score   support

           0       0.27      0.42      0.33        60
           1       0.86      0.75      0.80       720
           2       0.94      0.75      0.84       750
           3       0.66      0.83      0.74       450
           4       0.60      0.95      0.73       660
           5       0.67      0.90      0.77       630
           6       0.78      0.65      0.71       150
           7       0.94      0.71      0.81       450
           8       0.87      0.86      0.86       450
           9       0.94      0.90      0.92       480
          10       0.96      0.91      0.93       660
          11       0.91      0.91      0.91       420
          12       1.00      0.90      0.95       690
  