In [4]:
import numpy as np
import matplotlib.pyplot as plt
import os, glob
import pickle
import pandas as pd
import cv2
%matplotlib inline

# classification required packages
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.metrics import classification_report
import pickle

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
%matplotlib inline

In [5]:
def load_data(filedir, filepath, csvfile):
    data = np.load(os.path.join(filedir, filepath), allow_pickle=True)
    train_info = pd.read_csv(os.path.join(filedir, csvfile))
    labels = np.array(train_info['ClassId'])
    
    return data, labels

def resample(data, labels, n=100):
    resample_data = []
    resample_labels = []
    for i in range(43):
        index = np.where(labels == i)[0][:n]
        resample_data.append(data[index])
        resample_labels.append(labels[index])
    return np.array(resample_data).reshape(43*n, -1), np.array(resample_labels).reshape(43*n, -1)

def randomize_data(data, labels):
    randomize = np.arange(len(labels))
    np.random.shuffle(randomize)
    X = data[randomize]
    y = labels[randomize]
    
    return X,y

def resize_img(images):
    ret = []
    for im in images:
        im = cv2.resize(cv2.cvtColor(im, cv2.COLOR_BGR2RGB), (32, 32))
        #print(im.shape)
        ret.append(im)
    ret = np.array(ret)
    #print(ret.shape)
    return ret

In [6]:
data_dir = './'
trainPath = 'SIFT/trainProcessedSIFT.npy'
testPath = 'SIFT/testProcessedSIFT.npy'

trainData, trainLabels = load_data(data_dir, trainPath, 'Train.csv')
trainData, trainLabels = randomize_data(trainData, trainLabels)
print(trainData.shape)

testData, testLabels = load_data(data_dir, testPath, 'Test.csv')
print(testData.shape)

(39209, 150)
(12630, 150)


In [7]:
def train_model(estimators, trainData, trainLabels, testData, testLabels):
    for n in estimators:
        print(f'RANDOM FOREST WITH {n} ESTIMATORS')
        if os.path.isfile("rf_"+str(n)+"_processed_sift_balanced.pkl"):
            print("[INFO] loading classifier: Random Forest trained on ori images...")
            rf = pickle.load(open("rf_"+str(n)+"_processed_sift_balanced.pkl", 'rb'))
            print("[INFO] Classifer is loaded as instance ::rf::")
        else:
            print("[INFO] pre-trained classifier not found. \n Training Classifier Random Forest")
            rf = RandomForestClassifier(n_estimators=n, class_weight='balanced')
            rf.fit(trainData.reshape(len(trainLabels), -1),trainLabels)
            print("[INFO] Succefully trained the classsifier. \n Saving the classifier for further use")
            pickle.dump(rf, open("rf_"+str(n)+"_processed_sift_balanced.pkl", 'wb')) 
            print("[INFO] Classifier Saved")
            
        predictions = rf.predict(testData.reshape(len(testLabels), -1))
 
        # show a final classification report demonstrating the accuracy of the classifier
        print("EVALUATION ON TESTING DATA FOR" + str(n) + 'FOREST')
        print(classification_report(testLabels, predictions))
        
        print('ACCURACY of TRAINING DATA')
        print(rf.score(trainData, trainLabels))
        print('-------------------------------------------------------')

In [8]:
estimators= [50,100,150,200]

In [9]:
train_model(estimators, trainData, trainLabels, testData, testLabels)

RANDOM FOREST WITH 50 ESTIMATORS
[INFO] loading classifier: Random Forest trained on ori images...
[INFO] Classifer is loaded as instance ::rf::
EVALUATION ON TESTING DATA FOR50FOREST
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        60
           1       0.52      0.58      0.55       720
           2       0.53      0.60      0.57       750
           3       0.45      0.36      0.40       450
           4       0.58      0.68      0.62       660
           5       0.47      0.48      0.48       630
           6       0.82      0.59      0.69       150
           7       0.52      0.55      0.54       450
           8       0.50      0.44      0.47       450
           9       0.62      0.67      0.65       480
          10       0.51      0.71      0.59       660
          11       0.31      0.43      0.36       420
          12       0.69      0.72      0.71       690
          13       0.45      0.74      0.56       720
     

  _warn_prf(average, modifier, msg_start, len(result))


0.9999489913030172
-------------------------------------------------------
RANDOM FOREST WITH 150 ESTIMATORS
[INFO] loading classifier: Random Forest trained on ori images...
[INFO] Classifer is loaded as instance ::rf::
EVALUATION ON TESTING DATA FOR150FOREST
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        60
           1       0.56      0.59      0.58       720
           2       0.55      0.61      0.58       750
           3       0.48      0.35      0.40       450
           4       0.58      0.69      0.63       660
           5       0.48      0.49      0.48       630
           6       0.91      0.59      0.72       150
           7       0.58      0.55      0.56       450
           8       0.52      0.47      0.49       450
           9       0.65      0.66      0.65       480
          10       0.52      0.75      0.62       660
          11       0.37      0.45      0.40       420
          12       0.74      0.73   

  _warn_prf(average, modifier, msg_start, len(result))


0.9999489913030172
-------------------------------------------------------
