In [None]:
import numpy as np
import matplotlib.pyplot as plt
import os, glob
import pickle
import pandas as pd
import cv2 as cv
%matplotlib inline

# classification required packages
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.metrics import classification_report
import pickle

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
%matplotlib inline

In [None]:
data_dir = '/content/drive/MyDrive/GTSRB'

In [None]:
def load_data(filedir, filepath, csvfile):
    data = np.load(os.path.join(filedir, filepath), allow_pickle=True)
    train_info = pd.read_csv(os.path.join(filedir, csvfile))
    labels = np.array(train_info['ClassId'])
    
    return data, labels

def resample(data, labels, n=100):
    resample_data = []
    resample_labels = []
    for i in range(43):
        index = np.where(labels == i)[0][:n]
        resample_data.append(data[index])
        resample_labels.append(labels[index])
    return np.array(resample_data).reshape(43*n, -1), np.array(resample_labels).reshape(43*n, -1)

def randomize_data(data, labels):
    randomize = np.arange(len(labels))
    np.random.shuffle(randomize)
    X = data[randomize]
    y = labels[randomize]
    
    return X,y

In [None]:
processed_train_path = 'Processed/Train/trainProcessed.npy'
processed_test_path = 'Processed/Test/testProcessed.npy'

In [None]:
trainProcessedData, trainLabels = load_data(data_dir, processed_train_path, 'Train.csv')
trainProcessedData, trainLabels = randomize_data(trainProcessedData, trainLabels)

In [None]:
testProcessedData, testLabels = load_data(data_dir, processed_test_path, 'Test.csv')

In [None]:
# RF
if os.path.isfile("/content/drive/MyDrive/rf_1000_processed_nofe.pkl"):
    print("[INFO] loading classifier: Random Forest trained on processed images...")
    rf = pickle.load(open("/content/drive/MyDrive/rf_1000_processed_nofe.pkl", 'rb'))
    print("[INFO] Classifer is loaded as instance ::rf::")
else:
    print("[INFO] pre-trained classifier not found. \n Training Classifier Random Forest")
    rf = RandomForestClassifier(n_estimators=1000)
    rf.fit(trainProcessedData.reshape(len(trainLabels), -1),trainLabels)
    print("[INFO] Succefully trained the classsifier. \n Saving the classifier for further use")
    pickle.dump(rf, open('/content/drive/MyDrive/rf_1000_processed_nofe.pkl', 'wb')) 
    print("[INFO] Classifier Saved")

[INFO] pre-trained classifier not found. 
 Training Classifier Random Forest
[INFO] Succefully trained the classsifier. 
 Saving the classifier for further use


In [31]:
def train_model(estimators, trainData, trainLabels, testData, testLabels):
    for n in estimators:
        print(f'RANDOM FOREST WITH {n} ESTIMATORS')
        if os.path.isfile("/content/drive/MyDrive/rf_"+str(n)+"_processed_nofe_balanced.pkl"):
            print("[INFO] loading classifier: Random Forest trained on ori images...")
            rf = pickle.load(open("rf_"+str(n)+"_processed_nofe_balanced.pkl", 'rb'))
            print("[INFO] Classifer is loaded as instance ::rf::")
        else:
            print("[INFO] pre-trained classifier not found. \n Training Classifier Random Forest")
            rf = RandomForestClassifier(n_estimators=n, class_weight='balanced')
            rf.fit(trainData.reshape(len(trainLabels), -1),trainLabels)
            print("[INFO] Succefully trained the classsifier. \n Saving the classifier for further use")
            pickle.dump(rf, open("rf_"+str(n)+"_processed_nofe_balanced.pkl", 'wb')) 
            print("[INFO] Classifier Saved")
            
        predictions = rf.predict(testData.reshape(len(testLabels), -1))
 
        # show a final classification report demonstrating the accuracy of the classifier
        print("EVALUATION ON TESTING DATA FOR" + str(n) + 'FOREST')
        print(classification_report(testLabels, predictions))
        
        print('Accuracy of model on training data')
        print(rf.score(trainData.reshape(len(trainLabels), -1), trainLabels))
        print('-------------------------------------------------------')

In [27]:
pwd

'/content/drive/MyDrive'

In [32]:
estimators = [50,100,150,200]
train_model(estimators, trainProcessedData, trainLabels, testProcessedData, testLabels)

RANDOM FOREST WITH 50 ESTIMATORS
[INFO] loading classifier: Random Forest trained on ori images...
[INFO] Classifer is loaded as instance ::rf::
EVALUATION ON TESTING DATA FOR50FOREST
              precision    recall  f1-score   support

           0       1.00      0.13      0.24        60
           1       0.83      0.89      0.86       720
           2       0.78      0.89      0.83       750
           3       0.83      0.81      0.82       450
           4       0.86      0.90      0.88       660
           5       0.70      0.82      0.76       630
           6       0.88      0.61      0.72       150
           7       0.85      0.78      0.82       450
           8       0.79      0.79      0.79       450
           9       0.89      0.94      0.92       480
          10       0.95      0.92      0.93       660
          11       0.70      0.90      0.79       420
          12       0.82      0.94      0.88       690
          13       0.89      0.96      0.93       720
     

'/content'