## Required Libraries

In [9]:
import csv
import matplotlib.pyplot as plt
import numpy as np
import os
from skimage import transform,feature,exposure
import pickle

from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.svm import SVC,LinearSVC
from sklearn.externals import joblib

from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report

## Functions

In [2]:
def readTrafficSigns(rootpath):
    images = [] # images
    labels = [] # corresponding labels
    for c in range(0,43):
        prefix = rootpath + '/' + format(c, '05d') + '/' # subdirectory for class
        gtFile = open(prefix + 'GT-'+ format(c, '05d') + '.csv') # annotations file
        gtReader = csv.reader(gtFile, delimiter=';') # csv parser for annotations file
        gtReader.next() # skip header
        for row in gtReader:
            images.append(plt.imread(prefix + row[0])) # the 1th column is the filename
            labels.append(row[7]) # the 8th column is the label
        gtFile.close()
    return images, labels

def rgb2gray(rgb):

    r, g, b = rgb[:,:,0], rgb[:,:,1], rgb[:,:,2]
    gray = 0.2989 * r + 0.5870 * g + 0.1140 * b
    return gray


def showimg_n_hog(grayimg,hogImage):
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 10), sharex=True, sharey=True)
    ax1.axis('off')
    ax1.imshow(grayimg)
    ax1.set_title('Input image')
    ax1.set_adjustable('box-forced')

    ax2.axis('off')
    ax2.imshow(hogImage, cmap=plt.cm.gray)
    ax2.set_title('Histogram of Oriented Gradients')
    ax1.set_adjustable('box-forced')
    plt.show()
    
    
def get_csv(path):
    return [os.path.join(path,f) for f in os.listdir(path) if f.endswith('.csv')]

def loadtestimages_from_path(testpath):
    print("[INFO] reading all test images from directory\n")
    filename = testpath+"/new.csv"
    raw_data = open(filename, 'rt')
    reader = csv.reader(raw_data, delimiter=';')
    reader.next()
    testfiles = list(reader)
    timg = []
    testimg = []
    for row in testfiles:
        fname = os.path.join(testpath,row[0])
        timg.append(fname)
        testimg.append(plt.imread(fname))
    return timg,testimg


## Main Code : 

## Load Train Data:

In [3]:
if os.path.isfile("Image_n_Labels/trainImages.npy") &  os.path.isfile("Image_n_Labels/trainLabels.npy") :
    X = np.load("Image_n_Labels/trainImages.npy")
    y = np.load("Image_n_Labels/trainLabels.npy")
    print("[INFO] Training images and labels are loaded in variables ==> X,y")
    print("[INFO] Number of training Images {} \nNumber of Labels {}".format(len(X), len(y)))
    
else:
    trainImages, trainLabels =readTrafficSigns("/home/rupali/Desktop/project/GTSRB/dataset/GTSRB/Final_Training/Images")
    np.save("Image_n_Labels/trainImages.npy",trainImages)
    np.save("Image_n_Labels/trainLabels.npy",trainLabels)
    print("[INFO] training images and labels are read from the dataset directory")
    print("[INFO] training images saved to Image_n_Labels/trainingImages.npy for further use")
    print("[INFO] training labels saved to Image_n_Labels/trainingLabels.npy for further use")
    X = np.load("Image_n_Labels/trainImages.npy")
    y = np.load("Image_n_Labels/trainLabels.npy")

[INFO] Training images and labels are loaded in variables ==> X,y
[INFO] Number of training Images 39209 
Number of Labels 39209


## Load Test Data

In [4]:
if (os.path.isfile("Image_n_Labels/testimagenames.npy") &  os.path.isfile("Image_n_Labels/testimages.npy")):
    print("[INFO] loading from .npy\n")
    timg = np.load("Image_n_Labels/testimagenames.npy")
    testimg = np.load("Image_n_Labels/testimages.npy")
    print("[INFO] DONE!loaded from .npy\n")
    print("[INFO] Training images and labels are loaded in variables ==> X,y")
    print("[INFO] Number of training Images {} \nNumber of Labels {}".format(len(timg), len(testimg)))
else:
    testpath="/home/rupali/Desktop/project/GTSRB/dataset/GTSRB_test/Final_Test/small"
    timg,testimg = loadtestimages_from_path(testpath)
    np.save("Image_n_Labels/testimagenames.npy",timg)
    np.save("Image_n_Labels/testimages.npy",testimg)
    
    
    

[INFO] loading from .npy

[INFO] DONE!loaded from .npy

[INFO] Training images and labels are loaded in variables ==> X,y
[INFO] Number of training Images 26 
Number of Labels 26


## Extract HoG features over all training images 

In [5]:
if os.path.isfile("HoGFeatures/HoGfeatures.npy") & os.path.isfile("HoGFeatures/HoGvisualize.npy") :
    print("[INFO] loading from file ... ")
    hogfeat = np.load("HoGFeatures/HoGfeatures.npy")
    hogviz = np.load("HoGFeatures/HoGvisualize.npy")
    
    print("HoG features are loaded from HoGfeatures.npy to variable ==> hogfeat")
    print("HoG visualizations are loaded from HoGvisualize.npy to variable ==> hogviz")
    
else:
    Hviz=[]
    Hfeat=[]
    print("[INFO] HoGfeatures.npy does not exist")
    for i in range(len(X)):
        if i > 0 and i % 1000 == 0:
            print("[INFO] processed {}/{}".format(i, len(X)))
        i1 = X[i]
        grayim = rgb2gray(i1)
        gI1 = transform.resize(grayim,(40,40))
        (H, hogImage) = feature.hog(gI1, orientations=9, pixels_per_cell=(4,4),cells_per_block=(2, 2), transform_sqrt=True, visualise=True)
        hogImage = exposure.rescale_intensity(hogImage, out_range=(0, 255)).astype("uint8")

        Hviz.append(hogImage)
        Hfeat.append(H)

    np.save("HoGFeatures/HoGfeatures.npy", Hfeat)
    np.save("HoGFeatures/HoGvisualize.npy", Hviz)
    print("[INFO] HoGfeatures.npy are saved")  
    print("[INFO] HoGvisualize.npy are saved")
    hogfeat = np.load("HoGFeatures/HoGfeatures.npy")
    hogviz = np.load("HoGFeatures/HoGvisualize.npy")
print (hogfeat.shape)

[INFO] loading from file ... 
HoG features are loaded from HoGfeatures.npy to variable ==> hogfeat
HoG visualizations are loaded from HoGvisualize.npy to variable ==> hogviz
(39209, 2916)


## Extract HoG features over all Testing images 

In [6]:
if os.path.isfile("HoGFeatures/HoGfeatures_test.npy") & os.path.isfile("HoGFeatures/HoGvisualize_test.npy") :
    hogfeat_test = np.load("HoGFeatures/HoGfeatures_test.npy")
    hogviz_test = np.load("HoGFeatures/HoGvisualize_test.npy")
    
    print("HoG features are loaded from HoGfeatures_test.npy to variable ==> hogfeat_test")
    print("HoG visualizations are loaded from HoGvisualize_test.npy to variable ==> hogviz_test")
else:
    print("HoGfeatures_test.npy does not found")
    Hviz = []
    Hfeat = []
    for i in range(0,len(testimg)):
        # show an update every 1,000 images
        if i > 0 and i % 10 == 0:
            print("[INFO] processed {}/{}".format(i, len(testimg)))
        I = testimg[i]
        grayim = rgb2gray(I)
        grayim = transform.resize(grayim,(40,40))

        (H_4x4, hogImage) = feature.hog(grayim, orientations=9, pixels_per_cell=(4, 4),
            cells_per_block=(2, 2), transform_sqrt=True, visualise=True)
        hogImage = exposure.rescale_intensity(hogImage, out_range=(0, 255)).astype("uint8")
        Hviz.append(hogImage)
        Hfeat.append(H_4x4)
        # save the features using numpy save with .npy extention 
        # which reduced the storage space by 4times compared to pickle
    np.save("HoGFeatures/HoGfeatures_test.npy", Hfeat)
    np.save("HoGFeatures/HoGvisualize_test.npy", Hviz)
    print("HoGfeatures_test.npy are saved")  
    print("HoGvisualize_test.npy are saved")
    hogfeat_test = np.load("HoGFeatures/HoGfeatures_test.npy")
    hogviz_test = np.load("HoGFeatures/HoGvisualize_test.npy")
print (hogfeat_test.shape)

HoG features are loaded from HoGfeatures_test.npy to variable ==> hogfeat_test
HoG visualizations are loaded from HoGvisualize_test.npy to variable ==> hogviz_test
(26, 2916)


## Create Train and test dataset fromm training dataset

In [7]:
Xhog = np.array(hogfeat).astype("float")
y = y.astype("float")
X_t = np.array(hogfeat_test).astype("float")


features = Xhog
labels = y
Xtest = X_t

# take the  data and construct the training and testing split, using 75% of the
# data for training and 25% for testing
(trainData, testData, trainLabels, testLabels) = train_test_split(features,
    labels, test_size=0.25, random_state=42)
 
# now, let's take 10% of the training data and use that for validation
(trainData, valData, trainLabels, valLabels) = train_test_split(trainData, trainLabels,
    test_size=0.1, random_state=84)
 
# show the sizes of each data split
print("training data points: {}".format(len(trainLabels)))
print("validation data points: {}".format(len(valLabels)))
print("testing data points: {}".format(len(testLabels)))



training data points: 26465
validation data points: 2941
testing data points: 9803


## Multi Layer Perceptron

In [10]:
if os.path.isfile("clf/clf_mlp_hog.pkl"):
    print("[INFO] loading classifier: MLP =200 trained on HoG features...")
    mlp= joblib.load("clf/clf_mlp_hog.pkl")
    print("[INFO] Classifer is loaded as instance ::mlp::")
    
    
else:
    print("[INFO] pre-trained classifier not found. \n Training Classifier \MLP = 200")
#     Single hidden layer with 200 hidden neurons
    mlp = MLPClassifier(hidden_layer_sizes=(200,))
    mlp.fit(trainData,trainLabels)
    print("[INFO] Succefully trained the classsifier. \n Saving the classifier for further use")
    joblib.dump(mlp, 'clf/clf_mlp_hog.pkl') 
    print("[INFO] Classifier Saved")

[INFO] pre-trained classifier not found. 
 Training Classifier \MLP = 200
[INFO] Succefully trained the classsifier. 
 Saving the classifier for further use
[INFO] Classifier Saved


## Calculating Scores 

In [11]:
print("accuracy on training data: {}".format(mlp.score(trainData,trainLabels)))

print("accuracy on test data: {}".format(mlp.score(testData,testLabels)))

print("accuracy on validation data: {}".format(mlp.score(valData,valLabels)))

accuracy on training data: 1.0
accuracy on test data: 0.975925737019
accuracy on validation data: 0.973478408705


## Cross Validation Accuracy

In [12]:
cv_score = cross_val_score(mlp,testData,testLabels,cv=5)
print("mean cross-validation score: {}".format(np.mean(cv_score)))



mean cross-validation score: 0.95856783507


NameError: name 'svc' is not defined

In [13]:
mlp.predict(Xtest)

array([ 1.,  4.,  1.,  2.,  4.,  3.,  2.,  2.,  4.,  1.,  4.,  2.,  2.,
        1.,  4.,  2.,  2.,  2.,  2.,  2.,  4.,  4.,  1.,  1.,  1.,  2.])

## Final Classification Report

In [14]:
predictions = mlp.predict(testData)
 
# show a final classification report demonstrating the accuracy of the classifier
print("EVALUATION ON TESTING DATA")
print(classification_report(testLabels, predictions))

EVALUATION ON TESTING DATA
             precision    recall  f1-score   support

        0.0       0.98      0.93      0.95        44
        1.0       0.94      0.96      0.95       594
        2.0       0.92      0.93      0.93       560
        3.0       0.94      0.95      0.95       348
        4.0       0.98      0.98      0.98       529
        5.0       0.91      0.88      0.89       462
        6.0       1.00      0.99      0.99        81
        7.0       0.94      0.94      0.94       336
        8.0       0.95      0.96      0.95       385
        9.0       1.00      0.99      1.00       359
       10.0       0.99      0.99      0.99       461
       11.0       0.98      1.00      0.99       291
       12.0       0.99      1.00      1.00       548
       13.0       1.00      0.99      1.00       549
       14.0       1.00      0.99      1.00       197
       15.0       0.99      1.00      1.00       152
       16.0       1.00      1.00      1.00       108
       17.0       