In [1]:
import csv
import imutils
import cv2
import os
import numpy as np
import matplotlib.pyplot as plt

from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_val_score, train_test_split, KFold
from sklearn.externals import joblib
from sklearn.metrics import classification_report

from PIL import ImageFont
from PIL import Image
from PIL import ImageDraw

In [2]:
def readTrafficSigns(rootpath):
    images = [] # images
    labels = [] # corresponding labels
    for c in range(0,5):
        prefix = rootpath + '/' + format(c, '05d') + '/' # subdirectory for class
        gtFile = open(prefix + 'GT-'+ format(c, '05d') + '.csv') # annotations file
        gtReader = csv.reader(gtFile, delimiter=';') # csv parser for annotations file
        next(gtReader) # skip header
        # loop over all images in current annotations file
        for row in gtReader:
            images.append(plt.imread(prefix + row[0])) # the 1th column is the filename
            labels.append(row[7]) # the 8th column is the label
        gtFile.close()
    return images, labels

In [3]:
def image_to_feature_vector(image, size=(32, 32)):
    # resize the image to a fixed size, then flatten the image into
    # a list of raw pixel intensities
    return cv2.resize(image, size).flatten()

In [4]:
if os.path.isfile("Image_n_Labels/trainImages.npy") &  os.path.isfile("Image_n_Labels/trainLabels.npy") :
    print("Loading from npy files...")
    X = np.load("Image_n_Labels/trainImages.npy")
    y = np.load("Image_n_Labels/trainLabels.npy")
    print("Training images and labels are loaded in variables ==> X,y")
    print("Number of training Images {} \nNumber of Labels {}".format(len(X), len(y)))
else:    
    # training images and labels
    trainImages, trainLabels = readTrafficSigns('/home/sejal1/projects/TrafficSign/GTSRB/Final_Training/Images/')
    np.save("Image_n_Labels/trainImages.npy",trainImages)
    np.save("Image_n_Labels/trainLabels.npy",trainLabels)
    print("training images and labels are read from the dataset directory")
    print("training images saved to Image_n_Labels/trainingImages.npy for further use")
    print("training labels saved to Image_n_Labels/trainingLabels.npy for further use")
    X = np.load("Image_n_Labels/trainImages.npy")
    y = np.load("Image_n_Labels/trainLabels.npy")

Loading from npy files...
Training images and labels are loaded in variables ==> X,y
Number of training Images 39209 
Number of Labels 39209


In [5]:
# initialize the raw pixel intensities matrix, the features matrix,
# and labels list
rawImages = []
labels = y
# loop over the input images
for i in range(0,len(X)):
# for i in range(0,10):
    image = X[i]
    # extract raw pixel intensity "features", followed by a color
    # histogram to characterize the color distribution of the pixels
    # in the image
    pixels = image_to_feature_vector(image)
    # update the raw images, features, and labels matricies,
    # respectively
    rawImages.append(pixels)
    # show an update every 3,000 images
    if i > 0 and i % 3000 == 0:
        print("[INFO] processed {}/{}".format(i, len(X)))

[INFO] processed 3000/39209
[INFO] processed 6000/39209
[INFO] processed 9000/39209
[INFO] processed 12000/39209
[INFO] processed 15000/39209
[INFO] processed 18000/39209
[INFO] processed 21000/39209
[INFO] processed 24000/39209
[INFO] processed 27000/39209
[INFO] processed 30000/39209
[INFO] processed 33000/39209
[INFO] processed 36000/39209
[INFO] processed 39000/39209


In [6]:
# show some information on the memory consumed by the raw images
# matrix and features matrix
rawImages=np.array(rawImages)
labels = np.array(labels)
print("[INFO] pixels matrix: {:.2f}MB".format(rawImages.nbytes / (1024 * 1000.0)))

[INFO] pixels matrix: 117.63MB


In [7]:
# take the  data and construct the training and testing split, using 75% of the
# data for training and 25% for testing
(trainData, testData, trainLabels, testLabels) = train_test_split(rawImages,
    labels, test_size=0.25, random_state=42)
 
# now, let's take 10% of the training data and use that for validation
(trainData, valData, trainLabels, valLabels) = train_test_split(trainData, trainLabels,
    test_size=0.1, random_state=84)
 
# show the sizes of each data split
print("training data points: {}".format(len(trainLabels)))
print("validation data points: {}".format(len(valLabels)))
print("testing data points: {}".format(len(testLabels)))

training data points: 26465
validation data points: 2941
testing data points: 9803


In [8]:
# initialize the values of k for our k-Nearest Neighbor classifier along with the
# list of accuracies for each value of k
kVals = range(1, 30, 2)
accuracies = []
 
# loop over various values of `k` for the k-Nearest Neighbor classifier
for k in range(1, 30, 2):
    # train the k-Nearest Neighbor classifier with the current value of `k`
    model = KNeighborsClassifier(n_neighbors=k)
    model.fit(trainData, trainLabels)

    # evaluate the model and update the accuracies list
    score = model.score(valData, valLabels)
    print("k=%d, accuracy=%.2f%%" % (k, score * 100))
    accuracies.append(score)

# find the value of k that has the largest accuracy
i = np.argmax(accuracies)
print("k=%d achieved highest accuracy of %.2f%% on validation data" % (kVals[i],
    accuracies[i] * 100))

k=1, accuracy=92.18%
k=3, accuracy=87.73%
k=5, accuracy=84.77%
k=7, accuracy=81.54%
k=9, accuracy=79.29%
k=11, accuracy=77.52%
k=13, accuracy=76.06%
k=15, accuracy=75.01%
k=17, accuracy=73.92%
k=19, accuracy=72.87%
k=21, accuracy=71.74%
k=23, accuracy=70.83%
k=25, accuracy=70.18%
k=27, accuracy=69.23%
k=29, accuracy=68.65%
k=1 achieved highest accuracy of 92.18% on validation data


In [9]:
# train and evaluate a k-NN classifer on the histogram
# representations
# if os.path.isfile('c'
name = "clf/clf_knn_cv_rawpix.pkl"
if os.path.isfile(name):
    print("[INFO] loading classifier: KNN ={} trained on color histogram features...".format(kVals[i]))
    model= joblib.load(name)
    print("[INFO] Classifer is loaded as instance ::model::")
else:
    print("[INFO] pre-trained classifier not found. \n Training Classifier \KNN = {}".format(kVals[i]))
    model = KNeighborsClassifier(n_neighbors=1,n_jobs=2)
    model.fit(trainData, trainLabels)
    print("[INFO] Succefully trained the classsifier. \n Saving the classifier for further use")
    joblib.dump(model, name) 
    print("[INFO] Classifier Saved")

[INFO] pre-trained classifier not found. 
 Training Classifier \KNN = 1
[INFO] Succefully trained the classsifier. 
 Saving the classifier for further use
[INFO] Classifier Saved


In [10]:
predictions = model.predict(testData)
# show a final classification report demonstrating the accuracy of the classifier

# print("EVALUATION ON TESTING DATA")
print(classification_report(testLabels, predictions))

             precision    recall  f1-score   support

          0       0.80      0.91      0.85        44
          1       0.93      0.92      0.92       594
         10       0.92      0.92      0.92       461
         11       0.94      0.89      0.91       291
         12       0.99      0.93      0.95       548
         13       0.97      0.97      0.97       549
         14       0.99      0.88      0.93       197
         15       0.92      0.91      0.91       152
         16       0.97      1.00      0.99       108
         17       0.99      0.96      0.98       286
         18       0.91      0.94      0.93       288
         19       0.96      0.87      0.91        53
          2       0.89      0.89      0.89       560
         20       0.89      0.90      0.90        90
         21       0.88      0.91      0.90        82
         22       0.92      0.93      0.93       107
         23       0.96      0.94      0.95       138
         24       0.93      0.84      0.88   