# Exploration of the traffic sign dataset

# 1. Import libraries and datasets

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pickle
import os

In [2]:
# Get the training and testing dataset
with open(os.path.join("dataset", "train.p"), mode='rb') as training_data:
    train = pickle.load(training_data)
with open(os.path.join("dataset", "valid.p"), mode='rb') as validation_data:
    valid = pickle.load(validation_data)

In [3]:
# Get the features and labels of the datasets
# The features are the images of the signs
X_train, y_train = train['features'], train['labels']
X_valid, y_valid = valid['features'], valid['labels']

# 2. Visualize traffic sign dataset

In [None]:
print("Number of training examples: ", X_train.shape[0])
print("Number of validation examples: ", X_valid.shape[0])
print("Image data shape =", X_train[0].shape)
print("Number of classes =", len(np.unique(y_train)))

In [None]:
# Plot a random picture from the training dataset
i = np.random.randint(1, len(X_train))
plt.grid(False)
plt.imshow(X_train[i])
print("Label: ", y_train[i])

In [None]:
# Plot (width x height) pictures from the training dataset
grid_width = 5
grid_height = 4

fig, axes = plt.subplots(grid_height, grid_width, figsize = (10,10))
axes = axes.ravel()

for i in np.arange(0, grid_width * grid_height):
    index = np.random.randint(0, len(X_train))
    axes[i].imshow(X_train[index])
    axes[i].set_title(y_train[index], fontsize = 15)
    axes[i].axis('off')

plt.subplots_adjust(hspace = 0.3)

In [7]:
# Plotting histograms of the count of each sign
def histogram_plot(dataset: np.ndarray, label: str):
    """ Plots a histogram of the dataset

    Args:
        dataset: The input data to be plotted as a histogram.
        label: The label of the histogram.
    """
    hist, bins = np.histogram(dataset, bins=43)
    width = 0.8 * (bins[1] - bins[0])
    center = (bins[:-1] + bins[1:]) / 2
    plt.bar(center, hist, align='center', width=width)
    plt.xlabel(label)
    plt.ylabel("Image count")
    plt.show()

In [None]:
histogram_plot(y_train, "Training examples")
histogram_plot(y_valid, "Validation examples")

# A list of all classes:
- 0 = Speed limit (20km/h)
- 1 = Speed limit (30km/h)
- 2 = Speed limit (50km/h)
- 3 = Speed limit (60km/h)
- 4 = Speed limit (70km/h)
- 5 = Speed limit (80km/h)
- 6 = End of speed limit (80km/h)
- 7 = Speed limit (100km/h)
- 8 = Speed limit (120km/h)
- 9 = No passing
- 10 = No passing for vehicles over 3.5 metric tons
- 11 = Right-of-way at the next intersection
- 12 = Priority road
- 13 = Yield
- 14 = Stop
- 15 = No vehicles
- 16 = Vehicles over 3.5 metric tons prohibited
- 17 = No entry
- 18 = General caution
- 19 = Dangerous curve to the left
- 20 = Dangerous curve to the right
- 21 = Double curve
- 22 = Bumpy road
- 23 = Slippery road
- 24 = Road narrows on the right
- 25 = Road work
- 26 = Traffic signals
- 27 = Pedestrians
- 28 = Children crossing
- 29 = Bicycles crossing
- 30 = Beware of ice/snow
- 31 = Wild animals crossing
- 32 = End of all speed and passing limits
- 33 = Turn right ahead
- 34 = Turn left ahead
- 35 = Ahead only
- 36 = Go straight or right
- 37 = Go straight or left
- 38 = Keep right
- 39 = Keep left
- 40 = Roundabout mandatory
- 41 = End of no passing
- 42 = End of no passing by vehicles over 3.5 metric tons