# SAME Roadsigns Demo!

## Fetching roadsigns data

In [1]:
import pickle
import os

In [2]:
os.system("""
curl -s -o signnames.csv \
  https://raw.githubusercontent.com/SAME-Project/SAME-samples/e0eaeb66cb7eac0a52025624bc1f1fe9392cc1b1/03-road-signs/signnames.csv
""")

0

In [3]:
# Convert signnames from csv to json
import csv, json, os

jsonFileName = 'classes.json'
csvFileName = 'signnames.csv'
csvFile = open(csvFileName, 'r')

data = {}

reader = csv.DictReader(csvFile)
for row in reader:
    id = row['ClassId']
    data[id] = row['SignName']

if os.path.exists(jsonFileName):
  os.remove(jsonFileName)

with open(jsonFileName, 'w') as jsonFile:
    jsonFile.write(json.dumps(data, indent = 4))

In [4]:
os.system("""
DATAFILE="https://d17h27t6h515a5.cloudfront.net/topher/2017/February/5898cd6f_traffic-signs-data/traffic-signs-data.zip"
if [ -d "/tmp/traffic-signs-data" ]; then
    echo "Data already downloaded"
else
    echo "Downloading data from $DATAFILE"
    mkdir /tmp/traffic-signs-data
    curl -s -o /tmp/traffic-signs-data/traffic-signs-data.zip $DATAFILE
    (cd /tmp/traffic-signs-data && unzip traffic-signs-data.zip && rm -f traffic-signs-data.zip)
    (mv /tmp/traffic-signs-data/* .)
fi
""")

0

In [5]:
import numpy as np
import pandas as pd
import pickle
import os
import cv2
import random
import skimage.morphology as morp
from skimage.filters import rank
import matplotlib.pyplot as plt
import tensorflow as tf
import logging
logging.getLogger('tensorflow').disabled = True

In [6]:
train = pickle.load(open("train.p","rb"))
valid = pickle.load(open("valid.p","rb"))
test = pickle.load(open("test.p","rb"))

In [7]:
import csv
# Mapping ClassID to traffic sign names
signs = []
with open('signnames.csv', 'r') as csvfile:
    signnames = csv.reader(csvfile, delimiter=',')
    next(signnames,None)
    for row in signnames:
        signs.append(row[1])
    csvfile.close()

In [8]:
X_train, y_train = train['features'], train['labels']
X_valid, y_valid = valid['features'], valid['labels']
X_test, y_test = test['features'], test['labels']

# Number of training examples
n_train = X_train.shape[0]

# Number of testing examples
n_test = X_test.shape[0]

# Number of validation examples.
n_validation = X_valid.shape[0]

# What's the shape of an traffic sign image?
image_shape = X_train[0].shape

# How many unique classes/labels there are in the dataset.
n_classes = len(np.unique(y_train))

print("Number of training examples: ", n_train)
print("Number of testing examples: ", n_test)
print("Number of validation examples: ", n_validation)
print("Image data shape =", image_shape)
print("Number of classes =", n_classes)

Number of training examples:  34799
Number of testing examples:  12630
Number of validation examples:  4410
Image data shape = (32, 32, 3)
Number of classes = 43


In [9]:
X_train[0].shape

(32, 32, 3)

# Preprocess data

In [10]:
# define helper functions
def list_images(dataset, dataset_y, ylabel="", cmap=None):
    """
    Display a list of images in a single figure with matplotlib.
        Parameters:
            images: An np.array compatible with plt.imshow.
            lanel (Default = No label): A string to be used as a label for each image.
            cmap (Default = None): Used to display gray images.
    """
    plt.figure(figsize=(15, 16))
    for i in range(6):
        plt.subplot(1, 6, i+1)
        indx = random.randint(0, len(dataset))
        #Use gray scale color map if there is only one channel
        cmap = 'gray' if len(dataset[indx].shape) == 2 else cmap
        plt.imshow(dataset[indx], cmap = cmap)
        plt.xlabel(signs[dataset_y[indx]])
        plt.ylabel(ylabel)
        plt.xticks([])
        plt.yticks([])
    plt.tight_layout(pad=0, h_pad=0, w_pad=0)
    plt.show()
    
def histogram_plot(dataset, label):
    """
    Plots a histogram of the input data.
        Parameters:
            dataset: Input data to be plotted as a histogram.
            lanel: A string to be used as a label for the histogram.
    """
    hist, bins = np.histogram(dataset, bins=n_classes)
    width = 0.7 * (bins[1] - bins[0])
    center = (bins[:-1] + bins[1:]) / 2
    plt.bar(center, hist, align='center', width=width)
    plt.xlabel(label)
    plt.ylabel("Image count")
    plt.show()
    

def preprocess(data):
    """
    Applying the preprocessing steps to the input data.
        Parameters:
            data: An np.array compatible with plt.imshow.
    """

    # define these inline to workaround issue with SAME...
    def gray_scale(image):
        """
        Convert images to gray scale.
            Parameters:
                image: An np.array compatible with plt.imshow.
        """
        # workaround another weird SAME bug
        import cv2
        return cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)

    def local_histo_equalize(image):
        """
        Apply local histogram equalization to grayscale images.
            Parameters:
                image: A grayscale image.
        """
        # workaround another weird SAME bug
        import skimage.morphology as morp
        from skimage.filters import rank

        kernel = morp.disk(30)
        img_local = rank.equalize(image, selem=kernel)
        return img_local

    def image_normalize(image):
        """
        Normalize images to [0, 1] scale.
            Parameters:
                image: An np.array compatible with plt.imshow.
        """
        # workaround another weird SAME bug
        import numpy as np
        image = np.divide(image, 255)
        return image
    
    # workaround another weird SAME bug
    import numpy as np
    gray_images = list(map(gray_scale, data))
    equalized_images = list(map(local_histo_equalize, gray_images))
    n_training = data.shape
    normalized_images = np.zeros((n_training[0], n_training[1], n_training[2]))
    for i, img in enumerate(equalized_images):
        normalized_images[i] = image_normalize(img)
    normalized_images = normalized_images[..., None]
    return normalized_images

In [11]:
X_valid_preprocessed = preprocess(X_valid)
X_test_preprocessed = preprocess(X_test)
X_train_preprocessed = preprocess(X_train)

  img_local = rank.equalize(image, selem=kernel)


# Train & test model

## Convolutional neural net


In [12]:
num_classes=43
conv = tf.keras.models.Sequential()
conv.add(tf.keras.layers.Conv2D(32, kernel_size=(5, 5), strides=(1, 1), activation='relu', input_shape=(32, 32, 1)))
conv.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
conv.add(tf.keras.layers.Conv2D(32, (5, 5), activation='relu'))
conv.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
conv.add(tf.keras.layers.Flatten())
conv.add(tf.keras.layers.Dense(1000, activation='relu'))
conv.add(tf.keras.layers.Dense(units = num_classes, activation='softmax'))

conv.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

es = tf.keras.callbacks.EarlyStopping(monitor='acc')

conv.fit(X_train_preprocessed, y_train,
          epochs=3,
          verbose=1,
          validation_data=(X_valid_preprocessed, y_valid),
          callbacks=[es])

Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x7fd955e401c0>

In [13]:
model = conv

# Save the `SavedModel` model to MLflow


In [14]:
# Fetch the Keras session and save the model
# The signature definition is defined by the input and output tensors,
# and stored with the default serving key
import tempfile

MODEL_DIR = "./model"
version = 1
export_path = os.path.join(MODEL_DIR, str(version))
print('export_path = {}\n'.format(export_path))
if os.path.isdir(export_path):
  print('\nAlready saved a model, cleaning up\n')
  os.system(f"rm -r {export_path}")

tf.saved_model.save(model, export_path)

print('\nSaved model:')
os.system(f"ls -l {export_path}")

export_path = ./model/1


Already saved a model, cleaning up


Saved model:


0

In [15]:
os.system(f"saved_model_cli show --dir {export_path} --all")

0

In [16]:
import shutil

shutil.copyfile("classes.json", "model/classes.json")


'model/classes.json'

In [17]:
# TODO actually save to mlflow