In [None]:
# Note: After you run this cell, the training and test data will be available in
# the file browser. (Click the folder icon on the left to view it)
#
# If you don't see the data after the cell completes, click the refresh button
# in the file browser (folder icon with circular arrow)

# First, let's download and unzip the data
!echo "Downloading files..."
!wget -q https://github.com/lfalin/cse450-course/raw/master/data/roadsigns/training1.zip
!wget -q https://github.com/lfalin/cse450-course/raw/master/data/roadsigns/training2.zip
!wget -q https://github.com/lfalin/cse450-course/raw/master/data/roadsigns/test.zip
!wget -q https://github.com/lfalin/cse450-course/raw/master/data/roadsigns/test_classes.csv

!echo "Unzipping files..."
!unzip -q /content/training1.zip
!unzip -q /content/training2.zip
!unzip -q /content/test.zip

# Combine the two traning directories
!echo "Mergining training data..."
!mkdir /content/training
!mv /content/training1/* /content/training
!mv /content/training2/* /content/training

# Cleanup
!echo "Cleaning up..."
!rmdir /content/training1
!rmdir /content/training2
!rm training1.zip
!rm training2.zip
!rm test.zip

!echo "Data ready."

In [None]:
# Import libraries
import pandas as pd
import tensorflow as tf
from tensorflow import keras


In [None]:
# Create an image training dataset
from tensorflow.keras.preprocessing import image_dataset_from_directory

# We're using keras' image_dataset_from_directory method to load our image data.
# See (https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/image_dataset_from_directory) for details
#
# A couple of things to note:
# 1. We're specifying a number for the seed, so we'll always get the same shuffle and split of our images.
# 2. Class names are inferred automatically from the image subdirectory names.
# 3. We're splitting the training data into 80% training, 20% validation. 


training_dir = '/content/training/'
image_size = (100, 100)

# Split up the training data images into training and validations sets
training_data = image_dataset_from_directory(training_dir, validation_split=.2, subset='training', seed=42, image_size=image_size)
validation_data = image_dataset_from_directory(training_dir, validation_split=.2, subset='validation', seed=42, image_size=image_size)

In [None]:
import matplotlib.pyplot as plt

# View first 9 images and their class labels
plt.figure(figsize=(10, 10))
for images, labels in training_data.take(1):
  for i in range(9):
    ax = plt.subplot(3, 3, i + 1)
    plt.imshow(images[i].numpy().astype("uint8"))
    plt.title(training_data.class_names[labels[i]])
    plt.axis("off")

In [None]:
# Build a model...

## Testing the model
Once you have built and trained your model, the next step is to run the test images through it and see how well your model does at making predictions for images it has never seen before. 

Since loading these images and formatting them for the model can be tricky, you may find the following code useful. This code only uses your model to predict the class label for a given image. You'll still need to compare those predictions to the "ground truth" class labels in `test_classes.csv` to evaluate how well the model does.

    import pathlib
    
    def predict_an_image(model, file_path):
        # Load the image
        img = keras.preprocessing.image.load_img(file_path, target_size=(100, 100))
    
        # Get the image into the shape we need for our network
        img_array = keras.preprocessing.image.img_to_array(img)
        img_array = tf.expand_dims(img_array, 0) 
    
        # Predict the class
        predictions = model.predict(img_array)
        score = tf.nn.softmax(predictions[0])
        return np.argmax(score)
    
    # Loop through all images in our test directory and make
    # a prediction
    testdir = pathlib.Path('/content/test')
    image_paths = list(testdir.glob('*.jpg'))
    for image_path in image_paths:
        prediction = predict_an_image(model, str(image_path))
        print(image_path, prediction)
  