In [52]:
import csv
import cv2
import numpy as np
import os
import glob
import shutil
from sklearn.utils import shuffle
from collections import OrderedDict
from shutil import copyfile
import time
import datetime

from keras.applications.inception_v3 import InceptionV3, preprocess_input
from keras.preprocessing import image
from keras import applications
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.models import Sequential, Model 
from keras.optimizers import SGD
from keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D
from keras import backend as k 
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, TensorBoard, EarlyStopping

In [None]:
#ensure GPU
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

In [None]:
#check dim ordering
print(k.image_data_format())

In [60]:
#integer seed for any randomness 
seed = np.int64(np.floor(time.time()))

## delete all data in working directory made by below function

In [71]:
# !! find ./data/final_sim_data/working_set/ -type f -name '*.png' -delete

# THIS IS REALLY GOING TO REQUIRE SOME DOUBLE CHECKING

In [72]:
def build_data_dir_for_keras(seed):
    data_dir = os.getcwd() + '/data/final_sim_data'
    full_data_dir = data_dir + '/full_set'
    working_data_dir = data_dir + '/working_set'

    for class_name in os.listdir(full_data_dir):
        D_class = []
    
        for filename in glob.glob(full_data_dir + '/' + class_name + '/*.png'):
            D_class.append(filename)

        D_class = shuffle(D_class, random_state=seed)
    
        class_count = len(D_class)
        train_val_index = int(np.floor(len(D_class) * 0.70))
        val_test_index = int(np.floor(len(D_class) * 0.85))

        for file in D_class[:train_val_index]:
            shutil.copy2(file, 
                         working_data_dir + '/train/' + class_name + '/' + os.path.basename(os.path.normpath(file)))
        for file in D_class[train_val_index:val_test_index]:
            shutil.copy2(file, 
                         working_data_dir + '/val/' + class_name + '/' + os.path.basename(os.path.normpath(file)))
        for file in D_class[val_test_index:]:
            shutil.copy2(file, 
                         working_data_dir + '/test/' + class_name + '/' + os.path.basename(os.path.normpath(file)))

In [73]:
build_data_dir_for_keras(seed)

In [63]:
# helper stolen 
def get_nb_files(directory):
  """Get number of files by searching directory recursively"""
  if not os.path.exists(directory):
    return 0
  cnt = 0
  for r, dirs, files in os.walk(directory):
    for dr in dirs:
      cnt += len(glob.glob(os.path.join(r, dr + "/*")))
  return cnt

In [None]:
from sklearn.utils import class_weight
In order to calculate the class weight do the following

class_weight = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)

In [88]:
# weight_dict = {
#     'red': .75,
#     'yellow': 4.5,
#     'green': 4,
#     'nolight': 4
# }

class_list = [
    'nolight',
    'green',
    'yellow',
    'red' 
]

weight_dict = {
    0: 4,
    1: 4,
    2: 4.5,
    3: .75,
}

In [99]:
IMG_HEIGHT, IMG_WIDTH = 299, 299

TRAIN_DATA_DIR = "./data/final_sim_data/working_set/train"
VAL_DATA_DIR = "./data/final_sim_data/working_set/val"
TEST_DATA_DIR = "./data/final_sim_data/working_set/test"

NB_TRAIN_SAMPLES = get_nb_files(TRAIN_DATA_DIR)
NB_VAL_SAMPLES = get_nb_files(VAL_DATA_DIR)
NB_TEST_SAMPLES = get_nb_files(TEST_DATA_DIR)

BATCH_SIZE = 40
EPOCHS_TRANSFER = 5
EPOCHS_FINE_TUNE = 15

TRAIN_STEPS_PER_EPOCH = int(np.ceil(NB_TRAIN_SAMPLES / BATCH_SIZE))
VAL_STEPS_PER_EPOCH = int(np.ceil(NB_VAL_SAMPLES / BATCH_SIZE))
TEST_STEPS_PER_EPOCH = int(np.ceil(NB_TEST_SAMPLES / BATCH_SIZE))

FROZEN_LAYERS = 172

# TODO: MODIFY COLOR, REMOVING YELLOW FROM PICTURES
# TODO: DO I NEED ANY ADDITIONAL MEAN NORMALIZTION?

In [100]:
# Initiate the train and test generators with data Augumentation 
train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    width_shift_range = 0.2,
    height_shift_range=0.4,
    rotation_range=20,
    shear_range=0.2,
    zoom_range=0.5,
    fill_mode='nearest',
    horizontal_flip=True,
    vertical_flip=True
)

val_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    width_shift_range = 0.2,
    height_shift_range=0.4,
    rotation_range=20,
    shear_range=0.2,
    zoom_range=0.5,
    fill_mode='nearest',
    horizontal_flip=True,
    vertical_flip=True
)

test_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
#     width_shift_range = 0.2,
#     height_shift_range=0.4,
#     rotation_range=20
#     shear_range=0.2,
#     zoom_range=0.5,
#     fill_mode='nearest',
#     horizontal_flip=True,
#     vertical_flip=True
)

train_generator = train_datagen.flow_from_directory(
    TRAIN_DATA_DIR,
    target_size = (IMG_HEIGHT, IMG_WIDTH),
    batch_size = BATCH_SIZE,
    classes=class_list)

validation_generator = val_datagen.flow_from_directory(
    VAL_DATA_DIR,
    batch_size = BATCH_SIZE,
    target_size = (IMG_HEIGHT, IMG_WIDTH),
    classes=class_list)

test_generator = test_datagen.flow_from_directory(
    TEST_DATA_DIR,
    batch_size = BATCH_SIZE,
    target_size = (IMG_HEIGHT, IMG_WIDTH),
    classes=class_list)


Found 12361 images belonging to 4 classes.
Found 2649 images belonging to 4 classes.
Found 2651 images belonging to 4 classes.


## Notes
-rescale and fill dont seem to impact much
-neither does class mode or class weight

In [91]:
base_model = InceptionV3(weights='imagenet', include_top=False, input_shape = (IMG_HEIGHT, IMG_WIDTH, 3))

# + global spatial average pooling layer
x = base_model.output
x = GlobalAveragePooling2D()(x)

# + FC layer
x = Dense(1024, activation='relu')(x)

# + log laye
predictions = Dense(4, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)

# freeze all convolutional layers
for layer in base_model.layers:
    layer.trainable = False

# compile the model (most places, including the inceptv3 paper recommended RMSProp, 
#  but I had a lot of premature convergence issues with it. to be fair, I switched to
#  adam experimenting around before I spent anytime optimizing LR, so who knows. converges
#  quick enough for Anand's data as it is)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Save the model according to the conditions  
checkpoint = ModelCheckpoint("inceptionv3.h5", monitor='val_loss', verbose=1, save_best_only=True, 
                             save_weights_only=False, mode='auto', period=3)
early = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1, mode='auto')

# training for transfer learning
model.fit_generator(
    train_generator,
    steps_per_epoch = TRAIN_STEPS_PER_EPOCH,
    epochs = EPOCHS_TRANSFER,
    validation_data = validation_generator,
    validation_steps = VAL_STEPS_PER_EPOCH,
    callbacks = [early, checkpoint],
    verbose=True, 
    class_weight=weight_dict,
    use_multiprocessing=True) #    workers=4

# start fine-tuning conv layers in addition to FC. 
# freeze the bottom FROZEN_LAYERS layers, train the rest
for layer in model.layers[:FROZEN_LAYERS]:
   layer.trainable = False
for layer in model.layers[FROZEN_LAYERS:]:
   layer.trainable = True

# compile
model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy', metrics=['accuracy'])

# training for fine-tuning conv layers
model.fit_generator(
    train_generator,
    steps_per_epoch = TRAIN_STEPS_PER_EPOCH,
    epochs = EPOCHS_FINE_TUNE,
    validation_data = validation_generator,
    validation_steps = VAL_STEPS_PER_EPOCH,
    callbacks = [early, checkpoint], #
    verbose=True,
    class_weight=weight_dict,
    use_multiprocessing=True)


print(model.metrics_names)
model.evaluate_generator(
    test_generator,
    steps = TEST_STEPS_PER_EPOCH,
    use_multiprocessing=True)

Epoch 1/2
Epoch 2/2
Epoch 1/2
Epoch 2/2
['loss', 'acc']


[0.18173385175701717, 0.94530365335316269]

In [None]:
model.count_params()

In [92]:
model.class_indices

AttributeError: 'Model' object has no attribute 'class_indices'

In [98]:
print(model.metrics_names)
model.evaluate_generator(
    test_generator,
    steps = TEST_STEPS_PER_EPOCH,
    use_multiprocessing=True)

['loss', 'acc']


[0.18173384965477868, 0.94530365784992954]

In [None]:
# a bunch of junk code for pushing around earlier data that I'm not sure I won't need again...

# find . -type f -name '*.png' -delete

# !! ls data/TrafficLightDataset/working_data/

# with open('./data/TrafficLightDataset/labels_num.csv') as labels:
#     c_labels = OrderedDict(csv.reader(labels))

# X_labels = np.array(list(c_labels.keys()))
# Y = np.array(list(c_labels.values()))
# shuffle(X_labels, Y)

# images = []
# for label in X_labels:
#     loc = "/home/dieslow/WORKSPACE/PROJECTS/CARZ/nanodegree/Capstone/ros/src/tl_detector/data/TrafficLightDataset/image_data/" + label + ".png"
#     image = cv2.imread(loc, 1)
# #     image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB) 
#     # May not need to do resizing... BUT SHOULD VIEW IMAGES
# #     resized_image = cv2.resize(image, (256, 256)) 
#     images.append(image)
# X = np.array(images)
# print(X.shape)

# train_val_index = int(np.floor(len(Y) * 0.85))

# D = list(zip(X, Y))
# D_train = D[:train_val_index]
# D_val = D[train_val_index:]
# X_train, Y_train = zip(*D_train)
# print(len(X_train))
# X_val, Y_val = zip(*D_val)
# print(len(X_val))

# images = []
# for x, y in D_train:
#     loc = "/home/dieslow/WORKSPACE/PROJECTS/CARZ/nanodegree/" + \
#           "Capstone/ros/src/tl_detector/data/TrafficLightDataset/working_data/train/" + \
#           str(y) + "/" + str(np.random.randint(100000000, 999999999)) + ".png"
#     cv2.imwrite(loc, x)
    
# for x, y in D_val:
#     loc = "/home/dieslow/WORKSPACE/PROJECTS/CARZ/nanodegree/" + \
#           "Capstone/ros/src/tl_detector/data/TrafficLightDataset/working_data/val/" + \
#           str(y) + "/" + str(np.random.randint(100000000, 999999999)) + ".png"
#     cv2.imwrite(loc, x)