# Train model to detect images with an obstacle on track and make a lite version of the model

Author : Raphaël Matusiak / Johan Jublanc
    
Date : 02/10/2019

Description : 

Train a model to detect obstacles and use the Python interpreter to load a .tflite file and run inference

_TODO :_
* Generalize this procedure to each model in the car

In [1]:
import json
import shutil
import random
import pathlib
import sys, os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import IPython.display as display

import mlflow
import mlflow.tensorflow
import mlflow.keras

import tensorflow as tf

from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.metrics import MSLE, MAE, MSE
from tensorflow.keras.losses import MSE, MSLE
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.regularizers import l1_l2
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization

from sklearn.metrics import precision_recall_curve

from xebikart.images import transformer as T
import xebikart.dataset as dataset

from sklearn.model_selection import train_test_split

from tensorflow.compat.v1 import lite
from xebikart.lite_functions import predictor_builder

%matplotlib inline

ModuleNotFoundError: No module named 'mlflow'

In [None]:
tf.__version__

In [None]:
tf.test.is_gpu_available()

Eager Execution allows to evaluate operations immediately without building graphs
note : Only needed when not using TF 2.0

# Parameters

In [None]:
# dataset parameters
tubes_root_folder = 'file:/workspace/xebikart-ml-tubes'
tubes_folders_obstacle = ["tub.v8.02"]
tubes_folders_road = ["tub.v5.01", "tub.v5.02", "tub.v4.02"]#, "tub.v5.03", "tub.v5.04"]

test_size=0.2

# training parameters
batch_size = 16
shuffle_size = 200
n_epochs = 10
learning_rate = 1e-3

# model name
model_name = "obstacle_detection_gray"

# Load data

Download tubes from : https://github.com/xebia-france/xebikart-ml-tubes

Get the tubes from tar.gz files into pd data frames.

In [None]:
road_tubes_df = dataset.get_tubes_df(tubes_root_folder, tubes_folders_road, tubes_extension=".tar.gz")
road_tubes_df['label'] = 0

obstacle_tubes_df = dataset.get_tubes_df(tubes_root_folder, tubes_folders_obstacle, tubes_extension=".tar.gz")
obstacle_tubes_df['label'] = 1

Concatenate the dataframes

In [None]:
%%capture
tubes_df = pd.concat([road_tubes_df, obstacle_tubes_df]).reset_index(drop=True)

Rename the columns

In [None]:
tubes_df = tubes_df.rename(columns={"cam/image_array": "images_path"})

#### **- Display some examples**

In [None]:
fig, axs = plt.subplots(1, 3, figsize=(15,5), constrained_layout=True)
fig.suptitle("label", fontsize=20)

for n, sample in tubes_df.sample(3).reset_index().iterrows():
    random_image_path = sample["images_path"]
    label = sample["label"]
    image = mpimg.imread(random_image_path) 
    axs[n].set_title(label)
    axs[n].imshow(image)
    axs[n].get_xaxis().set_visible(False)
    axs[n].get_yaxis().set_visible(False)

#### **- Display some sample distribution**

In [None]:
labels = 'no obstacles', 'obstacles'
sizes = [len(tubes_df[tubes_df.label == 0]), len(tubes_df[tubes_df.label == 1])]
explode = (0, 0.1)

fig1, ax1 = plt.subplots()
ax1.pie(sizes, explode=explode, labels=labels, autopct='%1.1f%%', shadow=True, startangle=90)
ax1.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.

plt.show()

# Preprocessing Images

In [None]:
def get_process_from_path(process):
    
    def process_from_path(path):
        tf_image = T.read_image(path)
        return process(tf_image)
    
    return process_from_path

In [None]:
preprocess = T.normalize_gray_scale
preprocess_from_path = get_process_from_path(T.normalize_gray_scale)

In [None]:
random_image_path = tubes_df.sample()["images_path"].values[0]

tf_image_original = T.read_image(random_image_path)
tf_processed_image = preprocess_from_path(random_image_path)

# go back to 3 channels
tf_processed_image = tf.image.grayscale_to_rgb(tf_processed_image)

fig, axs = plt.subplots(1, 2, figsize=(15,15), constrained_layout=True)
axs[0].set_title("Original")
axs[0].imshow(tf_image_original)
axs[1].set_title("processed")
axs[1].imshow(tf_processed_image,cmap='gray')

plt.show()

# Build a dataset of images

#### **- Split data into test/train datasets**

Note : We only use angle as label

In [None]:
images_path = tubes_df["images_path"].tolist()
label = tubes_df["label"].tolist()

In [None]:
train_images_path, test_images_path, train_metas, test_metas = train_test_split(images_path, label, test_size=test_size)
print('Train set :', len(train_images_path), 'images')
print('Test set :', len(test_images_path), 'images')

#### **- Create tensor for train and test datasets**

In [None]:
def input_fn(filepath, label, BATCH_SIZE = batch_size, SHUFFLE_SIZE = 400, NUM_EPOCHS = n_epochs):
    ds_x = tf.data.Dataset.from_tensor_slices(filepath)
    ds_x = ds_x.map(preprocess_from_path)
    ds_y = tf.data.Dataset.from_tensor_slices(label)
    ds_x_y = tf.data.Dataset.zip((ds_x, ds_y)).shuffle(SHUFFLE_SIZE).repeat(NUM_EPOCHS).batch(BATCH_SIZE).prefetch(1)
    
    return ds_x_y

In [None]:
ds_train = input_fn(train_images_path, train_metas)
ds_test = input_fn(test_images_path, test_metas, NUM_EPOCHS=1)

# Build Model

In [None]:
model = Sequential()

model.add(Conv2D(16, (3, 3), activation='relu', input_shape=(120, 160,1)))
model.add(BatchNormalization())
model.add(MaxPooling2D(2, 2))
model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(MaxPooling2D(2, 2))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(2, 2))

model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))

model.add(Dense(1, activation='sigmoid'))

model.summary()

In [None]:
model.compile(optimizer=Adam(learning_rate=learning_rate, decay=learning_rate/n_epochs), loss="binary_crossentropy")

In [None]:
mlflow.set_experiment("detect_obstacle")

with mlflow.start_run():
    mlflow.log_params({
        "images_exit": str(tubes_folders_obstacle),
        "images_road": str(tubes_folders_road),
        "nb_images": len(train_images_path),
        "epochs": n_epochs,
        "batch_size": batch_size,
        "learning_rate": learning_rate
    })
    mlflow.tensorflow.autolog()
    history = model.fit(x=ds_train,
                    steps_per_epoch=len(train_metas)//batch_size,
                    epochs=n_epochs,
                    verbose=1,
                    validation_data=ds_test,
                    validation_steps=len(test_metas)//batch_size)

# Plot loss

In [None]:
model.evaluate(ds_test)

In [None]:
def plot_results(history):
    hist_df = pd.DataFrame(history.history)
    hist_df.columns=['loss','val_loss']
    hist_df.index = np.arange(1, len(hist_df)+1)
    fig = plt.figure(figsize=(10,5))
    plt.plot(hist_df.val_loss, lw=3, label='Validation Loss')
    plt.plot(hist_df.loss, lw=3, label='Training Loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.grid()
    plt.legend(loc=0)

    plt.show()

In [None]:
plot_results(history)

# Evaluation & Visualisation

Get the model from mlflow

In [None]:
#model_runid = "93c5025795394f50adfa5d6db4319c1b"
#model = mlflow.keras.load_model(f"runs:/{model_runid}/model", compile=True)

### - Prediction

In [None]:
test_pred = model.predict(ds_test, use_multiprocessing=True, workers=12, steps = None)#len(test_metas)/batch_size)

In [None]:
df = pd.DataFrame(test_pred, columns = ['label'])
df.label.hist()

### - Visualisation predictions

In [None]:
nb_random = np.random.randint(len(test_metas))

random_image_path_0 = [(test_images_path[nb_random])]
random_image_label_0 = [test_metas[nb_random]]

ds_visu_test_0 = input_fn(random_image_path_0, random_image_label_0)

nb_random = np.random.randint(len(test_metas))

random_image_path_1 = [(test_images_path[nb_random])]
random_image_label_1 = [test_metas[nb_random]]

ds_visu_test_1 = input_fn(random_image_path_1, random_image_label_1)

fig, axs = plt.subplots(1, 2, figsize=(15,5), constrained_layout=True)
axs[0].set_title("Value ({}) : Predict ({})".format(random_image_label_0, model.predict(ds_visu_test_0)[0]))
axs[0].imshow(T.read_image(random_image_path_0[0]))
axs[1].set_title("Value ({}) : Predict ({})".format(random_image_label_1, model.predict(ds_visu_test_1)[0]))
axs[1].imshow(T.read_image(random_image_path_1[0]))

plt.show()

### - Interpretation results

In [None]:
precision, recall, thresholds = precision_recall_curve(y_true=test_metas, probas_pred=test_pred)

In [None]:
plt.plot(thresholds, precision[:-1], '--', label='precision')
plt.plot(thresholds, recall[:-1], '--', label='recall')
plt.xlabel('threshold')
plt.legend()

plt.show()

### Save the model in .h5

In [None]:
model.save("models/{}.h5".format(model_name))

### Convert the model into a lite model

In [None]:
converter = lite.TFLiteConverter.from_keras_model_file("models/{}.h5".format(model_name))

In [None]:
tflite_model = converter.convert()

In [None]:
open("{}.tflite".format(model_name), "wb").write(tflite_model)

### Build the predictor and test it on random images

In [None]:
predictor = predictor_builder(model_name + '.tflite', preprocess)

In [None]:
random_image_path = []
for i in range(12):
    random_image_path.append(tubes_df.sample()["images_path"].values[0])

In [None]:
fig, axs = plt.subplots(3, 4,figsize=(20,15), constrained_layout=True)

for i in range(3):
    for j in range(4):
        # function pre-defined are used to compute the prediction
        tf_image = T.read_image(random_image_path[4*i +j])
        prediction = predictor(tf_image)[0]

        # eaxh image in shown with the prediction
        axs[i][j].set_title("Prediction = {}".format(prediction))
        axs[i][j].imshow(tf.reshape(tf_image,(tf_image.shape[0],tf_image.shape[1],3)))