In [None]:
import json
import shutil
import random
import pathlib
import sys, os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import IPython.display as display

import mlflow
import mlflow.tensorflow
import mlflow.keras

import tensorflow as tf

from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.metrics import Accuracy
from tensorflow.keras.losses import MSE, MSLE
from tensorflow.keras.models import Sequential
from tensorflow.keras.regularizers import l1_l2
from tensorflow.keras.layers import Conv3D, MaxPooling3D, Conv2D, MaxPooling2D, Flatten, Dense, Dropout

from xebikart.images import transformer as T
import xebikart.dataset as dataset

from sklearn.model_selection import train_test_split

%matplotlib inline

In [None]:
tf.__version__

In [None]:
tf.test.is_gpu_available()

Eager Execution allows to evaluate operations immediately without building graphs
note : Only needed when not using TF 2.0

In [None]:
tf.enable_eager_execution()

In [None]:
# parameters
# dataset parameters
tubes_root_folder = "file:/workspace/xebikart-ml-tubes"
tubes_folders = [
    "tub.v5.01"
]
test_size=0.2

# training parameters
batch_size = 32
shuffle_size = 200
n_epochs = 200
learning_rate = 1e-4

# Load data

Download tubes from : https://github.com/xebia-france/xebikart-ml-tubes

In [None]:
raw_tubes_df = dataset.get_tubes_df(tubes_root_folder, tubes_folders, tubes_extension=".tar.gz")
tubes_df = raw_tubes_df.rename(columns={"cam/image_array": "images_path", "user/angle": "angles", "user/throttle": "throttles"})
tubes_df.count()

# Load data

#### **- Display some examples**

In [None]:
fig, axs = plt.subplots(1, 3, figsize=(15,5), constrained_layout=True)
fig.suptitle("Angle / Throttle", fontsize=20)

for n, sample in tubes_df.sample(3).reset_index().iterrows():
    random_image_path = sample["images_path"]
    angle = sample["angles"]
    throttle = sample["throttles"]
    image = mpimg.imread(random_image_path) 
    axs[n].set_title(f"{angle} / {throttle}")
    axs[n].imshow(image)
    axs[n].get_xaxis().set_visible(False)
    axs[n].get_yaxis().set_visible(False)

#### **- Display some sample distribution**

In [None]:
fig, axs = plt.subplots(1,2, figsize=(15,5))
axs[0].hist(tubes_df.angles)
axs[0].set_title('distribution angles')
axs[1].hist(tubes_df.throttles)
axs[1].set_title('distribution throttles')

plt.show()

# Preprocessing Images

**Images will be :**
- Loaded
    - Read images
    - Decode jpeg images into uint8 tensor
- Cropped
    - Crop images on the lower part
- Augmented
    - Brightness : Adjust the brightness of images by a random factor.
    - Saturation : Adjust the saturation of images by a random factor (must be RGB images)
    - Contrast : Adjust the contrast of images by a random factor.
    - Jpeg quality : Randomly changes jpeg encoding quality for inducing jpeg noise
- Normalized
    - Image are converted into Float32 between 0 and 1
- Edged
    - Convert tensor uint8 type into float32 type
    - Convert rgb images to grayscale
    - Reshape into [1, 80, 160, 1] tensor
    - Apply sobel filter (see https://en.wikipedia.org/wiki/Sobel_operator)
    - Reshape into [80, 160, 2] tensor
    - Select image gradient up to 0.3
    - Binarize images by setting elements to 0 or 1

##### **- Display some examples before and after preprocessing**

In [None]:
crop_fn = T.generate_crop_fn(left_margin=0, width=160, height_margin=40, height=80)

def load_augmentation_preprocess(image_path):
    tf_image = T.read_image(image_path)
    tf_image = T.normalize(tf_image)
    tf_image = crop_fn(tf_image)
    tf_image = T.data_augmentation(tf_image)
    tf_image = T.edges(tf_image)
    return tf_image

def load_preprocess(image_path):
    tf_image = T.read_image(image_path)
    tf_image = T.normalize(tf_image)
    tf_image = crop_fn(tf_image)
    tf_image = T.edges(tf_image)
    return tf_image

def flip_load_function(load_function):
    def load_function_flipped(image_path):
        tf_image = load_function(image_path)
        tf_image = tf.image.flip_left_right(tf_image)
        return tf_image
    return load_function_flipped

In [None]:
random_image_path = tubes_df.sample()["images_path"].values[0]

tf_image_original   = T.read_image(random_image_path)
tf_image_cropped    = crop_fn(tf_image_original)
tf_image_augmented  = T.data_augmentation(tf_image_cropped)
tf_image_normalized = T.normalize(tf_image_augmented)
tf_image_edged      = T.edges(tf_image_normalized)

fig, axs = plt.subplots(1, 5, figsize=(15,15), constrained_layout=True)
axs[0].set_title("Original")
axs[0].imshow(tf_image_original)
axs[1].set_title("Cropping")
axs[1].imshow(tf_image_cropped)
axs[2].set_title("Augmented")
axs[2].imshow(tf_image_augmented)
axs[3].set_title("Preprocessed channel 1")
axs[3].imshow(tf_image_edged[:,:,0],cmap='gray')
axs[4].set_title("Preprocessed channel 2")
axs[4].imshow(tf_image_edged[:,:,1],cmap='gray')
plt.show()

# Build a dataset of images

_TODO : faire le shuffle à la toute fin_

#### **- Split data into test/train datasets**

Note : We only use angle as label

In [None]:
images_path = tubes_df["images_path"].tolist()
metas_angle = tubes_df["angles"].tolist()
metas_throttle = tubes_df["throttles"].tolist()

In [None]:
images_num = [x.split('/')[-1].split('_')[0] for x in images_path]

In [None]:
images_path[0]
folder_path = "/root/.keras/datasets/tub.v5.01/"
image_generic_path = "_cam-image_array_.jpg"

In [None]:
images_df = pd.DataFrame({"path":images_path, "num":images_num})
images_df["num"] = images_df["num"].astype("int")
images_df = images_df.sort_values("num")

##### Stack the path of the tubes

In [None]:
images_path_stack = [[images_path[ii], images_path[ii+1], images_path[ii+2], images_path[ii+3]] 
                     for ii in range(len(images_path)-3)]
metas_angle_stack = metas_angle[3:len(metas_angle)]

##### Split the data

In [None]:
train_images_path, test_images_path, train_metas, test_metas = train_test_split(images_path_stack, 
                                                                                metas_angle_stack,
                                                                                test_size=test_size)
print('Train set :', len(train_images_path), 'images')
print('Test set :', len(test_images_path), 'images')

In [None]:
random.sample(train_images_path,1)

In [None]:
random_number = random.sample(range(len(train_images_path)),1)[0]

random_image_path = train_images_path[random_number]

tf_image_original_0 = T.read_image(random_image_path[0])
tf_image_original_1 = T.read_image(random_image_path[1])
tf_image_original_2 = T.read_image(random_image_path[2])
tf_image_original_3 = T.read_image(random_image_path[3])

fig, axs = plt.subplots(1, 4, figsize=(15,15), constrained_layout=True)
axs[0].set_title("t-3")
axs[0].imshow(tf_image_original_0)
axs[1].set_title("t-2")
axs[1].imshow(tf_image_original_1)
axs[2].set_title("t-1")
axs[2].imshow(tf_image_original_2)
axs[3].set_title("t")
axs[3].imshow(tf_image_original_3)
plt.show()

In [None]:
train_metas[random_number]

#### **- Create tensor for train and test datasets**

In [None]:
def input_fn_stack(filepath_stack, label, load_type = load_augmentation_preprocess):
    
    # make a lambda that load and stack the tubes
    load_type = load_augmentation_preprocess
    load_and_stack = lambda x : tf.stack([load_type(x[0]),
                                      load_type(x[1]),
                                      load_type(x[2]),
                                      load_type(x[3])])
    
    # get the preprocessed and stacked images to a data of tensors
    ds_x = tf.data.Dataset.from_tensor_slices(filepath_stack)
    ds_x = ds_x.map(load_and_stack)
    
    # do the same with the labels
    ds_y = tf.data.Dataset.from_tensor_slices(label)
    
    #finnaly zip both dataset
    ds_x_y = tf.data.Dataset.zip((ds_x, ds_y))
    
    return ds_x_y

##### get the images not augmented

In [None]:
ds_train_not_augmented = input_fn_stack(train_images_path, train_metas, 
                              load_type = load_preprocess)

In [None]:
ds_train_not_augmented

##### Get the data flipped (not augmented)

In [None]:
trains_meta_flipped = [-ii for ii in train_metas]
flipped_load_function = flip_load_function(load_preprocess)

ds_train_flipped = input_fn_stack(train_images_path, trains_meta_flipped, 
                              load_type = flipped_load_function)

In [None]:
ds_train_flipped

##### Get the data augmented

In [None]:
ds_train_augmented = input_fn_stack(train_images_path, train_metas, 
                              load_type = load_augmentation_preprocess)

In [None]:
ds_train_augmented

##### Get the data augmented and flipped

In [None]:
trains_meta_flipped = [-ii for ii in train_metas]
flipped_load_function = flip_load_function(load_augmentation_preprocess)

ds_train_augmented_flipped = input_fn_stack(train_images_path, trains_meta_flipped, 
                              load_type = flipped_load_function)

# plt.imshow(flip_load_function(load_preprocess)(train_images_path[0])[:,:,0])
# plt.imshow(load_preprocess(train_images_path[0])[:,:,0])

In [None]:
ds_train_augmented_flipped

##### Conactenate, shuffle and batch the data

In [None]:
ds_train = ds_train_not_augmented.concatenate(ds_train_flipped)
ds_train = ds_train.concatenate(ds_train_augmented)
ds_train = ds_train.concatenate(ds_train_augmented_flipped)

In [None]:
BATCH_SIZE = 32
SHUFFLE_SIZE = 200
NUM_EPOCHS = 50

ds_train = ds_train.shuffle(SHUFFLE_SIZE).repeat(NUM_EPOCHS).batch(BATCH_SIZE).prefetch(1)

#### Get the data for test

In [None]:
ds_test = input_fn_stack(test_images_path, test_metas)

In [None]:
ds_test

# Build Model

In [None]:
model = Sequential()

model.add(Conv3D(16, (1,5,5), activation='relu', 
                 kernel_regularizer=l1_l2(l1=0.1, l2=0.01), 
                 input_shape=(4, 80, 160, 2)))
model.add(MaxPooling3D((1,3,3)))
model.add(Conv3D(32, (1,3,3), activation='relu', 
                 kernel_regularizer=l1_l2(l1=0.01, l2=0.01)))
model.add(MaxPooling3D((1,3,3)))
model.add(Conv3D(64, (1,3,3), activation='relu', 
                 kernel_regularizer=l1_l2(l1=0.01, l2=0.01)))
model.add(MaxPooling3D((1,2,2)))

model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.3))

model.add(Dense(1, activation='linear'))

model.summary()

In [None]:
model.compile(optimizer=Adam(learning_rate=learning_rate, decay=1e-6), loss="mean_squared_error")

**- How to choose and interpret loss :**

Mean Absolute Error (MAE) is the absolute value of the substraction of predicted from actual value
Let's see an example : 
Actual angle = 0.21


In [None]:
mlflow.set_experiment("convolutional_neural_network")
n_epochs=50

with mlflow.start_run():
    mlflow.log_params({
        "images": str(tubes_folders),
        "nb_images": len(train_images_path),
        "epochs": n_epochs,
        "batch_size": batch_size,
        "learning_rate": learning_rate
    })
    mlflow.tensorflow.autolog()
    history = model.fit(x=ds_train,
                    steps_per_epoch=len(train_metas)//batch_size,
                    epochs=n_epochs,
                    verbose=1,
                    validation_data=ds_test,
                    validation_steps=len(test_metas)//batch_size)

_TODO : vérifier que l'opération de concatenation a bien fonctionnée_

_TODO : mettre en cohérence le nombre d'epoch_

In [None]:
# load model
# cnn_model = mlflow.keras.load_model("runs:/5d890004534c44f1880a0da3011c80d0/model")

# Plot loss

In [None]:
def plot_results(history):
    hist_df = pd.DataFrame(history.history)
    hist_df.columns=['loss','val_loss']
    hist_df.index = np.arange(1, len(hist_df)+1)
    fig = plt.figure(figsize=(10,5))
    plt.plot(hist_df.val_loss, lw=3, label='Validation Loss')
    plt.plot(hist_df.loss, lw=3, label='Training Loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.grid()
    plt.legend(loc=0)

    plt.show()

In [None]:
plot_results(history)

# Evaluation

**- steps:** Total number of steps (batches of samples) before declaring the prediction round finished. Ignored with the default value of None. If x is a tf.data dataset or a dataset iterator, and steps is None, predict will run until the input dataset is exhausted.

In [None]:
train_angles = model.predict(ds_test, steps=len(test_metas)//batch_size)

In [None]:
df = pd.DataFrame(train_angles, columns = ['angles'])
df.angles.hist()