In [None]:
import json
import shutil
import random
import pathlib
import sys, os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import IPython.display as display

import mlflow
import mlflow.tensorflow
import mlflow.keras

import tensorflow as tf

from xebikart.images import transformer as T
import xebikart.dataset as dataset
# remove warning
import xebikart.vae

from sklearn.model_selection import train_test_split

%matplotlib inline

In [None]:
tf.__version__

In [None]:
tf.test.is_gpu_available()

Eager Execution allows to evaluate operations immediately without building graphs
note : Only needed when not using TF 2.0

In [None]:
tf.logging.set_verbosity(tf.logging.INFO)
tf.enable_eager_execution()

# Load data

Download tubes from : https://github.com/xebia-france/xebikart-ml-tubes

In [None]:
# parameters
# dataset parameters
tubes_root_folder = "file:/workspace/xebikart-ml-tubes"
tubes_folders = [
    "tub.v4.02",
    "tub.v4.03"
]

# vae
vae_mlflow_run_id = "e653a6fafe5b4dd1b446d6e763ecaf6b"

test_size=0.2

# training parameters
batch_size = 32
shuffle_size = 200
n_epochs = 10
learning_rate = 1e-4

Download tubes from : https://github.com/xebia-france/xebikart-ml-tubes

In [None]:
raw_tubes_df = dataset.get_tubes_df(tubes_root_folder, tubes_folders, tubes_extension=".tar.gz")
tubes_df = raw_tubes_df.rename(columns={"cam/image_array": "images_path", "user/angle": "angles", "user/throttle": "throttles"})
tubes_df.count()

#### **- Display some examples**

In [None]:
fig, axs = plt.subplots(1, 3, figsize=(15,15), constrained_layout=True)
fig.suptitle("Angle / Throttle", fontsize=20)

for n, sample in tubes_df.sample(3).reset_index().iterrows():
    random_image_path = sample["images_path"]
    angle = sample["angles"]
    throttle = sample["throttles"]
    image = mpimg.imread(random_image_path) 
    axs[n].set_title(f"{angle} / {throttle}")
    axs[n].imshow(image)
    axs[n].get_xaxis().set_visible(False)
    axs[n].get_yaxis().set_visible(False)

#### **- Display some sample distribution**

In [None]:
fig, axs = plt.subplots(1,2, figsize=(15,5))
axs[0].hist(tubes_df.angles)
axs[0].set_title('distribution angles')
axs[1].hist(tubes_df.throttles)
axs[1].set_title('distribution throttles')

plt.show()

# Preprocessing Images

**Images will be :**
- Loaded
    - Read images
    - Decode jpeg images into uint8 tensor
- Cropped
    - Crop images on the lower part
- Augmented
    - Brightness : Adjust the brightness of images by a random factor.
    - Saturation : Adjust the saturation of images by a random factor (must be RGB images)
    - Contrast : Adjust the contrast of images by a random factor.
    - Jpeg quality : Randomly changes jpeg encoding quality for inducing jpeg noise
- Normalized
    - Image are converted into Float32 between 0 and 1
- Edged
    - Convert tensor uint8 type into float32 type
    - Convert rgb images to grayscale
    - Reshape into [1, 80, 160, 1] tensor
    - Apply sobel filter (see https://en.wikipedia.org/wiki/Sobel_operator)
    - Reshape into [80, 160, 2] tensor
    - Select image gradient up to 0.3
    - Binarize images by setting elements to 0 or 1

##### **- Display some examples before and after preprocessing**

In [None]:
def generate_vae_fn(vae):
    vae_encoder = vae.get_layer('encoder')
    def _transform(tf_image):
        return tf.squeeze(vae_encoder.predict(tf.expand_dims(tf_image, 0), steps=1)[2])
    return _transform

crop_fn = T.generate_crop_fn(left_margin=0, width=160, height_margin=40, height=80)
mlflow_vae = mlflow.keras.load_model(f"runs:/{vae_mlflow_run_id}/model", compile=False)
mlflow_vae_fn = T.generate_vae_fn(mlflow_vae)

def load_augmentation_preprocess(image_path):
    tf_image = T.read_image(image_path)
    tf_image = T.normalize(tf_image)
    tf_image = crop_fn(tf_image)
    tf_image = T.data_augmentation(tf_image)
    tf_image = T.edges(tf_image)
    tf_image = tf.py_function(mlflow_vae_fn, [tf_image], tf.float32)
    # https://github.com/tensorflow/tensorflow/issues/28257
    # https://stackoverflow.com/questions/42590431/output-from-tensorflow-py-func-has-unknown-rank-shape
    # TODO: Get latent dimension from VAE
    tf_image.set_shape((32,))
    #tf_image = tf.reshape(tf_image, [-1])
    return {'pixels': tf_image}

In [None]:
random_image_path = tubes_df.sample()["images_path"].values[0]

tf_image_original   = T.read_image(random_image_path)
tf_image_cropped    = crop_fn(tf_image_original)
tf_image_augmented  = T.data_augmentation(tf_image_cropped)
tf_image_normalized = T.normalize(tf_image_augmented)
tf_image_edged      = T.edges(tf_image_normalized)
tf_image_embedded   = mlflow_vae_fn(tf_image_edged)

fig, axs = plt.subplots(1, 5, figsize=(15,15), constrained_layout=True)
axs[0].set_title("Original")
axs[0].imshow(tf_image_original)
axs[1].set_title("Cropping")
axs[1].imshow(tf_image_cropped)
axs[2].set_title("Augmented")
axs[2].imshow(tf_image_augmented)
axs[3].set_title("Preprocessed channel 1")
axs[3].imshow(tf_image_edged[:,:,0],cmap='gray')
axs[4].set_title("Preprocessed channel 2")
axs[4].imshow(tf_image_edged[:,:,1],cmap='gray')
plt.show()
print(tf_image_embedded)

# Build a dataset of images

#### **- Split data into test/train datasets**

Note : We only use angle as label

In [None]:
images_path = tubes_df["images_path"].tolist()
metas_angle = tubes_df["angles"].tolist()
metas_throttle = tubes_df["throttles"].tolist()

In [None]:
train_images_path, test_images_path, train_metas, test_metas = train_test_split(images_path, metas_angle, test_size=test_size)
print('Train set :', len(train_images_path), 'images')
print('Test set :', len(test_images_path), 'images')

#### **- Create tensor for train and test datasets**

In [None]:
def make_input_fn(filepath, label, batch_size=32, shuffle_size=200, n_epochs=50):
    def _input_fn():
        ds_x = tf.data.Dataset.from_tensor_slices(filepath)
        ds_x = ds_x.map(load_augmentation_preprocess)
        ds_y = tf.data.Dataset.from_tensor_slices(label)
        ds_x_y = tf.data.Dataset.zip((ds_x, ds_y)).shuffle(shuffle_size).repeat(n_epochs).batch(batch_size).prefetch(1)
        return ds_x_y
    return _input_fn

In [None]:
ds_train = make_input_fn(train_images_path, train_metas)
ds_test = make_input_fn(test_images_path, test_metas)

# Build Model

- Model

**- n_batches_per_layer:** the number of batches to collect statistics per layer. The total number of batches is total number of data divided by batch size.

In [None]:
image_col = tf.feature_column.numeric_column('pixels', shape=[32,])

estimator = tf.estimator.BoostedTreesRegressor(
    feature_columns=[image_col],
    n_batches_per_layer=len(train_metas)//batch_size
)

In [None]:
estimator.train(ds_train)

In [None]:
results = estimator.evaluate(ds_test)

# Plot loss

In [None]:
def plot_results(history):
    hist_df = pd.DataFrame(history.history)
    hist_df.columns=['loss','val_loss']
    hist_df.index = np.arange(1, len(hist_df)+1)
    fig = plt.figure(figsize=(10,5))
    plt.plot(hist_df.val_loss, lw=3, label='Validation Loss')
    plt.plot(hist_df.loss, lw=3, label='Training Loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.ylim(0,0.2)
    plt.grid()
    plt.legend(loc=0)

    plt.show()

In [None]:
plot_results(history)

# Evaluation

**- steps:** Total number of steps (batches of samples) before declaring the prediction round finished. Ignored with the default value of None. If x is a tf.data dataset or a dataset iterator, and steps is None, predict will run until the input dataset is exhausted.

In [None]:
train_angles = model.predict(ds_test, steps=len(test_metas)//BATCH_SIZE)

In [None]:
df = pd.DataFrame(train_angles, columns = ['angles'])
df.angles.hist(bins=20)


In [None]:
df.angles.plot.kde()