# Variational auto-encoder

Author : Nicolas Laille/ Johan Jublanc
    
Date : 05/10/2019

Description : 

Use a VAE to encode preprocessed images

In [None]:
import tensorflow as tf

from tensorflow.keras.models import Model

import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import time
import cv2
import os
import uuid
import tempfile

import mlflow
import mlflow.tensorflow
import mlflow.keras

import pandas as pd

import seaborn as sns

import xebikart.dataset as dataset
from xebikart.vae import create_variational_auto_encoder, custom_vae_loss
import xebikart.images.transformer as T
from sklearn.model_selection import train_test_split

import matplotlib.patheffects as PathEffects

from sklearn.manifold import TSNE

# Parameters

In [None]:
# dataset parameters
tubes_root_folder = 'file:/workspace/xebikart-ml-tubes'
tubes_folders_road = ["tub.v7.01", "tub.v7.02"]
tubes_folders_exit = ["tub.v5.01", "tub.v5.02"]#, "tub.v5.03", "tub.v5.04"]
tubes_folders_obstacles = ["tub.v8.02"]

image_folders = tubes_folders_road + tubes_folders_exit + tubes_folders_obstacles

test_size=0.2

# parameters
learning_rate = 1e-4
batch_size = 64
n_epochs = 20
shuffle_size = 1024

# Vae parameters
latent_dim = 64

## Get the data

In [None]:
%%capture
%%time
road_tubes_df = dataset.get_tubes_df(tubes_root_folder, tubes_folders_road, tubes_extension=".tar.gz")
road_tubes_df['label'] = 0

exit_tubes_df = dataset.get_tubes_df(tubes_root_folder, tubes_folders_exit, tubes_extension=".tar.gz")
exit_tubes_df['label'] = 1

obstacles_tubes_df = dataset.get_tubes_df(tubes_root_folder, tubes_folders_obstacles, tubes_extension=".tar.gz")
obstacles_tubes_df['label'] = 2

tubes_df = pd.concat([road_tubes_df, exit_tubes_df, obstacles_tubes_df])
tubes_df = tubes_df.rename(columns={"cam/image_array": "images_path"})
tubes_df = tubes_df.drop(["user/angle", "user/throttle", "user/mode", "timestamp"], axis=1)

In [None]:
images_path = tubes_df["images_path"].tolist()
label = tubes_df["label"].tolist()

In [None]:
train_images_path, test_images_path, train_metas, test_metas = train_test_split(images_path, label, test_size=test_size)
print('Train set :', len(train_images_path), 'images')
print('Test set :', len(test_images_path), 'images')

## Utils

In [None]:
def plot_images(images, title):
    n = len(images)
    fig = plt.figure(figsize=(20, 4))
    fig.suptitle(title, fontsize=20)
    for i in range(n):
        # display original
        ax = plt.subplot(2, n, i+1)
        plt.imshow(images[i][:,:,1], cmap = "gray")
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)

        # display reconstruction
        ax = plt.subplot(2, n, i + 1 + n)
        plt.imshow(images[i][:,:,1], cmap = "gray")
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)
    plt.show()

# Process the images and build the model

### Choose an image pre-process

**Images will be :**
- Loaded
    - Read images
    - Decode jpeg images into uint8 tensor
- Cropped
    - Crop images on the lower part
- Augmented
    - Brightness : Adjust the brightness of images by a random factor.
    - Saturation : Adjust the saturation of images by a random factor (must be RGB images)
    - Contrast : Adjust the contrast of images by a random factor.
    - Jpeg quality : Randomly changes jpeg encoding quality for inducing jpeg noise
- Normalized
    - Image are converted into Float32 between 0 and 1
- Edged
    - Convert tensor uint8 type into float32 type
    - Convert rgb images to grayscale
    - Reshape into [1, 80, 160, 1] tensor
    - Apply sobel filter (see https://en.wikipedia.org/wiki/Sobel_operator)
    - Reshape into [80, 160, 2] tensor
    - Select image gradient up to 0.3
    - Binarize images by setting elements to 0 or 1

In [None]:
crop_fn = T.generate_crop_fn(left_margin=0, width=160, height_margin=40, height=80)

In [None]:
def preprocess(image_path):
    tf_image = T.read_image(image_path)
    tf_image = T.normalize(tf_image)
    tf_image = crop_fn(tf_image)
    #tf_image = T.data_augmentation(tf_image)
    tf_image = T.edges(tf_image)
    return tf_image

## Build tensorflow datasets

In [None]:
def input_fn(filepath, label, preprocess):
    ds_x = tf.data.Dataset.from_tensor_slices(filepath)
    ds_x = ds_x.map(preprocess)
    ds_y = tf.data.Dataset.from_tensor_slices(label)
    # ds_x_y = tf.data.Dataset.zip((ds_x, ds_y)).shuffle(SHUFFLE_SIZE).repeat(NUM_EPOCHS).batch(BATCH_SIZE).prefetch(1)
    
    return ds_x, ds_y

In [None]:
ds_train_image, ds_train_labels = input_fn(train_images_path, train_metas, preprocess=preprocess)
ds_test_images, ds_test_labels = input_fn(test_images_path, test_metas, preprocess=preprocess)

In [None]:
ds_train_image_batch = ds_train_image.shuffle(shuffle_size).repeat(n_epochs).batch(batch_size)

## Build a VAE

## Compile the model

In [None]:
vae = create_variational_auto_encoder(
    input_shape=tf.compat.v1.data.get_output_shapes(ds_train_image_batch)[1:],
    latent_dim=latent_dim
)

In [None]:
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
vae.compile(optimizer, loss=custom_vae_loss(vae))

### Test the vae before training

In [None]:
train_iterator = ds_train_image_batch.make_one_shot_iterator()

preprocessed_images = tf.compat.v1.Session().run(train_iterator.get_next())[:4]
plot_images(preprocessed_images, "Preprocessed images")
decoded_images = vae.predict(preprocessed_images)
plot_images(decoded_images, "Decoded images")

# Train the model

In [None]:
n_samples = len(train_images_path)

mlflow.set_experiment("variational_auto_encoder_edge")
# Create temp directory
run_tempdir = tempfile.mkdtemp()

with mlflow.start_run(nested=True):
    mlflow.log_params({
        "images": str(image_folders),
        "nb_images": shuffle_size,
        "epochs": n_epochs,
        "batch_size": batch_size,
        "input_shape": "test",#tf.compat.v1.data.get_output_shapes(ds_train_image_batch)[1:],
        "latent_dim": latent_dim
    })
    mlflow.tensorflow.autolog()
    vae.fit(ds_train_image_batch, epochs=n_epochs, steps_per_epoch=int(n_samples / batch_size))
    # save encoder and lite encoder
    # as keras model
    
    # TODO: TFLiteConverter reset tf graph, it creates issues if you want to continue to use this notebook afterwards
    # TODO: find a solution
    # as lite
    #encoder_save_path = os.path.join(run_tempdir, "encoder")
    #encoder.save(encoder_save_path + ".h5")
    #converter = tf.compat.v1.lite.TFLiteConverter.from_keras_model_file(encoder_save_path + ".h5")
    #converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]
    #encoder_tflite = converter.convert()
    #open(encoder_save_path + ".tflite", "wb+").write(encoder_tflite)
    #mlflow.log_artifact(encoder_save_path + ".tflite")

## Test the VAE after training

In [None]:
# Load vae mlflow
vae_id = "5e7e7673cbc24e9da2d33ca37df0dda5"
vae = mlflow.keras.load_model("runs:/{}/model".format(vae_id), compile=False)

In [None]:
train_iterator = ds_test_images.batch(4).make_one_shot_iterator()

preprocessed_images = tf.compat.v1.Session().run(train_iterator.get_next())[:4]
plot_images(preprocessed_images, "Preprocessed images")
decoded_images = vae.predict(preprocessed_images)
plot_images(decoded_images, "Decoded images")

# Vizualization

### Get the encoder

In [None]:
encoder = tf.keras.Model(inputs=vae.get_layer("encoder").input, 
                             outputs=vae.get_layer("encoder").get_layer("z_mean").output)

mlflow.keras.log_model(encoder, "encoder", include_optimizer=False)

### Get the test images

In [None]:
%%time
size = 5000
train_iterator = ds_test_images.batch(size).make_one_shot_iterator()
X_images = tf.compat.v1.Session().run(train_iterator.get_next())[:size]

## Encode and decode the images

In [None]:
%%time
X_images_encoded = encoder.predict(X_images)
label_iterator = ds_test_labels.batch(size).make_one_shot_iterator()
Y_labels = tf.compat.v1.Session().run(label_iterator.get_next())[:size]

In [None]:
X_images_decoded = vae.predict(X_images)

### TSNE : Reduce the dimension to 2D

In [None]:
%%time

# set a seed
RS = 2805

# fit and transform the encoded images to reduce the dimensio to 2D
fashion_tsne = TSNE(random_state=RS).fit_transform(X_images_encoded)

#### Get a dataframe with labels and 2D coordinates

In [None]:
x_1 = [x[0] for x in fashion_tsne]
x_2 = [x[1] for x in fashion_tsne]
X_tsne = pd.DataFrame(list(zip(x_1,x_2,Y_labels)), columns=["x_1", "x_2", "color"])

## Plot the results

##### Choose an image to highlight

In [None]:
def annotate_axes(fig):
    for i, ax in enumerate(fig.axes):
        ax.text(0.5, 0.5, "ax%d" % (i+1), va="center", ha="center")
        ax.tick_params(labelbottom=False, labelleft=False)

def plot_viz(X_tsne,point, image_original,image_decoded):
    X = X_tsne
    colors = X["color"]

    num_classes = len(np.unique(colors))
    palette = np.array(sns.color_palette("hls", num_classes))

    fig = plt.figure(figsize=(20, 8))
    ax1 = plt.subplot2grid((4, 9), (0, 0), colspan=5, rowspan=4)
    ax2 = plt.subplot2grid((4, 9), (0, 5), colspan=2, rowspan=2)
    ax3 = plt.subplot2grid((4, 9), (0, 7), colspan=2, rowspan=2)
    ax4 = plt.subplot2grid((4, 9), (2, 5), colspan=2, rowspan=2)
    ax5 = plt.subplot2grid((4, 9), (2, 7), colspan=2, rowspan=2)

    ax2.imshow(image_original[:,:,0], cmap = "gray")
    ax3.imshow(image_decoded[:,:,0], cmap = "gray")
    ax4.imshow(image_original[:,:,1], cmap = "gray")
    ax5.imshow(image_decoded[:,:,1], cmap = "gray")

    # create a scatter plot.
    #f = plt.figure(figsize=(8, 8))
    ax1.scatter(X["x_1"], X["x_2"], lw=0, s=40, c=palette[colors.astype(np.int)])
    ax1.axis('off')
    ax2.axis('tight')
    ax2.axis('off')
    ax3.axis('tight')
    ax3.axis('off')
    ax4.axis('tight')
    ax4.axis('off')
    ax5.axis('tight')
    ax5.axis('off')


    # add the labels for each digit corresponding to the label
    txts = []
    txt = ax1.text(point[0], point[1], "x", fontsize=24)
    txt.set_path_effects([
        PathEffects.Stroke(linewidth=5, foreground="w"),
        PathEffects.Normal()])
    txts.append(txt)
    
    
    plt.show()

#### Plot for the label 0 (out road)

In [None]:
rand = np.random.choice([x for x in range(len(Y_labels)) if Y_labels[x]==0])

point = fashion_tsne[rand]
image_original = X_images[rand]
image_decoded = X_images_decoded[rand]

plot_viz(X_tsne,point, image_original,image_decoded)

#### Plot for the label 1 (on track)

In [None]:
label = 1
rand = np.random.choice([x for x in range(len(Y_labels)) if Y_labels[x]==label])

point = fashion_tsne[rand]
image_original = X_images[rand]
image_decoded = X_images_decoded[rand]

plot_viz(X_tsne,point, image_original,image_decoded)

#### Plot for the label 2 (obstacle)

In [None]:
label = 2
rand = np.random.choice([x for x in range(len(Y_labels)) if Y_labels[x]==label])

point = fashion_tsne[rand]
image_original = X_images[rand]
image_decoded = X_images_decoded[rand]

plot_viz(X_tsne,point, image_original,image_decoded)