Donkey Car Tainer V5 *Alpha*
----------------------------

## Avant propos


#### Lister les GPU disponibles

In [None]:
import tensorflow
print(tf.test.gpu_device_name())

In [None]:
from tensorflow.python.client import device_lib

def get_available_gpus():
    local_device_protos = device_lib.list_local_devices()
    return [x.name for x in local_device_protos if x.device_type == 'GPU']
print(get_available_gpus())

#### Choisir le GPU de lancement

- Avec la variable d'environnement `CUDA_VISIBLE_DEVICES`, asigner la valeur :
  * `-1` pour faire du calcul sur CPU
  * `0` ou `1` ou ... pour faire du calcul respectivement sur GPU 0, 1 ou ...
  * `0,1` pour faire du calcul sur les 2 GPU 0 et GPU 1.

- Si `CUDA_VISIBLE_DEVICES` ne fonctionne pas,
on peut encadrer le fit avec ce code :
```
with tf.device("/gpu:1"):
    model.fit(...)
```
pour lancer sur le GPU 1.

In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="1"

## Importer les librairies

In [None]:
import os
import numpy as np
import pandas as pd 
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from time import time
from tqdm import tqdm
import json
from PIL import Image
import base64
from io import BytesIO
import inspect
from matplotlib import pyplot as plt
import datetime

## Importer la dataset

### Depuis Colab via Google Drive

In [None]:
from google.colab import files
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

In [None]:
!rm -Rf "corentin_renault_20000_record_controller"
!cp "drive/My Drive/ColabStorage/DonkeyCar/Simulator/Dataset/corentin_renault_20000_record_controller.eslr" "dataset.eslr"

### Autre source via CURL

In [None]:
!curl <LIEN_URL> --output dataset.eslr

## Configuration

In [None]:
TIME = str(time())

### Environement

In [None]:
### ENVIRONEMENT ###
STORAGE_ROOT_DIR = os.path.abspath("data")

### Modèle

In [None]:
### MODEL ###
MODEL_NAME = "DCDeepModelV5.0-reda-renault-speed_accel_gyro-" + TIME
os.environ["MODEL_NAME"] = MODEL_NAME
os.environ["MODEL_NAME_TAR"] = MODEL_NAME+".tar.gz"

### Sauvegarde

In [None]:
### SAVE PATH ###
SAVE_PATH = os.path.join(STORAGE_ROOT_DIR, "model", MODEL_NAME)
os.environ["SAVE_PATH"] = SAVE_PATH
os.makedirs(SAVE_PATH, exist_ok=True)

# Don't remove the last "s" in "checkpoints",
# the file `checkpoint` already exists
CHECKPOINT_PATH = os.path.join(SAVE_PATH, "checkpoints")
os.makedirs(CHECKPOINT_PATH, exist_ok=True)
CHECKPOINT_FILEPATH = os.path.join(CHECKPOINT_PATH, "checkpoint-{epoch:02d}.weight")

### Log d'entraînement

In [None]:
### LOG ###
ROOT_TENSORLOG_PATH = os.path.join(STORAGE_ROOT_DIR, "log", MODEL_NAME)
os.makedirs(ROOT_TENSORLOG_PATH, exist_ok=True)

In [None]:
def get_new_tensorlog_path():
    special_log_name = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    special_log_path = os.path.join(ROOT_TENSORLOG_PATH, special_log_name)
    os.makedirs(special_log_path)
    print(build_log_tag(LOG_PATH=LOG_PATH))
    os.environ["LOG_PATH"] = special_log_path
    return special_log_path

### Dataset

In [None]:
### DATASET ###
DATASET_NAME = "corentin_renault_30000_clean_record_controller"
DATASET_FILE_PATH = os.path.join(STORAGE_ROOT_DIR, "sample", DATASET_NAME + ".eslr")

IMAGE_PATH = "images"
DATASET_LABEL_FILENAME = "label.csv"

DATASET_LABEL_PATH = os.path.join(DATASET_NAME, DATASET_LABEL_FILENAME)
DATASET_IMAGE_PATH = os.path.join(DATASET_NAME, IMAGE_PATH)

In [None]:
BATCH_SIZE = 256
SPLIT_VALIDATION = 0.05
SPLIT_TEST = 0.05

### Entrée du réseau

In [None]:
IMAGE_SHAPE = (120,160, 3)

### Résumé

In [None]:
def build_log_tag(*args, **kwargs):
    """
    Generate a string as a tag to parse the logs more easily
    
    If you call `build_log_tag("arg1", "arg2", key1="value1", key2="value2")`
    This function generate this string in return :
    [arg1][arg2][key1="value1"][key2="value2"]
    """
    generated_string = ""
    for v in args:
        generated_string += "[" + str(v) + "]"
    
    for k,v in kwargs.items():
        generated_string += "[" + str(k) + "=" + "\"" + str(v) + "\"]"
    
    return generated_string

In [None]:
print(build_log_tag(TIME=TIME))
print(build_log_tag(STORAGE_ROOT_DIR=STORAGE_ROOT_DIR))
print(build_log_tag(MODEL_NAME=MODEL_NAME))
print(build_log_tag(SAVE_PATH=SAVE_PATH))
print(build_log_tag(CHECKPOINT_PATH=CHECKPOINT_PATH))
print(build_log_tag(CHECKPOINT_FILEPATH=CHECKPOINT_FILEPATH))
print(build_log_tag(ROOT_TENSORLOG_PATH=ROOT_TENSORLOG_PATH))
print(build_log_tag(DATASET_NAME=DATASET_NAME))
print(build_log_tag(DATASET_FILE_PATH=DATASET_FILE_PATH))
print(build_log_tag(IMAGE_PATH=IMAGE_PATH))
print(build_log_tag(DATASET_LABEL_FILENAME=DATASET_LABEL_FILENAME))
print(build_log_tag(DATASET_LABEL_PATH=DATASET_LABEL_PATH))
print(build_log_tag(DATASET_IMAGE_PATH=DATASET_IMAGE_PATH))
print(build_log_tag(BATCH_SIZE=BATCH_SIZE))
print(build_log_tag(SPLIT_VALIDATION=SPLIT_VALIDATION))
print(build_log_tag(SPLIT_TEST=SPLIT_TEST))
print(build_log_tag(IMAGE_SHAPE=IMAGE_SHAPE))

## Extraire la dataset

On convertit chaque ligne du fichier *.eslr envoyées par le serveur en :
- une image qui sera stockée dans le dossier `<DATASET_NAME>/<DATASET_IMAGE_PATH>`
- une ligne dans le csv label.csv avec toutes les infos (reliées à aux images par leur `path`)

In [None]:
class ESLRExtractor:
  def __init__(self, eslr_path):
    self.eslr_path = eslr_path
    if not os.path.exists(self.eslr_path):
      raise Exception("ESLR File not found !")
  
  def extract(self, label_path, images_path, image_ext = ".jpeg"):
    if os.path.exists(images_path):
        print("[INFO] .eslr is already extracted !")
        return
    # Créer le dossier qui contiendra toutes les images extraites du .eslr s'il n'existe pas
    os.makedirs(images_path, exist_ok=True)

    # Ouvrir le fichier label.csv
    label_file = open(label_path, "w")

    # Pour définir les en-têtes du fichier label, il faut lire au moins la première ligne
    # du fichier *.eslr
    label_head_is_defined = False

    # Lire le fichier eslr
    with open(self.eslr_path, "r") as dataset_file:
      for i, line in enumerate(tqdm(dataset_file)):
        data_line = json.loads(line)
        if (data_line["msg_type"] == "telemetry"):
          # Si le header n'a pas encore initialisé
          if not label_head_is_defined:
            label_head_list = list(data_line.keys())
            label_head_list.remove("msg_type")
            label_head_list.remove("image")
            label_head_list = ['path'] + label_head_list
            label_head_str = ",".join(label_head_list)
            # Écrire le header dans le CSV
            label_file.write(label_head_str + "\n")
            label_head_is_defined = True
          # Définir le path de l'image à enregistrer
          image_focused_path = os.path.join(images_path, str(i) + image_ext)
          data_line['path'] = image_focused_path
          # Lire, décoder et enregistrer l'image
          Image.open(BytesIO(base64.b64decode(data_line["image"]))).save(image_focused_path)
          # Ajouter toutes les données de la ligne lue dans un le CSV
          # Mettre 0 comme valeur par défaut si la valeur n'est pas trouvée dans data_line
          data_list_to_write = [str(data_line.get(k, 0)) for k in label_head_list]
          label_file.write(",".join(data_list_to_write) + "\n")
    label_file.close()
  
  @staticmethod
  def read_csv(images_path):
    return pd.read_csv(images_path)

In [None]:
eslr_extractor = ESLRExtractor(DATASET_FILE_PATH)
eslr_extractor.extract(label_path = DATASET_LABEL_PATH, images_path = DATASET_IMAGE_PATH)
raw_data = eslr_extractor.read_csv(DATASET_LABEL_PATH)

In [None]:
raw_data

In [None]:
raw_data.hist(figsize=(20,20))

## Préparer la dataset

### Split en 3 jeux : Train, Test et Validation

In [None]:
train_and_test_set, validation_set = train_test_split(raw_data,
                                             test_size = SPLIT_VALIDATION,
                                             shuffle = True)
train_set, test_set = train_test_split(train_and_test_set,
                                             test_size = SPLIT_TEST,
                                             shuffle = True)

NBR_ROW_TRAIN_SET = train_set.shape[0]
NBR_ROW_TEST_SET = test_set.shape[0]
NBR_ROW_VALIDATION_SET = validation_set.shape[0]
print(train_set)
print(test_set)
print(validation_set)

### Traitements avec TensorData

#### Donkey Car Data Augmentator

In [None]:
class DonkeyCarDataAugmentator:
  @staticmethod
  def normalize(img):
    return (img / 127.5) - 1.0
  
  @staticmethod
  def unnormalize(img):
    return (img + 1.0) * 127.5

  @staticmethod
  def clip_image(img):
    return tf.clip_by_value(img, clip_value_min=0, clip_value_max=255)

  @staticmethod
  def noiser(img, mean, stddev):
    transformed_img = DonkeyCarDataAugmentator.normalize(img)
    noise_img = tf.random.normal(shape=tf.shape(img), mean=mean, stddev=stddev)
    transformed_img = tf.add(transformed_img, noise_img)
    transformed_img = DonkeyCarDataAugmentator.unnormalize(transformed_img)
    transformed_img = DonkeyCarDataAugmentator.clip_image(transformed_img)
    return transformed_img

  @staticmethod
  def transform(img, angle, ratio_augmentation = 0.75, ratio_flip_left_right = 0.5, max_brightness = 50,
                lower_contrast = 0.75, upper_contrast = 1.5, lower_saturation = 0.0, 
                upper_saturation = 2, mean_noise = 0.0, max_noise = 0.3):
    
    random_do_augmentation = tf.random.uniform(shape=[], minval = 0., maxval = 1., dtype=tf.float32)
    if random_do_augmentation <= ratio_augmentation:
      transformed_img = tf.image.random_brightness(img, max_delta = max_brightness)
      transformed_img = DonkeyCarDataAugmentator.clip_image(transformed_img)

      transformed_img = tf.image.random_contrast(transformed_img, lower = lower_contrast, upper = upper_contrast)
      transformed_img = DonkeyCarDataAugmentator.clip_image(transformed_img)
      
      transformed_img = tf.image.random_saturation(transformed_img, lower = lower_saturation, upper = upper_saturation)
      transformed_img = DonkeyCarDataAugmentator.clip_image(transformed_img)

      random_noise_gain = tf.random.uniform(shape=[], minval = 0.0, maxval = max_noise, dtype=tf.float32)
      transformed_img = DonkeyCarDataAugmentator.noiser(transformed_img, mean_noise, random_noise_gain)
    else:
      transformed_img = img
    
    random_do_flip = tf.random.uniform(shape=[], minval = 0., maxval = 1., dtype=tf.float32)
    if random_do_flip <= ratio_flip_left_right:
      transformed_img = tf.image.flip_left_right(transformed_img)
      angle *= -1
    
    return transformed_img, angle

#### Donkey Car Tensor Builder

In [None]:
class DonkeyCarTensorBuilder:
  def __init__(self, input_label = {'input':['path']}, output_label = {'angle':['user_angle']}, num_parallel_calls = 3, image_shape = (120, 160, 3)):
    self.input_label = input_label
    self.output_label = output_label
    
    self.num_parallel_calls = num_parallel_calls
    self.image_shape = image_shape
  
  def dataset_to_tensor(self, dataset):
    """
    {"input" : dataset['path'], "speed_accel_gyro" : dataset[['speed', 'accel_x', 'accel_y', 'accel_z', 'gyro_x', 'gyro_y', 'gyro_z']]}, {"angle" : dataset['angle']}
    :return: Tensor
    """
    input_dict = {}
    output_dict = {}

    # Inputs
    for k, l in self.input_label.items():
      if len(l) != 0:
        if len(l) == 1:
          input_dict[k] = dataset[l[0]]
        else:
          input_dict[k] = dataset[l]

    # Outputs
    for k, l in self.output_label.items():
      if len(l) != 0:
        if len(l) == 1:
          output_dict[k] = dataset[l[0]]
        else:
          output_dict[k] = dataset[l]
    return tf.data.Dataset.from_tensor_slices((input_dict, output_dict))
  
  def load_image(self, dataset_tensor):
    def load_image_map_func(inputs, outputs):
      loaded_inputs = dict(inputs)

      img = tf.io.read_file(inputs['input'])
      img = tf.cast(tf.image.decode_jpeg(img, channels=3), dtype=tf.float32)
      img = tf.reshape(img, self.image_shape)
      loaded_inputs['input'] = img

      return loaded_inputs, outputs
    return dataset_tensor.map(load_image_map_func, num_parallel_calls = self.num_parallel_calls)
  
  def make_augmentation(self, dataset_tensor, ratio_augmentation = 0.5, ratio_flip_left_right = 0.5, max_brightness = 50,
                  lower_contrast = 0.75, upper_contrast = 1.5, lower_saturation = 0.0, 
                  upper_saturation = 2, mean_noise = 0.0, max_noise = 0.3):
    def augmentation_map_func(inputs, outputs):
      transformed_inputs = dict(inputs)
      transformed_outputs = dict(outputs)
      img = inputs['input']
      angle = outputs['angle']

      transformed_img, transformed_angle = DonkeyCarDataAugmentator.transform(img, 
                                                                  angle = angle, 
                                                                  ratio_augmentation = ratio_augmentation, 
                                                                  ratio_flip_left_right = ratio_flip_left_right, 
                                                                  max_brightness = max_brightness,
                                                                  lower_contrast = lower_contrast, 
                                                                  upper_contrast = upper_contrast, 
                                                                  lower_saturation = lower_saturation, 
                                                                  upper_saturation = upper_saturation, 
                                                                  mean_noise = mean_noise, 
                                                                  max_noise = max_noise)

      transformed_img = tf.reshape(transformed_img, self.image_shape)
      transformed_inputs['input'] = transformed_img
      transformed_outputs['angle'] = transformed_angle
      return transformed_inputs, transformed_outputs
    return dataset_tensor.map(augmentation_map_func, num_parallel_calls = self.num_parallel_calls)
  
  def normalize_dataset(self, dataset_tensor):
    def normalize_map_func(inputs, outputs):
      transformed_inputs = dict(inputs)
      transformed_img = inputs['input']
      transformed_img = DonkeyCarDataAugmentator.normalize(transformed_img)
      transformed_img = tf.reshape(transformed_img, self.image_shape)
      transformed_inputs['input'] = transformed_img
      return transformed_inputs, outputs
    return dataset_tensor.map(normalize_map_func, num_parallel_calls = self.num_parallel_calls)


#### Préparer la dataset
Shape des IO, Convertir en Tensor, Preprocess, Augmentation possiblement

In [None]:
### <<< CONFIG >>> ###
input_label = {'input':['path'], 'speed_accel_gyro':['speed', 'accel_x', 'accel_y', 'accel_z', 'gyro_x', 'gyro_y', 'gyro_z']}
output_label = {'angle':['user_angle']}

tensor_builder = DonkeyCarTensorBuilder(input_label = input_label,
                                        output_label = output_label,
                                        num_parallel_calls = 3,
                                        image_shape = IMAGE_SHAPE)

# Transforme panda DataFrame en Tensor
train_tensor = tensor_builder.dataset_to_tensor(train_set)
test_tensor = tensor_builder.dataset_to_tensor(test_set)
validation_tensor = tensor_builder.dataset_to_tensor(validation_set)

# Charger les images, i.e lire les path et stocker les images à la place
train_tensor = tensor_builder.load_image(train_tensor)
test_tensor = tensor_builder.load_image(test_tensor)
validation_tensor = tensor_builder.load_image(validation_tensor)

# Mélanger, répéter, faire des batch et activer le pré-traitement
# On répète train_tensor pour faire de l'augmentation
train_tensor = train_tensor.shuffle(NBR_ROW_TRAIN_SET)#.repeat(2) ### <<< CONFIG >>> (repeat) ###
test_tensor = test_tensor.shuffle(NBR_ROW_TEST_SET)
validation_tensor = validation_tensor.shuffle(NBR_ROW_VALIDATION_SET)

### <<< CONFIG >>> (comment or not) ###
# Augmentation
"""
train_tensor = tensor_builder.make_augmentation(train_tensor, 
                                                ratio_augmentation = 0.5, 
                                                ratio_flip_left_right = 0.5, 
                                                max_brightness = 50,
                                                lower_contrast = 0.75, 
                                                upper_contrast = 1.5, 
                                                lower_saturation = 0.0, 
                                                upper_saturation = 2, 
                                                mean_noise = 0.0, 
                                                max_noise = 0.3)
"""
# Normaliser les images
train_tensor = tensor_builder.normalize_dataset(train_tensor).batch(BATCH_SIZE).prefetch(2)
test_tensor = tensor_builder.normalize_dataset(test_tensor).batch(BATCH_SIZE).prefetch(2)
validation_tensor = tensor_builder.normalize_dataset(validation_tensor).batch(BATCH_SIZE).prefetch(2)