# Notebook for the training of the models

In [None]:
import tensorflow as tf

from tensorflow.keras import datasets, layers, models
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd 
import skimage.measure
from datetime import datetime

import random
import copy
import math
import cv2
import os

# Variables definition

In [None]:
div = 64
step = int(32/4)

map_list = ['stanford_gates1', 'stanford_bookstore0', 'stanford_bookstore4', 'stanford_coupa0', 'stanford_coupa1', 
            'stanford_coupa2', 'stanford_coupa3', 'stanford_gates0', #'stanford_gates1', 
            'stanford_gates2', 'stanford_gates3', 'stanford_hyang2', 'stanford_hyang3', 
            'stanford_hyang4',  'stanford_hyang10', 'stanford_little3',  'stanford_nexus0',  
            'stanford_nexus1',  'stanford_deathCircle0']

# sem_dict = ['cash', 'entrance', 'light', 'sit', 'stairs', 'trash', 'tree','restricted','grass','intersection','shadow']
sem_dict = ['bicycle_road', 'building', 'entrance', 'grass', 'obstacle', 'parking', 'pedestrian_road', 'tree', 'vehicle_road', 'sitting_area', 'stairs', 'intersection_zone', 'shaded_area']
chans = len(sem_dict)

lut_in = [0, 20, 50, 100, 150, 255]
lut_out = [0, 100, 180, 220, 240, 255]
lut_8u = np.interp(np.arange(0, 256), lut_in, lut_out).astype(np.uint8)

spec = 'stan'
train_data_dir = 'training_data/64crop_size/13labels/1red/'

now = datetime.now() # current date and time
date = now.strftime("%d")+now.strftime("%b").lower()
print(date)

# filename = 'IRI_models/'+date+'_'+str(step)+'px_steps_'+spec+'_paths'
# filename1 = 'IRI_models/'+date+'_'+str(step)+'px_steps_'+spec+'_vels'
# filename2 = 'IRI_models/'+date+'_'+str(step)+'px_steps_'+spec+'_stops'
filename = 'IRI_models/final7/13labels'

# Loading data and training model

In [None]:
# if True:
#     index = 5
#     map = map_list[index]

for index, map in enumerate(map_list[1:]):
    index += 1
    print(f'Processing map {map} ({index+1}/{len(map_list)})')
    train_list = map_list.copy()
    test_list = [train_list.pop(index)]
    random.shuffle(train_list)
    val_list = [train_list.pop(index%len(train_list)), 
                train_list.pop(index%len(train_list)), 
                train_list.pop(index%len(train_list))]

    # train_list = [train_list.pop()]
    # val_list = [val_list.pop()]

    print(f'train list: {train_list}')
    print(f'test list: {test_list}')
    print(f'val list: {val_list}')

    train_x = np.empty((0, div, div, chans))
    train_y = np.empty((0, div, div))
    for data_dir in train_list:
        dir = train_data_dir+data_dir
        assert os.path.exists(dir), f'data_dir {dir} does not exist'
        train_x_aux = np.loadtxt(dir+'/train_X.csv')
        sizes = train_x_aux[0:4].astype(int)
        train_x_aux = np.delete(train_x_aux, [0,1,2,3])
        train_x_aux = np.reshape(train_x_aux,sizes)
        # train_x_aux = train_x_aux[:, :, :, :-2]

        train_y_aux = np.loadtxt(dir+'/train_Y.csv')
        sizes = train_y_aux[0:3].astype(int)
        train_y_aux = np.delete(train_y_aux, [0,1,2])
        train_y_aux = np.reshape(train_y_aux,sizes)
        train_y_aux = train_y_aux/np.max(train_y_aux)

        train_x = np.append(train_x, train_x_aux, axis=0)
        train_y = np.append(train_y, train_y_aux, axis=0)

    val_x = np.empty((0, div, div, chans))
    val_y = np.empty((0, div, div))
    for data_dir in val_list:
        dir = train_data_dir+data_dir
        assert os.path.exists(dir), f'data_dir {dir} does not exist'
        val_x_aux = np.loadtxt(dir+'/train_X.csv')
        sizes = val_x_aux[0:4].astype(int)
        val_x_aux = np.delete(val_x_aux, [0,1,2,3])
        val_x_aux = np.reshape(val_x_aux,sizes)
        # val_x_aux = val_x_aux[:, :, :, :-2]

        val_y_aux = np.loadtxt(dir+'/train_Y.csv')
        sizes = val_y_aux[0:3].astype(int)
        val_y_aux = np.delete(val_y_aux, [0,1,2])
        val_y_aux = np.reshape(val_y_aux,sizes)
        val_y_aux = val_y_aux/np.max(val_y_aux)

        val_x = np.append(val_x, val_x_aux, axis=0)
        val_y = np.append(val_y, val_y_aux, axis=0)

    # Create a MirroredStrategy.
    strategy = tf.distribute.MirroredStrategy()
    print('Number of devices: {}'.format(strategy.num_replicas_in_sync))

    # Open a strategy scope.
    with strategy.scope():

        
        # CNN-11  |  CNN-21  |  CNN-31
        fil_array = [4,8,8]                         #Num filters first conv: 4, 8 or 8
        lay_array = [1,3,5]                         #Layers per dense block: 1, 3 or 5
        learn_array = [8.71e-5,3.72e-4,1.51e-4]     #Learning rates
        decaylearn_array = [0.9984,0.9984,0.9985]   #Learning rate decays
        wdecay_array = [1.11e-6,5.53e-7,4.58e-5]    #Weight decays
        dropout_array = [0.307,0.120,0.349]         #Dropout probability

        arc = 2                                     #[0,1,2]

        filters = fil_array[arc]
        layers_in_dense = lay_array[arc]
        growth_rate = 12

        tf.random.set_seed(0)

        def dense_factor(inputs):
            h_1 = layers.BatchNormalization(axis=3)(inputs)
            output = layers.Conv2D(growth_rate, (3,3), strides=1, padding='same', activation='relu')(h_1)
            
            output = tf.transpose(output, perm=[0, 2, 3, 1])
            output = layers.Dropout(dropout_array[arc])(output)
            output = tf.transpose(output, perm=[0, 3, 1, 2])
            
            return output

        def dense_block(inputs, upsampling):
            output = []
            concatenated_inputs = inputs
            for i in range(layers_in_dense):
                x = dense_factor(concatenated_inputs)
                concatenated_inputs = layers.concatenate([concatenated_inputs, x], axis=3)
                if i == 0:
                    output = x
                else:
                    output = layers.concatenate([output, x], axis=3)

            if not upsampling:
                output = concatenated_inputs

            return output

        # norm_layer = layers.Normalization(axis=None)
        # norm_layer.adapt(train_x)


        input_img = layers.Input(shape=(div, div, chans))

        # augment_input = norm_layer(input_img)
        # augment_input = layers.RandomRotation(factor=0.5)(augment_input)
        # augment_input = layers.RandomFlip(mode='horizontal_and_vertical')(augment_input)
        # augment_input = layers.RandomZoom(height_factor=0.2, width_factor=0.2)(augment_input)

        lays = layers.Conv2D(filters, kernel_size=(1, 1), strides=1, padding='same', activation='relu')(input_img)
        layerX = dense_block(lays,False)
        filters += growth_rate
        lays = layers.Conv2D(filters, kernel_size=(1, 1), strides=1, padding='same', activation='relu')(layerX)
        lays = layers.MaxPooling2D((2, 2), strides=2)(lays)
        layerY = dense_block(lays,False)
        filters += growth_rate
        lays = layers.Conv2D(filters, kernel_size=(1, 1), strides=1, padding='same', activation='relu')(layerY)
        lays = layers.MaxPooling2D((2, 2), strides=2)(lays)

        lays = dense_block(lays,True)

        filters = growth_rate * layers_in_dense
        lays = layers.Conv2DTranspose(filters, kernel_size=(3, 3), strides=2, padding='same')(lays)
        lays = layers.concatenate([layerY, lays], axis=3)
        lays = dense_block(lays,True)
        lays = layers.Conv2DTranspose(filters, kernel_size=(3, 3), strides=2, padding='same')(lays)
        lays = layers.concatenate([layerX, lays], axis=3)
        lays = dense_block(lays,False)
        lays = layers.Conv2D(2, kernel_size=(1, 1), strides=1, padding='same', activation='relu')(lays)
        lays = layers.Dense(units=2, activation='softmax')(lays)

        model = models.Model(input_img, lays)

        # model.summary()

        def get_lr_metric(optimizer):
            def lr(y_true, y_pred):
                return optimizer._decayed_lr(tf.float32) # I use ._decayed_lr method instead of .lr
            return lr

        lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
                            initial_learning_rate=learn_array[arc],
                            decay_steps=10,
                            decay_rate=decaylearn_array[arc])

        opt = tf.keras.optimizers.Adam(learning_rate=lr_schedule,weight_decay=wdecay_array[arc])
        lr_metric = get_lr_metric(opt)

        model.compile(
            optimizer=opt,
            # Loss function to minimize
            loss='binary_crossentropy',
            # List of metrics to monitor
            metrics=['mean_squared_error'],
        ) 

    my_callbacks = [
        tf.keras.callbacks.EarlyStopping(patience=15),
        # tf.keras.callbacks.ModelCheckpoint(filepath='IRI_models/model.{epoch:02d}-{val_loss:.2f}.h5'),
        tf.keras.callbacks.TensorBoard(log_dir='./logs'),
    ]

    history = model.fit(
        x=train_x,
        y=np.stack((train_y,1-train_y),axis=3),
        batch_size=300,
        epochs=100, #100
        validation_data=(val_x, np.stack((val_y,1-val_y),axis=3)),
        callbacks=my_callbacks,
    )

    # if not os.path.exists(filename):
    model.save(filename+'_'+test_list[0])

In [None]:
# aux_x = validation_x
# aux_y = validation_y

# output = model.predict(aux_x,verbose=0)[:,:,:,0]

# n = num_validation  # How many images we will display
# plt.figure(figsize=(25, 5))
# for i in range(n):
#     # Display original
#     ax = plt.subplot(2, n, i + 1)
#     plt.imshow(np.multiply(np.stack((aux_x[i,:,:,0],aux_x[i,:,:,0],aux_x[i,:,:,0]),axis=2), np.stack((np.full(aux_y[i].shape,1),1-aux_y[i],1-aux_y[i]),axis=2)))
#     ax.get_xaxis().set_visible(False)
#     ax.get_yaxis().set_visible(False)

#     # Display reconstruction
#     ax = plt.subplot(2, n, i + 1 + n)
#     plt.imshow(np.multiply(np.stack((aux_x[i,:,:,0],aux_x[i,:,:,0],aux_x[i,:,:,0]),axis=2), np.stack((np.full(output[i].shape,1),1-output[i],1-output[i]),axis=2)))
#     ax.get_xaxis().set_visible(False)
#     ax.get_yaxis().set_visible(False)
# # Display original
# plt.show()

In [None]:
# # test_loss, test_acc = model.evaluate(aux_x,  np.stack((val_y,1-val_y),axis=3), verbose=2)

# plt.plot(history.history['mean_squared_error'], label='mean_squared_error')
# plt.plot(history.history['val_mean_squared_error'], label = 'val_mean_squared_error')
# plt.xlabel('Epoch')
# plt.ylabel('MSE')
# plt.legend(loc='upper right')

# plt.show()

# plt.plot(history.history['loss'], label = 'loss')
# plt.plot(history.history['val_loss'], label = 'val_loss')
# plt.xlabel('Epoch')
# plt.ylabel('Loss')
# plt.legend(loc='upper right')

# plt.show()

# NOTES

- [x] Add benches, tables & chairs
- [x] Add also velocity information (two different maps for velocities and regions of stop)
- [x]    80% of time people pass stair and not sit
- [x]    white noise velocity 
- [ ] Add heading of motion (directional velocity)
- [ ] MAYBE Time of the day

- [ ] mobility: main cues they are looking for (narrow places, which other criterias?) "Criterium" what do we need to look for
(from computer vision)

- [ ] collect data with following robot/static robot

- Add the velocity and stop information is enough novelty?
- Could the same network handle everything (all outputs: occupancy, velocity, stops)? First try seems not.. (only with occupancy and velocity.. but it could be my fault! binary crossentropy!!!)
- Could same network handle both people, cars and bicycles?
- Paper "Learning Occupancy Priors of Human Motion From Semantic Maps of Urban Enviroments" uses KL-divergence to compare to baselines.
    They say that "only a few methods explicitly highlight the performance in new environments outside the training scenario"
    Their future work section: "Furthermore, we plan to validate semapp with on-the-fly semantics estimation and extend it to first-person view for application in automated driving to infer potential pedestrians’ entrance points to the road."


- [x] Add barriers: completely close passage or partially
- [x] validate w/ simulation and exp (stanford dataset)
- [ ] check reference of new paper

- Mobility environment cues:
- what is important in mobility: velocity, narrow spaces, individual or group
- STOP is important