In [None]:
import logging
from math import floor

from tensorflow.python.keras.layers import Input
from tensorflow.python.keras.models import Model, load_model
from tensorflow.python.keras.layers import Convolution2D
from tensorflow.python.keras.layers import Dropout, Flatten, Dense
from tensorflow.python.keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard
from keras_preprocessing.image import ImageDataGenerator

import matplotlib.pyplot as plt
import datetime

try:
    from pathlib import Path
except ImportError:
    from pathlib2 import Path

import pandas as pd
import os
import numpy as np

logger = logging.getLogger(__name__)

%matplotlib qt

In [None]:

IMAGE_INPUT_WIDTH=160
IMAGE_INPUT_HEIGHT=120
# This discards number of pixels from the top after scaling
TOP_MARGIN_IN_PIXELS=8


col_img = 'image'
col_steering = 'msg.steering'
col_throttle = 'msg.throttle'
col_image_ts = 'image_timestamp'
col_steernig_ts = 'steering_timestamp'
col_steering_scaled = 'steering_scaled'
col_throttle_scaled = 'throttle_scaled'
col_validation = 'validation'

base_dir = Path("./data/")
def read_csv(path):
    df = pd.read_csv(path)
    for col in [col_img, col_steering, col_throttle, col_image_ts, col_steernig_ts]:
        assert col in df.columns, f'missing {col} in {path}'
    return df

# Read files and concatenate them

In [None]:
def load_and_merge_csvs(paths_csv):
    print('Loading {} csvs:\n{}'.format(len(paths_csv), '\n'.join(str(p) for p in paths_csv)))
    csvs = [read_csv(str(path)) for path in paths_csv]
    df = pd.concat(csvs, axis=0, ignore_index=True)
    print(f'loaded {df.shape}')
    return df

db_names =  [
        "sergem_robocar_20191009_v2",
        "sergem_robocar_20191106",
        "sergem_robocar_20191126_entrance",
        "sergem_robocar_20191127",
        "sergem_robocar_20191202",
    ]
dfs = {
    db_name: load_and_merge_csvs(sorted(base_dir.glob(f"{db_name}/*.csv")))
    for db_name in db_names
}
# df1 = load_and_merge_csvs(sorted(base_dir.glob("sergem_robocar_20191003_v2/*.csv")))
# df2 = load_and_merge_csvs(sorted(base_dir.glob("*201911*/*.csv")))

In [None]:
dfs[db_names[0]].head()

In [None]:
dfs[db_names[2]].head()

# defining scaling
radio pwm has a certain range and the data is recorded as is. Now it's time to preprocess it to make it generic

In [None]:
import plotly.graph_objects as go
import plotly.io as pio
pio.renderers.default = "browser"



def plot_ranges(column, val_min=None, val_zero=None, val_max=None, update_layout_kwargs={}):
    graphs =[
        go.Scatter(x=column.index, y=column, mode='lines'),
    ]
    if val_min is not None:
        graphs.append(go.Scatter(x=column.index, y=val_min, mode='lines', name='min'))
    if val_zero is not None:
        graphs.append(go.Scatter(x=column.index, y=val_zero, mode='lines', name='zero',
                  line=dict(width=4,dash='dash')),
        )
    if val_max is not None:
        graphs.append(go.Scatter(x=column.index, y=val_max, mode='lines', name='max'),)
    fig = go.Figure()
    fig.add_traces(graphs)
    fig.update_layout(**update_layout_kwargs)
    fig.show()

In [None]:
def zeros_series_like(series):
    return series * 0

In [None]:
# where we dont drive
def find_zeros(
    df,
    approx_zero_throttle = 1500,
    approx_zero_steering = 1400,
    th_zero_throttle = 150,
    th_zero_steering = 80,
    show=False,
):

    df = df.copy()

    is_idle = ( 
        (df[col_steering].rolling(30, center=True, min_periods=1).std() < 5)
        & (df[col_throttle].rolling(30, center=True, min_periods=1).std() < 5)
        & ((df[col_steering].rolling(5, center=True, min_periods=1).mean() - approx_zero_steering).abs() < th_zero_steering)
        & ((df[col_throttle].rolling(5, center=True, min_periods=1).mean() - approx_zero_throttle).abs() < th_zero_throttle)
    )
    df['idle'] = is_idle
    df['steering_zero'] = df[col_steering].where(is_idle > 0).interpolate(method='nearest', axis=0).ffill().bfill()
    df['throttle_zero'] = df[col_throttle].where(is_idle > 0).interpolate(method='nearest', axis=0).ffill().bfill()
    if show:
        plot_ranges(
            df[col_steering],
            None,
            df['steering_zero'],
        )
    
        plot_ranges(
            df[col_throttle],
            None,
            df['throttle_zero'],
        )
    return df


In [None]:
df = dfs['sergem_robocar_20191127'].copy()
df = dfs['sergem_robocar_20191009_v2'].copy()
df = dfs['sergem_robocar_20191106'].copy()
df = dfs['sergem_robocar_20191126_entrance'].copy()
df = dfs['sergem_robocar_20191202'].copy()

find_zeros(df, show=True)


In [None]:
for name in db_names:
    dfs[name] = find_zeros(dfs[name], show=False)


In [None]:
def min_max_throttle(
    df,
    name='',
    show=False
):
    df = df.copy()
    throttle_max = (
        df[col_throttle]
        .where(df['idle'] == 0)
        .where(df[col_throttle] > df['throttle_zero'] + 200)
        .rolling(100, center=True, min_periods=100).quantile(0.80)
        .interpolate(method='nearest', axis=0).ffill().bfill()
        .fillna(df['throttle_zero'] + 500)
    )
    throttle_min = (
        df[col_throttle]
        .where(df['idle'] == 0)
        .where(df[col_throttle] < df['throttle_zero'] - 50)
        .rolling(500, center=True, min_periods=10).min()
    )

    throttle_min = throttle_min.where(throttle_min < df['throttle_zero'] - 200, df['throttle_zero'] - 500)
    df['throttle_min'] = throttle_min
    df['throttle_max'] = throttle_max
    if show:
        plot_ranges(
                    df[col_throttle], 
                    df['throttle_min'],
                    df['throttle_zero'],
                    df['throttle_max'],
                    update_layout_kwargs = {'title': 'throttle' + name}
                )
    return df

In [None]:
# df = dfs['sergem_robocar_20191127'].copy()
# df = dfs['sergem_robocar_20191009_v2'].copy()
# df = dfs['sergem_robocar_20191106'].copy()
# # df = dfs['sergem_robocar_20191126_entrance'].copy()

# _ = min_max_throttle(
#     df, 
#     show=True)

In [None]:
def min_max_steering(
    df,
    name='',
    show=False
):
    df = df.copy()
    df['steering_min'] = (
        df[col_steering]
        .rolling(1000, center=True, min_periods=100).quantile(0.001)
        .where(df['idle'] == 0)
        .interpolate(method='nearest', axis=0).ffill().bfill()
        .fillna(df['steering_zero'] - 250)
    )
    df['steering_max'] = (
        df[col_steering]
        
        .rolling(1000, center=True, min_periods=100).quantile(0.999)
        .where(df['idle'] == 0)
        .interpolate(method='nearest', axis=0).ffill().bfill()
        .fillna(df['steering_zero'] + 250)
    )

    if show:
        plot_ranges(
                    df[col_steering], 
                    df['steering_min'],
                    df['steering_zero'],
                    df['steering_max'],
                    update_layout_kwargs = {'title': 'steering ' + name}
                )
    return df

In [None]:
# df = dfs['sergem_robocar_20191127'].copy()
# df = dfs['sergem_robocar_20191009_v2'].copy()
# df = dfs['sergem_robocar_20191106'].copy()
# df = dfs['sergem_robocar_20191126_entrance'].copy()

# _ = min_max_steering(
#     df, 
#     show=True)

In [None]:
for name in db_names:
    dfs[name] = min_max_steering(min_max_throttle(dfs[name]))
    

In [None]:
def scale_minus1_plus1(x, xmin, xzero, xmax, do_clip):
    positive = x > xzero
    scale_positive = (x - xzero) / (xmax - xzero)
    scale_negative = (x - xzero) / (xzero - xmin)
    result = scale_positive.where(positive, scale_negative)
    if do_clip:
        result = result.clip(-1.,1.)
    
    return result

def scale_minus1_plus1_df(df):
    df = df.copy()
    df[col_steering_scaled] = scale_minus1_plus1(
        x = df[col_steering],
        xzero = df['steering_zero'],
        xmin = df['steering_min'],
        xmax = df['steering_max'],
        do_clip=True
    )
    df[col_throttle_scaled] = scale_minus1_plus1(
        x = df[col_throttle],
        xzero = df['throttle_zero'],
        xmin = df['throttle_min'],
        xmax = df['throttle_max'],
        do_clip=True
    )
    return df

In [None]:
# df = dfs['sergem_robocar_20191127'].copy()
# # df = dfs['sergem_robocar_20191009_v2'].copy()
# # df = dfs['sergem_robocar_20191106'].copy()
# # df = dfs['sergem_robocar_20191126_entrance'].copy()


# plot_ranges(
#     scale_minus1_plus1(
#         x = df[col_steering],
#         xzero = df['steering_zero'],
#         xmin = df['steering_min'],
#         xmax = df['steering_max'],
#         do_clip=True
#     )
# )

In [None]:
# plot_ranges(
#     scale_minus1_plus1(
#         x = df[col_throttle],
#         xzero = df['throttle_zero'],
#         xmin = df['throttle_min'],
#         xmax = df['throttle_max'],
#         do_clip=True
#     )
# )

In [None]:
for name in db_names:
    dfs[name] = scale_minus1_plus1_df(dfs[name])
    

In [None]:
for name in db_names:
    df = dfs[name]
    plot_ranges(df[col_steering], df['steering_min'], df['steering_zero'], df['steering_max'], 
                update_layout_kwargs = {'title': 'steering orig ' + name})
    plot_ranges(df[col_steering_scaled],
                update_layout_kwargs = {'title': 'steering scaled ' + name})
    plot_ranges(df[col_throttle], df['throttle_min'], df['throttle_zero'], df['throttle_max'], 
                update_layout_kwargs = {'title': 'throttle orig ' + name})
    plot_ranges(df[col_throttle_scaled],
                update_layout_kwargs = {'title': 'throttle scaled ' + name})

#### Discard if 

1) if there is a reverse in the future 3 sec

2) no throttle and no steering


In [None]:
def find_moments_before_reverse(
    df,
    col_target = 'reverse_in_the_future',
    threshold_ns = 3 * 1e9,
    th_reverse = -0.03,
):
    df = df.copy()
    df[col_target] = 0

    i_nearest_reverse = len(df) - 1
    ts_nearest_reverse = 1.0e30 # big value in the future
    res = []
    
    for i_cur in range(len(df) - 1, -1, -1):
        if df.iloc[i_cur][col_throttle_scaled] < th_reverse: 
            i_nearest_reverse = i_cur
            ts_nearest_reverse = df.iloc[i_nearest_reverse][col_image_ts]

        if ts_nearest_reverse - df.iloc[i_cur][col_image_ts] < threshold_ns:
            res.append(1)
        else:
            res.append(0)
            
    df[col_target] = res[::-1]
    return df[col_target]


In [None]:
def before_reversing_and_goodness(
    df,
    time_bofore_reverse_ns
):
    df = df.copy()
    df['reverse_in_the_future'] = find_moments_before_reverse(df, threshold_ns=time_bofore_reverse_ns)
    df['goodness'] = ((df['reverse_in_the_future'] == 0) & (df['idle'] == 0) & (df[col_throttle_scaled] > 0.3)) * 1
    return df



In [None]:
db_names

In [None]:
for name in [
    'sergem_robocar_20191009_v2',
    'sergem_robocar_20191106',
    'sergem_robocar_20191126_entrance',
]:
    dfs[name] = before_reversing_and_goodness(dfs[name], time_bofore_reverse_ns= 3*1e9)

In [None]:
for name in [
    'sergem_robocar_20191127',
    'sergem_robocar_20191202',
]:
    dfs[name] = before_reversing_and_goodness(dfs[name], time_bofore_reverse_ns= 1)
    

In [None]:
df = dfs[db_names[3]]
go.Figure(data=[
    go.Scatter(x=df.index, y=df[col_steering_scaled], mode='lines', name='steeing_scaled'),
    go.Scatter(x=df.index, y=df[col_throttle_scaled], mode='lines', name='throttle_scaled'),
    go.Scatter(x=df.index, y=df['reverse_in_the_future'], mode='lines', name='reverse_in_the_future',
              line=dict(width=1,dash='dash')),
    go.Scatter(x=df.index, y=df['idle'], mode='lines', name='idle',
              line=dict(width=1,dash='dash')),
    go.Scatter(x=df.index, y=df['goodness'], mode='lines', name='goodness',
              line=dict(width=2,dash='dash')),

]).show()
df.size

In [None]:
df[df['reverse_in_the_future']!=0]

## Saving filtered data

In [None]:
for db_name in db_names:
    df = dfs[db_name].copy()
    df[col_img] = df[col_img].map(lambda path: str(Path(db_name) / path))
    df = df[df['goodness'] != 0]
    print(df[df['goodness'] == 0].size)
    df.to_csv(base_dir / db_name / f"{db_name}.filteredcsv", index=False)

In [None]:
# df['image'].iloc[0]

# Loading filtered data 

In [None]:
filtered_csv_paths = sorted(base_dir.glob("**/*.filteredcsv"))
filtered_csv_paths

In [None]:
df_filtered = load_and_merge_csvs(filtered_csv_paths)

In [None]:
df_filtered.size, df_filtered.shape

In [None]:
df_filtered.head()

## Flip images

In [None]:
dirname_flip_lr = "_flip_lr" 
base_flip_path = base_dir / dirname_flip_lr
base_flip_path.mkdir(exist_ok=True, parents=True)

In [None]:
import PIL
from keras_preprocessing.image.utils import load_img
def create_flipped_image(path_src, path_dst, skip_if_exists=False, exceptions_ok=False, overwrite=False):
    if path_dst.exists():
        if skip_if_exists:
            return np.array([0, 1, 0])
        if not overwrite:
            raise FileExistsError(f'file exists: {path_dst}')
    try:
        path_dst.parent.mkdir(exist_ok=True, parents=True)
        img = load_img(path_src)
        img = img.transpose(PIL.Image.FLIP_LEFT_RIGHT)
        img.save(path_dst)
        
        return np.array([1, 0, 0])
    except Exception as e:
        if not exceptions_ok:
            raise
        print(f"Filed processing of image {path_src}. Exception: {e}")
        return np.array([0, 0, 1])
        

In [None]:
from tqdm import tqdm_notebook

In [None]:
stats = sum(
    (
        create_flipped_image(base_dir / img_path, base_flip_path / img_path, skip_if_exists=True) 
        for img_path in tqdm_notebook(df_filtered[col_img])
    ), 
    np.array([0, 0, 0])
)
print(f"all {np.sum(stats)}, processed {stats[0]}, exist {stats[1]}, faild {stats[2]}")
    

## split to training and validataion by indexes
It's just a proxy for time

In [None]:
is_valid = df_filtered.index % 1000 > 800
df_filtered[col_validation] = is_valid * 1


In [None]:
def plot_scaled(df):
    go.Figure(data=[
        go.Scatter(x=df.index, y=df[col_steering_scaled], mode='lines', name='steeing_scaled'),
        go.Scatter(x=df.index, y=df[col_throttle_scaled], mode='lines', name='throttle_scaled'),
        go.Scatter(x=df.index, y=df[col_validation], mode='lines', name='validation'),
    ]).show()
    
plot_scaled(df_filtered)

In [None]:
df_filtered_filp_lr = df_filtered.copy()
df_filtered_filp_lr[col_img] = dirname_flip_lr + '/' + df_filtered_filp_lr[col_img].str[:]
df_filtered_filp_lr
df_filtered_filp_lr[col_steering_scaled] = -df_filtered_filp_lr[col_steering_scaled]

In [None]:
df_filtered_normal_and_flip_lr = pd.concat([
    df_filtered, df_filtered_filp_lr
], ignore_index=True)
plot_scaled(df_filtered_normal_and_flip_lr)
df_filtered_normal_and_flip_lr

In [None]:
# shuffle to avoid issue: https://github.com/keras-team/keras-preprocessing/issues/205
df_train = df_filtered_normal_and_flip_lr[df_filtered_normal_and_flip_lr[col_validation] == 0].sample(frac=1) 

df_val = df_filtered_normal_and_flip_lr[df_filtered_normal_and_flip_lr[col_validation] == 1]
df_train.shape, df_val.shape, df_train.shape

In [None]:
df_train[df_train.index == 123]

In [None]:
df_val[df_val.index == 850]

# Visualization

In [None]:
def visualize(imgs, y=None, pred=None, ncols=4, subplots_kwargs=dict(figsize=(15,20), )):
    def format_(arr, idx):
        if arr is None:
            return "-"
        return ' '.join(['{:.3f}'.format(a.ravel()[idx]) for a in arr])
        
    _, axes = plt.subplots((len(imgs) + ncols - 1) // ncols, ncols, 
                           squeeze=False, sharex=True, sharey=True, 
                           gridspec_kw={'hspace': 0, 'wspace': 0},
                           **subplots_kwargs
                          )
    for i, ax in enumerate(axes.flat):
        ax.imshow(imgs[i])
        
        text = "y {} \npred {}".format(format_(y, i), format_(pred, i))
        ax.text(20, 20, text, color='cyan')

# Training

In [None]:
def create_generator(datagen, df):
    return datagen.flow_from_dataframe(
        dataframe=df, 
        directory=str(base_dir),
        x_col=col_img,
        y_col=[col_steering_scaled, col_throttle_scaled],
        class_mode="multi_output",
        target_size=(IMAGE_INPUT_HEIGHT, IMAGE_INPUT_WIDTH),
        batch_size=32,
        shuffle=False
    )

In [None]:
train_generator=create_generator(
    datagen=ImageDataGenerator(
        rescale=1./255,
        rotation_range=10,
        zoom_range=[0.8, 1.2],
        width_shift_range=0.1,
        height_shift_range=0.1,
        brightness_range=(0.2, 1.5),
        shear_range=5.,
        channel_shift_range=50,
        vertical_flip=False, # we don't need it
        horizontal_flip=False, # We do it manually because we have to flip steering
    ),
    df=df_train
)
dbg_imgs, dbg_y = train_generator.next()
visualize(dbg_imgs, dbg_y, subplots_kwargs={'figsize': (10, 10)})

In [None]:
val_generator=create_generator(
    datagen=ImageDataGenerator(
        rescale=1./255,
        vertical_flip=False, # we don't need it
        horizontal_flip=False, # We do it manually because we have to flip steering
    ),
    df=df_val
)

In [None]:
dbg_imgs, dbg_y = val_generator[50]
visualize(dbg_imgs, dbg_y, subplots_kwargs={'figsize': (10, 10)})

In [None]:
# np.set_printoptions(precision=3)

In [None]:
from keras_preprocessing.image.utils import load_img

In [None]:
dbg_y

In [None]:
def default_linear():
    img_in = Input(shape=(120, 160, 3), name='img_in')
    x = img_in

    # Convolution2D class name is an alias for Conv2D
    x = Convolution2D(filters=24, kernel_size=(5, 5), strides=(2, 2), activation='relu')(x)
    x = Convolution2D(filters=32, kernel_size=(5, 5), strides=(2, 2), activation='relu')(x)
    x = Convolution2D(filters=64, kernel_size=(5, 5), strides=(2, 2), activation='relu')(x)
    x = Convolution2D(filters=64, kernel_size=(3, 3), strides=(2, 2), activation='relu')(x)
    x = Convolution2D(filters=64, kernel_size=(3, 3), strides=(1, 1), activation='relu')(x)

    x = Flatten(name='flattened')(x)
    x = Dense(units=100, activation='relu')(x)
    x = Dropout(rate=.1)(x)
    x = Dense(units=50, activation='relu')(x)
    x = Dropout(rate=.1)(x)
    
    angle_out = Dense(units=1, activation='linear', name='angle_out')(x)

    # continous output of throttle
    throttle_out = Dense(units=1, activation='linear', name='throttle_out')(x)

    model = Model(inputs=[img_in], outputs=[
        angle_out, 
        throttle_out
    ])

    model.compile(optimizer='adam',
                  loss={
                      'angle_out': 'mean_squared_error',
                      'throttle_out': 'mean_squared_error'
                  },
                  loss_weights={
                      'angle_out': 0.97, 
                      'throttle_out': .03
                  }
                 )

    return model

In [None]:
model = default_linear()

In [None]:
use_early_stop = True
epochs = 12
saved_model_path = Path("./models/linear_20191204_aug2_fliplr_relu4dense_noold2.model")
verbose = True
min_delta=.0005
patience=5

In [None]:
len(train_generator), train_generator.n, train_generator.batch_size, 40*32

In [None]:
saved_model_path.parent.mkdir(exist_ok=True, parents=True)

In [None]:
model.summary()

In [None]:
save_best = ModelCheckpoint(str(saved_model_path),
                            monitor='val_loss',
                            verbose=verbose,
                            save_best_only=True,
                            mode='min')

# stop training if the validation error stops improving.
early_stop = EarlyStopping(monitor='val_loss',
                           min_delta=min_delta,
                           patience=patience,
                           verbose=verbose,
                           mode='auto')

log_dir = Path("logs") / f'{datetime.datetime.now().strftime("%Y%m%d-%H%M%S")}_{saved_model_path.stem}'
log_dir.mkdir(parents=True)

tb = TensorBoard(log_dir=str(log_dir))

callbacks_list = [save_best, tb]

if use_early_stop:
    callbacks_list.append(early_stop)

hist = model.fit_generator(
    train_generator,
    epochs=epochs,
    verbose=1,
    validation_data=val_generator,
    callbacks=callbacks_list)

In [None]:
vis_imgs, vis_y = val_generator.next()
vis_pred = model.predict(vis_imgs)
visualize(vis_imgs, vis_y, vis_pred)

In [None]:
# steering
plt.plot(vis_y[0], label="manual")
plt.plot(vis_pred[0], label="prediction")
plt.legend()

In [None]:
# Throttle
plt.plot(vis_y[1])
plt.plot(vis_pred[1])