In [None]:
import sys

for a in ['', '/Users/kristjan.roosild/opt/miniconda3/envs/donkey/lib/python37.zip',
    '/Users/kristjan.roosild/opt/miniconda3/envs/donkey/lib/python3.7',
    '/Users/kristjan.roosild/opt/miniconda3/envs/donkey/lib/python3.7/lib-dynload',
    '/Users/kristjan.roosild/.local/lib/python3.7/site-packages',
    '/Users/kristjan.roosild/opt/miniconda3/envs/donkey/lib/python3.7/site-packages',
    '/Users/kristjan.roosild/projects/donkeycar',
    '/Users/kristjan.roosild/opt/miniconda3/envs/donkey/lib/python3.7/site-packages/locket-0.2.1-py3.7.egg']:
    sys.path.append(a)

In [None]:
from donkeycar.parts.keras import KerasCategorical, KerasLinear, KerasLSTM, Keras3D_CNN, KerasLinearOnlySteering, KerasInterpreter

import donkeycar as dk

import math
import os
from typing import List, Dict, Union, Tuple

from donkeycar.config import Config
from donkeycar.parts.keras import KerasPilot
from donkeycar.pipeline.database import PilotDatabase
from donkeycar.pipeline.sequence import TubRecord, TubSequence, TfmIterator
from donkeycar.pipeline.types import TubDataset
from donkeycar.pipeline.augmentations import ImageAugmentation
from donkeycar.utils import normalize_image, train_test_split
import tensorflow as tf
import numpy as np


class BatchSequence(object):
    """
    The idea is to have a shallow sequence with types that can hydrate
    themselves to np.ndarray initially and later into the types required by
    tf.data (i.e. dictionaries or np.ndarrays).
    """
    def __init__(self,
                 model: KerasPilot,
                 config: Config,
                 records: List[TubRecord],
                 is_train: bool) -> None:
        self.model = model
        self.config = config
        self.sequence = TubSequence(records)
        self.batch_size = self.config.BATCH_SIZE
        self.is_train = is_train
        self.augmentation = ImageAugmentation(config, 'AUGMENTATIONS')
        self.transformation = ImageAugmentation(config, 'TRANSFORMATIONS')
        self.pipeline = self._create_pipeline()

    def __len__(self) -> int:
        return math.ceil(len(self.pipeline) / self.batch_size)

    def image_processor(self, img_arr):
        """ Transformes the images and augments if in training. Then
            normalizes it. """
        img_arr = self.transformation.run(img_arr)
        if self.is_train:
            img_arr = self.augmentation.run(img_arr)
        norm_img = normalize_image(img_arr)
        return norm_img

    def _create_pipeline(self) -> TfmIterator:
        """ This can be overridden if more complicated pipelines are
            required """
        # 1. Initialise TubRecord -> x, y transformations
        def get_x(record: TubRecord) -> Dict[str, Union[float, np.ndarray]]:
            """ Extracting x from record for training"""
            out_tuple = self.model.x_transform_and_process(
                record, self.image_processor)
            # convert tuple to dictionary which is understood by tf.data
            out_dict = self.model.x_translate(out_tuple)
            return out_dict

        def get_y(record: TubRecord) -> Dict[str, Union[float, np.ndarray]]:
            """ Extracting y from record for training """
            y0 = self.model.y_transform(record)
            y1 = self.model.y_translate(y0)
            return y1

        # 2. Build pipeline using the transformations
        pipeline = self.sequence.build_pipeline(x_transform=get_x,
                                                y_transform=get_y)
        return pipeline

    def create_tf_data(self) -> tf.data.Dataset:
        """ Assembles the tf data pipeline """
        dataset = tf.data.Dataset.from_generator(
            generator=lambda: self.pipeline,
            output_types=self.model.output_types(),
            output_shapes=self.model.output_shapes())
        return dataset.repeat().batch(self.batch_size)


def get_model_train_details(database: PilotDatabase, model: str = None) -> Tuple[str, int]:
    if not model:
        model_name, model_num = database.generate_model_name()
    else:
        model_name, model_num = os.path.abspath(model), 0
    return model_name, model_num


In [None]:

cfg = dk.load_config(config_path='/Users/kristjan.roosild/mycar/config.py')

cfg.TRANSFORMATIONS = ['CROP']
cfg.ROI_CROP_TOP = 60

input_shape = (cfg.IMAGE_H, cfg.IMAGE_W, cfg.IMAGE_DEPTH)

tubs_names_80_speed = [
    '1-1-CC-80',
    '2-1-CW-80',
    '4-1-CC-80',
]

tubs_names_85_speed = [
    '1-3-CC-85',
    '2-3-CW-85',
    '3-3-CW-85',
    '4-3-CC-85'
]

tub_names_90_speed = [
    '1-2-CC-90',
    '2-2-CW-90',
    '3-2-CW-90',
    '4-2-CC-90',
]


def load_records(tub_name):
    return TubDataset(
        config=cfg,
        tub_paths=[os.path.expanduser('/Users/kristjan.roosild/OneDrive/kool/maka/data/' + tub_name)],
        seq_size=0).get_records()


tub_records = {tn: load_records(tn) for tn in tubs_names_80_speed + tubs_names_85_speed + tub_names_90_speed}


In [None]:

def prep_fold_data(kl, cfg, data):
    training_records, validation_records = train_test_split(data, shuffle=True,
                                                            test_size=(1. - cfg.TRAIN_TEST_SPLIT))
    print(f'Records # Training {len(training_records)}')
    print(f'Records # Validation {len(validation_records)}')
    # We need augmentation in validation when using crop / trapeze
    training_pipe = BatchSequence(kl, cfg, training_records, is_train=True)
    validation_pipe = BatchSequence(kl, cfg, validation_records, is_train=False)
    tune = tf.data.experimental.AUTOTUNE
    dataset_train = training_pipe.create_tf_data().prefetch(tune)
    dataset_validate = validation_pipe.create_tf_data().prefetch(tune)
    train_size = len(training_pipe)
    val_size = len(validation_pipe)
    assert val_size > 0, "Not enough validation data, decrease the batch size or add more data."
    return dataset_train, dataset_validate, train_size, val_size

import wandb

def init_wandb():
    config = {
        "fold": fold,
        "model": str(kl),
        "tubs": ','.join(tubs),
    }
    wandb.init(project="master-thesis", entity="kristjan", config=config)


def train(kl, cfg, data):
    dataset_train, dataset_validate, train_size, val_size = prep_fold_data(kl, cfg, data)
    init_wandb()
    history = kl.train(model_path=model_path,
                       train_data=dataset_train,
                       train_steps=train_size,
                       batch_size=cfg.BATCH_SIZE,
                       validation_data=dataset_validate,
                       validation_steps=val_size,
                       epochs=cfg.MAX_EPOCHS,
                       verbose=cfg.VERBOSE_TRAIN,
                       min_delta=cfg.MIN_DELTA,
                       patience=cfg.EARLY_STOP_PATIENCE,
                       show_plot=cfg.SHOW_PLOT)

    return history

def mse(v1, v2):
    return np.mean((np.array(v1) - np.array(v2)) ** 2)

In [None]:
from collections import defaultdict

N_FOLDS = 10

from sklearn.model_selection import KFold

kf = KFold(n_splits=N_FOLDS)

training_folds = defaultdict(list)
testing_folds = defaultdict(list)

for tub_name in tub_names_90_speed:
    records = tub_records[tub_name]
    for fold, (train_index, test_index) in enumerate(kf.split(records)):
        train_records = [records[i] for i in train_index]
        training_folds[fold].extend(train_records)
        test_records = [records[i] for i in test_index]
        testing_folds[fold].extend(test_records)

In [None]:
all_records_85_speed = []
for n in tubs_names_85_speed:
    all_records_85_speed.extend(tub_records[n])

all_records_80_speed = []
for n in tubs_names_80_speed:
    all_records_80_speed.extend(tub_records[n])


In [None]:
import gc
gc.enable()

def get_loss(kl, test_records):
    test_preds = []
    for r in test_records:
        gc.collect()
        test_image = transformation.run(r.image())
        test_image = normalize_image(test_image)
        test_pred = kl.inference(test_image, None)[0]
        test_preds.append(test_pred)
    ground_truth = [r.underlying['user/angle'] for r in test_records]
    return mse(ground_truth, test_preds)

In [None]:
transformation = ImageAugmentation(cfg, 'TRANSFORMATIONS')

test_losses_for_80_speed = []
test_losses_for_85_speed = []
test_losses_for_90_speed = []

for fold, train_records in training_folds.items():
    model_name = f'test-fold{fold}'
    model_path = f'/Users/kristjan.roosild/OneDrive/kool/maka/models/{model_name}.h5'
    kl = KerasLinearOnlySteering(interpreter=KerasInterpreter(), input_shape=input_shape)
    train(kl, cfg, train_records)

    print(f'Getting 80-speed mse for fold {fold}')
    fold_mse_80_speed = get_loss(kl, all_records_80_speed)
    test_losses_for_80_speed.append(fold_mse_80_speed)
    wandb.run.summary["80_speed_test_loss"] = fold_mse_80_speed
    print(f'80 speed mse for fold {fold} is {fold_mse_80_speed}')

    print(f'Getting 85-speed mse for fold {fold}')
    fold_mse_85_speed = get_loss(kl, all_records_85_speed)
    test_losses_for_85_speed.append(fold_mse_85_speed)
    wandb.run.summary["85_speed_test_loss"] = fold_mse_85_speed
    print(f'85 speed mse for fold {fold} is {fold_mse_85_speed}')

    print(f'Getting 90-speed mse for fold {fold}')
    fold_mse_90_speed = get_loss(kl, testing_folds[fold])
    test_losses_for_90_speed.append(fold_mse_90_speed)
    wandb.run.summary["90_speed_test_loss"] = fold_mse_90_speed
    print(f'90 speed mse for fold {fold} is {fold_mse_90_speed}')


