In [None]:
%matplotlib inline
import os
import sys
import numpy as np
import matplotlib.pyplot as plt

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from commons.configuration_manager import ConfigurationManager
from src.learning.training.label_collector import LabelCollector
from src.learning.training.training_file_reader import TrainingFileReader
from src.learning.training.training_transformer import TrainingTransformer

In [None]:
config_manager = ConfigurationManager()
config = config_manager.config

reader = TrainingFileReader(path_to_training='../../training/laps/')
transformer = TrainingTransformer(config)
collector = LabelCollector()


def plot_stuff(title, plot_elems, bins, figsize=(18, 10)):
    fig=plt.figure(figsize=figsize)
    plt.title(title)
    #plt.ylabel('dunno')
    plt.xlabel('Count')
    
    for plot_elem in plot_elems:
        plt.hist(plot_elem['data'], bins=bins, label=plot_elem['label'], alpha=plot_elem['alpha'])

    plt.grid(axis='y')
    plt.legend(loc='best')
    plt.show()


# axis=2 for frames, axis=0 for telems
def memory_creator(instance, memory, length=4, interval=2, axis=2):
    memory.append(instance)
    
    near_memory = memory[::-interval]
    if len(near_memory) < length:
        return None
    
    if len(memory) >= length * interval:
        memory.pop(0)
        
    return np.concatenate(near_memory, axis=axis)


def read_stored_data(filename, numeric_columns, diff_columns):
    telemetry = reader.read_specific_telemetry_columns(filename + '.csv', numeric_columns)
    diffs = reader.read_specific_telemetry_columns(filename + '.csv', diff_columns)
    frames = reader.read_video(filename + '_resized.avi').astype(np.float32)
    resized_frames = transformer.resize_and_normalize_video(frames)
    
    return frames, telemetry.to_numpy(), diffs.to_numpy()
    
    
def create_memorized_dataset(frames, telemetry, diffs, length, interval): 
    # final length diff is (length - 1) * interval
    mem_slice_frames = []
    mem_slice_telemetry = []
    
    len_diff = (length - 1) * interval
    mem_frames = np.zeros((frames.shape[0] - len_diff, *frames.shape[1:-1], frames.shape[-1] * length))
    mem_telems = np.zeros((telemetry.shape[0] - len_diff, telemetry.shape[1] * length))
    
    for i in range(0, frames.shape[0]):
        mem_frame = memory_creator(frames[i], mem_slice_frames, length=length, interval=interval, axis=2)
        mem_telem = memory_creator(telemetry[i], mem_slice_telemetry, length=length, interval=interval, axis=0)
        
        if mem_frame is not None:
            mem_frames[i - len_diff] = mem_frame
            mem_telems[i - len_diff] = mem_telem
            
    mem_diffs = diffs[len_diff:]
    
    assert mem_frames.shape[0] == mem_telems.shape[0] == mem_diffs.shape[0], "Lengths differ!"
    return mem_frames, mem_telems, mem_diffs


def balanced_sampling(frames, numerics, diffs):
    # TODO currently assuming only one column exists in diffs 
    zero_indexes = np.where((diffs > -0.001) & (diffs < 0.001))[0]
    sampled_zero_indexes = np.random.choice(zero_indexes, int(zero_indexes.shape[0] * 0.9), replace=False)

    sampled_frames = np.delete(frames, sampled_zero_indexes, axis=0)
    sampled_numerics = np.delete(numerics, sampled_zero_indexes, axis=0)
    sampled_diffs = np.delete(diffs, sampled_zero_indexes, axis=0)
    
    return sampled_frames, sampled_numerics, sampled_diffs

In [None]:
filenames = ['lap_5_2020_01_24', 'lap_6_2020_01_24', 'lap_7_2020_01_24']
experiments = [(1, 1), (4, 1), (4, 3), (16, 1)]
experiment = experiments[1]

frames, numerics, diffs = read_stored_data(filenames[2], collector.steering_columns(), collector.diff_steering_columns())
mem_frames, mem_numerics, mem_diffs = create_memorized_dataset(frames, numerics, diffs, *experiment)

print(frames.shape)
print(numerics.shape)
print(diffs.shape)

print(mem_frames.shape)
print(mem_numerics.shape)
print(mem_diffs.shape)

base_path = '../../training/'
folder = 'n{}_m{}/'.format(*experiment)
full_path = base_path + folder

if not os.path.isdir(full_path):
    os.mkdir(full_path)
    
existing_count = len(os.listdir(full_path)) // 3

video_filename = 'frame_n{}_m{}_{:07}.npy'
numeric_filename = 'numeric_n{}_m{}_{:07}.npy'
diff_filename = 'diff_n{}_m{}_{:07}.npy'

# TODO sample data before writes
mem_frames, mem_numerics, mem_diffs = balanced_sampling(mem_frames, mem_numerics, mem_diffs)
print(mem_frames.shape)
print(mem_numerics.shape)
print(mem_diffs.shape)

for i in range(0, mem_frames.shape[0]):
    np.save(full_path + video_filename.format(*experiment, i + existing_count), mem_frames[i])
    np.save(full_path + numeric_filename.format(*experiment, i + existing_count), mem_numerics[i])
    np.save(full_path + diff_filename.format(*experiment, i + existing_count), mem_diffs[i])

In [None]:
plot_elem_1 = {'data': mem_diffs, 'label': 'steering diffs', 'alpha': 0.5}
bins = np.arange(-0.3, 0.3, 0.01)

plot_stuff('steering', [plot_elem_1], bins)