In [None]:
%matplotlib inline
import os
import sys
import gc
import numpy as np
import matplotlib.pyplot as plt

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from commons.configuration_manager import ConfigurationManager
from src.learning.training.collector import Collector
from src.learning.training.generator import GenFiles
from src.learning.training.training_file_reader import TrainingFileReader
from notebooks.notebook_commons import read_stored_data, create_memorized_dataset
from src.utilities.transformer import Transformer

In [None]:
config_manager = ConfigurationManager()
config = config_manager.config

reader = TrainingFileReader(path_to_training='../../training/laps/')
transformer = Transformer(config)
collector = Collector()


def plot_stuff(title, plot_elems, bins=None, figsize=(18, 10)):
    fig=plt.figure(figsize=figsize)
    plt.title(title)
    #plt.ylabel('dunno')
    plt.xlabel('Count')
    
    for plot_elem in plot_elems:
        plt.hist(plot_elem['data'], bins=bins, label=plot_elem['label'], alpha=plot_elem['alpha'])

    plt.grid(axis='both')
    plt.legend(loc='best')
    plt.show()


def balanced_sampling_haltuura(frames, numerics, diffs, lower_bound=-0.001, upper_bound=0.001, p_drop=0.96):
    # TODO currently assuming only one column exists in diffs 
    zero_indexes = np.where(diffs == 0.0)[0]
    sampled_zero_indexes = np.random.choice(zero_indexes, int(zero_indexes.shape[0] * p_drop), replace=False)
    
    sampled_frames = np.delete(frames, sampled_zero_indexes, axis=0)
    sampled_numerics = np.delete(numerics, sampled_zero_indexes, axis=0)
    sampled_diffs = np.delete(diffs, sampled_zero_indexes, axis=0)
    
    return sampled_frames, sampled_numerics, sampled_diffs

In [None]:
filenames = ['lap_5_2020_01_24', 'lap_6_2020_01_24', 'lap_7_2020_01_24']
experiments = [(1, 1), (4, 1), (4, 3), (8, 1), (16, 1)]

memory = experiments[3]
select_filename = filenames[2]

base_path = '../../training/'
memory_string = 'n{}_m{}'.format(*memory)
full_path = base_path + memory_string + '/'

if not os.path.isdir(full_path):
    os.mkdir(full_path)

for filename in filenames:
    existing_count = len(os.listdir(full_path)) // 3

    frames, numerics, diffs = read_stored_data(reader, transformer, filename, collector.steering_columns(), collector.diff_steering_columns())
    mem_frames, mem_numerics, mem_diffs = create_memorized_dataset(frames, numerics, diffs, *memory)
    balanced_frames, balanced_numerics, balanced_diffs = balanced_sampling_haltuura(mem_frames, mem_numerics, mem_diffs, p_drop=0.95)
    print(balanced_frames.shape)
    print(balanced_numerics.shape)
    print(balanced_diffs.shape)

    for i in range(0, balanced_frames.shape[0]):
        np.save(full_path + GenFiles.frame_file.format(memory_string, i + existing_count), balanced_frames[i])
        np.save(full_path + GenFiles.numeric_file.format(memory_string, i + existing_count), balanced_numerics[i])
        np.save(full_path + GenFiles.diff_file.format(memory_string, i + existing_count), balanced_diffs[i])
    gc.collect()

In [None]:
bins = np.arange(-0.1, 0.1, 0.001)
indices = np.digitize(mem_diffs, bins)
unique, counts = np.unique(indices, return_counts=True)

stuff = dict(zip(unique, counts))
print(counts)

In [None]:
plot_elems = []
#plot_elems.append({'data': diffs, 'label': 'd_steering pre', 'alpha': 0.5})
plot_elems.append({'data': mem_diffs, 'label': 'd_steering post', 'alpha': 0.5})
bins = np.arange(-0.3, 0.3, 0.01)

plot_stuff('steering', plot_elems, bins)