In [None]:
%matplotlib inline
import os
import sys
import gc
import numpy as np
import matplotlib.pyplot as plt

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from commons.configuration_manager import ConfigurationManager
from src.learning.training.collector import Collector
from src.learning.training.generator import GenFiles
from src.learning.training.training_file_reader import TrainingFileReader
from notebooks.notebook_commons import read_stored_data, create_memorized_dataset
from src.utilities.transformer import Transformer

In [None]:
def plot_stuff(title, plot_elems, bins=None, figsize=(18, 10)):
    fig=plt.figure(figsize=figsize)
    plt.title(title)
    #plt.ylabel('dunno')
    plt.xlabel('Count')
    
    for plot_elem in plot_elems:
        plt.hist(plot_elem['data'], bins=bins, label=plot_elem['label'], alpha=plot_elem['alpha'])

    plt.grid(axis='both')
    plt.legend(loc='best')
    plt.show()


def downsample_zeros(frames, numerics, diffs, data_column, bin_size=0.001): 
    bins = np.arange(-1.0, 1.0, bin_size)
    indices = np.digitize(data_column, bins)
    unique_bins, counts = np.unique(indices, return_counts=True)
    sorted_counts = np.sort(counts) 
    
    zero_indexes = np.where(data_column == 0.0)[0]
    count_to_del = sorted_counts[-1] - sorted_counts[-2]
    zero_indexes_to_del = np.random.choice(zero_indexes, count_to_del, replace=False)
    
    sampled_frames = np.delete(frames, zero_indexes_to_del, axis=0)
    sampled_numerics = np.delete(numerics, zero_indexes_to_del, axis=0)
    sampled_diffs = np.delete(diffs, zero_indexes_to_del, axis=0)
    
    return sampled_frames, sampled_numerics, sampled_diffs


def upsampling(data_column, bin_size=0.001):
    upsample_multipliers = np.zeros(data_column.shape, dtype=int)
    
    # TODO this should be variable per column data type
    bins = np.arange(-1.0, 1.0, bin_size)
    indices = np.digitize(data_column, bins)
    
    unique_bins, counts = np.unique(indices, return_counts=True)
    count_dict = dict(zip(unique_bins, counts))
    max_count = np.max(counts)

    for i in range(0, upsample_multipliers.shape[0]):
        count = count_dict[indices[i]]
        upsample_multipliers[i] = max_count // count
    
    return upsample_multipliers

def store_upsampling(new_sampling, path, filename):
    if os.path.isfile(path + filename):
        stored_sampling = np.load(path + filename, allow_pickle=True)
        full_sampling = np.concatenate((stored_sampling, new_sampling), axis=0)
    else:
        full_sampling = new_sampling
        
    np.save(path + filename, full_sampling)

In [None]:
config_manager = ConfigurationManager()
config = config_manager.config

reader = TrainingFileReader(path_to_training='../../training/laps/')
transformer = Transformer(config)
collector = Collector()

filenames = ['lap_3_2020_01_24', 'lap_4_2020_01_24', 'lap_5_2020_01_24', 'lap_6_2020_01_24', 'lap_7_2020_01_24']
experiments = [(1, 1), (4, 1), (4, 4), (8, 1), (8, 4), (12, 1)]

memory = experiments[4]

base_path = '../../training/'
memory_string = 'n{}_m{}'.format(*memory)
path = base_path + memory_string + '/'

if not os.path.isdir(path):
    os.mkdir(path)

# NB this is steering specific
for filename in filenames: 
    existing_count = len([fn for fn in os.listdir(path) if fn.startswith('frame_')])
    
    frames, numerics, diffs = read_stored_data(reader, transformer, filename, collector.numeric_columns(), collector.diff_columns())
    mem_frames, mem_numerics, mem_diffs = create_memorized_dataset(frames, numerics, diffs, *memory)
    
    mem_frames, mem_numerics, mem_diffs = downsample_zeros(mem_frames, mem_numerics, mem_diffs, mem_diffs[:, 1])
    print(mem_frames.shape)
    print(mem_numerics.shape)
    print(mem_diffs.shape)
    
    for i in range(0, mem_frames.shape[0]):
        np.save(path + GenFiles.frame.format(memory_string, i + existing_count), mem_frames[i])
        np.save(path + GenFiles.steer.format(memory_string, i + existing_count), mem_numerics[i])
        np.save(path + GenFiles.steer_diff.format(memory_string, i + existing_count), mem_diffs[i])
    
    steer_diffs_upsampling = upsampling(mem_diffs[:, 1])
    store_upsampling(steer_diffs_upsampling, path, GenFiles.steer_sampling.format(memory_string))
    
    print(steer_diffs_upsampling.shape)
    gc.collect()

In [None]:
plot_elems = []
#plot_elems.append({'data': mem_diffs[:, 1], 'label': 'pre', 'alpha': 0.5})
plot_elems.append({'data': balanced_diffs[:, 1], 'label': 'post', 'alpha': 0.5})
bins = np.arange(-0.6, 0.6, 0.01)

plot_stuff('steering', plot_elems, bins)