In [1]:
%matplotlib inline
import os
import sys
import gc
import numpy as np
import matplotlib.pyplot as plt

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from commons.configuration_manager import ConfigurationManager
from src.learning.training.collector import Collector
from src.learning.training.generator import GenFiles
from src.learning.training.training_file_reader import TrainingFileReader
from notebooks.notebook_commons import read_stored_data, create_memorized_dataset
from src.utilities.transformer import Transformer

In [2]:
def plot_stuff(title, plot_elems, bins=None, figsize=(18, 10)):
    fig=plt.figure(figsize=figsize)
    plt.title(title)
    #plt.ylabel('dunno')
    plt.xlabel('Count')
    
    for plot_elem in plot_elems:
        plt.hist(plot_elem['data'], bins=bins, label=plot_elem['label'], alpha=plot_elem['alpha'])

    plt.grid(axis='both')
    plt.legend(loc='best')
    plt.show()


def balanced_steering_sampling(frames, numerics, diffs, lower_bound=-0.001, upper_bound=0.001, p_drop=0.96):
    # steering column 
    zero_indexes = np.where(diffs[:, 1] == 0.0)[0]
    sampled_zero_indexes = np.random.choice(zero_indexes, int(zero_indexes.shape[0] * p_drop), replace=False)
    
    sampled_frames = np.delete(frames, sampled_zero_indexes, axis=0)
    sampled_numerics = np.delete(numerics, sampled_zero_indexes, axis=0)
    sampled_diffs = np.delete(diffs, sampled_zero_indexes, axis=0)
    
    return sampled_frames, sampled_numerics, sampled_diffs

In [3]:
config_manager = ConfigurationManager()
config = config_manager.config

reader = TrainingFileReader(path_to_training='../../training/laps/')
transformer = Transformer(config)
collector = Collector()

filenames = ['lap_5_2020_01_24', 'lap_6_2020_01_24', 'lap_7_2020_01_24']
filenames = ['lap_7_2020_01_24']
experiments = [(1, 1), (4, 1), (4, 3), (8, 1), (16, 1)]

memory = experiments[3]

base_path = '../../training/'
memory_string = 'n{}_m{}'.format(*memory)
full_path = base_path + memory_string + '/'

if not os.path.isdir(full_path):
    os.mkdir(full_path)

# NB this is steering specific
for filename in filenames: 
    existing_count = len([fn for fn in os.listdir(full_path) if fn.startswith('frame_')])
    
    frames, numerics, diffs = read_stored_data(reader, transformer, filename, collector.numeric_columns(), collector.diff_columns())
    mem_frames, mem_numerics, mem_diffs = create_memorized_dataset(frames, numerics, diffs, *memory)
    balanced_frames, balanced_numerics, balanced_diffs = balanced_steering_sampling(mem_frames, mem_numerics, mem_diffs, p_drop=0.95)
    print(balanced_frames.shape)
    print(balanced_numerics.shape)
    print(balanced_diffs.shape)
    
    break
    for i in range(0, balanced_frames.shape[0]):
        np.save(full_path + GenFiles.frame_file.format(memory_string, i + existing_count), balanced_frames[i])
        np.save(full_path + GenFiles.steering_file.format(memory_string, i + existing_count), balanced_numerics[i])
        np.save(full_path + GenFiles.steering_diff_file.format(memory_string, i + existing_count), balanced_diffs[i])
    gc.collect()

(2713, 40, 60, 24)
(2713, 32)
(2713, 4)


In [None]:
# TODO create separate files and balancing for throttle and gear

In [17]:
some_turbo_counts = np.zeros_like(balanced_diffs[:, 1])

bins = np.arange(-0.6, 0.6, 0.001)
indices = np.digitize(balanced_diffs[:, 1], bins)
unique, counts = np.unique(indices, return_counts=True)
count_dict = dict(zip(unique, counts))
max_count = np.max(counts)

for i in range(0, some_turbo_counts.shape[0]):
    count = count_dict[indices[i]]
    some_turbo_counts[i] = max_count // count

print(balanced_diffs[1050:1100, 1])
print(some_turbo_counts[1050:1100])

[-0.02999878 -0.1199646  -0.02664185 -0.02996826 -0.01998901 -0.1187439
  0.          0.02212524  0.10992432  0.02667236  0.15991211  0.01998901
  0.01998901  0.05996704 -0.05664062 -0.01998901 -0.17990112 -0.01000977
 -0.00997925 -0.01000977 -0.01998901 -0.01000977  0.         -0.10098267
 -0.01104736 -0.01000977 -0.04666138 -0.04330444 -0.00997925 -0.04998779
 -0.0166626  -0.01998901 -0.00997925 -0.01000977 -0.04998779 -0.0166626
 -0.04663086 -0.00997925 -0.01998901 -0.01000977 -0.04330444 -0.01000977
 -0.02331543 -0.03665161 -0.01998901 -0.00665283 -0.00668335 -0.00997925
 -0.01000977 -0.00997925]
[  4. 103.   6.   4.   3. 207.   1.  69.  18.   5. 103.   4.   4.   9.
   9.   3.  69.   1.   2.   1.   3.   1.   1. 207. 103.   1.  10.   6.
   2.   7.   3.   3.   2.   1.   7.   3.  10.   2.   3.   1.   6.   1.
   5.   7.   3.   2.   2.   2.   1.   2.]


In [None]:
bins = np.arange(-0.6, 0.6, 0.01)
indices = np.digitize(balanced_diffs[:, 1], bins)
unique, counts = np.unique(indices, return_counts=True)

stuff = dict(zip(unique, counts))
print(counts)
print(np.mean(counts))

In [None]:
plot_elems = []
#plot_elems.append({'data': mem_diffs[:, 1], 'label': 'pre', 'alpha': 0.5})
plot_elems.append({'data': balanced_diffs[:, 1], 'label': 'post', 'alpha': 0.5})
bins = np.arange(-0.6, 0.6, 0.01)

plot_stuff('steering', plot_elems, bins)