# Training of the model
## Extra lib instalation

In [35]:
!pip install speechpy
!pip install soundfile
!pip install tables



## Lib imports

In [1]:
import numpy as np
import pandas as pd
import soundfile as sf
import scipy.io.wavfile as wav
import speechpy
import json
import os
import math
import tensorflow.keras as k
import dask.dataframe as dd
from IPython.display import display, Markdown
from time import sleep
from pprint import pprint
from multiprocessing import Queue, Process, Pool

%config IPCompleter.greedy=True

For audio processing I will use `speechpy` as it is the fastest of the well known libraries:

![speed_comp](https://camo.githubusercontent.com/1465ddaba9f99df4ff86ef800ef4598f35c12698/68747470733a2f2f696d61676573322e696d67626f782e636f6d2f64652f31302f6f4e5a776b49694b5f6f2e706e67)
> Source: [sonopy](https://github.com/MycroftAI/sonopy)

## Processing function for dataset creation

In [16]:
def df_col2numpy(df, col_names):
    ser = df.apply(lambda row: np.array([row[col] for col in col_names]).flatten(), axis=1)
    arr = np.array(ser.values.tolist())
    return arr

def df_col2series(df, col_names):
    if len(col_names) == 1:
        ser = df[col_names[0]].map(lambda cell: np.array([cell]).flatten())
    ser = df.apply(lambda row: np.array([row[col] for col in col_names]).flatten(), axis=1)
    return ser

def file2mfcc(file_name, frame_length=0.20, frame_stride=0.1, recreate=False):
    """ recreate: whether to recreate existing .npy MFCC"""
    
    dir_name = file_name[:file_name.index('blocks')]
    file_wav, file_ogg = None, None
    
#     check for existing .wav or .npy cache
    for file in os.listdir(dir_name):
#         if file.endswith('.wav'):
#             file_wav = os.path.join(dir_name, file)
#         if file.endswith('.npy') and not recreate:
#             return np.load(os.path.join(dir_name, file))
        if file.endswith('.mfcc') and not recreate:
            return pd.read_hdf((os.path.join(dir_name, file)))
            
#     if none .wav found, create it
    for file in os.listdir(dir_name):
        if file.endswith('.ogg'):
            file_ogg = os.path.join(dir_name, file)
            if not file_wav:
                data, samplerate = sf.read(file_ogg)
                file_wav = f'{file_ogg[:-4]}.wav'
                sf.write(file_wav, data, samplerate)

    fs, signal = wav.read(file_wav)
    
#     Stereo to mono
    if signal.shape[1] == 2:
        signal = (signal[:, 0] + signal[:, 1]) / 2
    else:
        signal = signal[:, 0]

    # Pre-emphasize
    signal_preemphasized = speechpy.processing.preemphasis(signal, cof=0.98)

    # Extract MFCC features
    mfcc = speechpy.feature.mfcc(signal, sampling_frequency=fs, frame_length=frame_length, 
                                 frame_stride=frame_stride, num_filters=40, fft_length=512,
                                 low_frequency=0, high_frequency=None, num_cepstral=13)
    
#     Normalize
    mfcc_cmvn = speechpy.processing.cmvnw(mfcc,win_size=301,variance_normalization=True)
    
#     Cache results and clean .wav to save space
#     np.save(f'{file_wav[:-4]}.mfcc.npy', mfcc_cmvn)
    if file_ogg:
        os.remove(file_wav)

#     Recalculate the time differences
    index = np.arange(0, (len(mfcc) - 0.5) * frame_stride, frame_stride) + frame_length
    df = pd.DataFrame(data=mfcc_cmvn, index=index).apply(np.array, axis=1)
    df.to_hdf(f'{file_wav[:-4]}.mfcc', 'mfcc', mode='w', format='fixed')
    return df

def process_cell(cell, side):
    res = cell[:, :, side, :]
    
    mx = res.max()
    mx_index = np.unravel_index(res.argmax(), res.shape)
    pred = [None for _ in range(3)]
    
    for dim in range(3):
#         pred[dim] = np.zeros(res.shape[dim] + 1)
        pred[dim] = np.zeros(res.shape[dim])
        
        if mx < 0.5:
#             pred[dim][-1] = 1
            pass
        else:
            pred[dim][mx_index[dim]] = 1
    
    if mx < 0.5:
        res = cell[:, :, (side+1) % 2, :]
    
        mx = res.max()
        mx_index = np.unravel_index(res.argmax(), res.shape)
        for dim in range(3):
            pred[dim][mx_index[dim]] = 1
        
    return pred

def change_output(df: pd.DataFrame):
    left, right = df.apply(lambda cell: process_cell(cell, 0)), df.apply(lambda cell: process_cell(cell, 1))
    
    left = pd.DataFrame(left.to_list(), columns=[f'l_dim{x}' for x in range(3)], index=left.index)
    right = pd.DataFrame(right.to_list(), columns=[f'r_dim{x}' for x in range(3)], index=right.index)
    
    return left.join(right)
    
def process_file(file_path, recreate=False):
    """ Processing needed to be done per file """
    print(f'Processing {file_path}')
    try:
        df = pd.read_pickle(file_path)
    
        # all in one serialization (99.5 % with bad metric)
        #     df['output'] = df_col2series(df, ['output'])
        df = df.join(change_output(df['output']))
        df['shifted'] = df['output'].shift(1, fill_value=[np.zeros(df['output'].iloc[0].shape)])
        
        df['times'] = df_col2series(df, ['prev', 'next'])
        df['name'] = f'{file_path}'

        mfcc = file2mfcc(file_path, recreate=recreate)
        mfcc.name = 'mfcc'
        round_index = mfcc.index.values[1] - mfcc.index.values[0]
        df.index = np.floor(df['time'] / round_index).astype(int)
        mfcc.index = (mfcc.index / round_index).astype(int)

        df = df.join(mfcc)
        df.index = df['time']
        df = df.dropna()
    except Exception as e:
        print(f'Caught Error: {e}')
        return None

    return df

def process_df(df, X_cols, y_cols):
    """ Post processing on the whole DF """
    
    # Add shifter y (predictions)
    y_cols_shifted = [f'{x}_shifted' for x in y_cols]
    shifted = df[y_cols].groupby('name').shift(1)
    df[y_cols_shifted] = shifted
    df = df.dropna()
    
    return df
    
file = '../data/Army Of The Night/blocks/Expert.pkl'
file2mfcc(file, recreate=False)
process_file(file).iloc[0]

Processing ../data/Army Of The Night/blocks/Expert.pkl


output     [[[[0. 1. 0. 0. 0. 0. 0. 0. 0.], [0. 0. 0. 0. ...
time                                                 3.08211
prev                                                 3.08211
next                                               0.0821053
l_dim0                                       [1.0, 0.0, 0.0]
l_dim1                                  [1.0, 0.0, 0.0, 0.0]
l_dim2         [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
r_dim0                                       [1.0, 0.0, 0.0]
r_dim1                                  [1.0, 0.0, 0.0, 0.0]
r_dim2         [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
shifted    [[[[0. 0. 0. 0. 0. 0. 0. 0. 0.], [0. 0. 0. 0. ...
times              [3.0821052631578945, 0.08210526315789446]
name             ../data/Army Of The Night/blocks/Expert.pkl
mfcc       [0.36976948, -1.4347119, 0.909154, 0.62588304,...
Name: 3.0821052631578945, dtype: object

## Generate urls to blocks

In [3]:
def get_file_paths(path, hard_max):
    """ 
    Create a list of all pregenerated blocks files 
    Search in full subtree of :path:
    """
    
    file_paths = []
    counter = 0
    
    for root, dirs, files in os.walk(path, topdown=False):
        if counter > hard_max:
                break
        for name in files:
            if root[-6:] == 'blocks':
                print(f'#{counter:5} {root}/{name}')
                file_paths.append(os.path.join(root, name))
                
                counter += 1
                
    return file_paths

## Set which cols to use as X and Y

In [4]:
X_list = []
Y = []
HARD_MAX = 20000
path = '../data'

y_cols = [f'{side}_dim{i}' for side in 'rl' for i in range(3) ]
X_cols = ['times', 'mfcc']
# X_cols = y_cols

columns = ['name', 'time'] + X_cols + y_cols

## Create and save training data

In [None]:
multi_core = True
if multi_core:
    # Embarrassingly parallel problem, but RAM heavy
    pool = Pool(processes=None)
    X_list = pool.map(process_file, get_file_paths(path, HARD_MAX))
    pool.close()
    pool.join()
else:
    X_list = [process_file(x) for x in get_file_paths(path, HARD_MAX)]

X_list = [x for x in X_list if x is not None]
        
print(f'Passes {len(X_list):6}/{HARD_MAX:6} hard max')
X = pd.DataFrame(pd.concat(X_list), columns=columns)
X = X.set_index(['name', 'time'])
X = process_df(X, X_cols, y_cols)

In [25]:
X.to_pickle(os.path.join(path, 'X_saved.pkl'))

## Load training data

In [None]:
X = pd.read_pickle(os.path.join(path, 'X_saved.pkl'))
print(f'Loaded {len(X)} rows')

# Puff, please here upload it like above, but from the _internet_

In [None]:
# X = magic()

# Training approaches

## Generator training approach

### Advantages
- Every batch can have different length
    - No crop of songs in batches needed
    - Minimal padding
    - $\Rightarrow$ model learns from songs with the context of beginning and end
    
### Disadvantages
- Less convinient then standard `model.fit(X, y, **kwargs)`
- Big memory movement overhead
    - Would be even more significant if trained on a GPU
    

## Standard training approach
- Different lengths solved by generating snippets of songs of len $N$
    - Effectively creates more versitile dataset, since songs are "starting" at different places
    
### Advantages
- No padding and no crop
    - No crop of songs in batches needed
    - Minimal padding
    - $\Rightarrow$ model learns from songs with the context of beginning and end
- All "heavy altilery" from TF can be used
    
### Disadvantages
- No differentitation between beggining and end of the song
    - $\Rightarrow$ could be solved by adding procentage column which indicates in interval $(0, 1)$ where the beat lies in the song

# Generator training approach
## Helper functions for train_generator

In [6]:
precision = 2

def pp(row):
    print('*' * 69)
    print(row)
    
def round_up(num:float, prec: int) -> int:
    return int(math.ceil((10 ** -prec) * num) / (10 ** -prec))
    
def get_len_category(song, prec: int=precision):
    return int(round_up(len(song), 2))

def get_mask(X, prec: int=precision):
    mask = X.groupby('name').apply(get_len_category)
    return mask.to_dict()

def create_batch(group, ceil_len, verbose=True):
    if verbose:
        print(f'Creating batch of ceil_len {ceil_len:6} with {len(group)} rows')
    ceil_len = int(ceil_len)
    
    batch = []
    for name, song in group.groupby('name'):
        empty_row = song.head(1).squeeze().apply(np.zeros_like)
#         print(f'{ceil_len} | {len(song)} | {empty_row}')
        df_to_add = pd.DataFrame([empty_row] * (ceil_len - len(song)))
        batch.append(pd.concat([song, df_to_add]))

    return pd.concat(batch)

# LESON: Don't forget about the NaNs!


def list2numpy(batch, col_name, groupby=('name')):
    return np.array(batch.groupby(list(groupby))[col_name].apply(list).to_list())

## Show bucketing results

In [7]:
grouped = X.groupby(get_mask(X), level=0)

adjust = 6
stats = []
print(f'{"from":>{adjust}} ‒ {"to":>{adjust}}: {"# of songs":>{adjust*2}}')
      
for name, group in grouped:
    print(f'{name - 10 ** precision:{adjust}} ‒ {name:{adjust}}: {len(group.groupby("name").groups):{adjust*2}}')
    stats.append({'from': name - 10 ** precision, 'to': name, '# of songs': len(group.groupby("name").groups)})
          
# print in your favorite way
# pd.DataFrame(stats, columns=['from', 'to', '# of songs'])

  from ‒     to:   # of songs
     0 ‒    100:            7
   100 ‒    200:           27
   200 ‒    300:          105
   300 ‒    400:          245
   400 ‒    500:          310
   500 ‒    600:          261
   600 ‒    700:          187
   700 ‒    800:          125
   800 ‒    900:           73
   900 ‒   1000:           49
  1000 ‒   1100:           31
  1100 ‒   1200:           16
  1200 ‒   1300:            4
  1300 ‒   1400:            6
  1400 ‒   1500:            5
  1500 ‒   1600:            3
  1600 ‒   1700:            1
  1700 ‒   1800:            1
  2200 ‒   2300:            1


In [None]:
def train_generator(df, X_cols, y_cols, verbose=True):
    grouped = X.groupby(get_mask(df), level=0)
    
#     p = Pool(2)  # slowe because of memory
#     batches = p.starmap(create_batch, [(group, ceil_len) for ceil_len, group in grouped])
#     grouped = list(grouped)[:2]
    batches = [create_batch(group, ceil_len, verbose) for ceil_len, group in grouped]
    batches = [batch for batch in batches if len(batch.groupby('name').groups) > 8]
    
    while True:
        for batch in batches:
            yield [list2numpy(batch, col) for col in X_cols],\
                  [list2numpy(batch, col) for col in y_cols]

# test generated shapes
generator = train_generator(X, X_cols, y_cols)
for x, y in generator:
    print(f'x.shapes {[np.array(x_t).shape for x_t in x]}')
    print(f'y.shape {[np.array(y_t).shape for y_t in y]}\n')
    break

# Model

In [8]:
from tensorflow.keras.layers import Dense, LSTM, Flatten, Input, Activation, TimeDistributed, concatenate
from tensorflow.keras.models import Sequential
from tensorflow.keras.models import Model

In [26]:
def get_model(X, X_cols, y_cols):
    demo_row = X.iloc[0]
    X_shapes = [demo_row[col].shape[0] for col in X_cols]
    y_shapes = [demo_row[col].shape[0] for col in y_cols]
    
#     in1 = Input(shape=(None, 216)) # last blocks
#     in2 = Input(shape=(None, 2))   # time difference of previous and next beat
    inputs = [Input(shape=(None, shape)) for shape in X_shapes]
    
    time_dist = [TimeDistributed(Dense(shape, activation='sigmoid'))(inputs[i]) for i, shape in enumerate(X_shapes)]
#     out = time_dist
#     x1 = TimeDistributed(Dense(50, activation='elu'))(in1)
#     x2 = TimeDistributed(Dense(3, activation='elu'))(in2)
    
    out = concatenate(time_dist, axis=-1)
    out = LSTM(64, return_sequences=True)(out)
    out = LSTM(64, return_sequences=True)(out)
#     out = LSTM(64, return_sequences=True)(out)
#     out = LSTM(128, return_sequences=True)(out)
#     out = TimeDistributed(Dense(216, activation='sigmoid'))(out)
    outputs = [TimeDistributed(Dense(shape, activation='softmax'), name=col)(out) for shape, col in zip(y_shapes, y_cols)]

    model = Model(inputs=inputs, outputs=outputs)
    
    model.compile(optimizer='rmsprop',
#                   loss='binary_crossentropy',
                  loss='categorical_crossentropy',
#                   loss_weights=[1,2,3,1,2,3],
                  metrics=['accuracy'])
    
    return model

## Get a new model and show it

In [None]:
model = get_model(X, X_cols, y_cols)
model.summary()

## Train model on generator

In [None]:
def important_metric(metric_name):
    return 'val' in metric_name and 'acc' in metric_name
        

def train_on_generator(X, X_cols, y_cols, verbose_level=1, model=None, epochs=300):
    if not model:
        model = get_model(X, X_cols, y_cols)
    
    acc_results = {}
    stats_len = 30

    generator = train_generator(X, X_cols, y_cols, verbose_level>=2)
    for i, (x, y) in enumerate(generator):
        if i > epochs:
            break
        res = model.fit(x, y, batch_size=128, validation_split=0.1, verbose=verbose_level>=3)
        
        if verbose_level >= 1:
            if i % stats_len == 0:
                total_acc = (np.array(list(acc_results.values())) / stats_len).mean()
                display(Markdown(f'### Batch {i:4} | {total_acc:4.4}'))
                pprint([f'{key:30}: {val/stats_len}' for key, val in acc_results.items()])

                acc_results = {key: val[0] for key, val in res.history.items() if important_metric(key)}
            else:
                for key in acc_results:
                    acc_results[key] += res.history[key][0]
    
    
    acc_results = {key: val[0] for key, val in res.history.items() if important_metric(key)}
    total_acc = (np.array(list(acc_results.values()))).mean()
    display(Markdown(f'### Last epoch {i:4} results | {total_acc:4.4}'))
    pprint(acc_results)
    
    return model
    
# Test train_on_generator
# train_on_generator(X, X_cols, y_cols, 2, None, 2)

In [None]:
# X.join(X.shift)
y_cols_shifted = [f'{x}_shifted' for x in y_cols]
shifted = X[y_cols].groupby('name').shift(1)
X[y_cols_shifted] = shifted
X = X.dropna()

In [None]:
# Sanity check, try if the model performs well on _identity_
train_on_generator(X, y_cols, y_cols, verbose_level=1, model=None, epochs=300)

model = train_on_generator(X, ['times', 'mfcc'] + y_cols_shifted, y_cols, verbose_level=2, model=model, epochs=490)

### Empirical findings
- If the `y_cols_shifted` input is not provided, model tends to learn and stay on most common value of each classification.
- Sanity check porforms quickly over 90 % acc

### Improvements to be tested

1. More normalization of the data
    - Force the model to catch the underlying principle and not "mean"
1. Flip L / R hand and horizontal mirorring and rotations
1. Flip vertically with rotation
1. If one hand not used, mirror the other hand instead of "0"
1. Instead of generator, create snippets of 100 beats
    - Easier GPU training
    - Train it on gColab

## Hand evaluation
- Is needed since good results can be caused by a wrongly chosen matric!

In [None]:
import copy

generator = train_generator(X, ['times', 'mfcc'] + y_cols_shifted, y_cols)
x, y = generator.__next__()
prediction = model.predict(x)

In [None]:
f, t = 0, 20
for dim, (p, y_t) in enumerate(zip(prediction, y)):    
    df = pd.DataFrame(p[0][f:t])
    df = df.eq(df.where(df != 0).max(1), axis=0).astype(int)
    df.index.name = y_cols[dim]
    df_y = pd.DataFrame(y_t[0][f:t]).astype(int)
    df = df.join(df_y, rsuffix='_true')
    display(df)

In [None]:
def eval_generator():
    while True:
        for x1, x2, y in zip(X['blocks'][300:], X['times'][300:], Y[300:]):
            yield [np.array([x1, ]), np.array([x2, ])], np.array([y, ])
            
model = get_model()
model.batch_size = 8            

model.fit_generator(train_generator(), steps_per_epoch=300, epochs=1, verbose=1, 
                    use_multiprocessing=False)
model.evaluate_generator(eval_generator(), steps=19, )

# Song snippets training

In [17]:
def generate_snippets(song_df, window=100, skip=50):
    stack = []
    ln = len(song_df)
    
    # Check if at least 1 window is possible
    if ln < window:
        return None
    
    # name information is contained in the grouping operation
    song_df = song_df.reset_index(level='name').drop(columns='name')

    for s in range(0, ln, skip):
        # Make sure the dataset contains ends of the songs
        if s + window > ln:
            stack.append(song_df.iloc[-window:])
        else:
            stack.append(song_df.iloc[s:s+window])
    
    df = pd.concat(stack, keys=list(range(0, len(song_df), skip)), names=['snippet', 'time'])
    return df
    
X = process_df(X, X_cols, y_cols)
X2 = X.iloc[:].groupby('name').apply(generate_snippets)


In [21]:
X_cols

['times',
 'mfcc',
 'r_dim0_shifted',
 'r_dim1_shifted',
 'r_dim2_shifted',
 'l_dim0_shifted',
 'l_dim1_shifted',
 'l_dim2_shifted']

In [22]:
def create_training_data(X, groupby=('name', 'snippet')):
    return [list2numpy(X, col, groupby) for col in X_cols],\
           [list2numpy(X, col, groupby) for col in y_cols]

x, y = create_training_data(X2)

In [34]:
X.to_pickle('X_gdrive.zip')
# X

Unnamed: 0_level_0,Unnamed: 1_level_0,times,mfcc,r_dim0,r_dim1,r_dim2,l_dim0,l_dim1,l_dim2,r_dim0_shifted,r_dim1_shifted,r_dim2_shifted,l_dim0_shifted,l_dim1_shifted,l_dim2_shifted
name,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
../data/1My Truth/blocks/Hard.pkl,3.349315,"[0.8219178082191778, 0.8219178082191778]","[0.39882427, 0.12761503, 1.154948, 1.1284789, ...","[1.0, 0.0, 0.0]","[0.0, 0.0, 1.0, 0.0]","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[1.0, 0.0, 0.0]","[0.0, 1.0, 0.0, 0.0]","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[1.0, 0.0, 0.0]","[0.0, 0.0, 1.0, 0.0]","[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[1.0, 0.0, 0.0]","[0.0, 1.0, 0.0, 0.0]","[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]"
../data/1My Truth/blocks/Hard.pkl,4.171233,"[0.8219178082191778, 0.2054794520547949]","[0.3719555, -1.0339408, -0.80485827, -0.730802...","[1.0, 0.0, 0.0]","[0.0, 0.0, 1.0, 0.0]","[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[1.0, 0.0, 0.0]","[0.0, 0.0, 1.0, 0.0]","[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[1.0, 0.0, 0.0]","[0.0, 0.0, 1.0, 0.0]","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[1.0, 0.0, 0.0]","[0.0, 1.0, 0.0, 0.0]","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]"
../data/1My Truth/blocks/Hard.pkl,4.376712,"[0.2054794520547949, 0.2054794520547949]","[0.5503798, 1.1068511, 0.5231095, -0.30168983,...","[1.0, 0.0, 0.0]","[0.0, 1.0, 0.0, 0.0]","[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[1.0, 0.0, 0.0]","[0.0, 1.0, 0.0, 0.0]","[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[1.0, 0.0, 0.0]","[0.0, 0.0, 1.0, 0.0]","[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[1.0, 0.0, 0.0]","[0.0, 0.0, 1.0, 0.0]","[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]"
../data/1My Truth/blocks/Hard.pkl,4.582192,"[0.2054794520547949, 0.2054794520547949]","[0.4554713, -0.27926528, -0.7364389, -1.554207...","[1.0, 0.0, 0.0]","[0.0, 0.0, 1.0, 0.0]","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[1.0, 0.0, 0.0]","[0.0, 0.0, 1.0, 0.0]","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[1.0, 0.0, 0.0]","[0.0, 1.0, 0.0, 0.0]","[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[1.0, 0.0, 0.0]","[0.0, 1.0, 0.0, 0.0]","[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]"
../data/1My Truth/blocks/Hard.pkl,4.787671,"[0.2054794520547949, 0.2054794520547949]","[0.47580615, -0.4372961, -1.2905254, -1.564763...","[1.0, 0.0, 0.0]","[0.0, 1.0, 0.0, 0.0]","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[1.0, 0.0, 0.0]","[0.0, 1.0, 0.0, 0.0]","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[1.0, 0.0, 0.0]","[0.0, 0.0, 1.0, 0.0]","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[1.0, 0.0, 0.0]","[0.0, 0.0, 1.0, 0.0]","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]"
../data/1My Truth/blocks/Hard.pkl,4.993151,"[0.2054794520547949, 0.8219178082191778]","[0.45567393, -0.9843975, -1.6274992, -1.968211...","[1.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 1.0]","[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[1.0, 0.0, 0.0]","[1.0, 0.0, 0.0, 0.0]","[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[1.0, 0.0, 0.0]","[0.0, 1.0, 0.0, 0.0]","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[1.0, 0.0, 0.0]","[0.0, 1.0, 0.0, 0.0]","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]"
../data/1My Truth/blocks/Hard.pkl,5.815068,"[0.8219178082191778, 0.8219178082191769]","[0.42838782, 0.4628631, 2.3629167, -0.43580657...","[1.0, 0.0, 0.0]","[0.0, 0.0, 1.0, 0.0]","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[1.0, 0.0, 0.0]","[0.0, 1.0, 0.0, 0.0]","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[1.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 1.0]","[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[1.0, 0.0, 0.0]","[1.0, 0.0, 0.0, 0.0]","[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]"
../data/1My Truth/blocks/Hard.pkl,6.636986,"[0.8219178082191769, 0.2054794520547949]","[0.3499777, 0.52470785, 1.3818997, 0.3557627, ...","[1.0, 0.0, 0.0]","[0.0, 0.0, 1.0, 0.0]","[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[1.0, 0.0, 0.0]","[0.0, 0.0, 1.0, 0.0]","[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[1.0, 0.0, 0.0]","[0.0, 0.0, 1.0, 0.0]","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[1.0, 0.0, 0.0]","[0.0, 1.0, 0.0, 0.0]","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]"
../data/1My Truth/blocks/Hard.pkl,6.842466,"[0.2054794520547949, 0.2054794520547949]","[0.5071396, 0.41296625, 0.6892415, -1.0343406,...","[1.0, 0.0, 0.0]","[0.0, 1.0, 0.0, 0.0]","[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[1.0, 0.0, 0.0]","[0.0, 1.0, 0.0, 0.0]","[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[1.0, 0.0, 0.0]","[0.0, 0.0, 1.0, 0.0]","[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[1.0, 0.0, 0.0]","[0.0, 0.0, 1.0, 0.0]","[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]"
../data/1My Truth/blocks/Hard.pkl,7.047945,"[0.2054794520547949, 0.2054794520547949]","[0.4219901, 0.5208144, -0.5722012, -0.7572905,...","[1.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 1.0]","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[1.0, 0.0, 0.0]","[0.0, 0.0, 0.0, 1.0]","[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[1.0, 0.0, 0.0]","[0.0, 1.0, 0.0, 0.0]","[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[1.0, 0.0, 0.0]","[0.0, 1.0, 0.0, 0.0]","[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]"


In [23]:
model = get_model(X, X_cols, y_cols)

# model.fit(x, y, batch_size=32, verbose=True)

In [27]:
res = model.fit(x, y, batch_size=128, epochs=10, validation_split=0.1, verbose=True)

Train on 15109 samples, validate on 1679 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [28]:
res = model.fit(x, y, batch_size=128, epochs=20, validation_split=0.1, verbose=True)

Train on 15109 samples, validate on 1679 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [29]:
res = model.fit(x, y, batch_size=128, epochs=60, validation_split=0.1, verbose=True)

Train on 15109 samples, validate on 1679 samples
Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60


Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60


Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60
