In [1]:
# allow the notebook to access the parent directory so we can import the other modules
# https://stackoverflow.com/a/35273613
import os
import sys
nb_dir = os.path.split(os.getcwd())[0]
if nb_dir not in sys.path:
    sys.path.append(nb_dir)

# Data Preparation
-----

### Constants and Folder Paths

In [2]:
import os
dataset_folder_path = os.path.join("..", "files", "dataset")

### Load Data and Split into *Test*, *Train/Valid*

In [3]:
from data.DataSet import DataSet
dataset = DataSet()
dataset.load(dataset_folder_path, test_set_percentage=0, validation_set_percentage=0)

In [4]:
print(len(dataset.train_data))
print(len(dataset.test_data))

3600
0


### Data Preprocessing

In [5]:
from utils.preprocessing import *
from functools import partial
dataset.apply(apply_mean_centering)
dataset.apply(apply_unit_distance_normalization)
dataset.expand(reverse_digit_sequence)

In [6]:
print(len(dataset.train_data))
print(len(dataset.test_data))

7200
0


----------
## Test different sample rates and cross validate then compare the results to determine the optimal sampling rate to use
----------

In [7]:
NUM_SAMPLES_TO_TRY = [10, 25, 50, 75, 100, 125, 150, 200, 250, 300]
N_FOLDS = 4
PARAM_NUM_EPOCHS = 30
PARAM_BATCH_SIZE = 300

In [8]:
import numpy as np
import pandas as pd
from utils.evaluation import cross_validate_model
from models.regularized_deep_gru import NaiveRegularizedDeepGRU

results = {}

for num_samples in NUM_SAMPLES_TO_TRY:
    print("\n\n\n-------------------------------")
    print("Evaluating Spline interpolation using %d samples" % num_samples)
    print("-------------------------------")
    # setup copy of data and evaluate its spline with the currently selected number of samples
    data = dataset.copy()
    data.apply(partial(spline_interpolate_and_resample, num_samples=num_samples))
    x = np.array(data.train_data)
    y = np.array(data.train_labels)
    # setup the model
    mymodel = NaiveRegularizedDeepGRU(x.shape[1:])
    mymodel.batch_size = PARAM_BATCH_SIZE
    mymodel.num_epochs = PARAM_NUM_EPOCHS
    # run cross validation
    scores = cross_validate_model(x, y, mymodel, N_FOLDS)
    results[num_samples] = scores
    
results = pd.DataFrame([[key, res[0], res[1]] for key,res in list(results.items())], columns=["Number of Samples", "Categorical Accuracy", "Std Deviation"])
results

Using TensorFlow backend.


Evaluating Spline interpolation using 10 samples
-------------------------------


Cross validation fold [1]
..........

Train on 5400 samples, validate on 1800 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
categorical_accuracy: 96.28%


Cross validation fold [2]
..........

Train on 5400 samples, validate on 1800 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
categorical_

KeyboardInterrupt: 