In [12]:
import os
import sys
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(os.getcwd()), '..\..')))

# import tensorflow as tf

from analysis.predictive.rnn_model.pipeline import load_data, transform_dataset, get_matrix_combination
from analysis.predictive.rnn_model.settings import TRAINING_LABEL, logger

CONFIG = {
    # Dataset related
    'file_name': 'race_record_first_included',
    'target_column': TRAINING_LABEL,
    
    # Construction phase related
    'max_length': 20,
    'n_steps': 20,
    'n_inputs': 215 if TRAINING_LABEL == 'y_run_time_1000' else 214,
    'n_neurons': 100,
    'n_outputs': 1,
    'learning_rate': 0.001,

    # Execution phase related
    'n_epochs': 100,
    'batch_size': 150
}

# DATA PREPARATION =============================================================================================

# Get configuration
max_length = CONFIG['max_length']

# Get training and testing set
logger.warning('Current training label is {}. Fetching training and testing set...'.format(TRAINING_LABEL))
train, test, validation = load_data(file_name=CONFIG['file_name'])

# Get X, y, mapping series of training and testing set
logger.warning('Transforming training and testing set...')
train_transformed = transform_dataset(train, target_column=CONFIG['target_column'])
test_transformed = transform_dataset(test, target_column=CONFIG['target_column'])

[2018-04-01 21:32:19,940] Current training label is y_run_time_1000. Fetching training and testing set...
[2018-04-01 21:32:40,185] Stored datasets found...
[2018-04-01 21:32:41,780] Training: 2000-01-08 10:00:00 -> 2014-07-27 13:10:00
[2018-04-01 21:32:41,783] Testing: 2014-07-27 13:20:00 -> 2016-11-19 13:35:00
[2018-04-01 21:32:41,785] Validation: 2016-11-19 13:45:00 -> 2018-02-18 16:25:00
[2018-04-01 21:32:44,958] Transforming training and testing set...


In [15]:
# Get matrix transformation
logger.warning('Getting matrix representation of training and testing set...')
for key in train_transformed.keys():
    curr_series = train_transformed[key]
    if curr_series is not None:
        train_transformed[key] = get_matrix_combination(curr_series, max_length=max_length)
for key in test_transformed.keys():
    curr_series = test_transformed[key]
    if curr_series is not None:
        test_transformed[key] = get_matrix_combination(curr_series, max_length=max_length)

# Assign the variables
train_X = train_transformed['X']['transformed']
train_y = train_transformed['y']['transformed']
train_mapped = train_transformed['mapped']
if train_mapped is not None:
    train_mapped = train_transformed['mapped']['transformed']
train_seq_length = train_transformed['X']['length']

test_X = test_transformed['X']['transformed']
test_y = test_transformed['y']['transformed']
test_mapped = test_transformed['mapped']
if test_mapped is not None:
    test_mapped = test_transformed['mapped']['transformed']
test_seq_length = test_transformed['X']['length']

[2018-04-01 21:33:46,270] Getting matrix representation of training and testing set...


In [18]:
train_X.shape

(54067, 20, 215)