# Import Library

In [127]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import *
from tensorflow.keras import optimizers

# Preprocessing

In [132]:
df = pd.read_csv('dataset.csv')
df.head(3), df.tail(3)

(   Average travel time (min)        date  link
 0                  97.496377  2015-01-01  17:0
 1                 196.464871  2015-01-01   0:4
 2                 295.190476  2015-01-01   4:6,
      Average travel time (min)        date link
 327                 281.385621  2020-06-01  4:6
 328                 191.181373  2020-06-01  6:3
 329                 123.992327  2020-06-01  3:0)

In [35]:
df['link'].unique(), len(df['link'].unique())

(array(['17:0', '0:4', '4:6', '6:3', '3:0'], dtype=object), 5)

In [133]:
data = df.pivot(index = 'date',columns ='link', values =['Average travel time (min)'])
link_col = [data.columns[i][1] for i in range(len(data.columns))]
data.columns = link_col
data

Unnamed: 0_level_0,0:4,17:0,3:0,4:6,6:3
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2015-01-01,196.464871,97.496377,120.291801,295.190476,191.139286
2015-02-01,196.354369,97.368735,120.431304,298.144330,193.148000
2015-03-01,199.736585,99.058394,121.882997,298.211268,193.826087
2015-04-01,199.399103,97.729399,120.936909,297.732394,193.895911
2015-05-01,198.408991,97.530474,120.757258,296.504587,193.298182
...,...,...,...,...,...
2020-02-01,200.912351,99.187831,120.417085,282.523810,187.873077
2020-03-01,201.477848,98.978261,120.000000,283.630000,187.591549
2020-04-01,190.777778,102.928571,124.982456,283.630000,176.478261
2020-05-01,197.241206,103.240000,124.469767,278.765625,197.888889


In [115]:
lags = 5
preds = 3
x = np.stack([np.roll(data, i, axis = 0) for i in range(lags, 0, -1)], axis = 1)[lags:-preds,:,:,np.newaxis,np.newaxis]
y = np.stack([np.roll(data, -i, axis = 0) for i in range(0, preds, 1)], axis = 1)[lags:-preds,:,:,np.newaxis,np.newaxis]
x.shape, y.shape

((57, 6, 5, 1, 1), (57, 3, 5, 1, 1))

# Modelling

In [154]:
input_timesteps = lags
output_timesteps = preds
num_links = len(df['link'].unique())

def build_model(input_timesteps, output_timesteps, num_links):
    model = Sequential()
    model.add(BatchNormalization(name = 'batch_norm_0', input_shape = (input_timesteps, num_links, 1, 1)))
    model.add(ConvLSTM2D(name ='conv_lstm_1',
                            filters = 64, kernel_size = (5, 1),                       
                            padding = 'same', 
                            return_sequences = True))

    model.add(Dropout(0.2, name = 'dropout_1'))
    model.add(BatchNormalization(name = 'batch_norm_1'))

    model.add(ConvLSTM2D(name ='conv_lstm_2',
                            filters = 64, kernel_size = (5, 1), 
                            padding='same',
                            return_sequences = False))

    model.add(Dropout(0.1, name = 'dropout_2'))
    model.add(BatchNormalization(name = 'batch_norm_2'))

    model.add(Flatten())
    model.add(RepeatVector(output_timesteps))
    model.add(Reshape((output_timesteps, num_links, 1, 64)))

    model.add(ConvLSTM2D(name ='conv_lstm_3',
                            filters = 64, kernel_size = (5, 1), 
                            padding='same',
                            return_sequences = True))

    model.add(Dropout(0.1, name = 'dropout_3'))
    model.add(BatchNormalization(name = 'batch_norm_3'))

    model.add(ConvLSTM2D(name ='conv_lstm_4',
                            filters = 64, kernel_size = (5, 1), 
                            padding='same',
                            return_sequences = True))

    model.add(TimeDistributed(Dense(units=1, name = 'dense_1', activation = 'relu')))
    #model.add(Dense(units=1, name = 'dense_2'))

    optimizer = optimizers.RMSprop() #lr=0.0001, rho=0.9, epsilon=1e-08, decay=0.9)
    model.compile(loss = "mse", optimizer = optimizer)
    return model
model = build_model(input_timesteps, output_timesteps, num_links)
model.summary()

Model: "sequential_13"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 batch_norm_0 (BatchNormaliz  (None, 6, 5, 1, 1)       4         
 ation)                                                          
                                                                 
 conv_lstm_1 (ConvLSTM2D)    (None, 6, 5, 1, 64)       83456     
                                                                 
 dropout_1 (Dropout)         (None, 6, 5, 1, 64)       0         
                                                                 
 batch_norm_1 (BatchNormaliz  (None, 6, 5, 1, 64)      256       
 ation)                                                          
                                                                 
 conv_lstm_2 (ConvLSTM2D)    (None, 5, 1, 64)          164096    
                                                                 
 dropout_2 (Dropout)         (None, 5, 1, 64)        

## Train & Test Split

In [147]:
bootstrap_size = int(len(y)*0.75)
n_test = int(len(y)*0.15)
n_windows = 5
stop = min(bootstrap_size + n_windows * n_test, len(y))
bootstrap_size, stop, n_test

(42, 57, 8)

In [158]:
hist = []
windows = []
for i in range(bootstrap_size, stop, n_test):
    x_train = tf.constant(x[:i,])
    y_train = tf.constant(y[:i,])
    print("shape x_train: ", x_train.shape)
    x_test = x[i:i+n_test,]
    y_test = y[i:i+n_test,] 
    print("shape x_test: ", x_test.shape)

    model = build_model(input_timesteps, output_timesteps, num_links)
    history = model.fit(x_train, y_train,
                        batch_size = 64, epochs = 30,
                        shuffle = False, validation_data = (x_test, y_test))
    hist.append(history)
    # y_pred = model.predict(x_test).squeeze()
    # print(y_pred)

shape x_train:  (42, 6, 5, 1, 1)
shape x_test:  (8, 6, 5, 1, 1)
Epoch 1/30


2022-07-23 11:04:14.242994: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


InvalidArgumentError: Graph execution error:

Detected at node 'sequential_15/batch_norm_0/FusedBatchNormV3' defined at (most recent call last):
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/runpy.py", line 194, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/runpy.py", line 87, in _run_code
      exec(code, run_globals)
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/traitlets/config/application.py", line 846, in launch_instance
      app.start()
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/ipykernel/kernelapp.py", line 712, in start
      self.io_loop.start()
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/tornado/platform/asyncio.py", line 199, in start
      self.asyncio_loop.run_forever()
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/asyncio/base_events.py", line 570, in run_forever
      self._run_once()
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/asyncio/base_events.py", line 1859, in _run_once
      handle._run()
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/asyncio/events.py", line 81, in _run
      self._context.run(self._callback, *self._args)
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 504, in dispatch_queue
      await self.process_one()
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 493, in process_one
      await dispatch(*args)
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 400, in dispatch_shell
      await result
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 724, in execute_request
      reply_content = await reply_content
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/ipykernel/ipkernel.py", line 390, in do_execute
      res = shell.run_cell(code, store_history=store_history, silent=silent)
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/ipykernel/zmqshell.py", line 528, in run_cell
      return super().run_cell(*args, **kwargs)
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 2863, in run_cell
      result = self._run_cell(
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 2909, in _run_cell
      return runner(coro)
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3106, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3309, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3369, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "/var/folders/tm/lmhqcfxj7sd9mp5ws42m_s3m0000gn/T/ipykernel_23459/3965623943.py", line 12, in <cell line: 3>
      history = model.fit(x_train, y_train,
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/keras/engine/training.py", line 1384, in fit
      tmp_logs = self.train_function(iterator)
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/keras/engine/training.py", line 1021, in train_function
      return step_function(self, iterator)
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/keras/engine/training.py", line 1010, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/keras/engine/training.py", line 1000, in run_step
      outputs = model.train_step(data)
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/keras/engine/training.py", line 859, in train_step
      y_pred = self(x, training=True)
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/keras/engine/base_layer.py", line 1096, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/keras/engine/sequential.py", line 374, in call
      return super(Sequential, self).call(inputs, training=training, mask=mask)
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/keras/engine/functional.py", line 451, in call
      return self._run_internal_graph(
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/keras/engine/functional.py", line 589, in _run_internal_graph
      outputs = node.layer(*args, **kwargs)
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/keras/engine/base_layer.py", line 1096, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/keras/layers/normalization/batch_normalization.py", line 767, in call
      outputs = self._fused_batch_norm(inputs, training=training)
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/keras/layers/normalization/batch_normalization.py", line 623, in _fused_batch_norm
      output, mean, variance = control_flow_util.smart_cond(
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/keras/utils/control_flow_util.py", line 105, in smart_cond
      return tf.__internal__.smart_cond.smart_cond(
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/keras/layers/normalization/batch_normalization.py", line 589, in _fused_batch_norm_training
      return tf.compat.v1.nn.fused_batch_norm(
Node: 'sequential_15/batch_norm_0/FusedBatchNormV3'
Detected at node 'sequential_15/batch_norm_0/FusedBatchNormV3' defined at (most recent call last):
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/runpy.py", line 194, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/runpy.py", line 87, in _run_code
      exec(code, run_globals)
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/traitlets/config/application.py", line 846, in launch_instance
      app.start()
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/ipykernel/kernelapp.py", line 712, in start
      self.io_loop.start()
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/tornado/platform/asyncio.py", line 199, in start
      self.asyncio_loop.run_forever()
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/asyncio/base_events.py", line 570, in run_forever
      self._run_once()
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/asyncio/base_events.py", line 1859, in _run_once
      handle._run()
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/asyncio/events.py", line 81, in _run
      self._context.run(self._callback, *self._args)
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 504, in dispatch_queue
      await self.process_one()
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 493, in process_one
      await dispatch(*args)
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 400, in dispatch_shell
      await result
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 724, in execute_request
      reply_content = await reply_content
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/ipykernel/ipkernel.py", line 390, in do_execute
      res = shell.run_cell(code, store_history=store_history, silent=silent)
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/ipykernel/zmqshell.py", line 528, in run_cell
      return super().run_cell(*args, **kwargs)
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 2863, in run_cell
      result = self._run_cell(
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 2909, in _run_cell
      return runner(coro)
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3106, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3309, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3369, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "/var/folders/tm/lmhqcfxj7sd9mp5ws42m_s3m0000gn/T/ipykernel_23459/3965623943.py", line 12, in <cell line: 3>
      history = model.fit(x_train, y_train,
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/keras/engine/training.py", line 1384, in fit
      tmp_logs = self.train_function(iterator)
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/keras/engine/training.py", line 1021, in train_function
      return step_function(self, iterator)
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/keras/engine/training.py", line 1010, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/keras/engine/training.py", line 1000, in run_step
      outputs = model.train_step(data)
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/keras/engine/training.py", line 859, in train_step
      y_pred = self(x, training=True)
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/keras/engine/base_layer.py", line 1096, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/keras/engine/sequential.py", line 374, in call
      return super(Sequential, self).call(inputs, training=training, mask=mask)
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/keras/engine/functional.py", line 451, in call
      return self._run_internal_graph(
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/keras/engine/functional.py", line 589, in _run_internal_graph
      outputs = node.layer(*args, **kwargs)
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/keras/engine/base_layer.py", line 1096, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/keras/layers/normalization/batch_normalization.py", line 767, in call
      outputs = self._fused_batch_norm(inputs, training=training)
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/keras/layers/normalization/batch_normalization.py", line 623, in _fused_batch_norm
      output, mean, variance = control_flow_util.smart_cond(
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/keras/utils/control_flow_util.py", line 105, in smart_cond
      return tf.__internal__.smart_cond.smart_cond(
    File "/opt/homebrew/Caskroom/miniforge/base/envs/tf_env/lib/python3.8/site-packages/keras/layers/normalization/batch_normalization.py", line 589, in _fused_batch_norm_training
      return tf.compat.v1.nn.fused_batch_norm(
Node: 'sequential_15/batch_norm_0/FusedBatchNormV3'
2 root error(s) found.
  (0) INVALID_ARGUMENT:  input must be 4-dimensional[42,6,5,1,1]
	 [[{{node sequential_15/batch_norm_0/FusedBatchNormV3}}]]
	 [[Func/gradient_tape/sequential_15/conv_lstm_2/while/sequential_15/conv_lstm_2/while_grad/body/_2193/input/_4391/_646]]
  (1) INVALID_ARGUMENT:  input must be 4-dimensional[42,6,5,1,1]
	 [[{{node sequential_15/batch_norm_0/FusedBatchNormV3}}]]
0 successful operations.
0 derived errors ignored. [Op:__inference_train_function_58268]

### Testing

In [50]:
means = { }
scales = { }
low = { }
upr = { }
smooth = 7
freq = '15min'
for k, v in df.groupby('link', sort = False):
    v_nonan = v.copy()
    median = np.median(v_nonan['Average travel time (min)'])        
    mad = 1.4826 * np.median(np.abs(v_nonan['Average travel time (min)'] - median))
    low[k] = max(median - 3 * mad, 0) # v['LinkTravelTime'].quantile(0.1)
    upr[k] = median + 3 * mad # v['LinkTravelTime'].quantile(0.9)

    ix_ref = pd.DatetimeIndex(pd.to_datetime(v['date']))
    v_ = v.set_index(ix_ref)

    mean = v_[(low[k] < v_['Average travel time (min)']) & (v_['Average travel time (min)'] < upr[k])]['Average travel time (min)'].resample(freq).mean()
    mean = mean.interpolate().rolling(window = smooth, center = True).mean()
    means[k] = mean 
    #scales[k] = v_[(low[k] < v_['LinkTravelTime']) & (v_['LinkTravelTime'] < upr[k])]['LinkTravelTime'].std()
    scales[k] = 1
means_df = pd.DataFrame(data = means).fillna(method='pad').fillna(method='bfill')
means_df

Unnamed: 0_level_0,17:0,0:4,4:6,6:3,3:0
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2015-01-01 00:00:00,97.496248,196.46476,295.193454,191.141311,120.291941
2015-01-01 00:15:00,97.496248,196.46476,295.193454,191.141311,120.291941
2015-01-01 00:30:00,97.496248,196.46476,295.193454,191.141311,120.291941
2015-01-01 00:45:00,97.496248,196.46476,295.193454,191.141311,120.291941
2015-01-01 01:00:00,97.496205,196.464723,295.194446,191.141986,120.291988
2015-01-01 01:15:00,97.496162,196.464686,295.195439,191.142661,120.292035
2015-01-01 01:30:00,97.496119,196.464648,295.196432,191.143336,120.292082
2015-01-01 01:45:00,97.496077,196.464611,295.197424,191.144011,120.292129
2015-01-01 02:00:00,97.496034,196.464574,295.198417,191.144685,120.292176
2015-01-01 02:15:00,97.495991,196.464537,295.199409,191.14536,120.292223


In [32]:
ts = pd.DataFrame(df['Average travel time (min)'])
lags = 20
preds = 3
ts

Unnamed: 0,Average travel time (min)
0,97.496377
1,196.464871
2,295.190476
3,191.139286
4,120.291801
...,...
325,103.027027
326,200.052910
327,281.385621
328,191.181373


In [24]:
x = np.stack([np.roll(ts, i, axis = 0) for i in range(lags, 0, -1)], axis = 1)[lags:-preds,:,:,np.newaxis,np.newaxis]


In [31]:
np.roll(ts, 1, axis = 0)

array([[123.99232737],
       [ 97.49637681],
       [196.46487119],
       [295.19047619],
       [191.13928571],
       [120.29180064],
       [ 97.36873508],
       [196.35436893],
       [298.1443299 ],
       [193.148     ],
       [120.43130435],
       [ 99.05839416],
       [199.73658537],
       [298.21126761],
       [193.82608696],
       [121.88299663],
       [ 97.72939866],
       [199.39910314],
       [297.73239437],
       [193.89591078],
       [120.93690852],
       [ 97.53047404],
       [198.40899123],
       [296.50458716],
       [193.29818182],
       [120.75725807],
       [ 97.3826087 ],
       [197.79437229],
       [298.27751196],
       [193.72284644],
       [120.36356467],
       [ 97.46613995],
       [197.43078512],
       [296.55454545],
       [191.66778523],
       [120.52881041],
       [ 97.48295455],
       [196.24012474],
       [294.69545455],
       [190.51623377],
       [120.56188605],
       [ 97.42358079],
       [197.90280778],
       [295

In [161]:
x_train = pd.read_csv('/Users/kasidej/Documents/bus_time/GSTA/NYC Data/X_train.csv')
x_train

Unnamed: 0,pickup_longitude,pickup_latitude,dropoff_longitude,dropoff_latitude,pickup_cluster,dropoff_cluster,center_latitude,center_longitude,pickup_geohash,dropoff_geohash,...,dropoff_pca0,dropoff_pca1,pickup_pca0,pickup_pca1,Public_Holiday,Weekend_day,Work_day,Peak_Hour,pickup_counts_on_clusterid,dropoff_counts_on_clusterid
0,-0.647005,-0.059677,-0.280492,-0.641036,25,52,-0.344698,-0.541596,dr5ru2,dr5rs2,...,-0.275801,-0.634223,-0.666390,-0.102839,-1.0,-1.0,-1.0,-1.0,-0.630189,-0.783550
1,-0.668234,-0.211759,-0.265152,-0.517766,19,90,-0.309171,-0.542434,dr5rs5,dr5ru2,...,-0.267607,-0.514509,-0.681939,-0.251295,-1.0,-1.0,-1.0,-1.0,-0.883019,0.056277
2,-0.603200,0.185911,-0.233324,-0.352958,63,17,-0.017582,-0.492759,dr72h8,dr72hd,...,-0.245378,-0.353754,-0.631728,0.137795,-1.0,-1.0,-1.0,-1.0,0.403774,-0.536797
3,-0.573351,0.067690,-0.078348,-0.687572,56,16,-0.329698,-0.386069,dr5rus,dr5rmv,...,-0.071768,-0.666553,-0.597321,0.026786,-1.0,-1.0,-1.0,-1.0,0.177358,-0.961039
4,-0.614111,-0.117622,-0.269592,-0.532266,31,1,-0.282383,-0.519577,dr5rsq,dr5rsr,...,-0.271197,-0.528754,-0.631206,-0.155519,-1.0,-1.0,-1.0,-1.0,-0.622642,-0.580087
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
44995,-0.684263,-0.104535,-0.242651,-0.479130,42,11,-0.235129,-0.536542,dr5rsp,dr5rud,...,-0.247416,-0.475893,-0.702067,-0.149565,-1.0,-1.0,-1.0,-1.0,-0.849057,1.000000
44996,-0.579951,0.017954,-0.143704,-0.299822,61,57,-0.043858,-0.428239,dr5rue,dr72jm,...,-0.159162,-0.297016,-0.602063,-0.021732,-1.0,-1.0,-1.0,-1.0,-0.373585,-0.969697
44997,-0.591859,0.164179,-0.218493,-0.458445,63,12,-0.109667,-0.478550,dr5rux,dr5ruu,...,-0.224541,-0.454469,-0.619530,0.117943,-1.0,-1.0,-1.0,-1.0,-0.513208,-0.766234
44998,-0.078220,-0.254113,0.212587,-0.485797,22,96,-0.301141,0.021187,dr5rxf,dr5xb6,...,0.206434,-0.454153,-0.088519,-0.236164,-1.0,-1.0,-1.0,-1.0,-1.000000,-1.000000


In [162]:
x_train.columns

Index(['pickup_longitude', 'pickup_latitude', 'dropoff_longitude',
       'dropoff_latitude', 'pickup_cluster', 'dropoff_cluster',
       'center_latitude', 'center_longitude', 'pickup_geohash',
       'dropoff_geohash', 'DayofMonth_sin', 'DayofMonth_cos', 'Hour_sin',
       'Hour_cos', 'dayofweek_sin', 'dayofweek_cos', 'tempm', 'dewptm', 'hum',
       'rain', 'snow', 'wdird', 'vism', 'fog', 'thunder', 'tornado',
       'conds_Clear', 'conds_Haze', 'conds_Heavy Rain', 'conds_Heavy Snow',
       'conds_Light Rain', 'conds_Light Snow', 'distance_haversine',
       'distance_dummy_manhattan', 'direction', 'avg_speed_KMperHour',
       'dropoff_pca0', 'dropoff_pca1', 'pickup_pca0', 'pickup_pca1',
       'Public_Holiday', 'Weekend_day', 'Work_day', 'Peak_Hour',
       'pickup_counts_on_clusterid', 'dropoff_counts_on_clusterid'],
      dtype='object')

In [160]:
pd.read_csv('/Users/kasidej/Documents/bus_time/GSTA/NYC Data/Y_train.csv')

Unnamed: 0,trip_duration
0,781
1,707
2,228
3,2180
4,354
...,...
44995,908
44996,1039
44997,440
44998,870
