<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Imports" data-toc-modified-id="Imports-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Imports</a></span></li><li><span><a href="#Load-Data" data-toc-modified-id="Load-Data-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Load Data</a></span></li><li><span><a href="#Functions" data-toc-modified-id="Functions-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Functions</a></span></li><li><span><a href="#Support-Vector-Machine" data-toc-modified-id="Support-Vector-Machine-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Support Vector Machine</a></span></li><li><span><a href="#Neural-Network" data-toc-modified-id="Neural-Network-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Neural Network</a></span></li></ul></div>

# Imports

In [34]:
import pandas as pd
import matplotlib as plt

from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.model_selection import KFold
#import seaborn as sns

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Load Data

In [13]:
time_periods = [1, 2, 6, 24] # time bins we want to predict the demand for
resolution = ['h3_res_4', 'h3_res_6', 'h3_res_8'] # spatial resolution we want to predict the demand for

prediction_data={}
for periods in time_periods:
    res_data={}
    for res in resolution:
        res_data[res]=pd.read_csv(f'../data/{periods}hours_{res}.csv', index_col=False)
    prediction_data[periods]=res_data

In [15]:
df = prediction_data.get(1).get('h3_res_4')b
df.head(4)

Unnamed: 0,h3_res_4,temperature,precipitation,number_of_trips,weekday,month,hour,hour_sin,hour_cos,weekday_sin,weekday_cos,lagged_1h,lagged_1day
0,8426645ffffffff,-20.555556,0.0,2,1.0,1.0,0.0,0.0,1.0,-2.449294e-16,1.0,,
1,842664dffffffff,-20.555556,0.0,2321,1.0,1.0,0.0,0.0,1.0,-2.449294e-16,1.0,,
2,8427593ffffffff,-20.555556,0.0,35,1.0,1.0,0.0,0.0,1.0,-2.449294e-16,1.0,,
3,8426645ffffffff,-18.333333,0.0,2,1.0,1.0,1.0,0.269797,0.962917,-2.449294e-16,1.0,2.0,


In [37]:
encoder = OneHotEncoder(sparse=False)
encoded_data = encoder.fit_transform(df[['h3_res_4']])
encoded_df = pd.DataFrame(encoded_data, columns=encoder.get_feature_names_out(['h3_res_4']))
df = pd.concat([df, encoded_df], axis=1).drop('h3_res_4', axis=1)


# Functions

In [47]:
def k_fold_validation(k, X, Y, model):
    '''
    '''
    k_fold = KFold(n_splits= k, random_state=47, shuffle=True)
    for train_index, val_index in k_fold.split(X,Y):
        X_train, Y_train, X_val, Y_val = prepare_data(X,Y, train_index, val_index)
        #train_model(data=X_train, model=model)
        #evaluate_model(data=data)
        print(X_train)
        print(len(X_train))
        print(len(Y_train))
        print(len(Y_val))
    
def prepare_data(X,Y,train_index, val_index):
    '''
    '''
    Scaler=StandardScaler()
    
    X_train = Scaler.fit_transform(X.iloc[train_index])
    Y_train = Scaler.fit_transform(Y[train_index].values.reshape(-1,1))
    
    X_val = Scaler.fit_transform(X.iloc[val_index])
    Y_val = Scaler.fit_transform(Y[val_index].values.reshape(-1,1))
    
    return X_train, Y_train, X_val, Y_val

def train_nn():
    return None

def train_svm():
    return None

# Support Vector Machine

# Neural Network

In [38]:
df.columns

Index(['temperature', 'precipitation', 'number_of_trips', 'weekday', 'month',
       'hour', 'hour_sin', 'hour_cos', 'weekday_sin', 'weekday_cos',
       'lagged_1h', 'lagged_1day', 'h3_res_4_8426641ffffffff',
       'h3_res_4_8426645ffffffff', 'h3_res_4_842664dffffffff',
       'h3_res_4_8427593ffffffff'],
      dtype='object')

In [42]:
features=['temperature', 'precipitation', 'hour_sin', 'hour_cos', 'weekday_sin', 'weekday_cos',
       'lagged_1h', 'lagged_1day', 'h3_res_4_8426641ffffffff',
       'h3_res_4_8426645ffffffff', 'h3_res_4_842664dffffffff',
       'h3_res_4_8427593ffffffff']

In [39]:
nn_model = Sequential()
nn_model.add(Dense(12, input_shape=(len(features),), activation='relu'))
nn_model.add(Dense(8, activation='relu'))
nn_model.add(Dense(4, activation='relu'))
nn_model.add(Dense(1))

In [48]:
k_fold_validation(4, df[features], df['number_of_trips'], nn_model,)

[[-1.45002271  0.          0.00892757 ...  1.93649167 -0.76376262
  -0.73994007]
 [-1.45002271  0.          0.00892757 ... -0.51639778  1.30930734
  -0.73994007]
 [-1.45002271  0.          0.00892757 ... -0.51639778 -0.76376262
   1.3514608 ]
 ...
 [ 2.41645789  0.         -0.89989707 ...  1.93649167 -0.76376262
  -0.73994007]
 [ 2.41645789  0.         -0.89989707 ... -0.51639778  1.30930734
  -0.73994007]
 [ 2.58275813  0.         -1.16758416 ...  1.93649167 -0.76376262
  -0.73994007]]
342
342
114
[[-1.48040872  0.         -0.02151472 ... -0.51639778 -0.73994007
   1.34286213]
 [-1.22411509  0.          0.36876441 ... -0.51639778  1.3514608
  -0.74467809]
 [-1.30818781  0.          0.36876441 ... -0.51639778 -0.73994007
   1.34286213]
 ...
 [ 2.40930487  0.         -1.20331647 ... -0.51639778 -0.73994007
   1.34286213]
 [ 2.54061047  0.         -1.38455835 ... -0.51639778  1.3514608
  -0.74467809]
 [ 2.54032949  0.         -1.38455835 ... -0.51639778 -0.73994007
   1.34286213]]
342
34