<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Imports" data-toc-modified-id="Imports-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Imports</a></span></li><li><span><a href="#Load-Data" data-toc-modified-id="Load-Data-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Load Data</a></span></li><li><span><a href="#Functions" data-toc-modified-id="Functions-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Functions</a></span></li><li><span><a href="#Support-Vector-Machine" data-toc-modified-id="Support-Vector-Machine-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Support Vector Machine</a></span></li><li><span><a href="#Neural-Network" data-toc-modified-id="Neural-Network-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Neural Network</a></span></li></ul></div>

# Imports

In [1]:
import pandas as pd
import matplotlib as plt

from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.model_selection import KFold
#import seaborn as sns

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

ImportError: DLL load failed while importing _imaging: The specified module could not be found.

# Load Data

In [None]:
time_periods = [1, 2, 6, 24] # time bins we want to predict the demand for
resolution = ['h3_res_4', 'h3_res_6', 'h3_res_8'] # spatial resolution we want to predict the demand for

prediction_data={}
for periods in time_periods:
    res_data={}
    for res in resolution:
        res_data[res]=pd.read_csv(f'../data/{periods}hours_{res}.csv', index_col=False)
    prediction_data[periods]=res_data

In [None]:
df = prediction_data.get(1).get('h3_res_4')
df.head(4)

In [None]:
encoder = OneHotEncoder(sparse=False)
encoded_data = encoder.fit_transform(df[['h3_res_4']])
encoded_df = pd.DataFrame(encoded_data, columns=encoder.get_feature_names_out(['h3_res_4']))
df = pd.concat([df, encoded_df], axis=1).drop('h3_res_4', axis=1)


# Functions

In [None]:
def k_fold_validation(k, X, Y, model, params):
    '''
    Method that trains and validate the model using k-fold validation
    param k:      Number of folds (iterations)
    param x:      Feature data
    param y:      Target data
    param model:  Model to be trained ()
    returns:      Nothing
    '''
    # initialize the folds
    k_fold = KFold(n_splits= k, random_state=47, shuffle=True)
    # iteratre through all folds
    for train_index, val_index in k_fold.split(X,Y):
        # prepare data and get splits
        X_train, Y_train, X_val, Y_val = prepare_data(X,Y, train_index, val_index)
        # train & validate the model
        train_model(X_train=X_train, Y_train= Y_train, model=model, params=params)
    
    
def prepare_data(X,Y,train_index, val_index):
    '''
    Method that prepares the data for training (split the data/)
    param X: feature data to be prepared
    param Y: target data to be prepared
    param train_index: index that defines the split for trainig data
    param val_index: index that defines the split for target data
    returns X_train, Y_train, X_val, Y_val: prepared training & validation data
    '''
    Scaler=StandardScaler()
    
    X_train = Scaler.fit_transform(X.iloc[train_index])
    Y_train = Scaler.fit_transform(Y[train_index].values.reshape(-1,1))
    
    X_val = Scaler.fit_transform(X.iloc[val_index])
    Y_val = Scaler.fit_transform(Y[val_index].values.reshape(-1,1))
    
    return X_train, Y_train, X_val, Y_val


def train_nn(X_train, Y_train, model, params):
    '''
    This method compiles and trains a neural network with the given data and parameters
    param X_train: Training data-set
    param Y_train: Target variable for training
    param X_val:   Test data-set
    param y_val:   Target variable for validation
    param model:   NN to be trained
    param params:  Parameters to compile and fit the NN
    returns:       Nothing     
    '''
    model.compile(
        optimizer=params.get("optimizer"),
        loss=params.get("loss"),
        loss_weights=params.get("loss_weights"),
        metrics=params.get("metrics"),
        weighted_metrics=params.get("weighted_metrics"),
        run_eagerly=params.get("run_eagerly"),
        steps_per_execution=params.get("steps_per_execution"),
        jit_compile=params.get("jit_compile"),
        auto_scale_loss=params.get("auto_scale_loss"),
    )
    model.fit(
        x=params.get("x"),
        y=params.get("y"),
        batch_size=params.get("batch_size"),
        epochs=params.get("epochs"),
        verbose=params.get("verbose"),
        callbacks=params.get("callbacks"),
        validation_split=params.get("validation_split"),
        validation_data=params.get("validation_data"),
        shuffle=params.get("shuffle"),
        class_weight=params.get("class_weight"),
        sample_weight=params.get("sample_weight"),
        initial_epoch=params.get("initial_epoch"),
        steps_per_epoch=params.get("steps_per epoch"),
        validation_steps=params.get("validation_steps"),
        validation_batch_size=params.get("validation_batch_size"),
        validation_freq=params.get("validation_freq"),
    )
    return None


def train_svm(X_train, Y_train, model, params):
    '''
    This method compiles and trains a SVM with the given data and parameters
    param X_train: Training data-set
    param Y_train: Target variable for training
    param X_val:   Test data-set
    param y_val:   Target variable for validation
    param model:   SVM to be trained
    param params:  Parameters to train the SVM
    returns:       Nothing    
    '''
    return None

# Support Vector Machine

# Neural Network

In [None]:
df.columns

In [None]:
features=['temperature', 'precipitation', 'hour_sin', 'hour_cos', 'weekday_sin', 'weekday_cos',
       'lagged_1h', 'lagged_1day', 'h3_res_4_8426641ffffffff',
       'h3_res_4_8426645ffffffff', 'h3_res_4_842664dffffffff',
       'h3_res_4_8427593ffffffff']

In [None]:
nn_model = Sequential()
nn_model.add(Dense(12, input_shape=(len(features),), activation='relu'))
nn_model.add(Dense(8, activation='relu'))
nn_model.add(Dense(4, activation='relu'))
nn_model.add(Dense(1))

In [None]:
params={
    "optimizer":"rmsprop",
    "loss":None,
    "loss_weights":None,
    "metrics":None,
    "weighted_metrics":None,
    "run_eagerly":False,
    "steps_per_execution":1,
    "jit_compile":"auto",
    "auto_scale_loss":True,
    "x":None,
    "y"=None,
    "batch_size"=None,
    "epochs"=1,
    "verbose"="auto",
    "callbacks"=None,
    "validation_split"=0.0,
    "validation_data"=None,
    "shuffle"=True,
    "class_weight"=None,
    "sample_weight"=None,
    "initial_epoch"=0,
    "steps_per_epoch"=None,
    "validation_steps"=None,
    "validation_batch_size"=None,
    "validation_freq"=1
}

In [None]:
k_fold_validation(4, df[features], df['number_of_trips'], nn_model,)