In [1]:
import pandas as pd
from datetime import datetime
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from sklearn.linear_model import Lasso 

from src.paths import TEMPORARY_DATA, TRAINING_DATA
from src.hyperparameter_tuning import optimise_hyperparams

  from .autonotebook import tqdm as notebook_tqdm


## Importing and making some changes to the datasets

In [2]:
start_table = pd.read_parquet(TRAINING_DATA/"starts.parquet").sort_index()

In [3]:
start_table

Unnamed: 0,trips_previous_672_hour,trips_previous_671_hour,trips_previous_670_hour,trips_previous_669_hour,trips_previous_668_hour,trips_previous_667_hour,trips_previous_666_hour,trips_previous_665_hour,trips_previous_664_hour,trips_previous_663_hour,...,trips_previous_7_hour,trips_previous_6_hour,trips_previous_5_hour,trips_previous_4_hour,trips_previous_3_hour,trips_previous_2_hour,trips_previous_1_hour,start_hour,start_station_id,trips_next_hour
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,2023-10-30 01:00:00,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,2023-10-31 01:00:00,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,2023-11-01 01:00:00,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,2023-11-02 01:00:00,0,0
5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,2023-11-03 01:00:00,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2293615,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,2023-10-25 01:00:00,10667,0
2293616,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,2023-10-26 01:00:00,10667,0
2293617,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,2023-10-27 01:00:00,10667,0
2293618,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,2023-10-28 01:00:00,10667,0


## Tuning on Starts Data

#### Lasso

In [None]:
best_lasso_hyperparams_for_starts = \
  optimise_hyperparams(
    model_fn=Lasso, 
    hyperparam_trials = 5, 
    scenario = "start",   
    X = start_table.drop("trips_next_hour", axis = 1), 
    y = start_table["trips_next_hour"]
)

#### LGBMRegressor

In [None]:
best_lasso_hyperparams_for_start = \
  optimise_hyperparams(
    model_fn=LGBMRegressor, 
    hyperparam_trials = 5, 
    scenario = "start", 
    X = start_table.drop("trips_next_hour", axis = 1), 
    y = start_table["trips_next_hour"]
)

#### XGBRegressor

In [None]:
best_xgb_hyperparams_for_stops = \
  optimise_hyperparams(
    model_fn=XGBRegressor, 
    hyperparam_trials=5, 
    scenario="start", 
    X=start_table.drop("trips_next_hour", axis = 1),
    y=start_table["trips_next_hour"]
)

## Tuning on Stops Data

In [None]:
stop_table = pd.read_parquet(TRAINING_DATA/"stops.parquet").sort_index()

#### Lasso

In [None]:
best_lasso_hyperparams_for_stops = \
  optimise_hyperparams(
    model_fn=Lasso, 
    hyperparam_trials = 5, 
    scenario = "stop", 
    X = stop_table.drop("trips_next_hour", axis = 1), 
    y = stop_table["trips_next_hour"]
)

#### LGBMRegressor

In [None]:
best_lgb_hyperparams_for_stops = \
  optimise_hyperparams(
    model_fn=LGBMRegressor, 
    hyperparam_trials=5, 
    scenario = "stop", 
    X=stop_table.drop("trips_next_hour", axis = 1),
    y=stop_table["trips_next_hour"]
)

#### XGBRegressor

In [None]:
best_xgb_hyperparams_for_stops = \
  optimise_hyperparams(
    model_fn=XGBRegressor, 
    hyperparam_trials=5, 
    scenario="stop", 
    X=stop_table.drop("trips_next_hour", axis = 1),
    y=stop_table["trips_next_hour"]
)