In [1]:
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
from itertools import product
import warnings

In [2]:
import sys, os

sys.path.append(os.path.abspath('..'))
%load_ext autoreload
%autoreload 2

from modules.config import *
from modules.storage import get_results_df, get_demand_model_data, get_demand_orig_dest_model_data
from modules.neural_network import execute_stage, get_first_stage_hyperparameters,get_second_stage_hyperparameters, get_third_stage_hyperparameters

In [3]:
def get_model_data(h3_res, time_interval_length):
    model_data_train, model_data_test = get_demand_model_data(
        h3_res, time_interval_length
    )
    # return model_data_train.sample(frac=0.1), model_data_test
    return model_data_train, model_data_test


In [4]:
execute_stage(
    get_model_data,
    NN_FIRST_STAGE_DEMAND_RESULTS_PATH,
    get_first_stage_hyperparameters,
    TUNE_H3_RESOLUTION,
    TUNE_TIME_INTERVAL_LENGTH,
)

  0%|          | 0/7 [00:00<?, ?it/s]

[15:52:17] batch_size: 8 - nodes_per_feature: 1 - n_layers: 1 - activation: relu - dropout: -1 # already trained
[15:52:17] batch_size: 16 - nodes_per_feature: 1 - n_layers: 1 - activation: relu - dropout: -1 # already trained
[15:52:17] batch_size: 32 - nodes_per_feature: 1 - n_layers: 1 - activation: relu - dropout: -1 # already trained
[15:52:17] batch_size: 64 - nodes_per_feature: 1 - n_layers: 1 - activation: relu - dropout: -1 # already trained
[15:52:17] batch_size: 128 - nodes_per_feature: 1 - n_layers: 1 - activation: relu - dropout: -1 # already trained
[15:52:17] batch_size: 256 - nodes_per_feature: 1 - n_layers: 1 - activation: relu - dropout: -1 # already trained
[15:52:17] batch_size: 512 - nodes_per_feature: 1 - n_layers: 1 - activation: relu - dropout: -1 # already trained


In [5]:
results = get_results_df(NN_FIRST_STAGE_DEMAND_RESULTS_PATH)

best_batch_size = (
    results[
        (results["h3_res"] == TUNE_H3_RESOLUTION)
        & (results["time_interval_length"] == TUNE_TIME_INTERVAL_LENGTH)
    ]
    .sort_values(by="val_mse", ascending=True)["batch_size"]
    .iloc[0]
)

first_stage_hyperparameters = get_first_stage_hyperparameters()
batch_sizes = list(map(lambda x: x['batch_size'], first_stage_hyperparameters))
max_batch_size = max(batch_sizes)
min_batch_size = min(batch_sizes)

print(f"best batch_size: **{best_batch_size}** - min: {min_batch_size} - max: {max_batch_size}")


best batch_size: **256** - min: 8 - max: 512


In [6]:
get_hyperparameters = lambda : get_second_stage_hyperparameters(best_batch_size)
execute_stage(
    get_model_data,
    NN_SECOND_STAGE_DEMAND_RESULTS_PATH,
    get_hyperparameters,
    TUNE_H3_RESOLUTION,
    TUNE_TIME_INTERVAL_LENGTH,
)

  0%|          | 0/18 [00:00<?, ?it/s]

[15:52:18] batch_size: 256 - nodes_per_feature: 0.5 - n_layers: 1 - activation: relu - dropout: -1 # already trained
[15:52:18] batch_size: 256 - nodes_per_feature: 0.5 - n_layers: 1 - activation: tanh - dropout: -1 # already trained
[15:52:18] batch_size: 256 - nodes_per_feature: 0.5 - n_layers: 2 - activation: relu - dropout: -1 # already trained
[15:52:18] batch_size: 256 - nodes_per_feature: 0.5 - n_layers: 2 - activation: tanh - dropout: -1 # already trained
[15:52:18] batch_size: 256 - nodes_per_feature: 0.5 - n_layers: 3 - activation: relu - dropout: -1 # already trained
[15:52:18] batch_size: 256 - nodes_per_feature: 0.5 - n_layers: 3 - activation: tanh - dropout: -1 # already trained
[15:52:18] batch_size: 256 - nodes_per_feature: 1 - n_layers: 1 - activation: relu - dropout: -1 # already trained
[15:52:18] batch_size: 256 - nodes_per_feature: 1 - n_layers: 1 - activation: tanh - dropout: -1 # already trained
[15:52:18] batch_size: 256 - nodes_per_feature: 1 - n_layers: 2 - ac

In [7]:
results = get_results_df(NN_SECOND_STAGE_DEMAND_RESULTS_PATH)
best_model = (
    results[
        (results["h3_res"] == TUNE_H3_RESOLUTION)
        & (results["time_interval_length"] == TUNE_TIME_INTERVAL_LENGTH)
    ]
    .sort_values(by="val_mse", ascending=True)
    .iloc[0]
)

best_config = {
    "batch_size": best_model["batch_size"],
    "nodes_per_feature": best_model["nodes_per_feature"],
    "n_layers": best_model["n_layers"],
    "activation": best_model["activation"],
}

best_config


{'batch_size': 256,
 'nodes_per_feature': 1.5,
 'n_layers': 2,
 'activation': 'relu'}

In [8]:
get_hyperparameters = lambda : get_third_stage_hyperparameters(
    best_batch_size=best_model["batch_size"],
    best_nodes_per_feature=best_model["nodes_per_feature"],
    best_n_layers=best_model["n_layers"],
    best_activation=best_model["activation"],
)
execute_stage(
    get_model_data,
    NN_THIRD_STAGE_DEMAND_RESULTS_PATH,
    get_hyperparameters,
    TUNE_H3_RESOLUTION,
    TUNE_TIME_INTERVAL_LENGTH,
)


  0%|          | 0/5 [00:00<?, ?it/s]

[15:52:19] batch_size: 256 - nodes_per_feature: 1.5 - n_layers: 2 - activation: relu - dropout: 0 # already trained
[15:52:19] batch_size: 256 - nodes_per_feature: 1.5 - n_layers: 2 - activation: relu - dropout: 0.05 # already trained
[15:52:19] batch_size: 256 - nodes_per_feature: 1.5 - n_layers: 2 - activation: relu - dropout: 0.1 # already trained
[15:52:19] batch_size: 256 - nodes_per_feature: 1.5 - n_layers: 2 - activation: relu - dropout: 0.2 # already trained
[15:52:19] batch_size: 256 - nodes_per_feature: 1.5 - n_layers: 2 - activation: relu - dropout: 0.5 # already trained


In [9]:
results = get_results_df(NN_THIRD_STAGE_DEMAND_RESULTS_PATH)
best_dropout = (
	results[
		(results["h3_res"] == TUNE_H3_RESOLUTION)
		& (results["time_interval_length"] == TUNE_TIME_INTERVAL_LENGTH)
	]
	.sort_values(by="val_mse", ascending=True)["dropout"]
	.iloc[0]
)
best_config = {
	**best_config,
	"dropout": best_dropout,
}
best_config
		

{'batch_size': 256,
 'nodes_per_feature': 1.5,
 'n_layers': 2,
 'activation': 'relu',
 'dropout': 0.1}

In [10]:
def reduce_model_data_func(get_model_data: callable, frac: float):
	def new_get_model_data(h3_res, time_interval_length):
		model_data = get_model_data(h3_res, time_interval_length)
		return model_data.sample(frac=frac)
	
	return new_get_model_data

In [11]:
list(product(PREDICTIVE_H3_RESOLUTIONS, CALC_TIME_INTERVAL_LENGTHS))

[(7, 1), (7, 2), (7, 6), (7, 24), (8, 1), (8, 2), (8, 6), (8, 24)]

In [12]:
for h3_res, time_interval_length in tqdm(
    list(product(PREDICTIVE_H3_RESOLUTIONS, CALC_TIME_INTERVAL_LENGTHS))
    + ADDITIONAL_PREDICTIVE_RESOLUTIONS
):
    tqdm.write(f"executing h3_res: {h3_res}, time_interval_length: {time_interval_length}")
    current_best_config = best_config
    while best_config["batch_size"] > 1:
        try:
            execute_stage(
                get_model_data,
                NN_FOURTH_STAGE_DEMAND_RESULTS_PATH,
                lambda: [best_config],
                h3_res,
                time_interval_length,
                test_phase=True,
                silent=True,
            )
            continue
        except:
            tqdm.write(
                f"Failed to execute stage for h3_res: {h3_res}, time_interval_length: {time_interval_length}"
            )
            current_best_config["batch_size"] = best_config["batch_size"] / 2
            tqdm.write(f"reducing batch_size to: {current_best_config['batch_size']}")

  0%|          | 0/9 [00:00<?, ?it/s]

executing h3_res: 7, time_interval_length: 1


now orig dest

In [None]:
execute_stage(
	lambda _, __: get_demand_orig_dest_model_data(),
	NN_FOURTH_STAGE_DEMAND_RESULTS_PATH,
	lambda : [best_config],
	int(f"{ORIGIN_DESTINATION_H3_RESOLUTION}{ORIGIN_DESTINATION_H3_RESOLUTION}"),
	ORIGIN_DESTINATION_TIME_INTERVAL_LENGTH,
	test_phase=True,
	silent=True,
)

In [None]:
results = get_results_df(NN_FOURTH_STAGE_DEMAND_RESULTS_PATH)
results

Unnamed: 0,h3_res,time_interval_length,batch_size,nodes_per_feature,n_layers,activation,dropout,train_duration,test_mse,test_rmse,test_mae,test_non_zero_mape,test_zero_accuracy
0,7,1,256,1.5,3,relu,0.05,38.772582,43.330603,6.582599,2.81154,0.440948,0.870796
1,7,2,256,1.5,3,relu,0.05,13.391915,132.560881,11.513509,5.070047,0.582317,0.850733
2,7,6,256,1.5,3,relu,0.05,10.626538,52.098566,7.217934,3.240938,0.411989,0.847098
3,7,24,256,1.5,3,relu,0.05,4.414453,82.041731,9.057689,4.436109,0.345948,0.404363
4,8,1,256,1.5,3,relu,0.05,188.396887,4.710293,2.170321,0.954229,0.533906,0.861909
5,8,2,256,1.5,3,relu,0.05,56.297401,10.275211,3.205497,1.532907,0.652608,0.753162
6,8,6,256,1.5,3,relu,0.05,13.658923,6.079685,2.465702,1.120937,0.532901,0.854849
7,8,24,256,1.5,3,relu,0.05,3.364491,34.048421,5.835102,2.393364,1.0,0.664017
8,9,24,256,1.5,3,relu,0.05,10.969515,3.39367,1.842192,0.731061,1.0,0.726059


In [None]:
def get_demand_stats(row):
    h3_res, time_interval_length = row["h3_res"], row["time_interval_length"]

    model_data_tuple = get_model_data(h3_res, time_interval_length)
    model_data = pd.concat(model_data_tuple)
    median = model_data.outcome.median()
    mean = model_data.outcome.mean()
    std = model_data.outcome.std()
    maximum = model_data.outcome.max()
    perc_0 = model_data.outcome[model_data.outcome == 0].count() / len(model_data)

    return pd.Series(
        {
            "median": median,
            "mean": mean,
            "std": std,
            "maximum": maximum,
            "perc_0": perc_0,
        }
    )


results = pd.concat(
    [
        results,
        results.apply(get_demand_stats, axis=1),
    ],
    axis=1,
)


In [None]:
results

Unnamed: 0,h3_res,time_interval_length,batch_size,nodes_per_feature,n_layers,activation,dropout,train_duration,test_mse,test_rmse,test_mae,test_non_zero_mape,test_zero_accuracy,median,mean,std,maximum,perc_0
0,7,1,256,1.5,3,relu,0.05,38.772582,43.330603,6.582599,2.81154,0.440948,0.870796,0.0,5.755109,13.763612,190.0,0.624848
1,7,2,256,1.5,3,relu,0.05,13.391915,132.560881,11.513509,5.070047,0.582317,0.850733,0.0,5.934785,14.219029,183.0,0.623708
2,7,6,256,1.5,3,relu,0.05,10.626538,52.098566,7.217934,3.240938,0.411989,0.847098,0.0,6.711749,16.032654,174.0,0.619004
3,7,24,256,1.5,3,relu,0.05,4.414453,82.041731,9.057689,4.436109,0.345948,0.404363,0.0,10.485084,22.668667,174.0,0.597615
4,8,1,256,1.5,3,relu,0.05,188.396887,4.710293,2.170321,0.954229,0.533906,0.861909,0.0,1.327515,3.423869,70.0,0.719383
5,8,2,256,1.5,3,relu,0.05,56.297401,10.275211,3.205497,1.532907,0.652608,0.753162,0.0,1.369063,3.520954,70.0,0.717059
6,8,6,256,1.5,3,relu,0.05,13.658923,6.079685,2.465702,1.120937,0.532901,0.854849,0.0,1.548785,3.911882,64.0,0.707291
7,8,24,256,1.5,3,relu,0.05,3.364491,34.048421,5.835102,2.393364,1.0,0.664017,0.0,2.421447,5.357067,54.0,0.663272
8,9,24,256,1.5,3,relu,0.05,10.969515,3.39367,1.842192,0.731061,1.0,0.726059,0.0,0.727274,1.682266,30.0,0.726547


: 