In [1]:
import pandas as pd
from tqdm.notebook import tqdm
from itertools import product

In [2]:
import sys, os

sys.path.append(os.path.abspath('..'))
%load_ext autoreload
%autoreload 2

from modules.config import *
from modules.storage import get_results_df, get_demand_model_data, get_demand_orig_dest_model_data
from modules.neural_network import execute_stage, get_first_stage_hyperparameters,get_second_stage_hyperparameters, get_third_stage_hyperparameters

In [3]:
def get_model_data(h3_res, time_interval_length):
    model_data_train, model_data_test = get_demand_model_data(
        h3_res, time_interval_length
    )
    # return model_data_train.sample(frac=0.1), model_data_test
    return model_data_train, model_data_test


In [4]:
execute_stage(
    get_model_data,
    NN_FIRST_STAGE_DEMAND_RESULTS_PATH,
    get_first_stage_hyperparameters,
    TUNE_H3_RESOLUTION,
    TUNE_TIME_INTERVAL_LENGTH,
)

  0%|          | 0/5 [00:00<?, ?it/s]

[19:21:57] batch_size: 512 - nodes_per_feature: 1 - n_layers: 1 - activation: relu - dropout: -1 # already trained
[19:21:57] batch_size: 256 - nodes_per_feature: 1 - n_layers: 1 - activation: relu - dropout: -1 # already trained
[19:21:57] batch_size: 128 - nodes_per_feature: 1 - n_layers: 1 - activation: relu - dropout: -1 # already trained
[19:21:57] batch_size: 64 - nodes_per_feature: 1 - n_layers: 1 - activation: relu - dropout: -1 # already trained
[19:21:57] batch_size: 32 - nodes_per_feature: 1 - n_layers: 1 - activation: relu - dropout: -1 # already trained


In [5]:
results = get_results_df(NN_FIRST_STAGE_DEMAND_RESULTS_PATH)

best_batch_size = (
    results[
        (results["h3_res"] == TUNE_H3_RESOLUTION)
        & (results["time_interval_length"] == TUNE_TIME_INTERVAL_LENGTH)
    ]
    .sort_values(by="val_mse", ascending=True)["batch_size"]
    .iloc[0]
)

first_stage_hyperparameters = get_first_stage_hyperparameters()
batch_sizes = list(map(lambda x: x['batch_size'], first_stage_hyperparameters))
max_batch_size = max(batch_sizes)
min_batch_size = min(batch_sizes)

print(f"best batch_size: **{best_batch_size}** - min: {min_batch_size} - max: {max_batch_size}")


best batch_size: **256** - min: 32 - max: 512


In [6]:
get_hyperparameters = lambda : get_second_stage_hyperparameters(best_batch_size)
execute_stage(
    get_model_data,
    NN_SECOND_STAGE_DEMAND_RESULTS_PATH,
    get_hyperparameters,
    TUNE_H3_RESOLUTION,
    TUNE_TIME_INTERVAL_LENGTH,
)

  0%|          | 0/18 [00:00<?, ?it/s]

[19:21:58] batch_size: 256 - nodes_per_feature: 0.5 - n_layers: 1 - activation: relu - dropout: -1 # already trained
[19:21:58] batch_size: 256 - nodes_per_feature: 0.5 - n_layers: 1 - activation: tanh - dropout: -1 # already trained
[19:21:58] batch_size: 256 - nodes_per_feature: 0.5 - n_layers: 2 - activation: relu - dropout: -1 # already trained
[19:21:58] batch_size: 256 - nodes_per_feature: 0.5 - n_layers: 2 - activation: tanh - dropout: -1 # already trained
[19:21:58] batch_size: 256 - nodes_per_feature: 0.5 - n_layers: 3 - activation: relu - dropout: -1 # already trained
[19:21:58] batch_size: 256 - nodes_per_feature: 0.5 - n_layers: 3 - activation: tanh - dropout: -1 # already trained
[19:21:58] batch_size: 256 - nodes_per_feature: 1 - n_layers: 1 - activation: relu - dropout: -1 # already trained
[19:21:58] batch_size: 256 - nodes_per_feature: 1 - n_layers: 1 - activation: tanh - dropout: -1 # already trained
[19:21:58] batch_size: 256 - nodes_per_feature: 1 - n_layers: 2 - ac

In [7]:
results = get_results_df(NN_SECOND_STAGE_DEMAND_RESULTS_PATH)
best_model = (
    results[
        (results["h3_res"] == TUNE_H3_RESOLUTION)
        & (results["time_interval_length"] == TUNE_TIME_INTERVAL_LENGTH)
    ]
    .sort_values(by="val_mse", ascending=True)
    .iloc[0]
)

best_config = {
    "batch_size": best_model["batch_size"],
    "nodes_per_feature": best_model["nodes_per_feature"],
    "n_layers": best_model["n_layers"],
    "activation": best_model["activation"],
}

best_config


{'batch_size': 256,
 'nodes_per_feature': 1.0,
 'n_layers': 2,
 'activation': 'relu'}

In [8]:
get_hyperparameters = lambda : get_third_stage_hyperparameters(
    best_batch_size=best_model["batch_size"],
    best_nodes_per_feature=best_model["nodes_per_feature"],
    best_n_layers=best_model["n_layers"],
    best_activation=best_model["activation"],
)
execute_stage(
    get_model_data,
    NN_THIRD_STAGE_DEMAND_RESULTS_PATH,
    get_hyperparameters,
    TUNE_H3_RESOLUTION,
    TUNE_TIME_INTERVAL_LENGTH,
)


  0%|          | 0/5 [00:00<?, ?it/s]

[19:21:58] batch_size: 256 - nodes_per_feature: 1.0 - n_layers: 2 - activation: relu - dropout: 0 # already trained
[19:21:58] batch_size: 256 - nodes_per_feature: 1.0 - n_layers: 2 - activation: relu - dropout: 0.05 # already trained
[19:21:58] batch_size: 256 - nodes_per_feature: 1.0 - n_layers: 2 - activation: relu - dropout: 0.1 # already trained
[19:21:58] batch_size: 256 - nodes_per_feature: 1.0 - n_layers: 2 - activation: relu - dropout: 0.2 # already trained
[19:21:58] batch_size: 256 - nodes_per_feature: 1.0 - n_layers: 2 - activation: relu - dropout: 0.5 # already trained


In [9]:
results = get_results_df(NN_THIRD_STAGE_DEMAND_RESULTS_PATH)
best_dropout = (
	results[
		(results["h3_res"] == TUNE_H3_RESOLUTION)
		& (results["time_interval_length"] == TUNE_TIME_INTERVAL_LENGTH)
	]
	.sort_values(by="val_mse", ascending=True)["dropout"]
	.iloc[0]
)
best_config = {
	**best_config,
	"dropout": best_dropout,
}
best_config
		

{'batch_size': 256,
 'nodes_per_feature': 1.0,
 'n_layers': 2,
 'activation': 'relu',
 'dropout': 0.5}

In [10]:
def reduce_model_data_func(get_model_data: callable, frac: float):
	def new_get_model_data(h3_res, time_interval_length):
		model_data = get_model_data(h3_res, time_interval_length)
		return model_data.sample(frac=frac)
	
	return new_get_model_data

In [11]:
list(product(PREDICTIVE_H3_RESOLUTIONS, CALC_TIME_INTERVAL_LENGTHS))

[(7, 1), (7, 2), (7, 6), (7, 24), (8, 1), (8, 2), (8, 6), (8, 24)]

In [12]:
for h3_res, time_interval_length in tqdm(
    list(product(PREDICTIVE_H3_RESOLUTIONS, CALC_TIME_INTERVAL_LENGTHS))
    + ADDITIONAL_PREDICTIVE_RESOLUTIONS
):
    tqdm.write(f"executing h3_res: {h3_res}, time_interval_length: {time_interval_length}", end="\r")
    execute_stage(
        get_model_data,
        NN_FOURTH_STAGE_DEMAND_RESULTS_PATH,
        lambda: [best_config],
        h3_res,
        time_interval_length,
        test_phase=True,
        silent=True,
    )
    tqdm.write(f"executing h3_res: {h3_res}, time_interval_length: {time_interval_length} done")

  0%|          | 0/9 [00:00<?, ?it/s]

executing h3_res: 7, time_interval_length: 1 done
executing h3_res: 7, time_interval_length: 2 done
executing h3_res: 7, time_interval_length: 6 done
executing h3_res: 7, time_interval_length: 24 done
executing h3_res: 8, time_interval_length: 1 done
executing h3_res: 8, time_interval_length: 2 done
executing h3_res: 8, time_interval_length: 6 done
executing h3_res: 8, time_interval_length: 24 done
executing h3_res: 9, time_interval_length: 24 done


now orig dest

In [13]:
execute_stage(
	lambda _, __: get_demand_orig_dest_model_data(),
	NN_FOURTH_STAGE_DEMAND_RESULTS_PATH,
	lambda : [best_config],
	int(f"{ORIGIN_DESTINATION_H3_RESOLUTION}{ORIGIN_DESTINATION_H3_RESOLUTION}"),
	ORIGIN_DESTINATION_TIME_INTERVAL_LENGTH,
	test_phase=True,
	silent=True,
)

2022-07-07 19:22:04.598087: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-07 19:22:04.601683: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-07 19:22:04.601855: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-07 19:22:04.602365: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropri

In [19]:
results = get_results_df(NN_FOURTH_STAGE_DEMAND_RESULTS_PATH)
results

Unnamed: 0,h3_res,time_interval_length,batch_size,nodes_per_feature,n_layers,activation,dropout,train_duration,test_mse,test_rmse,test_mae,test_non_zero_mape,test_zero_accuracy
0,7,1,256,1.0,2,relu,0.5,186.338808,2.112337,1.453388,0.354515,0.415404,0.952907
1,7,2,256,1.0,2,relu,0.5,118.628595,17.64597,4.200711,1.260489,0.358513,0.917816
2,7,6,256,1.0,2,relu,0.5,84.846213,121.57857,11.026267,3.922868,0.269336,0.239784
3,7,24,256,1.0,2,relu,0.5,51.822149,385.978276,19.64633,8.396101,0.180705,0.426461
4,8,1,256,1.0,2,relu,0.5,827.665968,0.907417,0.952584,0.161567,1.0,0.939471
5,8,2,256,1.0,2,relu,0.5,439.822626,2.682283,1.637768,0.471627,0.519434,0.911342
6,8,6,256,1.0,2,relu,0.5,203.167135,14.918592,3.862459,1.138881,0.415263,0.922531
7,8,24,256,1.0,2,relu,0.5,58.10199,44.829146,6.695457,2.68533,0.267945,0.34455
8,9,24,256,1.0,2,relu,0.5,139.595377,8.76821,2.961116,1.323776,0.407589,0.888043
9,77,24,256,1.0,2,relu,0.5,492.197467,7.997945,2.828064,0.346599,0.496868,0.958253


In [15]:
def get_demand_stats(row):
    h3_res, time_interval_length = row["h3_res"], row["time_interval_length"]

    model_data_tuple = get_model_data(h3_res, time_interval_length)
    model_data = pd.concat(model_data_tuple)
    median = model_data.outcome.median()
    mean = model_data.outcome.mean()
    std = model_data.outcome.std()
    maximum = model_data.outcome.max()
    perc_0 = model_data.outcome[model_data.outcome == 0].count() / len(model_data)

    return pd.Series(
        {
            "median": median,
            "mean": mean,
            "std": std,
            "maximum": maximum,
            "perc_0": perc_0,
        }
    )


results = pd.concat(
    [
        results,
        results.apply(get_demand_stats, axis=1),
    ],
    axis=1,
)


In [16]:
results

Unnamed: 0,h3_res,time_interval_length,batch_size,nodes_per_feature,n_layers,activation,dropout,train_duration,test_mse,test_rmse,test_mae,test_non_zero_mape,test_zero_accuracy,median,mean,std,maximum,perc_0
0,7,1,256,1.0,2,relu,0.5,186.338808,2.112337,1.453388,0.354515,0.415404,0.952907,0.0,0.67669,3.510246,100.0,0.906053
1,7,2,256,1.0,2,relu,0.5,118.628595,17.64597,4.200711,1.260489,0.358513,0.917816,0.0,2.744735,12.121042,372.0,0.846248
2,7,6,256,1.0,2,relu,0.5,84.846213,121.57857,11.026267,3.922868,0.269336,0.239784,0.0,11.27633,40.433588,703.0,0.757468
3,7,24,256,1.0,2,relu,0.5,51.822149,385.978276,19.64633,8.396101,0.180705,0.426461,0.0,37.177494,105.456392,1011.0,0.574879
4,8,1,256,1.0,2,relu,0.5,827.665968,0.907417,0.952584,0.161567,1.0,0.939471,0.0,0.161691,0.941036,43.0,0.939523
5,8,2,256,1.0,2,relu,0.5,439.822626,2.682283,1.637768,0.471627,0.519434,0.911342,0.0,0.628188,3.062797,163.0,0.893093
6,8,6,256,1.0,2,relu,0.5,203.167135,14.918592,3.862459,1.138881,0.415263,0.922531,0.0,2.561613,9.744621,259.0,0.806417
7,8,24,256,1.0,2,relu,0.5,58.10199,44.829146,6.695457,2.68533,0.267945,0.34455,0.0,8.410319,24.519968,344.0,0.651968
8,9,24,256,1.0,2,relu,0.5,139.595377,8.76821,2.961116,1.323776,0.407589,0.888043,0.0,2.653434,6.890685,113.0,0.652465
9,77,24,256,1.0,2,relu,0.5,492.197467,7.997945,2.828064,0.346599,0.496868,0.958253,3.0,12.146287,23.857396,343.0,0.0


In [17]:
execute_stage(
	get_model_data,
	NN_FOURTH_STAGE_DEMAND_RESULTS_PATH,
	lambda: [best_config],
	TUNE_H3_RESOLUTION,
	TUNE_TIME_INTERVAL_LENGTH,
	test_phase=True,
	silent=True,
)