In [1]:
import pandas as pd
from itertools import product
from tqdm.notebook import tqdm

import sys, os

sys.path.append(os.path.abspath('..'))
%load_ext autoreload
%autoreload 2
from modules.config import *
from modules.storage import (
    get_model_data,
    store_results
)
from modules.svm import *

In [2]:
def get_svm_metas():
    return [
        {'kernel': ['linear'], 'C': [1, 10, 100], 'gamma': [-1],            'degree': [-1],         'max_iter': [1000000]},
        {'kernel': ['rbf'],    'C': [1, 10, 100], 'gamma': [0.001, 0.0001], 'degree': [-1],         'max_iter': [1000000]},
        {'kernel': ['poly'],   'C': [1, 10, 100], 'gamma': [-1],            'degree': [2, 3, 4, 5], 'max_iter': [1000000]}
    ]

In [3]:
def execute_stage(path, h3_res, time_interval_length, get_available_model_metas_for_stage, do_evaluate_model, silent):
    all_possible_metas = get_svm_metas()
    metas = get_available_model_metas_for_stage(h3_res, time_interval_length, all_possible_metas)
    
    iterator = tqdm(metas) if silent else metas 
    for param_grid in iterator:
        if not silent:
            feedback = f"h3: {h3_res} | t:{time_interval_length} | - " + param_grid[0]["kernel"][0]
            tqdm.write(feedback, end="\r")
        
        model_data = get_model_data(h3_res, time_interval_length)
        model_data = model_data.sample(1000)

        X_train, X_test, y_train, y_test = split_and_scale_data(model_data, "demand")
        models = train_model(param_grid, X_train, y_train)
        results = get_results(models, h3_res, time_interval_length, do_evaluate_model, X_test, y_test)
        store_results(results, path)  
        
        if not silent:
            tqdm.write(feedback + " ✓")

In [4]:
execute_stage(
    SVM_FIRST_STAGE_RESULTS_PATH,
    TUNE_H3_RESOLUTION,
    TUNE_TIME_INTERVAL_LENGTH,
    get_availabe_models_metas_first_stage,
    do_evaluate_model=False,
    silent=False,
)

h3: 8 | t:6 | - linear



h3: 8 | t:6 | - linear ✓
h3: 8 | t:6 | - rbf ✓
h3: 8 | t:6 | - poly ✓


In [5]:
results = pd.read_parquet(SVM_FIRST_STAGE_RESULTS_PATH)
results.sort_values(by=['mean_train_score'], ascending=False).head(2)

Unnamed: 0,iter,n_resources,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_kernel,param_max_iter,params,...,split2_train_score,split3_train_score,split4_train_score,mean_train_score,std_train_score,n_iter,h3_res,time_interval_length,param_gamma,param_degree
20,0,77,0.003004,9e-06,0.001003,8e-06,100,poly,1000000,"{'C': 100, 'degree': 2.0, 'gamma': None, 'kern...",...,-0.009338,-0.00986,-0.009585,-0.009679,0.000195,0,8,6,,2.0
21,0,77,0.002997,1.1e-05,0.0004,0.00049,100,poly,1000000,"{'C': 100, 'degree': 3.0, 'gamma': None, 'kern...",...,-0.010972,-0.009572,-0.036832,-0.015437,0.010708,0,8,6,,3.0


In [6]:
resolutions = list(product(PREDICTIVE_H3_RESOLUTIONS, CALC_TIME_INTERVAL_LENGTHS))
resolutions = resolutions + (ADDITIONAL_PREDICTIVE_RESOLUTIONS)
resolutions

[(7, 1), (7, 2), (7, 6), (7, 24), (8, 1), (8, 2), (8, 6), (8, 24), (9, 24)]

In [7]:
for h3_res, time_interval_length in tqdm(resolutions):
    execute_stage(
        SVM_SECOND_STAGE_RESULTS_PATH,
        h3_res,
        time_interval_length,
        get_availabe_models_metas_second_stage,
        do_evaluate_model=True,
        silent=True,
    )

  0%|          | 0/9 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

In [8]:
results = pd.read_parquet(SVM_FIRST_STAGE_RESULTS_PATH)
results.sort_values(by=['mean_train_score'], ascending=False).head(2)

Unnamed: 0,iter,n_resources,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_kernel,param_max_iter,params,...,split2_train_score,split3_train_score,split4_train_score,mean_train_score,std_train_score,n_iter,h3_res,time_interval_length,param_gamma,param_degree
20,0,77,0.003004,9e-06,0.001003,8e-06,100,poly,1000000,"{'C': 100, 'degree': 2.0, 'gamma': None, 'kern...",...,-0.009338,-0.00986,-0.009585,-0.009679,0.000195,0,8,6,,2.0
21,0,77,0.002997,1.1e-05,0.0004,0.00049,100,poly,1000000,"{'C': 100, 'degree': 3.0, 'gamma': None, 'kern...",...,-0.010972,-0.009572,-0.036832,-0.015437,0.010708,0,8,6,,3.0


In [9]:
results_final = pd.read_parquet(SVM_SECOND_STAGE_RESULTS_PATH)
results_final

Unnamed: 0,iter,n_resources,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_degree,param_kernel,param_max_iter,...,split4_train_score,mean_train_score,std_train_score,n_iter,h3_res,time_interval_length,mse,mae,mape,rmse
0,0,700,0.094798,0.008173,0.005003,1e-05,100,2.0,poly,1000000,...,-0.597791,-0.875397,0.284594,21274,7,1,9.382943,2.353098,1.153479,3.063159
1,0,700,0.110498,0.003413,0.004597,0.000599,100,2.0,poly,1000000,...,-6.260171,-5.354192,0.608826,28613,7,2,23.033967,3.390855,1.050885,4.799372
2,0,700,0.10116,0.012082,0.004801,0.001167,100,2.0,poly,1000000,...,-28.281038,-36.000983,4.939644,28462,7,6,80.508167,5.303404,1.023823,8.972634
3,0,700,0.056994,0.009572,0.004602,0.000487,100,2.0,poly,1000000,...,-491.326329,-420.977401,65.621148,13466,7,24,495.580942,12.470649,1.089774,22.261647
4,0,700,0.051532,0.001782,0.007605,0.000789,100,2.0,poly,1000000,...,-0.010381,-0.009905,0.000641,4416,8,1,0.933754,0.621257,0.517714,0.96631
5,0,700,0.066337,0.002946,0.00905,0.001953,100,2.0,poly,1000000,...,-0.029367,-0.021426,0.010082,8040,8,2,4.903391,0.877397,0.709485,2.21436
6,0,700,0.192638,0.063073,0.02302,0.011662,100,2.0,poly,1000000,...,-0.009459,-0.009511,9.6e-05,3286,8,6,1.195327,0.853376,0.570184,1.09331
7,0,700,0.063513,0.003041,0.008251,0.001384,100,2.0,poly,1000000,...,-0.998329,-0.919021,0.22546,7951,8,24,7.114808,1.847271,0.83461,2.66736
8,0,700,0.296464,0.084701,0.053609,0.028031,100,2.0,poly,1000000,...,-0.024159,-0.017974,0.005527,2512,9,24,0.702102,0.468359,0.381815,0.837915
