In [1]:
import pandas as pd
from itertools import product
from tqdm.notebook import tqdm

import sys, os

sys.path.append(os.path.abspath('..'))
%load_ext autoreload
%autoreload 2
from modules.config import *
from modules.storage import (
    get_model_data,
    store_results
)
from modules.svm import *

In [2]:
def get_svm_metas():
    return [
        {'kernel': ['linear'], 'C': [1, 10, 100], 'gamma': [-1],            'degree': [-1],         'max_iter': [1000000]},
        {'kernel': ['rbf'],    'C': [1, 10, 100], 'gamma': [0.001, 0.0001], 'degree': [-1],         'max_iter': [1000000]},
        {'kernel': ['poly'],   'C': [1, 10, 100], 'gamma': [-1],            'degree': [2, 3, 4, 5], 'max_iter': [1000000]}
    ]

In [3]:
def execute_stage(path, h3_res, time_interval_length, get_available_model_metas_for_stage, do_evaluate_model, silent):
    all_possible_metas = get_svm_metas()
    metas = get_available_model_metas_for_stage(h3_res, time_interval_length, all_possible_metas)
    
    iterator = tqdm(metas) if silent else metas 
    for param_grid in iterator:
        if iterator:
            feedback = f"h3: {h3_res} | t:{time_interval_length} | - " + param_grid[0]["kernel"][0]
            tqdm.write(feedback, end="\r")
        
        model_data = get_model_data(h3_res, time_interval_length)
        model_data = model_data.sample(1000)

        X_train, X_test, y_train, y_test = split_and_scale_data(model_data, "demand")
        models = train_model(param_grid, X_train, y_train)
        results = get_results(models, h3_res, time_interval_length, do_evaluate_model, X_test, y_test)
        store_results(results, path)  
        
        if iterator:
            tqdm.write(feedback + " ✓")

In [4]:
execute_stage(
    SVM_FIRST_STAGE_RESULTS_PATH,
    TUNE_H3_RESOLUTION,
    TUNE_TIME_INTERVAL_LENGTH,
    get_availabe_models_metas_first_stage,
    do_evaluate_model=False,
    silent=False,
)

  0%|          | 0/3 [00:00<?, ?it/s]

h3: 8 | t:6 | - linear ✓
h3: 8 | t:6 | - rbf ✓
h3: 8 | t:6 | - poly ✓


In [5]:
results = pd.read_parquet(SVM_FIRST_STAGE_RESULTS_PATH)
results.sort_values(by=['mean_train_score'], ascending=False).head(2)

Unnamed: 0,iter,n_resources,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_kernel,param_max_iter,params,...,split2_train_score,split3_train_score,split4_train_score,mean_train_score,std_train_score,n_iter,h3_res,time_interval_length,param_gamma,param_degree
20,0,77,0.002971,4.7e-05,0.000802,0.000401,100,poly,1000000,"{'C': 100, 'degree': 2.0, 'gamma': None, 'kern...",...,-0.01,-0.010002,-0.009945,-0.009969,4.4e-05,0,8,6,,2.0
22,0,77,0.002798,0.000407,0.001001,9e-06,100,poly,1000000,"{'C': 100, 'degree': 4.0, 'gamma': None, 'kern...",...,-0.01,-0.010001,-0.009894,-0.00998,4.3e-05,0,8,6,,4.0


In [6]:
for h3_res in PREDICTIVE_H3_RESOLUTIONS:
    for time_interval_length in CALC_TIME_INTERVAL_LENGTHS:
        execute_stage(
            SVM_SECOND_STAGE_RESULTS_PATH,
            h3_res,
            time_interval_length,
            get_availabe_models_metas_second_stage,
            do_evaluate_model=True,
            silent=True,
        )

  0%|          | 0/1 [00:00<?, ?it/s]

h3: 7 | t:1 | - poly ✓


  0%|          | 0/1 [00:00<?, ?it/s]

h3: 7 | t:2 | - poly ✓


  0%|          | 0/1 [00:00<?, ?it/s]

h3: 7 | t:6 | - poly ✓


  0%|          | 0/1 [00:00<?, ?it/s]

h3: 7 | t:24 | - poly ✓


  0%|          | 0/1 [00:00<?, ?it/s]

h3: 8 | t:1 | - poly ✓


  0%|          | 0/1 [00:00<?, ?it/s]

h3: 8 | t:2 | - poly ✓


  0%|          | 0/1 [00:00<?, ?it/s]

h3: 8 | t:6 | - poly ✓


  0%|          | 0/1 [00:00<?, ?it/s]

h3: 8 | t:24 | - poly ✓


In [7]:
results = pd.read_parquet(SVM_FIRST_STAGE_RESULTS_PATH)
results.sort_values(by=['mean_train_score'], ascending=False).head(2)

Unnamed: 0,iter,n_resources,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_kernel,param_max_iter,params,...,split2_train_score,split3_train_score,split4_train_score,mean_train_score,std_train_score,n_iter,h3_res,time_interval_length,param_gamma,param_degree
20,0,77,0.002971,4.7e-05,0.000802,0.000401,100,poly,1000000,"{'C': 100, 'degree': 2.0, 'gamma': None, 'kern...",...,-0.01,-0.010002,-0.009945,-0.009969,4.4e-05,0,8,6,,2.0
22,0,77,0.002798,0.000407,0.001001,9e-06,100,poly,1000000,"{'C': 100, 'degree': 4.0, 'gamma': None, 'kern...",...,-0.01,-0.010001,-0.009894,-0.00998,4.3e-05,0,8,6,,4.0


In [8]:
results_final = pd.read_parquet(SVM_SECOND_STAGE_RESULTS_PATH)
results_final

Unnamed: 0,iter,n_resources,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_degree,param_kernel,param_max_iter,...,split4_train_score,mean_train_score,std_train_score,n_iter,h3_res,time_interval_length,mse,mae,mape,rmse
0,0,700,0.085993,0.008556,0.005001,0.000631,100,2.0,poly,1000000,...,-0.921735,-1.474168,0.315868,20034,7,1,10.215582,2.453907,1.070018,3.196182
1,0,700,0.115137,0.009045,0.004662,0.000615,100,2.0,poly,1000000,...,-4.22778,-3.649198,0.576507,42492,7,2,20.247135,3.462498,1.210664,4.499682
2,0,700,0.075061,0.008247,0.003855,0.000955,100,2.0,poly,1000000,...,-79.818517,-101.326197,13.329427,39690,7,6,65.700426,5.506617,1.116206,8.10558
3,0,700,0.048791,0.00248,0.005403,0.001743,100,2.0,poly,1000000,...,-571.77359,-666.423665,84.385445,9846,7,24,629.835237,12.356898,1.01675,25.096518
4,0,700,0.050784,0.005983,0.007007,2e-05,100,2.0,poly,1000000,...,-0.02757,-0.021068,0.0079,3395,8,1,0.396341,0.454566,0.395275,0.629556
5,0,700,0.058474,0.002833,0.007188,0.000403,100,2.0,poly,1000000,...,-0.025367,-0.032442,0.01291,7478,8,2,1.179758,0.792153,0.625384,1.086167
6,0,700,0.260489,0.018463,0.022787,0.007764,100,2.0,poly,1000000,...,-0.009669,-0.01874,0.007038,2926,8,6,1.459292,0.888741,0.568491,1.208012
7,0,700,0.060897,0.001352,0.007272,0.00121,100,2.0,poly,1000000,...,-2.249394,-2.54199,0.318314,10005,8,24,9.884722,2.001288,0.888145,3.143998
