In [10]:
import pandas as pd
from itertools import product
from tqdm.notebook import tqdm

import sys, os

sys.path.append(os.path.abspath('..'))
%load_ext autoreload
%autoreload 2
from modules.config import *
from modules.storage import (
    get_demand_model_data,
    store_results
)
from modules.svm import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [11]:
def get_svm_metas():
    return [
        {'kernel': ['linear'], 'C': [0.1, 1, 10, 100],  'gamma': [-1],                          'degree': [-1],         'max_iter': [1000000]},
        {'kernel': ['rbf'],    'C': [0.1, 1, 10, 100],  'gamma': [0.1, 0.01, 0.001, 0.0001],    'degree': [-1],         'max_iter': [1000000]},
        {'kernel': ['poly'],   'C': [1, 10, 100, 1000], 'gamma': [-1],                          'degree': [2, 3, 4, 5], 'max_iter': [1000000]}
    ]

In [12]:
def execute_stage(path, h3_res, time_interval_length, get_available_model_metas_for_stage, do_evaluate_model, silent):
    all_possible_metas = get_svm_metas()
    metas = get_available_model_metas_for_stage(h3_res, time_interval_length, all_possible_metas)
    
    iterator = metas if silent else tqdm(metas)
    for param_grid in iterator:
        if not silent:
            feedback = f"h3: {h3_res} | t:{time_interval_length} | - " + param_grid[0]["kernel"][0]
            tqdm.write(feedback, end="\r")
        
        model_data_train, model_data_test = get_demand_model_data(h3_res, time_interval_length)
        if len(model_data_train) > 50000:
            model_data_train = model_data_train.sample(50000)

        X_train, X_test, y_train, y_test = split_and_scale_data(model_data_train, model_data_test)
        models = train_model(param_grid, X_train, y_train)
        results = get_results(models, h3_res, time_interval_length, do_evaluate_model, X_test, y_test)
        store_results(results, path)  
        
        if not silent:
            tqdm.write(feedback + " ✓")

In [13]:
execute_stage(
    SVM_FIRST_STAGE_RESULTS_PATH,
    TUNE_H3_RESOLUTION,
    TUNE_TIME_INTERVAL_LENGTH,
    get_availabe_models_metas_first_stage,
    do_evaluate_model=False,
    silent=False,
)

  0%|          | 0/1 [00:00<?, ?it/s]

[{'kernel': ['linear'], 'C': [0.1], 'max_iter': [1000000]}]


In [10]:
results = pd.read_parquet(SVM_FIRST_STAGE_RESULTS_PATH)
results.sort_values(by=['mean_test_score'], ascending=False).head(5)

Unnamed: 0,iter,n_resources,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_kernel,param_max_iter,params,...,split2_train_score,split3_train_score,split4_train_score,mean_train_score,std_train_score,n_iter,h3_res,time_interval_length,param_gamma,param_degree
4,0,8290,54.808515,0.62936,11.699974,0.465198,1.0,rbf,1000000,"{'C': 1.0, 'degree': None, 'gamma': 0.01, 'ker...",...,-1.24073,-1.419944,-1.493995,-1.319479,0.116704,434001,8,6,0.01,
7,0,8290,61.887783,2.143711,13.11701,0.57545,10.0,rbf,1000000,"{'C': 10.0, 'degree': None, 'gamma': 0.01, 'ke...",...,-0.968602,-1.101078,-1.162574,-1.023994,0.091734,0,8,6,0.01,
11,0,8290,61.977852,1.651096,8.008226,0.643548,100.0,rbf,1000000,"{'C': 100.0, 'degree': None, 'gamma': 0.001, '...",...,-1.251774,-1.431241,-1.494112,-1.327835,0.114344,0,8,6,0.001,
21,0,8290,72.496359,2.332309,11.504219,0.713604,10.0,poly,1000000,"{'C': 10.0, 'degree': 2.0, 'gamma': None, 'ker...",...,-1.278043,-1.45353,-1.51318,-1.35258,0.110961,0,8,6,,2.0
16,2,74610,9881.55261,513.575073,284.903122,5.123617,10.0,rbf,1000000,"{'C': 10.0, 'degree': None, 'gamma': 0.01, 'ke...",...,-1.367001,-1.403957,-1.402731,-1.397316,0.020037,0,8,6,0.01,


In [6]:
results.sort_values(by='mean_test_score', ascending=False).groupby('param_kernel').mean_test_score.first()

373066

In [None]:
results.groupby('param_kernel').mean_fit_time.sum() / 60

In [7]:
# resolutions = list(product(PREDICTIVE_H3_RESOLUTIONS, CALC_TIME_INTERVAL_LENGTHS))
# resolutions = resolutions + (ADDITIONAL_PREDICTIVE_RESOLUTIONS)
# resolutions

In [8]:
# for h3_res, time_interval_length in tqdm(resolutions):
#     execute_stage(
#         SVM_SECOND_STAGE_RESULTS_PATH,
#         h3_res,
#         time_interval_length,
#         get_availabe_models_metas_second_stage,
#         do_evaluate_model=True,
#         silent=True,
#     )

In [9]:
# results = pd.read_parquet(SVM_FIRST_STAGE_RESULTS_PATH)
# results.sort_values(by=['mean_train_score'], ascending=False).head(2)

In [10]:
# results_final = pd.read_parquet(SVM_SECOND_STAGE_RESULTS_PATH)
# results_final