# Imports

In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
import pickle
import itertools
import time
import os
import statistics
from time import time

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from evaluation.regression import generate_reg_results, get_regression_norm_results, get_regression_div_results

In [3]:
pd.options.display.float_format = "{:,.4f}".format

# Results generation

In [5]:
pickle_base = '/workspace/coniferas(1)/data_inference/pickles/treepar_dataset/dataset_'
res_path = "/workspace/coniferas(1)/data_inference/models/reg/"
n_tips = ['674', '489', '87']

data = dict()
for i in n_tips:
    with open(pickle_base + i + ".pkl", 'rb') as f:
        data[i] = pickle.load(f)

In [6]:
n_trees_tested = 1000
results = dict()
inf_times = dict()

for i in n_tips:
    print('---', i, 'tips ---')
    results[i] = dict()
    inf_times[i] = dict()
    
    for label in np.unique(data[i]['div_info_test']):
        div_scenario = label.split('/')[1].split('_')[0]
        results[i][div_scenario] = dict()
        inf_times[i][div_scenario] = dict()
        
        norm_types = ['norm', 'no_norm']
        for norm in norm_types:
            results[i][div_scenario][norm] = dict()
            inf_times[i][div_scenario][norm] = dict()
            
            # Load regression model 
            model_path = res_path + div_scenario + '/' + i + "_regression_"
            if norm != 'no_norm':
                model_path += norm + '_'
            
            results[i][div_scenario][norm], ex_time = generate_reg_results(model_path, data[i]['X_test'],
                                                                           data[i]['y_reg_test'],
                                                                           data[i]['y_reg_norm_test'],
                                                                           data[i]['div_info_test'],
                                                                           data[i]['resc_factor_test'],
                                                                           div_scenario, label, norm)
            inf_times[i][div_scenario][norm] = ex_time

--- 674 tips ---
--- Inference time:  BD scenario & norm 3.3448855876922607 seconds ---
--- Inference time:  BD scenario & no_norm 0.25420546531677246 seconds ---
--- Inference time:  HE scenario & norm 0.28528881072998047 seconds ---
--- Inference time:  HE scenario & no_norm 0.2760653495788574 seconds ---
--- Inference time:  ME scenario & norm 0.330324649810791 seconds ---
--- Inference time:  ME scenario & no_norm 0.3203620910644531 seconds ---
--- Inference time:  SAT scenario & norm 0.2215569019317627 seconds ---
--- Inference time:  SAT scenario & no_norm 0.49443483352661133 seconds ---
--- Inference time:  SR scenario & norm 0.2858107089996338 seconds ---
--- Inference time:  SR scenario & no_norm 0.3448355197906494 seconds ---
--- Inference time:  WW scenario & norm 0.28818583488464355 seconds ---
--- Inference time:  WW scenario & no_norm 0.30864548683166504 seconds ---
--- 489 tips ---
--- Inference time:  BD scenario & norm 0.2675907611846924 seconds ---
--- Inference time:

## Inference time

In [7]:
for i in inf_times:
    t = []
    for scenario in inf_times[i]:
        t.append(inf_times[i][scenario]['norm'])
   
    print(f"\nInference time statistics for {i}:")
    print(f"Mean: {np.mean(t):.4f}")
    print(f"Standard deviation: {np.std(t):.4f}")
    print(f"Minimum: {np.min(t):.4f}")
    print(f"Maximum: {np.max(t):.4f}")


Inference time statistics for 674:
Mean: 0.7927
Standard deviation: 1.1418
Minimum: 0.2216
Maximum: 3.3449

Inference time statistics for 489:
Mean: 0.2362
Standard deviation: 0.0300
Minimum: 0.1930
Maximum: 0.2676

Inference time statistics for 87:
Mean: 0.2382
Standard deviation: 0.0362
Minimum: 0.1889
Maximum: 0.2883


## Training time

In [8]:
for i in n_tips:
    train_times = []
    
    for label in np.unique(data[i]['div_info_test']):
        div_scenario = label.split('/')[1].split('_')[0]
        
        # Load regression model 
        model_path = res_path + div_scenario + '/' + i + "_regression_"

        with open(model_path + 'model_data.pkl', 'rb') as f:
            n_params, train_time = pickle.load(f)
                
        train_times.append(train_time)
    
    print('\nTraining times for', i, 'tips')
    print('-'*5)
    print('Mean:', np.mean(train_times))
    print('Std Dev:', np.std(train_times))
    print('Max:', np.max(train_times))
    print('Min:', np.min(train_times))


Training times for 674 tips
-----
Mean: 80.98654202620189
Std Dev: 23.909542822776093
Max: 121.00370073318481
Min: 53.84978413581848

Training times for 489 tips
-----
Mean: 146.08997031052908
Std Dev: 40.89618029143705
Max: 204.01095628738403
Min: 106.88540458679199

Training times for 87 tips
-----
Mean: 178.52716255187988
Std Dev: 47.54313057765572
Max: 278.6980650424957
Min: 133.32786893844604


# Regression metrics MAE vs MAE_norm

In [9]:
div_scenario = list(results[list(results.keys())[0]].keys())
get_regression_norm_results(results, '674', div_scenario, 'norm')

Unnamed: 0,norm,no_norm
MAE,6.6471,0.91
MAE_norm,4.8571,1.7434


In [10]:
div_scenario = list(results[list(results.keys())[0]].keys())
get_regression_norm_results(results, '489', div_scenario, 'norm')

Unnamed: 0,norm,no_norm
MAE,6.3643,0.8653
MAE_norm,4.2064,1.6158


In [11]:
div_scenario = list(results[list(results.keys())[0]].keys())
get_regression_norm_results(results, '87', div_scenario, 'norm')

Unnamed: 0,norm,no_norm
MAE,6.7094,0.9587
MAE_norm,5.3895,1.9278


## Comparison between diversification scenarios

In [12]:
get_regression_div_results(results, '674', 'BD', 'norm')

Unnamed: 0,r,a,lambda,mu
MAE,1.19,0.094,1.2742,0.3312
MAE_norm,0.758,0.094,0.8178,0.0859


In [13]:
get_regression_div_results(results, '674', 'HE', 'norm')

Unnamed: 0,r,a,lambda,mu
MAE,0.445,0.1704,0.4974,0.0555
MAE_norm,0.2817,0.1704,0.341,0.0615


In [14]:
get_regression_div_results(results, '674', 'ME', 'norm')

Unnamed: 0,r,a,time,frac,lambda,mu
MAE,0.9504,0.0755,33.5415,0.05,2.1865,1.2539
MAE_norm,0.6602,0.0755,26.0458,0.05,1.3647,0.7227


In [15]:
get_regression_div_results(results, '674', 'SR', 'norm')

Unnamed: 0,r0,r1,a0,a1,time,lambda0,lambda1,mu0,mu1
MAE,1.4599,0.1349,0.0912,0.0934,16.307,2.0013,0.5425,0.559,0.4102
MAE_norm,0.8802,0.0959,0.0912,0.0934,11.5338,1.2297,0.4772,0.3688,0.3842


In [16]:
get_regression_div_results(results, '674', 'SAT', 'norm')

Unnamed: 0,lambda 0
MAE,9.4073
MAE_norm,5.9115


In [17]:
get_regression_div_results(results, '674', 'WW', 'norm')

Unnamed: 0,r0,r1,a0,a1,time,lambda0,lambda1,mu0,mu1
MAE,1.2006,10.0092,0.1474,0.0903,144.9705,1.9398,17.4393,3.1385,7.5262
MAE_norm,0.7143,7.0542,0.1474,0.0903,114.1477,1.1808,13.1925,1.8926,6.252
