# Imports

In [34]:
%reload_ext autoreload
%autoreload 2

In [84]:
import pickle
import itertools
import time
import os
import statistics
from time import time

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from evaluation.regression import generate_reg_results, get_regression_norm_results, get_regression_div_results

In [36]:
pd.options.display.float_format = "{:,.4f}".format

# Results generation

In [38]:
pickle_base = '/workspace/coniferas(1)/data_inference/pickles/simulations_no_fossil/dataset_'
res_path = "/workspace/coniferas(1)/data_inference/models/reg/"
n_tips = ['674', '489', '87']

data = dict()
for i in n_tips:
    with open(pickle_base + i + "_10k.pkl", 'rb') as f:
        data[i] = pickle.load(f)

In [41]:
n_trees_tested = 1000
results = dict()
inf_times = dict()

for i in n_tips:
    print('---', i, 'tips ---')
    results[i] = dict()
    inf_times[i] = dict()
    
    for label in np.unique(data[i]['div_info_test']):
        div_scenario = label.split('/')[1].split('_')[0]
        results[i][div_scenario] = dict()
        inf_times[i][div_scenario] = dict()
        
        norm_types = ['norm', 'no_norm']
        for norm in norm_types:
            results[i][div_scenario][norm] = dict()
            inf_times[i][div_scenario][norm] = dict()
            
            # Load regression model 
            model_path = res_path + div_scenario + '/' + i + "_regression_"
            if norm != 'no_norm':
                model_path += norm + '_'
            
            results[i][div_scenario][norm], ex_time = generate_reg_results(model_path, data[i]['X_test'],
                                                                           data[i]['y_reg_test'],
                                                                           data[i]['y_reg_norm_test'],
                                                                           data[i]['div_info_test'],
                                                                           data[i]['resc_factor_test'],
                                                                           div_scenario, label, norm)
            inf_times[i][div_scenario][norm] = ex_time

--- 674 tips ---
--- Inference time:  BD scenario & norm 1.3218464851379395 seconds ---
--- Inference time:  BD scenario & no_norm 0.4149961471557617 seconds ---
--- Inference time:  HE scenario & norm 0.2879617214202881 seconds ---
--- Inference time:  HE scenario & no_norm 0.2892918586730957 seconds ---
--- Inference time:  ME scenario & norm 0.36864519119262695 seconds ---
--- Inference time:  ME scenario & no_norm 0.3339557647705078 seconds ---
--- Inference time:  SAT scenario & norm 0.2581517696380615 seconds ---
--- Inference time:  SAT scenario & no_norm 0.274233341217041 seconds ---
--- Inference time:  SR scenario & norm 0.4832584857940674 seconds ---
--- Inference time:  SR scenario & no_norm 0.4050312042236328 seconds ---
--- Inference time:  WW scenario & norm 0.3301835060119629 seconds ---
--- Inference time:  WW scenario & no_norm 0.39681053161621094 seconds ---
--- 489 tips ---
--- Inference time:  BD scenario & norm 0.2342076301574707 seconds ---
--- Inference time:  B

## Inference time

In [45]:
for i in inf_times:
    t = []
    for scenario in inf_times[i]:
        t.append(inf_times[i][scenario]['norm'])
   
    print(f"\nInference time statistics for {i}:")
    print(f"Mean: {np.mean(t):.4f}")
    print(f"Standard deviation: {np.std(t):.4f}")
    print(f"Minimum: {np.min(t):.4f}")
    print(f"Maximum: {np.max(t):.4f}")


Inference time statistics for 674:
Mean: 0.5083
Standard deviation: 0.3708
Minimum: 0.2582
Maximum: 1.3218

Inference time statistics for 489:
Mean: 0.2903
Standard deviation: 0.0705
Minimum: 0.2210
Maximum: 0.3958

Inference time statistics for 87:
Mean: 0.3269
Standard deviation: 0.1084
Minimum: 0.1869
Maximum: 0.5040


## Training time

In [46]:
for i in n_tips:
    train_times = []
    
    for label in np.unique(data[i]['div_info_test']):
        div_scenario = label.split('/')[1].split('_')[0]
        
        # Load regression model 
        model_path = res_path + div_scenario + '/' + i + "_regression_"

        with open(model_path + 'model_data.pkl', 'rb') as f:
            n_params, train_time = pickle.load(f)
                
        train_times.append(train_time)
    
    print('\nTraining times for', i, 'tips')
    print('-'*5)
    print('Mean:', np.mean(train_times))
    print('Std Dev:', np.std(train_times))
    print('Max:', np.max(train_times))
    print('Min:', np.min(train_times))


Training times for 674 tips
-----
Mean: 80.98654202620189
Std Dev: 23.909542822776093
Max: 121.00370073318481
Min: 53.84978413581848

Training times for 489 tips
-----
Mean: 146.08997031052908
Std Dev: 40.89618029143705
Max: 204.01095628738403
Min: 106.88540458679199

Training times for 87 tips
-----
Mean: 178.52716255187988
Std Dev: 47.54313057765572
Max: 278.6980650424957
Min: 133.32786893844604


# Regression metrics MAE vs MAE_norm

In [80]:
div_scenario = list(results[list(results.keys())[0]].keys())
get_regression_norm_results(results, '674', div_scenario, 'norm')

Unnamed: 0,norm,no_norm
MAE,7.7588,0.8391
MAE_norm,4.7085,1.9024


In [81]:
div_scenario = list(results[list(results.keys())[0]].keys())
get_regression_norm_results(results, '489', div_scenario, 'norm')

Unnamed: 0,norm,no_norm
MAE,6.8932,0.775
MAE_norm,4.613,1.72


In [82]:
div_scenario = list(results[list(results.keys())[0]].keys())
get_regression_norm_results(results, '87', div_scenario, 'norm')

Unnamed: 0,norm,no_norm
MAE,8.1259,0.9861
MAE_norm,5.5902,2.4266


## Comparison between diversification scenarios

In [86]:
get_regression_div_results(results, '674', 'BD', 'norm')

Unnamed: 0,r,a,lambda,mu
MAE,1.2578,0.0905,1.3917,0.1701
MAE_norm,0.8398,0.0905,0.9825,0.1601


In [87]:
get_regression_div_results(results, '674', 'HE', 'norm')

Unnamed: 0,r,a,lambda,mu
MAE,0.4919,0.1432,0.5824,0.1062
MAE_norm,0.3159,0.1432,0.8849,0.5904


In [88]:
get_regression_div_results(results, '674', 'ME', 'norm')

Unnamed: 0,r,a,time,frac,lambda,mu
MAE,1.0698,0.0693,37.8917,0.0489,2.35,1.288
MAE_norm,0.6829,0.0693,26.3903,0.0489,1.5639,0.889


In [89]:
get_regression_div_results(results, '674', 'SR', 'norm')

Unnamed: 0,r0,r1,a0,a1,time,lambda0,lambda1,mu0,mu1
MAE,1.6267,0.1627,0.1048,0.0949,22.6898,2.2163,0.617,0.6061,0.4584
MAE_norm,1.2011,0.1305,0.1048,0.0949,15.5514,1.7638,0.616,0.5816,0.4886


In [90]:
get_regression_div_results(results, '674', 'SAT', 'norm')

Unnamed: 0,lambda 0
MAE,10.3071
MAE_norm,6.4437


In [91]:
get_regression_div_results(results, '674', 'WW', 'norm')

Unnamed: 0,r0,r1,a0,a1,time,lambda0,lambda1,mu0,mu1
MAE,1.4068,11.6825,0.1344,0.0854,175.9286,2.2068,20.281,3.6084,8.7008
MAE_norm,0.7867,7.3123,0.1344,0.0854,91.7129,1.2261,13.0962,2.0071,5.8897
