In [1]:

# import external libs
import pandas as pd
import warnings
from tqdm import tqdm
import os
import sys
sys.path.append('../src/')
import re
import plotly.express as px

# import internal libs
from model.evaluation import classification_report, regression_metrics, get_classification_report
from model.config import create_experiment_configs_dummy, create_experiment_configs_tf
from data.preparation import load_dataset

In [2]:
# remove warning
warnings.filterwarnings('ignore')

In [3]:
# define paths
PATH_REPORTS = '../reports/'
lstm_results_path = 'test_results/LSTM_with_Attention_{asset}_test_results.csv'
mlp_results_path =  'test_results/MLP_{asset}_test_results.csv'
DATA_DIR = '../data/'

In [4]:
# define experiments
ASSETS = [
    "PETR3.SA", 
    "PRIO3.SA", 
    "VALE3.SA", 
    "GGBR3.SA", 
    "ABCB4.SA", 
    "ITUB3.SA", 
    "FLRY3.SA", 
    "RADL3.SA"
    ]

seq_len_list = [1,2,3,4,5,6,7,14,21,28,35,42,49,56,63,70]

moving_windows = [7,14,21]

dict_experiments_dummy = create_experiment_configs_dummy(ASSETS, moving_windows)
dict_experiments_tf = create_experiment_configs_tf(ASSETS, seq_len_list, moving_windows)

## General results

### Create table

In [5]:
list_results_clf = []
list_results_reg = []
# list_results = []

for name, dict_experiments, path_results in [
    ("tf", dict_experiments_tf, PATH_REPORTS + 'test_results/{algorithm}_{asset}_features={features}__label={label_col}__sql_len={seq_len}__scaling_method={scaling_method}_test_results.csv'),
    ('dummy', dict_experiments_dummy, PATH_REPORTS + "test_results/Dummy_model_{asset}_features={feature_col}__label={label_col}_test_results.csv")
]:
    
    for exp_name, config in tqdm(dict_experiments.items()):
        
        if name == "tf":
            
            feature_cols = config['feature_cols']
            label_col = config['label_col']
            seq_len = config['seq_len']
            asset = config['asset']
            scaling_method = config['scaling_method']
            algorithm = config['algorithm']
            asset = config['asset']
            prediction_type = config['prediction_type']
            
            filepath = path_results.format(
                algorithm = algorithm,
                features = "_".join(feature_cols),
                label_col = label_col,
                asset = asset.replace(".", "_"),
                scaling_method = scaling_method.__str__(),
                seq_len = seq_len
            )
            
            if not os.path.exists(filepath): 
                print(f"The file {filepath} dont't exists")
                continue

            results = pd.read_csv(filepath)
            
        elif name == 'dummy':
            feature_cols = [config['feature_col']]
            label_col = config['label_col']
            asset = config['asset']
            algorithm = "Dummy_model"
            seq_len = 1
            scaling_method = None
            prediction_type = 'dummy'
            asset_formated = asset.replace(".", "_")
            
            filepath = path_results.format(
                algorithm = algorithm,
                feature_col = feature_cols[0],
                label_col = label_col,
                asset = asset_formated
            )
            
            if  not os.path.exists(filepath): 
                print(f"The file {filepath} dont't exists")
                continue
            
            results = pd.read_csv(filepath)
        
        # concat with the test dataset
        full_test = load_dataset(asset=asset, data_dir=DATA_DIR,dataset_split='test')
        results = pd.concat([results,full_test], axis =1)
        
        # find the window for label calculation (get the number of the string)
        window = int(re.findall(r'\d+', label_col)[0])

        # results without leak
        results_wo_leak = results.iloc[max(seq_len, window):]
        
        # remove invalid days
        results = results[results.Invalid_Days == 0]
        
        # raise error if there is nan values
        if results.isna().sum().sum() > 0: 
            raise ValueError('Há dados nulos no dataframe de resultados')
                
                
        new_coluns = [asset, str(feature_cols), str(label_col), seq_len, algorithm, scaling_method, prediction_type]
        new_columns_nms = ['asset','feature_cols','label_col','seq_len','model','scaling_method','prediction_type', 'leak']
                
        # regression metrics
        reg_metrics = regression_metrics(results.y_test, results.y_pred)
        reg_metrics_wo_leak = regression_metrics(results_wo_leak.y_test, results_wo_leak.y_pred)
        
    
        # add columns with experiment config 

        reg_metrics[
            new_columns_nms
            ] = new_coluns + [True]
        reg_metrics_wo_leak[
            new_columns_nms
            ] = new_coluns+ [False]
        
        list_results_reg.append(reg_metrics)
        list_results_reg.append(reg_metrics_wo_leak)
    
    
        # obtem metricas de classificacao, truncando os valores (se o valor já for a meta, não terá diferenca)
        y_test_trunc = [int(i) for i in results.y_test]
        y_pred_trunc = [int(i) for i in results.y_pred]
        df_cr = get_classification_report(y_test_trunc, y_pred_trunc)
        
        y_test_trunc_woleak = [int(i) for i in results_wo_leak.y_test]
        y_pred_trunc_woleak = [int(i) for i in results_wo_leak.y_pred]
        df_cr_wo_leak = get_classification_report(y_test_trunc_woleak, y_pred_trunc_woleak)
        
        df_cr[new_columns_nms] = new_coluns + [True]
        df_cr_wo_leak[new_columns_nms] = new_coluns + [False]
        
        list_results_clf.append(df_cr)
        list_results_clf.append(df_cr_wo_leak)

100%|██████████| 2304/2304 [01:34<00:00, 24.37it/s]
100%|██████████| 48/48 [00:02<00:00, 20.39it/s]


In [6]:
# concat results
final_results_reg = pd.concat(list_results_reg).reset_index(drop=True)
final_results_clf = pd.concat(list_results_clf).reset_index(drop=True)

In [7]:
# only the "right" labels
final_results_clf = final_results_clf[final_results_clf.label_col.str.contains('meta')]
final_results_reg = final_results_reg[final_results_reg.label_col.str.contains('diff_close_mean_z_score')]

### Results

In [8]:
assets_domain = [(["PETR3.SA","PRIO3.SA"], 'Petróleo'), (["VALE3.SA", "GGBR3.SA"], "Mineração"), (["ABCB4.SA", "ITUB3.SA"], 'Financeiro'), (["FLRY3.SA", "RADL3.SA"], 'Saúde')]

# Configuração para não omitir linhas
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)


#### Dummy results

In [9]:
# general 
macro_dummy = final_results_clf[
    (final_results_clf['class'] == 'macro avg') &
    (final_results_clf['prediction_type'] == 'dummy')
    
    ].round(2)
reg_dummy = final_results_reg[
    (final_results_reg['prediction_type'] == 'dummy')
    ].round(2)

In [10]:
for label in macro_dummy.label_col.unique():
    for model in macro_dummy.model.unique():

        print(label, model)
        
        fig = px.box(macro_dummy, x="model", y="f1-score", points="all")
        fig.show()

meta_7 Dummy_model


meta_14 Dummy_model


meta_21 Dummy_model


In [11]:

for assets, domain in assets_domain:
    print(f'''
##############################################
# DOMAIN: {domain}
# ASSETS: {assets}
##############################################
          ''')
    
    
    
    print('Metricas macro - label meta')
    metrics_clf = macro_dummy[macro_dummy.label_col.str.contains('meta') & macro_dummy.asset.isin(assets)].\
    drop(['support', 'scaling_method', 'prediction_type', 'seq_len', 'class', 'model', 'feature_cols'], axis =1 ).\
            rename({'asset': 'ativo', 'label_col': 'alvo'},axis=1)[['ativo','alvo','precision','recall', 'f1-score']]
        
    display(metrics_clf)
    
    print('Metricas regressao - label diff_close_mean_z_score')
    
    metrics_reg = reg_dummy[reg_dummy.label_col.str.contains('diff_close_mean_z_score') & reg_dummy.asset.isin(assets)].\
        drop(['scaling_method', 'prediction_type', 'seq_len', 'model', 'feature_cols'], axis = 1).\
            rename({'asset': 'ativo', 'label_col': 'alvo'},axis=1)
    display(metrics_reg)


##############################################
# DOMAIN: Petróleo
# ASSETS: ['PETR3.SA', 'PRIO3.SA']
##############################################
          
Metricas macro - label meta


Unnamed: 0,ativo,alvo,precision,recall,f1-score
39560,PETR3.SA,meta_7,0.33,0.33,0.33
39568,PETR3.SA,meta_7,0.54,0.54,0.54
39594,PETR3.SA,meta_14,0.43,0.43,0.43
39603,PETR3.SA,meta_14,0.67,0.67,0.67
39630,PETR3.SA,meta_21,0.49,0.49,0.49
39639,PETR3.SA,meta_21,0.71,0.7,0.7
39664,PRIO3.SA,meta_7,0.34,0.34,0.34
39672,PRIO3.SA,meta_7,0.48,0.48,0.48
39696,PRIO3.SA,meta_14,0.49,0.49,0.49
39704,PRIO3.SA,meta_14,0.65,0.65,0.65


Metricas regressao - label diff_close_mean_z_score


Unnamed: 0,Mean Absolute Error (MAE),Mean Squared Error (MSE),Root Mean Squared Error (RMSE),R-squared (R2),Mean Absolute Percentage Error (MAPE),ativo,alvo,leak
4608,0.71,0.92,0.96,0.26,2.07,PETR3.SA,diff_close_mean_z_score_7,True
4609,0.49,0.63,0.79,0.48,1.42,PETR3.SA,diff_close_mean_z_score_7,False
4612,0.57,0.65,0.81,0.58,1.78,PETR3.SA,diff_close_mean_z_score_14,True
4613,0.39,0.45,0.67,0.71,1.23,PETR3.SA,diff_close_mean_z_score_14,False
4616,0.48,0.48,0.69,0.7,1.08,PETR3.SA,diff_close_mean_z_score_21,True
4617,0.33,0.33,0.58,0.79,0.74,PETR3.SA,diff_close_mean_z_score_21,False
4620,0.69,0.84,0.92,0.33,2.15,PRIO3.SA,diff_close_mean_z_score_7,True
4621,0.48,0.58,0.76,0.53,1.48,PRIO3.SA,diff_close_mean_z_score_7,False
4624,0.56,0.58,0.76,0.61,1.58,PRIO3.SA,diff_close_mean_z_score_14,True
4625,0.38,0.4,0.63,0.73,1.09,PRIO3.SA,diff_close_mean_z_score_14,False



##############################################
# DOMAIN: Mineração
# ASSETS: ['VALE3.SA', 'GGBR3.SA']
##############################################
          
Metricas macro - label meta


Unnamed: 0,ativo,alvo,precision,recall,f1-score
39760,VALE3.SA,meta_7,0.4,0.4,0.4
39768,VALE3.SA,meta_7,0.6,0.6,0.6
39792,VALE3.SA,meta_14,0.56,0.56,0.56
39800,VALE3.SA,meta_14,0.7,0.7,0.7
39830,VALE3.SA,meta_21,0.45,0.45,0.45
39839,VALE3.SA,meta_21,0.71,0.71,0.71
39864,GGBR3.SA,meta_7,0.34,0.34,0.34
39872,GGBR3.SA,meta_7,0.52,0.52,0.52
39896,GGBR3.SA,meta_14,0.53,0.53,0.53
39903,GGBR3.SA,meta_14,0.69,0.69,0.69


Metricas regressao - label diff_close_mean_z_score


Unnamed: 0,Mean Absolute Error (MAE),Mean Squared Error (MSE),Root Mean Squared Error (RMSE),R-squared (R2),Mean Absolute Percentage Error (MAPE),ativo,alvo,leak
4632,0.66,0.76,0.87,0.41,264745000000.0,VALE3.SA,diff_close_mean_z_score_7,True
4633,0.45,0.51,0.72,0.61,182029500000.0,VALE3.SA,diff_close_mean_z_score_7,False
4636,0.52,0.52,0.72,0.67,1.19,VALE3.SA,diff_close_mean_z_score_14,True
4637,0.35,0.35,0.59,0.78,0.78,VALE3.SA,diff_close_mean_z_score_14,False
4640,0.43,0.37,0.61,0.79,1.37,VALE3.SA,diff_close_mean_z_score_21,True
4641,0.29,0.23,0.48,0.87,0.92,VALE3.SA,diff_close_mean_z_score_21,False
4644,0.71,0.88,0.94,0.27,1.76,GGBR3.SA,diff_close_mean_z_score_7,True
4645,0.48,0.6,0.78,0.49,1.2,GGBR3.SA,diff_close_mean_z_score_7,False
4648,0.56,0.57,0.76,0.62,3.22,GGBR3.SA,diff_close_mean_z_score_14,True
4649,0.38,0.39,0.62,0.75,2.2,GGBR3.SA,diff_close_mean_z_score_14,False



##############################################
# DOMAIN: Financeiro
# ASSETS: ['ABCB4.SA', 'ITUB3.SA']
##############################################
          
Metricas macro - label meta


Unnamed: 0,ativo,alvo,precision,recall,f1-score
39968,ABCB4.SA,meta_7,0.35,0.35,0.35
39976,ABCB4.SA,meta_7,0.51,0.51,0.51
40000,ABCB4.SA,meta_14,0.47,0.47,0.47
40008,ABCB4.SA,meta_14,0.65,0.65,0.65
40035,ABCB4.SA,meta_21,0.45,0.45,0.45
40045,ABCB4.SA,meta_21,0.57,0.57,0.57
40068,ITUB3.SA,meta_7,0.35,0.35,0.35
40076,ITUB3.SA,meta_7,0.43,0.43,0.43
40103,ITUB3.SA,meta_14,0.46,0.46,0.46
40112,ITUB3.SA,meta_14,0.58,0.58,0.58


Metricas regressao - label diff_close_mean_z_score


Unnamed: 0,Mean Absolute Error (MAE),Mean Squared Error (MSE),Root Mean Squared Error (RMSE),R-squared (R2),Mean Absolute Percentage Error (MAPE),ativo,alvo,leak
4656,0.76,1.0,1.0,0.17,265.8,ABCB4.SA,diff_close_mean_z_score_7,True
4657,0.52,0.68,0.82,0.44,182.74,ABCB4.SA,diff_close_mean_z_score_7,False
4660,0.62,0.71,0.84,0.5,2.74,ABCB4.SA,diff_close_mean_z_score_14,True
4661,0.43,0.49,0.7,0.66,1.9,ABCB4.SA,diff_close_mean_z_score_14,False
4664,0.54,0.52,0.72,0.66,1.69,ABCB4.SA,diff_close_mean_z_score_21,True
4665,0.37,0.36,0.6,0.77,1.17,ABCB4.SA,diff_close_mean_z_score_21,False
4668,0.68,0.88,0.94,0.33,4948931000000.0,ITUB3.SA,diff_close_mean_z_score_7,True
4669,0.46,0.59,0.77,0.54,3402713000000.0,ITUB3.SA,diff_close_mean_z_score_7,False
4672,0.52,0.55,0.74,0.67,167.98,ITUB3.SA,diff_close_mean_z_score_14,True
4673,0.36,0.37,0.61,0.78,116.34,ITUB3.SA,diff_close_mean_z_score_14,False



##############################################
# DOMAIN: Saúde
# ASSETS: ['FLRY3.SA', 'RADL3.SA']
##############################################
          
Metricas macro - label meta


Unnamed: 0,ativo,alvo,precision,recall,f1-score
40176,FLRY3.SA,meta_7,0.33,0.33,0.33
40184,FLRY3.SA,meta_7,0.51,0.51,0.51
40210,FLRY3.SA,meta_14,0.38,0.38,0.38
40220,FLRY3.SA,meta_14,0.63,0.63,0.63
40247,FLRY3.SA,meta_21,0.48,0.48,0.48
40255,FLRY3.SA,meta_21,0.7,0.7,0.7
40280,RADL3.SA,meta_7,0.33,0.33,0.33
40288,RADL3.SA,meta_7,0.5,0.5,0.5
40312,RADL3.SA,meta_14,0.5,0.5,0.5
40320,RADL3.SA,meta_14,0.66,0.66,0.66


Metricas regressao - label diff_close_mean_z_score


Unnamed: 0,Mean Absolute Error (MAE),Mean Squared Error (MSE),Root Mean Squared Error (RMSE),R-squared (R2),Mean Absolute Percentage Error (MAPE),ativo,alvo,leak
4680,0.72,0.91,0.95,0.25,2.83,FLRY3.SA,diff_close_mean_z_score_7,True
4681,0.49,0.62,0.79,0.48,1.94,FLRY3.SA,diff_close_mean_z_score_7,False
4684,0.58,0.56,0.75,0.61,2.25,FLRY3.SA,diff_close_mean_z_score_14,True
4685,0.39,0.39,0.62,0.74,1.56,FLRY3.SA,diff_close_mean_z_score_14,False
4688,0.49,0.42,0.65,0.73,203.79,FLRY3.SA,diff_close_mean_z_score_21,True
4689,0.34,0.29,0.54,0.81,142.19,FLRY3.SA,diff_close_mean_z_score_21,False
4692,0.68,0.82,0.91,0.36,2.85,RADL3.SA,diff_close_mean_z_score_7,True
4693,0.47,0.56,0.75,0.57,1.96,RADL3.SA,diff_close_mean_z_score_7,False
4696,0.56,0.58,0.76,0.62,3164.58,RADL3.SA,diff_close_mean_z_score_14,True
4697,0.39,0.4,0.63,0.74,2191.88,RADL3.SA,diff_close_mean_z_score_14,False


#### NN results

In [12]:
# general 
macro_clf = final_results_clf[(final_results_clf['class'] == 'macro avg')].round(5)
reg= final_results_reg.round(5)

In [13]:
for label in reg.label_col.unique():

    for leak in [False]:

        print(f"label: {label} - leak: {leak}")
        
        
        if 'meta' in label: label_formated = label.replace('meta', 'k')
        if 'diff_close_mean_z_score' in label: label_formated = label.replace('diff_close_mean_z_score', 'z')
        
        df_plt = reg[(reg.label_col == label) & (reg.leak == leak)]
        
        fig = px.box(
            df_plt, 
            x="model", 
            y="R-squared (R2)", 
            points="all",
            title = f'Distribuição de f1 para os experimentos \n label:{label_formated}|leak={leak}'
            )
        
        fig.write_image(PATH_REPORTS + f"/images/box_plot_exp_dist_label={label_formated}__leak={leak}_reg.png")
        # fig.update_traces(boxpoints=False) 
        fig.show()

label: diff_close_mean_z_score_7 - leak: False


label: diff_close_mean_z_score_14 - leak: False


label: diff_close_mean_z_score_21 - leak: False


In [14]:
for label in macro_clf.label_col.unique():

    for leak in [False]:

        print(f"label: {label} - leak: {leak}")
        
        if 'meta' in label: label = label.replace('meta', 'k')
        if 'diff_close_mean_z_score' in label: label = label.replace('diff_close_mean_z_score', 'z')
        
        fig = px.box(
            macro_clf[(macro_clf.label_col == label_col) & (macro_clf.leak == leak)], 
            x="model", 
            y="f1-score", 
            points="all",
            title = f'Distribuição dos resultados de f1 para os experimentos - label: {label} | leak={leak}'
            )
        
        fig.write_image(PATH_REPORTS + f"/images/box_plot_exp_dist_label={label}__leak={leak}_clf.png")
        fig.show()

label: meta_7 - leak: False


label: meta_14 - leak: False


label: meta_21 - leak: False


In [15]:
for assets, domain in assets_domain:
    print(f'''
##############################################
# DOMAIN: {domain}
# ASSETS: {assets}
##############################################
          ''')
    
    print('Metricas macro - label meta')
    metrics_clf = macro_clf[macro_clf.label_col.str.contains('meta') & macro_clf.asset.isin(assets)].\
    drop(['support', 'scaling_method', 'prediction_type', 'class'], axis =1 ).\
            rename({'asset': 'ativo', 'label_col': 'alvo'},axis=1)[['ativo','seq_len','alvo','precision','recall', 'f1-score', 'model', 'leak', 'feature_cols']]
    
    display(metrics_clf.loc[metrics_clf.groupby(['alvo','ativo', 'leak', 'feature_cols'])['f1-score'].idxmax()])
    
    print('Metricas regressao - label diff_close_mean_z_score')
    
    
    metrics_reg = reg[reg.label_col.str.contains('diff_close_mean_z_score') & reg.asset.isin(assets)].\
        drop(['scaling_method', 'prediction_type', 'feature_cols'], axis = 1).\
            rename({'asset': 'ativo', 'label_col': 'alvo','f1-score':'valor'},axis=1)
    
    display(metrics_reg.loc[metrics_reg.groupby(['alvo','ativo', 'leak'])['R-squared (R2)'].idxmax()])


##############################################
# DOMAIN: Petróleo
# ASSETS: ['PETR3.SA', 'PRIO3.SA']
##############################################
          
Metricas macro - label meta


Unnamed: 0,ativo,seq_len,alvo,precision,recall,f1-score,model,leak,feature_cols
111,PETR3.SA,1,meta_14,0.55597,0.56142,0.5586,LSTM_with_Attention,False,['meta_14']
39603,PETR3.SA,1,meta_14,0.66893,0.66658,0.66773,Dummy_model,False,['past_meta_14']
101,PETR3.SA,1,meta_14,0.43912,0.44128,0.44018,LSTM_with_Attention,True,['meta_14']
39594,PETR3.SA,1,meta_14,0.43335,0.43335,0.43335,Dummy_model,True,['past_meta_14']
15227,PRIO3.SA,7,meta_14,0.66826,0.64541,0.6554,MLP,False,['meta_14']
39704,PRIO3.SA,1,meta_14,0.65342,0.65342,0.65342,Dummy_model,False,['past_meta_14']
15219,PRIO3.SA,7,meta_14,0.50495,0.48843,0.49515,MLP,True,['meta_14']
39696,PRIO3.SA,1,meta_14,0.49031,0.49031,0.49031,Dummy_model,True,['past_meta_14']
7651,PETR3.SA,4,meta_21,0.75945,0.66258,0.70115,KAN,False,['meta_21']
39639,PETR3.SA,1,meta_21,0.70554,0.70281,0.70415,Dummy_model,False,['past_meta_21']


Metricas regressao - label diff_close_mean_z_score


Unnamed: 0,Mean Absolute Error (MAE),Mean Squared Error (MSE),Root Mean Squared Error (RMSE),R-squared (R2),Mean Absolute Percentage Error (MAPE),ativo,alvo,seq_len,model,leak
21,0.42061,0.42128,0.64906,0.72737,1.13465,PETR3.SA,diff_close_mean_z_score_14,1,MLP,False
596,0.53242,0.58694,0.76612,0.61799,1.49762,PETR3.SA,diff_close_mean_z_score_14,3,MLP,True
921,0.42128,0.37641,0.61352,0.74747,1.06357,PRIO3.SA,diff_close_mean_z_score_14,4,MLP,False
920,0.52801,0.52085,0.7217,0.64911,1.43553,PRIO3.SA,diff_close_mean_z_score_14,4,MLP,True
33,0.35096,0.31627,0.56238,0.80074,0.77326,PETR3.SA,diff_close_mean_z_score_21,1,MLP,False
32,0.45601,0.44708,0.66864,0.72368,0.99606,PETR3.SA,diff_close_mean_z_score_21,1,MLP,True
69,0.356,0.28617,0.53495,0.82653,0.93495,PRIO3.SA,diff_close_mean_z_score_21,1,MLP,False
68,0.46039,0.40088,0.63315,0.75425,1.27224,PRIO3.SA,diff_close_mean_z_score_21,1,MLP,True
585,0.51527,0.54541,0.73852,0.54907,1.27261,PETR3.SA,diff_close_mean_z_score_7,3,MLP,False
2310,0.66265,0.749,0.86545,0.39481,1.40114,PETR3.SA,diff_close_mean_z_score_7,21,LSTM_with_Attention,True



##############################################
# DOMAIN: Mineração
# ASSETS: ['VALE3.SA', 'GGBR3.SA']
##############################################
          
Metricas macro - label meta


Unnamed: 0,ativo,seq_len,alvo,precision,recall,f1-score,model,leak,feature_cols
10883,GGBR3.SA,5,meta_14,0.7217,0.64954,0.67334,LSTM_with_Attention,False,['meta_14']
39903,GGBR3.SA,1,meta_14,0.69012,0.69012,0.69012,Dummy_model,False,['past_meta_14']
18273,GGBR3.SA,14,meta_14,0.57122,0.49252,0.51335,LSTM_with_Attention,True,['meta_14']
39896,GGBR3.SA,1,meta_14,0.52911,0.52871,0.52891,Dummy_model,True,['past_meta_14']
10587,VALE3.SA,5,meta_14,0.70274,0.67898,0.68914,MLP,False,['meta_14']
39800,VALE3.SA,1,meta_14,0.70389,0.70389,0.70389,Dummy_model,False,['past_meta_14']
10579,VALE3.SA,5,meta_14,0.56838,0.55392,0.55995,MLP,True,['meta_14']
39792,VALE3.SA,1,meta_14,0.55919,0.55919,0.55919,Dummy_model,True,['past_meta_14']
6091,GGBR3.SA,3,meta_21,0.65164,0.57703,0.60619,KAN,False,['meta_21']
39944,GGBR3.SA,1,meta_21,0.66197,0.66161,0.66179,Dummy_model,False,['past_meta_21']


Metricas regressao - label diff_close_mean_z_score


Unnamed: 0,Mean Absolute Error (MAE),Mean Squared Error (MSE),Root Mean Squared Error (RMSE),R-squared (R2),Mean Absolute Percentage Error (MAPE),ativo,alvo,seq_len,model,leak
129,0.42524,0.37198,0.6099,0.75846,2.00956,GGBR3.SA,diff_close_mean_z_score_14,1,MLP,False
128,0.53326,0.52139,0.72208,0.65578,2.69358,GGBR3.SA,diff_close_mean_z_score_14,1,MLP,True
93,0.39608,0.34049,0.58352,0.78793,0.78142,VALE3.SA,diff_close_mean_z_score_14,1,MLP,False
92,0.49646,0.47386,0.68837,0.6977,1.09138,VALE3.SA,diff_close_mean_z_score_14,1,MLP,True
141,0.35077,0.28001,0.52916,0.83157,1.1967,GGBR3.SA,diff_close_mean_z_score_21,1,MLP,False
140,0.45798,0.40463,0.63611,0.75283,1.67892,GGBR3.SA,diff_close_mean_z_score_21,1,MLP,True
105,0.31313,0.2225,0.4717,0.87689,0.94048,VALE3.SA,diff_close_mean_z_score_21,1,MLP,False
104,0.4164,0.34656,0.5887,0.80222,1.2942,VALE3.SA,diff_close_mean_z_score_21,1,MLP,True
693,0.51688,0.52503,0.72459,0.55768,1.05747,GGBR3.SA,diff_close_mean_z_score_7,3,MLP,False
114,0.66171,0.72284,0.8502,0.40137,1.24128,GGBR3.SA,diff_close_mean_z_score_7,1,LSTM_with_Attention,True



##############################################
# DOMAIN: Financeiro
# ASSETS: ['ABCB4.SA', 'ITUB3.SA']
##############################################
          
Metricas macro - label meta


Unnamed: 0,ativo,seq_len,alvo,precision,recall,f1-score,model,leak,feature_cols
6281,ABCB4.SA,3,meta_14,0.66345,0.65114,0.657,MLP,False,['meta_14']
40008,ABCB4.SA,1,meta_14,0.64893,0.64893,0.64893,Dummy_model,False,['past_meta_14']
6273,ABCB4.SA,3,meta_14,0.48402,0.47445,0.47889,MLP,True,['meta_14']
40000,ABCB4.SA,1,meta_14,0.47122,0.47122,0.47122,Dummy_model,True,['past_meta_14']
1643,ITUB3.SA,1,meta_14,0.57765,0.57957,0.5786,LSTM_with_Attention,False,['meta_14']
40112,ITUB3.SA,1,meta_14,0.57602,0.5763,0.57616,Dummy_model,False,['past_meta_14']
1634,ITUB3.SA,1,meta_14,0.46318,0.46499,0.46407,LSTM_with_Attention,True,['meta_14']
40103,ITUB3.SA,1,meta_14,0.4604,0.46071,0.46056,Dummy_model,True,['past_meta_14']
21205,ABCB4.SA,21,meta_21,0.62418,0.61196,0.61751,KAN,False,['meta_21']
40045,ABCB4.SA,1,meta_21,0.5693,0.56882,0.56906,Dummy_model,False,['past_meta_21']


Metricas regressao - label diff_close_mean_z_score


Unnamed: 0,Mean Absolute Error (MAE),Mean Squared Error (MSE),Root Mean Squared Error (RMSE),R-squared (R2),Mean Absolute Percentage Error (MAPE),ativo,alvo,seq_len,model,leak
1029,0.46039,0.447,0.66858,0.68662,1.77036,ABCB4.SA,diff_close_mean_z_score_14,4,MLP,False
162,0.59619,0.62396,0.78991,0.56245,2.07131,ABCB4.SA,diff_close_mean_z_score_14,1,LSTM_with_Attention,True
201,0.40605,0.35764,0.59803,0.78431,106.2147,ITUB3.SA,diff_close_mean_z_score_14,1,MLP,False
200,0.50626,0.49884,0.70629,0.69884,153.2851,ITUB3.SA,diff_close_mean_z_score_14,1,MLP,True
177,0.39372,0.34028,0.58333,0.78025,1.18984,ABCB4.SA,diff_close_mean_z_score_21,1,MLP,False
176,0.51252,0.48195,0.69423,0.68704,1.59071,ABCB4.SA,diff_close_mean_z_score_21,1,MLP,True
4677,0.29513,0.26442,0.51422,0.85552,1.93657,ITUB3.SA,diff_close_mean_z_score_21,1,Dummy_model,False
212,0.42709,0.37301,0.61074,0.79576,2.5871,ITUB3.SA,diff_close_mean_z_score_21,1,MLP,True
1593,0.55276,0.57438,0.75788,0.52193,129.6199,ABCB4.SA,diff_close_mean_z_score_7,6,MLP,False
1878,0.69537,0.78788,0.88762,0.34793,153.0098,ABCB4.SA,diff_close_mean_z_score_7,7,LSTM_with_Attention,True



##############################################
# DOMAIN: Saúde
# ASSETS: ['FLRY3.SA', 'RADL3.SA']
##############################################
          
Metricas macro - label meta


Unnamed: 0,ativo,seq_len,alvo,precision,recall,f1-score,model,leak,feature_cols
4428,FLRY3.SA,2,meta_14,0.52365,0.52896,0.52603,LSTM_with_Attention,False,['meta_14']
40220,FLRY3.SA,1,meta_14,0.6327,0.63292,0.63281,Dummy_model,False,['past_meta_14']
14279,FLRY3.SA,6,meta_14,0.41068,0.38572,0.39369,LSTM_with_Attention,True,['meta_14']
40210,FLRY3.SA,1,meta_14,0.37578,0.37601,0.3759,Dummy_model,True,['past_meta_14']
4741,RADL3.SA,2,meta_14,0.66935,0.65568,0.66191,LSTM_with_Attention,False,['meta_14']
40320,RADL3.SA,1,meta_14,0.66057,0.6603,0.66044,Dummy_model,False,['past_meta_14']
22003,RADL3.SA,21,meta_14,0.5402,0.4968,0.51122,LSTM_with_Attention,True,['meta_14']
40312,RADL3.SA,1,meta_14,0.50464,0.50435,0.5045,Dummy_model,True,['past_meta_14']
11969,FLRY3.SA,5,meta_21,0.73944,0.65042,0.6829,KAN,False,['meta_21']
40255,FLRY3.SA,1,meta_21,0.69998,0.70159,0.70078,Dummy_model,False,['past_meta_21']


Metricas regressao - label diff_close_mean_z_score


Unnamed: 0,Mean Absolute Error (MAE),Mean Squared Error (MSE),Root Mean Squared Error (RMSE),R-squared (R2),Mean Absolute Percentage Error (MAPE),ativo,alvo,seq_len,model,leak
237,0.42929,0.36396,0.60329,0.75248,1.4492,FLRY3.SA,diff_close_mean_z_score_14,1,MLP,False
236,0.54371,0.50705,0.71207,0.65219,2.02373,FLRY3.SA,diff_close_mean_z_score_14,1,MLP,True
273,0.41696,0.37753,0.61444,0.75685,1978.84622,RADL3.SA,diff_close_mean_z_score_14,1,MLP,False
848,0.53352,0.52831,0.72685,0.65624,2365.65046,RADL3.SA,diff_close_mean_z_score_14,3,MLP,True
249,0.37334,0.28285,0.53184,0.81967,340.72122,FLRY3.SA,diff_close_mean_z_score_21,1,MLP,False
536,0.47316,0.39225,0.6263,0.74608,71.38017,FLRY3.SA,diff_close_mean_z_score_21,2,MLP,True
285,0.35329,0.28109,0.53018,0.82624,2.79281,RADL3.SA,diff_close_mean_z_score_21,1,MLP,False
284,0.46245,0.39842,0.63121,0.75261,3.98159,RADL3.SA,diff_close_mean_z_score_21,1,MLP,True
513,0.5364,0.5447,0.73804,0.54997,1.61485,FLRY3.SA,diff_close_mean_z_score_7,2,MLP,False
2240,0.66824,0.73317,0.85625,0.39484,2.02946,FLRY3.SA,diff_close_mean_z_score_7,14,MLP,True


In [16]:
# for assets, domain in assets_domain:
#     print(f'''
# ##############################################
# # DOMAIN: {domain}
# # ASSETS: {assets}
# ##############################################
#           ''')
    
#     print('Metricas macro - label meta')
#     metrics_clf = macro_clf[macro_clf.label_col.str.contains('meta') & macro_clf.asset.isin(assets)].\
#     drop(['support', 'scaling_method', 'prediction_type', 'class', 'feature_cols'], axis =1 ).\
#             rename({'asset': 'ativo', 'label_col': 'alvo'},axis=1)[['ativo','seq_len','alvo','precision','recall', 'f1-score', 'model', 'leak']]
    
#     display(metrics_clf)
    
#     print('Metricas regressao - label diff_close_mean_z_score')
    
    
#     metrics_reg = reg[reg.label_col.str.contains('diff_close_mean_z_score') & reg.asset.isin(assets)].\
#         drop(['scaling_method', 'prediction_type', 'feature_cols'], axis = 1).\
#             rename({'asset': 'ativo', 'label_col': 'alvo','f1-score':'valor'},axis=1)
#     display(metrics_reg)