In [24]:

# import external libs
import pandas as pd
import warnings
from tqdm import tqdm
import os
import sys
sys.path.append('../src/')
import re
import plotly.express as px

# import internal libs
from model.evaluation import classification_report, regression_metrics, get_classification_report
from model.config import create_experiment_configs_dummy, create_experiment_configs_tf
from data.preparation import load_dataset

In [25]:
# remove warning
warnings.filterwarnings('ignore')

In [26]:
# define paths
PATH_REPORTS = '../reports/'
lstm_results_path = 'test_results/LSTM_with_Attention_{asset}_test_results.csv'
mlp_results_path =  'test_results/MLP_{asset}_test_results.csv'
DATA_DIR = '../data/'

In [27]:

# define experiments
ASSETS = [
    "PETR3.SA", 
    "PRIO3.SA", 
    "VALE3.SA", 
    "GGBR3.SA", 
    "ABCB4.SA", 
    "ITUB3.SA", 
    "FLRY3.SA", 
    "RADL3.SA"
    ]

seq_len_list = [1,2,3,4,5,6,7,14,21,28,35,42
                ,49,56,63,70
                ]

moving_windows = [7,14,21]

algorithms=[
    'LSTM_with_Attention', 
    'MLP',
    # 'KAN'
    ]

dict_experiments_dummy = create_experiment_configs_dummy(ASSETS, moving_windows)
dict_experiments_tf = create_experiment_configs_tf(ASSETS, seq_len_list, moving_windows,algorithms=algorithms)

## General results

### Create table

In [28]:
list_results_clf = []
list_results_reg = []
# list_results = []

for name, dict_experiments, path_results in [
    ("tf", dict_experiments_tf, PATH_REPORTS + 'test_results/{algorithm}_{asset}_features={features}__label={label_col}__sql_len={seq_len}__scaling_method={scaling_method}_test_results.csv'),
    ('dummy', dict_experiments_dummy, PATH_REPORTS + "test_results/Dummy_model_{asset}_features={feature_col}__label={label_col}_test_results.csv")
]:
    
    for exp_name, config in tqdm(dict_experiments.items()):
        
        if name == "tf":
            
            feature_cols = config['feature_cols']
            label_col = config['label_col']
            seq_len = config['seq_len']
            asset = config['asset']
            scaling_method = config['scaling_method']
            algorithm = config['algorithm']
            asset = config['asset']
            prediction_type = config['prediction_type']
            
            filepath = path_results.format(
                algorithm = algorithm,
                features = "_".join(feature_cols),
                label_col = label_col,
                asset = asset.replace(".", "_"),
                scaling_method = scaling_method.__str__(),
                seq_len = seq_len
            )
            
            if not os.path.exists(filepath): 
                print(f"The file {filepath} dont't exists")
                continue

            results = pd.read_csv(filepath)
            
        elif name == 'dummy':
            feature_cols = [config['feature_col']]
            label_col = config['label_col']
            asset = config['asset']
            algorithm = "Dummy_model"
            seq_len = 1
            scaling_method = None
            prediction_type = 'dummy'
            asset_formated = asset.replace(".", "_")
            
            filepath = path_results.format(
                algorithm = algorithm,
                feature_col = feature_cols[0],
                label_col = label_col,
                asset = asset_formated
            )
            
            if  not os.path.exists(filepath): 
                print(f"The file {filepath} dont't exists")
                continue
            
            results = pd.read_csv(filepath)
        
        # concat with the test dataset
        full_test = load_dataset(asset=asset, data_dir=DATA_DIR,dataset_split='test')
        results = pd.concat([results,full_test], axis =1)
        
        # find the window for label calculation (get the number of the string)
        window = int(re.findall(r'\d+', label_col)[0])

        # results without leak
        results_wo_leak = results.iloc[max(seq_len, window):]
        
        # remove invalid days
        results = results[results.Invalid_Days == 0]
        
        # raise error if there is nan values
        if results.isna().sum().sum() > 0: 
            raise ValueError('Há dados nulos no dataframe de resultados')
                
                
        new_coluns = [asset, str(feature_cols), str(label_col), seq_len, algorithm, scaling_method, prediction_type]
        new_columns_nms = ['asset','feature_cols','label_col','seq_len','model','scaling_method','prediction_type', 'leak']
                
        # regression metrics
        reg_metrics = regression_metrics(results.y_test, results.y_pred)
        reg_metrics_wo_leak = regression_metrics(results_wo_leak.y_test, results_wo_leak.y_pred)
        
    
        # add columns with experiment config 

        reg_metrics[
            new_columns_nms
            ] = new_coluns + [True]
        reg_metrics_wo_leak[
            new_columns_nms
            ] = new_coluns+ [False]
        
        list_results_reg.append(reg_metrics)
        list_results_reg.append(reg_metrics_wo_leak)
    
    
        # obtem metricas de classificacao, truncando os valores (se o valor já for a meta, não terá diferenca)
        y_test_trunc = [int(i) for i in results.y_test]
        y_pred_trunc = [int(i) for i in results.y_pred]
        df_cr = get_classification_report(y_test_trunc, y_pred_trunc)
        
        y_test_trunc_woleak = [int(i) for i in results_wo_leak.y_test]
        y_pred_trunc_woleak = [int(i) for i in results_wo_leak.y_pred]
        df_cr_wo_leak = get_classification_report(y_test_trunc_woleak, y_pred_trunc_woleak)
        
        df_cr[new_columns_nms] = new_coluns + [True]
        df_cr_wo_leak[new_columns_nms] = new_coluns + [False]
        
        list_results_clf.append(df_cr)
        list_results_clf.append(df_cr_wo_leak)

100%|██████████| 1536/1536 [00:52<00:00, 29.02it/s]
100%|██████████| 48/48 [00:01<00:00, 31.65it/s]


In [29]:
# concat results
final_results_reg = pd.concat(list_results_reg).reset_index(drop=True)
final_results_clf = pd.concat(list_results_clf).reset_index(drop=True)

In [30]:
# only the "right" labels
final_results_clf = final_results_clf[final_results_clf.label_col.str.contains('meta')]
final_results_reg = final_results_reg[final_results_reg.label_col.str.contains('diff_close_mean_z_score')]

### Results

In [31]:
assets_domain = [(["PETR3.SA","PRIO3.SA"], 'Petróleo'), (["VALE3.SA", "GGBR3.SA"], "Mineração"), (["ABCB4.SA", "ITUB3.SA"], 'Financeiro'), (["FLRY3.SA", "RADL3.SA"], 'Saúde')]

# Configuração para não omitir linhas
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)


#### Dummy results

In [32]:
assets_domain = [(["PETR3.SA","PRIO3.SA"], 'Petróleo'), (["VALE3.SA", "GGBR3.SA"], "Mineração"), (["ABCB4.SA", "ITUB3.SA"], 'Financeiro'), (["FLRY3.SA", "RADL3.SA"], 'Saúde')]

asset_to_domain = lambda x: 'Petróleo' if x in ["PETR3.SA","PRIO3.SA"] else "Mineração" if x in ["VALE3.SA", "GGBR3.SA"] else 'Financeiro' if x in ["ABCB4.SA", "ITUB3.SA"] else 'Saúde' if x in ["FLRY3.SA", "RADL3.SA"] else None

In [33]:
# general 
macro_dummy = final_results_clf[
    (final_results_clf['class'] == 'macro avg') &
    (final_results_clf['prediction_type'] == 'dummy')
    
    ].round(2)
macro_dummy['domain'] = macro_dummy.asset.apply(asset_to_domain)

reg_dummy = final_results_reg[
    (final_results_reg['prediction_type'] == 'dummy')
    ].round(2)
reg_dummy['domain'] = reg_dummy.asset.apply(asset_to_domain)

In [34]:
# for label in macro_dummy.label_col.unique():
#     for model in macro_dummy.model.unique():

#         print(label, model)
        
#         fig = px.box(macro_dummy, x="model", y="f1-score", points="all")
#         fig.show()

In [35]:
for assets, domain in assets_domain:
    print(f'''
##############################################
# DOMAIN: {domain}
# ASSETS: {assets}
##############################################
          ''')
    
    print('Metricas macro - label meta')
    metrics_clf = macro_dummy[macro_dummy.label_col.str.contains('meta') & macro_dummy.asset.isin(assets)].\
    drop(['support', 'scaling_method', 'prediction_type', 'seq_len', 'class', 'model'], axis =1 ).\
            rename({'asset': 'ativo', 'label_col': 'alvo'},axis=1)
        
        
    display(metrics_clf.sort_values(['alvo', 'leak', 'ativo']))
    
    print('Metricas regressao - label diff_close_mean_z_score')
    
    metrics_reg = reg_dummy[reg_dummy.label_col.str.contains('diff_close_mean_z_score') & reg_dummy.asset.isin(assets)].\
        drop(['scaling_method', 'prediction_type', 'seq_len', 'model', 'feature_cols'], axis = 1).\
            rename({'asset': 'ativo', 'label_col': 'alvo'},axis=1)
    display(metrics_reg)


##############################################
# DOMAIN: Petróleo
# ASSETS: ['PETR3.SA', 'PRIO3.SA']
##############################################
          
Metricas macro - label meta


Unnamed: 0,precision,recall,f1-score,ativo,feature_cols,alvo,leak,domain
26328,0.67,0.67,0.67,PETR3.SA,['past_meta_14'],meta_14,False,Petróleo
26429,0.65,0.65,0.65,PRIO3.SA,['past_meta_14'],meta_14,False,Petróleo
26319,0.43,0.43,0.43,PETR3.SA,['past_meta_14'],meta_14,True,Petróleo
26421,0.49,0.49,0.49,PRIO3.SA,['past_meta_14'],meta_14,True,Petróleo
26364,0.71,0.7,0.7,PETR3.SA,['past_meta_21'],meta_21,False,Petróleo
26460,0.74,0.74,0.74,PRIO3.SA,['past_meta_21'],meta_21,False,Petróleo
26355,0.49,0.49,0.49,PETR3.SA,['past_meta_21'],meta_21,True,Petróleo
26452,0.62,0.62,0.62,PRIO3.SA,['past_meta_21'],meta_21,True,Petróleo
26293,0.54,0.54,0.54,PETR3.SA,['past_meta_7'],meta_7,False,Petróleo
26397,0.48,0.48,0.48,PRIO3.SA,['past_meta_7'],meta_7,False,Petróleo


Metricas regressao - label diff_close_mean_z_score


Unnamed: 0,Mean Absolute Error (MAE),Mean Squared Error (MSE),Root Mean Squared Error (RMSE),R-squared (R2),Mean Absolute Percentage Error (MAPE),ativo,alvo,leak,domain
3072,0.71,0.92,0.96,0.26,2.07,PETR3.SA,diff_close_mean_z_score_7,True,Petróleo
3073,0.49,0.63,0.79,0.48,1.42,PETR3.SA,diff_close_mean_z_score_7,False,Petróleo
3076,0.57,0.65,0.81,0.58,1.78,PETR3.SA,diff_close_mean_z_score_14,True,Petróleo
3077,0.39,0.45,0.67,0.71,1.23,PETR3.SA,diff_close_mean_z_score_14,False,Petróleo
3080,0.48,0.48,0.69,0.7,1.08,PETR3.SA,diff_close_mean_z_score_21,True,Petróleo
3081,0.33,0.33,0.58,0.79,0.74,PETR3.SA,diff_close_mean_z_score_21,False,Petróleo
3084,0.69,0.84,0.92,0.33,2.15,PRIO3.SA,diff_close_mean_z_score_7,True,Petróleo
3085,0.48,0.58,0.76,0.53,1.48,PRIO3.SA,diff_close_mean_z_score_7,False,Petróleo
3088,0.56,0.58,0.76,0.61,1.58,PRIO3.SA,diff_close_mean_z_score_14,True,Petróleo
3089,0.38,0.4,0.63,0.73,1.09,PRIO3.SA,diff_close_mean_z_score_14,False,Petróleo



##############################################
# DOMAIN: Mineração
# ASSETS: ['VALE3.SA', 'GGBR3.SA']
##############################################
          
Metricas macro - label meta


Unnamed: 0,precision,recall,f1-score,ativo,feature_cols,alvo,leak,domain
26628,0.69,0.69,0.69,GGBR3.SA,['past_meta_14'],meta_14,False,Mineração
26525,0.7,0.7,0.7,VALE3.SA,['past_meta_14'],meta_14,False,Mineração
26621,0.53,0.53,0.53,GGBR3.SA,['past_meta_14'],meta_14,True,Mineração
26517,0.56,0.56,0.56,VALE3.SA,['past_meta_14'],meta_14,True,Mineração
26669,0.66,0.66,0.66,GGBR3.SA,['past_meta_21'],meta_21,False,Mineração
26564,0.71,0.71,0.71,VALE3.SA,['past_meta_21'],meta_21,False,Mineração
26659,0.42,0.42,0.42,GGBR3.SA,['past_meta_21'],meta_21,True,Mineração
26555,0.45,0.45,0.45,VALE3.SA,['past_meta_21'],meta_21,True,Mineração
26597,0.52,0.52,0.52,GGBR3.SA,['past_meta_7'],meta_7,False,Mineração
26493,0.6,0.6,0.6,VALE3.SA,['past_meta_7'],meta_7,False,Mineração


Metricas regressao - label diff_close_mean_z_score


Unnamed: 0,Mean Absolute Error (MAE),Mean Squared Error (MSE),Root Mean Squared Error (RMSE),R-squared (R2),Mean Absolute Percentage Error (MAPE),ativo,alvo,leak,domain
3096,0.66,0.76,0.87,0.41,264745000000.0,VALE3.SA,diff_close_mean_z_score_7,True,Mineração
3097,0.45,0.51,0.72,0.61,182029500000.0,VALE3.SA,diff_close_mean_z_score_7,False,Mineração
3100,0.52,0.52,0.72,0.67,1.19,VALE3.SA,diff_close_mean_z_score_14,True,Mineração
3101,0.35,0.35,0.59,0.78,0.78,VALE3.SA,diff_close_mean_z_score_14,False,Mineração
3104,0.43,0.37,0.61,0.79,1.37,VALE3.SA,diff_close_mean_z_score_21,True,Mineração
3105,0.29,0.23,0.48,0.87,0.92,VALE3.SA,diff_close_mean_z_score_21,False,Mineração
3108,0.71,0.88,0.94,0.27,1.76,GGBR3.SA,diff_close_mean_z_score_7,True,Mineração
3109,0.48,0.6,0.78,0.49,1.2,GGBR3.SA,diff_close_mean_z_score_7,False,Mineração
3112,0.56,0.57,0.76,0.62,3.22,GGBR3.SA,diff_close_mean_z_score_14,True,Mineração
3113,0.38,0.39,0.62,0.75,2.2,GGBR3.SA,diff_close_mean_z_score_14,False,Mineração



##############################################
# DOMAIN: Financeiro
# ASSETS: ['ABCB4.SA', 'ITUB3.SA']
##############################################
          
Metricas macro - label meta


Unnamed: 0,precision,recall,f1-score,ativo,feature_cols,alvo,leak,domain
26733,0.65,0.65,0.65,ABCB4.SA,['past_meta_14'],meta_14,False,Financeiro
26837,0.58,0.58,0.58,ITUB3.SA,['past_meta_14'],meta_14,False,Financeiro
26725,0.47,0.47,0.47,ABCB4.SA,['past_meta_14'],meta_14,True,Financeiro
26828,0.46,0.46,0.46,ITUB3.SA,['past_meta_14'],meta_14,True,Financeiro
26770,0.57,0.57,0.57,ABCB4.SA,['past_meta_21'],meta_21,False,Financeiro
26877,0.54,0.54,0.54,ITUB3.SA,['past_meta_21'],meta_21,False,Financeiro
26760,0.45,0.45,0.45,ABCB4.SA,['past_meta_21'],meta_21,True,Financeiro
26867,0.46,0.46,0.46,ITUB3.SA,['past_meta_21'],meta_21,True,Financeiro
26701,0.51,0.51,0.51,ABCB4.SA,['past_meta_7'],meta_7,False,Financeiro
26801,0.43,0.43,0.43,ITUB3.SA,['past_meta_7'],meta_7,False,Financeiro


Metricas regressao - label diff_close_mean_z_score


Unnamed: 0,Mean Absolute Error (MAE),Mean Squared Error (MSE),Root Mean Squared Error (RMSE),R-squared (R2),Mean Absolute Percentage Error (MAPE),ativo,alvo,leak,domain
3120,0.76,1.0,1.0,0.17,265.8,ABCB4.SA,diff_close_mean_z_score_7,True,Financeiro
3121,0.52,0.68,0.82,0.44,182.74,ABCB4.SA,diff_close_mean_z_score_7,False,Financeiro
3124,0.62,0.71,0.84,0.5,2.74,ABCB4.SA,diff_close_mean_z_score_14,True,Financeiro
3125,0.43,0.49,0.7,0.66,1.9,ABCB4.SA,diff_close_mean_z_score_14,False,Financeiro
3128,0.54,0.52,0.72,0.66,1.69,ABCB4.SA,diff_close_mean_z_score_21,True,Financeiro
3129,0.37,0.36,0.6,0.77,1.17,ABCB4.SA,diff_close_mean_z_score_21,False,Financeiro
3132,0.68,0.88,0.94,0.33,4948931000000.0,ITUB3.SA,diff_close_mean_z_score_7,True,Financeiro
3133,0.46,0.59,0.77,0.54,3402713000000.0,ITUB3.SA,diff_close_mean_z_score_7,False,Financeiro
3136,0.52,0.55,0.74,0.67,167.98,ITUB3.SA,diff_close_mean_z_score_14,True,Financeiro
3137,0.36,0.37,0.61,0.78,116.34,ITUB3.SA,diff_close_mean_z_score_14,False,Financeiro



##############################################
# DOMAIN: Saúde
# ASSETS: ['FLRY3.SA', 'RADL3.SA']
##############################################
          
Metricas macro - label meta


Unnamed: 0,precision,recall,f1-score,ativo,feature_cols,alvo,leak,domain
26945,0.63,0.63,0.63,FLRY3.SA,['past_meta_14'],meta_14,False,Saúde
27045,0.66,0.66,0.66,RADL3.SA,['past_meta_14'],meta_14,False,Saúde
26935,0.38,0.38,0.38,FLRY3.SA,['past_meta_14'],meta_14,True,Saúde
27037,0.5,0.5,0.5,RADL3.SA,['past_meta_14'],meta_14,True,Saúde
26980,0.7,0.7,0.7,FLRY3.SA,['past_meta_21'],meta_21,False,Saúde
27081,0.61,0.61,0.61,RADL3.SA,['past_meta_21'],meta_21,False,Saúde
26972,0.48,0.48,0.48,FLRY3.SA,['past_meta_21'],meta_21,True,Saúde
27072,0.52,0.52,0.52,RADL3.SA,['past_meta_21'],meta_21,True,Saúde
26909,0.51,0.51,0.51,FLRY3.SA,['past_meta_7'],meta_7,False,Saúde
27013,0.5,0.5,0.5,RADL3.SA,['past_meta_7'],meta_7,False,Saúde


Metricas regressao - label diff_close_mean_z_score


Unnamed: 0,Mean Absolute Error (MAE),Mean Squared Error (MSE),Root Mean Squared Error (RMSE),R-squared (R2),Mean Absolute Percentage Error (MAPE),ativo,alvo,leak,domain
3144,0.72,0.91,0.95,0.25,2.83,FLRY3.SA,diff_close_mean_z_score_7,True,Saúde
3145,0.49,0.62,0.79,0.48,1.94,FLRY3.SA,diff_close_mean_z_score_7,False,Saúde
3148,0.58,0.56,0.75,0.61,2.25,FLRY3.SA,diff_close_mean_z_score_14,True,Saúde
3149,0.39,0.39,0.62,0.74,1.56,FLRY3.SA,diff_close_mean_z_score_14,False,Saúde
3152,0.49,0.42,0.65,0.73,203.79,FLRY3.SA,diff_close_mean_z_score_21,True,Saúde
3153,0.34,0.29,0.54,0.81,142.19,FLRY3.SA,diff_close_mean_z_score_21,False,Saúde
3156,0.68,0.82,0.91,0.36,2.85,RADL3.SA,diff_close_mean_z_score_7,True,Saúde
3157,0.47,0.56,0.75,0.57,1.96,RADL3.SA,diff_close_mean_z_score_7,False,Saúde
3160,0.56,0.58,0.76,0.62,3164.58,RADL3.SA,diff_close_mean_z_score_14,True,Saúde
3161,0.39,0.4,0.63,0.74,2191.88,RADL3.SA,diff_close_mean_z_score_14,False,Saúde


In [36]:
for leak in [True, False]:
        
    fig = px.box(
        macro_dummy[macro_dummy.leak == leak], x="label_col", 
        y="f1-score", 
        points="all", 
        title = f'Distribuição de resultados para cada alvo com leak={leak} | model=dummy')
    
    fig.write_image(PATH_REPORTS + f"/images/box_plot_alvo_dummy_leak={leak}_clf.png")
    fig.show()

for leak in [True, False]:
        
    fig = px.box(
        reg_dummy[reg_dummy.leak == leak], x="label_col", 
        y="R-squared (R2)", 
        points="all", 
        title = f'Distribuição de resultados para cada alvo com leak={leak} | model=dummy')
    
    fig.write_image(PATH_REPORTS + f"/images/box_plot_alvo_dummy_leak={leak}_reg.png")
    fig.show()

In [37]:
for leak in [True, False]:
        
    fig = px.box(
        macro_dummy[macro_dummy.leak == leak], x="asset", 
        y="f1-score", 
        points="all",
        color = 'domain',
        title = f'Distribuição de resultados para cada ativo com leak={leak}')
    
    fig.write_image(PATH_REPORTS + f"/images/box_plot_asset_dummy_leak={leak}_clf.png")
    fig.show()

for leak in [True, False]:
        
    fig = px.box(
        reg_dummy[reg_dummy.leak == leak], x="asset", 
        y="R-squared (R2)", 
        points="all", 
        color = 'domain',
        title = f'Distribuição de resultados para cada ativo com leak={leak}')
    
    fig.write_image(PATH_REPORTS + f"/images/box_plot_asset_dummy_leak={leak}_ref.png")
    fig.show()

#### NN results

In [38]:
# general 
macro_clf = final_results_clf[(final_results_clf['class'] == 'macro avg')].round(5)
reg= final_results_reg.round(5)

In [39]:
for label in reg.label_col.unique():

    for leak in [False]:

        print(f"label: {label} - leak: {leak}")
        
        
        if 'meta' in label: label_formated = label.replace('meta', 'k')
        if 'diff_close_mean_z_score' in label: label_formated = label.replace('diff_close_mean_z_score', 'z')
        
        df_plt = reg[(reg.label_col == label) & (reg.leak == leak)]
        
        fig = px.box(
            df_plt, 
            x="model", 
            y="R-squared (R2)", 
            points="all",
            title = f'Distribuição de f1 para os experimentos \n label:{label_formated}|leak={leak}'
            )
        
        fig.write_image(PATH_REPORTS + f"/images/box_plot_exp_dist_label={label_formated}__leak={leak}_reg.png")
        # fig.update_traces(boxpoints=False) 
        fig.show()

label: diff_close_mean_z_score_7 - leak: False


label: diff_close_mean_z_score_14 - leak: False


label: diff_close_mean_z_score_21 - leak: False


In [40]:
for label in macro_clf.label_col.unique():

    for leak in [False]:

        print(f"label: {label} - leak: {leak}")
        
        if 'meta' in label: label_col = label.replace('meta', 'k')
        if 'diff_close_mean_z_score' in label: label_col = label.replace('diff_close_mean_z_score', 'z')
        
        df_plt = macro_clf[(macro_clf.label_col == label) & (macro_clf.leak == leak)]
        
        fig = px.box(
            df_plt, 
            x="model", 
            y="f1-score", 
            points="all",
            title = f'Distribuição dos resultados de f1 para os experimentos - label: {label} | leak={leak}'
            )
        
        fig.write_image(PATH_REPORTS + f"/images/box_plot_exp_dist_label={label}__leak={leak}_clf.png")
        fig.show()

label: meta_7 - leak: False


label: meta_14 - leak: False


label: meta_21 - leak: False


In [41]:
for assets, domain in assets_domain:
    print(f'''
##############################################
# DOMAIN: {domain}
# ASSETS: {assets}
##############################################
          ''')
    
    print('Metricas macro - label meta')
    metrics_clf = macro_clf[macro_clf.label_col.str.contains('meta') & macro_clf.asset.isin(assets)].\
    drop(['support', 'scaling_method', 'prediction_type', 'class'], axis =1 ).\
            rename({'asset': 'ativo', 'label_col': 'alvo'},axis=1)[['ativo','seq_len','alvo','precision','recall', 'f1-score', 'model', 'leak', 'feature_cols']]
    
    
    display(metrics_clf.loc[metrics_clf.groupby(['alvo','ativo', 'leak'])['f1-score'].idxmax()])
    
    print('Metricas regressao - label diff_close_mean_z_score')
    
    metrics_reg = reg[reg.label_col.str.contains('diff_close_mean_z_score') & reg.asset.isin(assets)].\
        drop(['scaling_method', 'prediction_type', 'feature_cols'], axis = 1).\
            rename({'asset': 'ativo', 'label_col': 'alvo','f1-score':'valor'},axis=1)
    
    display(metrics_reg.loc[metrics_reg.groupby(['alvo','ativo', 'leak'])['R-squared (R2)'].idxmax()])


##############################################
# DOMAIN: Petróleo
# ASSETS: ['PETR3.SA', 'PRIO3.SA']
##############################################
          
Metricas macro - label meta


Unnamed: 0,ativo,seq_len,alvo,precision,recall,f1-score,model,leak,feature_cols
26328,PETR3.SA,1,meta_14,0.66893,0.66658,0.66773,Dummy_model,False,['past_meta_14']
69,PETR3.SA,1,meta_14,0.43912,0.44128,0.44018,LSTM_with_Attention,True,['meta_14']
1925,PRIO3.SA,2,meta_14,0.65536,0.65245,0.65388,LSTM_with_Attention,False,['meta_14']
1917,PRIO3.SA,2,meta_14,0.49313,0.49096,0.49201,LSTM_with_Attention,True,['meta_14']
26364,PETR3.SA,1,meta_21,0.70554,0.70281,0.70415,Dummy_model,False,['past_meta_21']
8360,PETR3.SA,6,meta_21,0.51016,0.48722,0.4961,MLP,True,['meta_21']
348,PRIO3.SA,1,meta_21,0.74257,0.74321,0.74289,LSTM_with_Attention,False,['meta_21']
340,PRIO3.SA,1,meta_21,0.62055,0.62055,0.62055,LSTM_with_Attention,True,['meta_21']
26293,PETR3.SA,1,meta_7,0.53916,0.53916,0.53916,Dummy_model,False,['past_meta_7']
21351,PETR3.SA,56,meta_7,0.34403,0.34541,0.34411,MLP,True,['meta_7']


Metricas regressao - label diff_close_mean_z_score


Unnamed: 0,Mean Absolute Error (MAE),Mean Squared Error (MSE),Root Mean Squared Error (RMSE),R-squared (R2),Mean Absolute Percentage Error (MAPE),ativo,alvo,seq_len,model,leak
1741,0.40917,0.39888,0.63157,0.74236,1.22579,PETR3.SA,diff_close_mean_z_score_14,28,LSTM_with_Attention,False
1740,0.53155,0.58508,0.7649,0.6192,1.63756,PETR3.SA,diff_close_mean_z_score_14,28,LSTM_with_Attention,True
37,0.39672,0.37316,0.61087,0.74965,1.04252,PRIO3.SA,diff_close_mean_z_score_14,1,LSTM_with_Attention,False
422,0.52949,0.52175,0.72232,0.6485,1.47742,PRIO3.SA,diff_close_mean_z_score_14,3,MLP,True
2325,0.34849,0.30688,0.55397,0.80636,0.81748,PETR3.SA,diff_close_mean_z_score_21,49,LSTM_with_Attention,False
22,0.45565,0.44693,0.66853,0.72377,1.0043,PETR3.SA,diff_close_mean_z_score_21,1,MLP,True
45,0.33884,0.28417,0.53308,0.82774,0.87883,PRIO3.SA,diff_close_mean_z_score_21,1,LSTM_with_Attention,False
46,0.45927,0.40109,0.63332,0.75412,1.22303,PRIO3.SA,diff_close_mean_z_score_21,1,MLP,True
1925,0.5082,0.5447,0.73803,0.54928,1.47531,PETR3.SA,diff_close_mean_z_score_7,35,LSTM_with_Attention,False
390,0.64021,0.75411,0.86839,0.39069,1.5897,PETR3.SA,diff_close_mean_z_score_7,3,MLP,True



##############################################
# DOMAIN: Mineração
# ASSETS: ['VALE3.SA', 'GGBR3.SA']
##############################################
          
Metricas macro - label meta


Unnamed: 0,ativo,seq_len,alvo,precision,recall,f1-score,model,leak,feature_cols
684,GGBR3.SA,1,meta_14,0.69012,0.69012,0.69012,LSTM_with_Attention,False,['meta_14']
677,GGBR3.SA,1,meta_14,0.52911,0.52871,0.52891,LSTM_with_Attention,True,['meta_14']
493,VALE3.SA,1,meta_14,0.70389,0.70389,0.70389,MLP,False,['meta_14']
485,VALE3.SA,1,meta_14,0.55919,0.55919,0.55919,MLP,True,['meta_14']
26669,GGBR3.SA,1,meta_21,0.66197,0.66161,0.66179,Dummy_model,False,['past_meta_21']
4023,GGBR3.SA,3,meta_21,0.4433,0.44646,0.44481,LSTM_with_Attention,True,['meta_21']
26564,VALE3.SA,1,meta_21,0.70587,0.70695,0.70641,Dummy_model,False,['past_meta_21']
10395,VALE3.SA,7,meta_21,0.47487,0.45946,0.46606,MLP,True,['meta_21']
26597,GGBR3.SA,1,meta_7,0.52066,0.52066,0.52066,Dummy_model,False,['past_meta_7']
10469,GGBR3.SA,7,meta_7,0.34916,0.36261,0.35571,MLP,True,['meta_7']


Metricas regressao - label diff_close_mean_z_score


Unnamed: 0,Mean Absolute Error (MAE),Mean Squared Error (MSE),Root Mean Squared Error (RMSE),R-squared (R2),Mean Absolute Percentage Error (MAPE),ativo,alvo,seq_len,model,leak
2005,0.40989,0.36746,0.60619,0.76451,0.84724,GGBR3.SA,diff_close_mean_z_score_14,35,LSTM_with_Attention,False
86,0.53134,0.52181,0.72237,0.6555,2.78051,GGBR3.SA,diff_close_mean_z_score_14,1,MLP,True
1597,0.37996,0.32356,0.56882,0.79711,0.83628,VALE3.SA,diff_close_mean_z_score_14,21,LSTM_with_Attention,False
62,0.49532,0.47391,0.68841,0.69767,1.11082,VALE3.SA,diff_close_mean_z_score_14,1,MLP,True
93,0.33911,0.27677,0.52609,0.83352,1.16459,GGBR3.SA,diff_close_mean_z_score_21,1,LSTM_with_Attention,False
94,0.45775,0.40481,0.63624,0.75272,1.67667,GGBR3.SA,diff_close_mean_z_score_21,1,MLP,True
69,0.30513,0.22024,0.4693,0.87814,0.92156,VALE3.SA,diff_close_mean_z_score_21,1,LSTM_with_Attention,False
68,0.41497,0.34561,0.58789,0.80277,1.32215,VALE3.SA,diff_close_mean_z_score_21,1,LSTM_with_Attention,True
269,0.51107,0.52467,0.72434,0.55798,1.03901,GGBR3.SA,diff_close_mean_z_score_7,2,LSTM_with_Attention,False
846,0.64936,0.72312,0.85036,0.40114,1.29583,GGBR3.SA,diff_close_mean_z_score_7,5,MLP,True



##############################################
# DOMAIN: Financeiro
# ASSETS: ['ABCB4.SA', 'ITUB3.SA']
##############################################
          
Metricas macro - label meta


Unnamed: 0,ativo,seq_len,alvo,precision,recall,f1-score,model,leak,feature_cols
10749,ABCB4.SA,7,meta_14,0.68297,0.63865,0.65634,MLP,False,['meta_14']
2541,ABCB4.SA,2,meta_14,0.47473,0.47122,0.47291,MLP,True,['meta_14']
1113,ITUB3.SA,1,meta_14,0.57765,0.57957,0.5786,MLP,False,['meta_14']
1104,ITUB3.SA,1,meta_14,0.46318,0.46499,0.46407,MLP,True,['meta_14']
19007,ABCB4.SA,42,meta_21,0.63415,0.68286,0.64127,LSTM_with_Attention,False,['meta_21']
18997,ABCB4.SA,42,meta_21,0.52425,0.57626,0.53126,LSTM_with_Attention,True,['meta_21']
7729,ITUB3.SA,5,meta_21,0.54364,0.54783,0.5457,LSTM_with_Attention,False,['meta_21']
7719,ITUB3.SA,5,meta_21,0.46626,0.47092,0.46854,LSTM_with_Attention,True,['meta_21']
26701,ABCB4.SA,1,meta_7,0.51475,0.51433,0.51454,Dummy_model,False,['past_meta_7']
26693,ABCB4.SA,1,meta_7,0.35303,0.35303,0.35303,Dummy_model,True,['past_meta_7']


Metricas regressao - label diff_close_mean_z_score


Unnamed: 0,Mean Absolute Error (MAE),Mean Squared Error (MSE),Root Mean Squared Error (RMSE),R-squared (R2),Mean Absolute Percentage Error (MAPE),ativo,alvo,seq_len,model,leak
2221,0.44426,0.44645,0.66817,0.68949,1.66644,ABCB4.SA,diff_close_mean_z_score_14,42,LSTM_with_Attention,False
1262,0.58524,0.623,0.7893,0.56312,2.3357,ABCB4.SA,diff_close_mean_z_score_14,7,MLP,True
2629,0.39029,0.35529,0.59606,0.78755,96.51893,ITUB3.SA,diff_close_mean_z_score_14,56,LSTM_with_Attention,False
134,0.50583,0.49834,0.70593,0.69914,115.5282,ITUB3.SA,diff_close_mean_z_score_14,1,MLP,True
117,0.38132,0.3399,0.58301,0.78049,1.14116,ABCB4.SA,diff_close_mean_z_score_21,1,LSTM_with_Attention,False
118,0.51289,0.48443,0.69601,0.68543,1.64118,ABCB4.SA,diff_close_mean_z_score_21,1,MLP,True
141,0.31693,0.25537,0.50534,0.86047,1.8393,ITUB3.SA,diff_close_mean_z_score_21,1,LSTM_with_Attention,False
140,0.41671,0.36802,0.60665,0.79849,2.60857,ITUB3.SA,diff_close_mean_z_score_21,1,LSTM_with_Attention,True
1061,0.53444,0.57412,0.75771,0.52215,141.5387,ABCB4.SA,diff_close_mean_z_score_7,6,LSTM_with_Attention,False
1446,0.67864,0.79093,0.88934,0.3454,168.8229,ABCB4.SA,diff_close_mean_z_score_7,14,MLP,True



##############################################
# DOMAIN: Saúde
# ASSETS: ['FLRY3.SA', 'RADL3.SA']
##############################################
          
Metricas macro - label meta


Unnamed: 0,ativo,seq_len,alvo,precision,recall,f1-score,model,leak,feature_cols
26945,FLRY3.SA,1,meta_14,0.6327,0.63292,0.63281,Dummy_model,False,['past_meta_14']
16067,FLRY3.SA,28,meta_14,0.42686,0.38835,0.40081,LSTM_with_Attention,True,['meta_14']
8077,RADL3.SA,5,meta_14,0.68384,0.65816,0.66948,LSTM_with_Attention,False,['meta_14']
16275,RADL3.SA,28,meta_14,0.56683,0.50456,0.52399,LSTM_with_Attention,True,['meta_14']
26980,FLRY3.SA,1,meta_21,0.69998,0.70159,0.70078,Dummy_model,False,['past_meta_21']
21082,FLRY3.SA,49,meta_21,0.52527,0.47728,0.49524,MLP,True,['meta_21']
6522,RADL3.SA,4,meta_21,0.62119,0.62136,0.62118,MLP,False,['meta_21']
16359,RADL3.SA,28,meta_21,0.53996,0.52258,0.53007,MLP,True,['meta_21']
26909,FLRY3.SA,1,meta_7,0.51439,0.51439,0.51439,Dummy_model,False,['past_meta_7']
1237,FLRY3.SA,1,meta_7,0.33146,0.34015,0.33571,LSTM_with_Attention,True,['meta_7']


Metricas regressao - label diff_close_mean_z_score


Unnamed: 0,Mean Absolute Error (MAE),Mean Squared Error (MSE),Root Mean Squared Error (RMSE),R-squared (R2),Mean Absolute Percentage Error (MAPE),ativo,alvo,seq_len,model,leak
157,0.41483,0.36029,0.60024,0.75498,1.47864,FLRY3.SA,diff_close_mean_z_score_14,1,LSTM_with_Attention,False
1118,0.54608,0.50481,0.7105,0.65373,1.98169,FLRY3.SA,diff_close_mean_z_score_14,6,MLP,True
2869,0.40769,0.36152,0.60127,0.77028,2062.56823,RADL3.SA,diff_close_mean_z_score_14,63,LSTM_with_Attention,False
2292,0.53194,0.52185,0.72239,0.66045,2778.68656,RADL3.SA,diff_close_mean_z_score_14,42,LSTM_with_Attention,True
165,0.35726,0.27871,0.52793,0.82232,191.80007,FLRY3.SA,diff_close_mean_z_score_21,1,LSTM_with_Attention,False
934,0.47407,0.39039,0.62481,0.74729,115.23838,FLRY3.SA,diff_close_mean_z_score_21,5,MLP,True
2877,0.34623,0.27141,0.52097,0.83629,1.99102,RADL3.SA,diff_close_mean_z_score_21,63,LSTM_with_Attention,False
190,0.46261,0.39758,0.63054,0.75313,3.94839,RADL3.SA,diff_close_mean_z_score_21,1,MLP,True
3029,0.53074,0.5395,0.73451,0.55568,1.68942,FLRY3.SA,diff_close_mean_z_score_7,70,LSTM_with_Attention,False
1494,0.66858,0.73456,0.85707,0.39369,2.00286,FLRY3.SA,diff_close_mean_z_score_7,14,MLP,True


In [42]:

print('Metricas macro - label meta')
metrics_clf = macro_clf[macro_clf.label_col.str.contains('meta')].\
drop(['support', 'scaling_method', 'prediction_type', 'class'], axis =1 ).\
        rename({'asset': 'ativo', 'label_col': 'alvo'},axis=1)[['ativo','seq_len','alvo', 'f1-score', 'model', 'leak']]

metrics_clf = metrics_clf[metrics_clf.leak == False]

metrics_clf.alvo = metrics_clf.alvo.apply(lambda x: x.replace('meta', 'k'))

display(metrics_clf.loc[metrics_clf.groupby(['alvo','ativo', 'leak'])['f1-score'].idxmax()].drop('leak', axis = 1).round(2))

print('Metricas regressao - label diff_close_mean_z_score')


metrics_reg = reg[reg.label_col.str.contains('diff_close_mean_z_score')].\
    drop(['scaling_method', 'prediction_type', 'feature_cols'], axis = 1).\
        rename({'asset': 'ativo', 'label_col': 'alvo','f1-score':'valor'},axis=1)[['ativo','seq_len','alvo', 'R-squared (R2)', 'model', 'leak']].round(2)
        
metrics_reg = metrics_reg[metrics_reg.leak == False]

metrics_reg.alvo = metrics_reg.alvo.apply(lambda x: x.replace('diff_close_mean_z_score', 'z'))

display(metrics_reg.loc[metrics_reg.groupby(['alvo','ativo', 'leak'])['R-squared (R2)'].idxmax()])

Metricas macro - label meta


Unnamed: 0,ativo,seq_len,alvo,f1-score,model
10749,ABCB4.SA,7,k_14,0.66,MLP
26945,FLRY3.SA,1,k_14,0.63,Dummy_model
684,GGBR3.SA,1,k_14,0.69,LSTM_with_Attention
1113,ITUB3.SA,1,k_14,0.58,MLP
26328,PETR3.SA,1,k_14,0.67,Dummy_model
1925,PRIO3.SA,2,k_14,0.65,LSTM_with_Attention
8077,RADL3.SA,5,k_14,0.67,LSTM_with_Attention
493,VALE3.SA,1,k_14,0.7,MLP
19007,ABCB4.SA,42,k_21,0.64,LSTM_with_Attention
26980,FLRY3.SA,1,k_21,0.7,Dummy_model


Metricas regressao - label diff_close_mean_z_score


Unnamed: 0,ativo,seq_len,alvo,R-squared (R2),model,leak
109,ABCB4.SA,1,z_14,0.69,LSTM_with_Attention,False
157,FLRY3.SA,1,z_14,0.75,LSTM_with_Attention,False
85,GGBR3.SA,1,z_14,0.76,LSTM_with_Attention,False
133,ITUB3.SA,1,z_14,0.79,LSTM_with_Attention,False
1741,PETR3.SA,28,z_14,0.74,LSTM_with_Attention,False
37,PRIO3.SA,1,z_14,0.75,LSTM_with_Attention,False
2869,RADL3.SA,63,z_14,0.77,LSTM_with_Attention,False
1597,VALE3.SA,21,z_14,0.8,LSTM_with_Attention,False
117,ABCB4.SA,1,z_21,0.78,LSTM_with_Attention,False
165,FLRY3.SA,1,z_21,0.82,LSTM_with_Attention,False


In [43]:
for assets, domain in assets_domain:
    print(f'''
##############################################
# DOMAIN: {domain}
# ASSETS: {assets}
##############################################
          ''')
    
    print('Metricas macro - label meta')
    metrics_clf = macro_clf[macro_clf.label_col.str.contains('meta') & macro_clf.asset.isin(assets)].\
    drop(['support', 'scaling_method', 'prediction_type', 'class'], axis =1 ).\
            rename({'asset': 'ativo', 'label_col': 'alvo'},axis=1)[['ativo','seq_len','alvo','precision','recall', 'f1-score', 'model', 'leak', 'feature_cols']]
    
    display(metrics_clf.loc[metrics_clf.groupby(['alvo','ativo', 'leak'])['f1-score'].idxmax()])
    
    print('Metricas regressao - label diff_close_mean_z_score')
    
    
    metrics_reg = reg[reg.label_col.str.contains('diff_close_mean_z_score') & reg.asset.isin(assets)].\
        drop(['scaling_method', 'prediction_type', 'feature_cols'], axis = 1).\
            rename({'asset': 'ativo', 'label_col': 'alvo','f1-score':'valor'},axis=1)
    
    display(metrics_reg.loc[metrics_reg.groupby(['alvo','ativo', 'leak'])['R-squared (R2)'].idxmax()])


##############################################
# DOMAIN: Petróleo
# ASSETS: ['PETR3.SA', 'PRIO3.SA']
##############################################
          
Metricas macro - label meta


Unnamed: 0,ativo,seq_len,alvo,precision,recall,f1-score,model,leak,feature_cols
26328,PETR3.SA,1,meta_14,0.66893,0.66658,0.66773,Dummy_model,False,['past_meta_14']
69,PETR3.SA,1,meta_14,0.43912,0.44128,0.44018,LSTM_with_Attention,True,['meta_14']
1925,PRIO3.SA,2,meta_14,0.65536,0.65245,0.65388,LSTM_with_Attention,False,['meta_14']
1917,PRIO3.SA,2,meta_14,0.49313,0.49096,0.49201,LSTM_with_Attention,True,['meta_14']
26364,PETR3.SA,1,meta_21,0.70554,0.70281,0.70415,Dummy_model,False,['past_meta_21']
8360,PETR3.SA,6,meta_21,0.51016,0.48722,0.4961,MLP,True,['meta_21']
348,PRIO3.SA,1,meta_21,0.74257,0.74321,0.74289,LSTM_with_Attention,False,['meta_21']
340,PRIO3.SA,1,meta_21,0.62055,0.62055,0.62055,LSTM_with_Attention,True,['meta_21']
26293,PETR3.SA,1,meta_7,0.53916,0.53916,0.53916,Dummy_model,False,['past_meta_7']
21351,PETR3.SA,56,meta_7,0.34403,0.34541,0.34411,MLP,True,['meta_7']


Metricas regressao - label diff_close_mean_z_score


Unnamed: 0,Mean Absolute Error (MAE),Mean Squared Error (MSE),Root Mean Squared Error (RMSE),R-squared (R2),Mean Absolute Percentage Error (MAPE),ativo,alvo,seq_len,model,leak
1741,0.40917,0.39888,0.63157,0.74236,1.22579,PETR3.SA,diff_close_mean_z_score_14,28,LSTM_with_Attention,False
1740,0.53155,0.58508,0.7649,0.6192,1.63756,PETR3.SA,diff_close_mean_z_score_14,28,LSTM_with_Attention,True
37,0.39672,0.37316,0.61087,0.74965,1.04252,PRIO3.SA,diff_close_mean_z_score_14,1,LSTM_with_Attention,False
422,0.52949,0.52175,0.72232,0.6485,1.47742,PRIO3.SA,diff_close_mean_z_score_14,3,MLP,True
2325,0.34849,0.30688,0.55397,0.80636,0.81748,PETR3.SA,diff_close_mean_z_score_21,49,LSTM_with_Attention,False
22,0.45565,0.44693,0.66853,0.72377,1.0043,PETR3.SA,diff_close_mean_z_score_21,1,MLP,True
45,0.33884,0.28417,0.53308,0.82774,0.87883,PRIO3.SA,diff_close_mean_z_score_21,1,LSTM_with_Attention,False
46,0.45927,0.40109,0.63332,0.75412,1.22303,PRIO3.SA,diff_close_mean_z_score_21,1,MLP,True
1925,0.5082,0.5447,0.73803,0.54928,1.47531,PETR3.SA,diff_close_mean_z_score_7,35,LSTM_with_Attention,False
390,0.64021,0.75411,0.86839,0.39069,1.5897,PETR3.SA,diff_close_mean_z_score_7,3,MLP,True



##############################################
# DOMAIN: Mineração
# ASSETS: ['VALE3.SA', 'GGBR3.SA']
##############################################
          
Metricas macro - label meta


Unnamed: 0,ativo,seq_len,alvo,precision,recall,f1-score,model,leak,feature_cols
684,GGBR3.SA,1,meta_14,0.69012,0.69012,0.69012,LSTM_with_Attention,False,['meta_14']
677,GGBR3.SA,1,meta_14,0.52911,0.52871,0.52891,LSTM_with_Attention,True,['meta_14']
493,VALE3.SA,1,meta_14,0.70389,0.70389,0.70389,MLP,False,['meta_14']
485,VALE3.SA,1,meta_14,0.55919,0.55919,0.55919,MLP,True,['meta_14']
26669,GGBR3.SA,1,meta_21,0.66197,0.66161,0.66179,Dummy_model,False,['past_meta_21']
4023,GGBR3.SA,3,meta_21,0.4433,0.44646,0.44481,LSTM_with_Attention,True,['meta_21']
26564,VALE3.SA,1,meta_21,0.70587,0.70695,0.70641,Dummy_model,False,['past_meta_21']
10395,VALE3.SA,7,meta_21,0.47487,0.45946,0.46606,MLP,True,['meta_21']
26597,GGBR3.SA,1,meta_7,0.52066,0.52066,0.52066,Dummy_model,False,['past_meta_7']
10469,GGBR3.SA,7,meta_7,0.34916,0.36261,0.35571,MLP,True,['meta_7']


Metricas regressao - label diff_close_mean_z_score


Unnamed: 0,Mean Absolute Error (MAE),Mean Squared Error (MSE),Root Mean Squared Error (RMSE),R-squared (R2),Mean Absolute Percentage Error (MAPE),ativo,alvo,seq_len,model,leak
2005,0.40989,0.36746,0.60619,0.76451,0.84724,GGBR3.SA,diff_close_mean_z_score_14,35,LSTM_with_Attention,False
86,0.53134,0.52181,0.72237,0.6555,2.78051,GGBR3.SA,diff_close_mean_z_score_14,1,MLP,True
1597,0.37996,0.32356,0.56882,0.79711,0.83628,VALE3.SA,diff_close_mean_z_score_14,21,LSTM_with_Attention,False
62,0.49532,0.47391,0.68841,0.69767,1.11082,VALE3.SA,diff_close_mean_z_score_14,1,MLP,True
93,0.33911,0.27677,0.52609,0.83352,1.16459,GGBR3.SA,diff_close_mean_z_score_21,1,LSTM_with_Attention,False
94,0.45775,0.40481,0.63624,0.75272,1.67667,GGBR3.SA,diff_close_mean_z_score_21,1,MLP,True
69,0.30513,0.22024,0.4693,0.87814,0.92156,VALE3.SA,diff_close_mean_z_score_21,1,LSTM_with_Attention,False
68,0.41497,0.34561,0.58789,0.80277,1.32215,VALE3.SA,diff_close_mean_z_score_21,1,LSTM_with_Attention,True
269,0.51107,0.52467,0.72434,0.55798,1.03901,GGBR3.SA,diff_close_mean_z_score_7,2,LSTM_with_Attention,False
846,0.64936,0.72312,0.85036,0.40114,1.29583,GGBR3.SA,diff_close_mean_z_score_7,5,MLP,True



##############################################
# DOMAIN: Financeiro
# ASSETS: ['ABCB4.SA', 'ITUB3.SA']
##############################################
          
Metricas macro - label meta


Unnamed: 0,ativo,seq_len,alvo,precision,recall,f1-score,model,leak,feature_cols
10749,ABCB4.SA,7,meta_14,0.68297,0.63865,0.65634,MLP,False,['meta_14']
2541,ABCB4.SA,2,meta_14,0.47473,0.47122,0.47291,MLP,True,['meta_14']
1113,ITUB3.SA,1,meta_14,0.57765,0.57957,0.5786,MLP,False,['meta_14']
1104,ITUB3.SA,1,meta_14,0.46318,0.46499,0.46407,MLP,True,['meta_14']
19007,ABCB4.SA,42,meta_21,0.63415,0.68286,0.64127,LSTM_with_Attention,False,['meta_21']
18997,ABCB4.SA,42,meta_21,0.52425,0.57626,0.53126,LSTM_with_Attention,True,['meta_21']
7729,ITUB3.SA,5,meta_21,0.54364,0.54783,0.5457,LSTM_with_Attention,False,['meta_21']
7719,ITUB3.SA,5,meta_21,0.46626,0.47092,0.46854,LSTM_with_Attention,True,['meta_21']
26701,ABCB4.SA,1,meta_7,0.51475,0.51433,0.51454,Dummy_model,False,['past_meta_7']
26693,ABCB4.SA,1,meta_7,0.35303,0.35303,0.35303,Dummy_model,True,['past_meta_7']


Metricas regressao - label diff_close_mean_z_score


Unnamed: 0,Mean Absolute Error (MAE),Mean Squared Error (MSE),Root Mean Squared Error (RMSE),R-squared (R2),Mean Absolute Percentage Error (MAPE),ativo,alvo,seq_len,model,leak
2221,0.44426,0.44645,0.66817,0.68949,1.66644,ABCB4.SA,diff_close_mean_z_score_14,42,LSTM_with_Attention,False
1262,0.58524,0.623,0.7893,0.56312,2.3357,ABCB4.SA,diff_close_mean_z_score_14,7,MLP,True
2629,0.39029,0.35529,0.59606,0.78755,96.51893,ITUB3.SA,diff_close_mean_z_score_14,56,LSTM_with_Attention,False
134,0.50583,0.49834,0.70593,0.69914,115.5282,ITUB3.SA,diff_close_mean_z_score_14,1,MLP,True
117,0.38132,0.3399,0.58301,0.78049,1.14116,ABCB4.SA,diff_close_mean_z_score_21,1,LSTM_with_Attention,False
118,0.51289,0.48443,0.69601,0.68543,1.64118,ABCB4.SA,diff_close_mean_z_score_21,1,MLP,True
141,0.31693,0.25537,0.50534,0.86047,1.8393,ITUB3.SA,diff_close_mean_z_score_21,1,LSTM_with_Attention,False
140,0.41671,0.36802,0.60665,0.79849,2.60857,ITUB3.SA,diff_close_mean_z_score_21,1,LSTM_with_Attention,True
1061,0.53444,0.57412,0.75771,0.52215,141.5387,ABCB4.SA,diff_close_mean_z_score_7,6,LSTM_with_Attention,False
1446,0.67864,0.79093,0.88934,0.3454,168.8229,ABCB4.SA,diff_close_mean_z_score_7,14,MLP,True



##############################################
# DOMAIN: Saúde
# ASSETS: ['FLRY3.SA', 'RADL3.SA']
##############################################
          
Metricas macro - label meta


Unnamed: 0,ativo,seq_len,alvo,precision,recall,f1-score,model,leak,feature_cols
26945,FLRY3.SA,1,meta_14,0.6327,0.63292,0.63281,Dummy_model,False,['past_meta_14']
16067,FLRY3.SA,28,meta_14,0.42686,0.38835,0.40081,LSTM_with_Attention,True,['meta_14']
8077,RADL3.SA,5,meta_14,0.68384,0.65816,0.66948,LSTM_with_Attention,False,['meta_14']
16275,RADL3.SA,28,meta_14,0.56683,0.50456,0.52399,LSTM_with_Attention,True,['meta_14']
26980,FLRY3.SA,1,meta_21,0.69998,0.70159,0.70078,Dummy_model,False,['past_meta_21']
21082,FLRY3.SA,49,meta_21,0.52527,0.47728,0.49524,MLP,True,['meta_21']
6522,RADL3.SA,4,meta_21,0.62119,0.62136,0.62118,MLP,False,['meta_21']
16359,RADL3.SA,28,meta_21,0.53996,0.52258,0.53007,MLP,True,['meta_21']
26909,FLRY3.SA,1,meta_7,0.51439,0.51439,0.51439,Dummy_model,False,['past_meta_7']
1237,FLRY3.SA,1,meta_7,0.33146,0.34015,0.33571,LSTM_with_Attention,True,['meta_7']


Metricas regressao - label diff_close_mean_z_score


Unnamed: 0,Mean Absolute Error (MAE),Mean Squared Error (MSE),Root Mean Squared Error (RMSE),R-squared (R2),Mean Absolute Percentage Error (MAPE),ativo,alvo,seq_len,model,leak
157,0.41483,0.36029,0.60024,0.75498,1.47864,FLRY3.SA,diff_close_mean_z_score_14,1,LSTM_with_Attention,False
1118,0.54608,0.50481,0.7105,0.65373,1.98169,FLRY3.SA,diff_close_mean_z_score_14,6,MLP,True
2869,0.40769,0.36152,0.60127,0.77028,2062.56823,RADL3.SA,diff_close_mean_z_score_14,63,LSTM_with_Attention,False
2292,0.53194,0.52185,0.72239,0.66045,2778.68656,RADL3.SA,diff_close_mean_z_score_14,42,LSTM_with_Attention,True
165,0.35726,0.27871,0.52793,0.82232,191.80007,FLRY3.SA,diff_close_mean_z_score_21,1,LSTM_with_Attention,False
934,0.47407,0.39039,0.62481,0.74729,115.23838,FLRY3.SA,diff_close_mean_z_score_21,5,MLP,True
2877,0.34623,0.27141,0.52097,0.83629,1.99102,RADL3.SA,diff_close_mean_z_score_21,63,LSTM_with_Attention,False
190,0.46261,0.39758,0.63054,0.75313,3.94839,RADL3.SA,diff_close_mean_z_score_21,1,MLP,True
3029,0.53074,0.5395,0.73451,0.55568,1.68942,FLRY3.SA,diff_close_mean_z_score_7,70,LSTM_with_Attention,False
1494,0.66858,0.73456,0.85707,0.39369,2.00286,FLRY3.SA,diff_close_mean_z_score_7,14,MLP,True


In [44]:
for leak in [True, False]:
        
    fig = px.box(
        macro_clf[macro_clf.leak == leak], x="label_col", 
        y="f1-score", 
        points="all", 
        title = f'Distribuição de resultados para cada alvo com leak={leak}')
    
    fig.write_image(PATH_REPORTS + f"/images/box_plot_alvo_models_leak={leak}_clf.png")
    fig.show()

for leak in [True, False]:
        
    fig = px.box(
        reg[reg.leak == leak], x="label_col", 
        y="R-squared (R2)", 
        points="all", 
        title = f'Distribuição de resultados para cada alvo com leak={leak}')
    
    fig.write_image(PATH_REPORTS + f"/images/box_plot_alvo_models_leak={leak}_reg.png")
    fig.show()

In [45]:

macro_clf['domain'] = macro_clf.asset.apply(asset_to_domain)
reg['domain'] = reg.asset.apply(asset_to_domain)

for leak in [True, False]:
        
    fig = px.box(
        macro_clf[macro_clf.leak == leak], x="asset", 
        y="f1-score", 
        points="all", 
        color = 'domain',
        title = f'Distribuição de resultados para cada ativo com leak={leak}')
    
    fig.write_image(PATH_REPORTS + f"/images/box_plot_asset_models_leak={leak}_clf.png")
    fig.show()

for leak in [True, False]:
    
    fig = px.box(
        reg[reg.leak == leak], x="asset", 
        y="R-squared (R2)", 
        points="all", 
        color = 'domain',
        title = f'Distribuição de resultados para cada ativo com leak={leak}')
    
    fig.write_image(PATH_REPORTS + f"/images/box_plot_asset_models_leak={leak}_reg.png")
    fig.show()

In [46]:
# for assets, domain in assets_domain:
#     print(f'''
# ##############################################
# # DOMAIN: {domain}
# # ASSETS: {assets}
# ##############################################
#           ''')
    
#     print('Metricas macro - label meta')
#     metrics_clf = macro_clf[macro_clf.label_col.str.contains('meta') & macro_clf.asset.isin(assets)].\
#     drop(['support', 'scaling_method', 'prediction_type', 'class', 'feature_cols'], axis =1 ).\
#             rename({'asset': 'ativo', 'label_col': 'alvo'},axis=1)[['ativo','seq_len','alvo','precision','recall', 'f1-score', 'model', 'leak']]
    
#     display(metrics_clf)
    
#     print('Metricas regressao - label diff_close_mean_z_score')
    
    
#     metrics_reg = reg[reg.label_col.str.contains('diff_close_mean_z_score') & reg.asset.isin(assets)].\
#         drop(['scaling_method', 'prediction_type', 'feature_cols'], axis = 1).\
#             rename({'asset': 'ativo', 'label_col': 'alvo','f1-score':'valor'},axis=1)
#     display(metrics_reg)