In [1]:

# import external libs
import pandas as pd
import warnings
from tqdm import tqdm
import os
import sys
sys.path.append('../src/')
import re
import plotly.express as px

# import internal libs
from model.evaluation import classification_report, regression_metrics, get_classification_report
from model.config import create_experiment_configs_dummy, create_experiment_configs_tf
from data.preparation import load_dataset

In [2]:
# remove warning
warnings.filterwarnings('ignore')

In [3]:
# define paths
PATH_REPORTS = '../reports/'
lstm_results_path = 'test_results/LSTM_with_Attention_{asset}_test_results.csv'
mlp_results_path =  'test_results/MLP_{asset}_test_results.csv'
DATA_DIR = '../data/'

In [4]:
# define experiments
ASSETS = [
    "PETR3.SA", 
    "PRIO3.SA", 
    "VALE3.SA", 
    "GGBR3.SA", 
    "ABCB4.SA", 
    "ITUB3.SA", 
    "FLRY3.SA", 
    "RADL3.SA"
    ]

seq_len_list = [1,2,3,4,5,6,7,14,21,28,35,42,49
                # ,56,63,70
                ]

moving_windows = [7,14,21]

algorithms=[
    'LSTM_with_Attention', 
    # 'MLP',
    # 'KAN'
    ]

dict_experiments_dummy = create_experiment_configs_dummy(ASSETS, moving_windows)
dict_experiments_tf = create_experiment_configs_tf(ASSETS, seq_len_list, moving_windows,algorithms=algorithms)

## General results

### Create table

In [5]:
list_results_clf = []
list_results_reg = []
# list_results = []

for name, dict_experiments, path_results in [
    ("tf", dict_experiments_tf, PATH_REPORTS + 'test_results/{algorithm}_{asset}_features={features}__label={label_col}__sql_len={seq_len}__scaling_method={scaling_method}_test_results.csv'),
    ('dummy', dict_experiments_dummy, PATH_REPORTS + "test_results/Dummy_model_{asset}_features={feature_col}__label={label_col}_test_results.csv")
]:
    
    for exp_name, config in tqdm(dict_experiments.items()):
        
        if name == "tf":
            
            feature_cols = config['feature_cols']
            label_col = config['label_col']
            seq_len = config['seq_len']
            asset = config['asset']
            scaling_method = config['scaling_method']
            algorithm = config['algorithm']
            asset = config['asset']
            prediction_type = config['prediction_type']
            
            filepath = path_results.format(
                algorithm = algorithm,
                features = "_".join(feature_cols),
                label_col = label_col,
                asset = asset.replace(".", "_"),
                scaling_method = scaling_method.__str__(),
                seq_len = seq_len
            )
            
            if not os.path.exists(filepath): 
                print(f"The file {filepath} dont't exists")
                continue

            results = pd.read_csv(filepath)
            
        elif name == 'dummy':
            feature_cols = [config['feature_col']]
            label_col = config['label_col']
            asset = config['asset']
            algorithm = "Dummy_model"
            seq_len = 1
            scaling_method = None
            prediction_type = 'dummy'
            asset_formated = asset.replace(".", "_")
            
            filepath = path_results.format(
                algorithm = algorithm,
                feature_col = feature_cols[0],
                label_col = label_col,
                asset = asset_formated
            )
            
            if  not os.path.exists(filepath): 
                print(f"The file {filepath} dont't exists")
                continue
            
            results = pd.read_csv(filepath)
        
        # concat with the test dataset
        full_test = load_dataset(asset=asset, data_dir=DATA_DIR,dataset_split='test')
        results = pd.concat([results,full_test], axis =1)
        
        # find the window for label calculation (get the number of the string)
        window = int(re.findall(r'\d+', label_col)[0])

        # results without leak
        results_wo_leak = results.iloc[max(seq_len, window):]
        
        # remove invalid days
        results = results[results.Invalid_Days == 0]
        
        # raise error if there is nan values
        if results.isna().sum().sum() > 0: 
            raise ValueError('Há dados nulos no dataframe de resultados')
                
                
        new_coluns = [asset, str(feature_cols), str(label_col), seq_len, algorithm, scaling_method, prediction_type]
        new_columns_nms = ['asset','feature_cols','label_col','seq_len','model','scaling_method','prediction_type', 'leak']
                
        # regression metrics
        reg_metrics = regression_metrics(results.y_test, results.y_pred)
        reg_metrics_wo_leak = regression_metrics(results_wo_leak.y_test, results_wo_leak.y_pred)
        
    
        # add columns with experiment config 

        reg_metrics[
            new_columns_nms
            ] = new_coluns + [True]
        reg_metrics_wo_leak[
            new_columns_nms
            ] = new_coluns+ [False]
        
        list_results_reg.append(reg_metrics)
        list_results_reg.append(reg_metrics_wo_leak)
    
    
        # obtem metricas de classificacao, truncando os valores (se o valor já for a meta, não terá diferenca)
        y_test_trunc = [int(i) for i in results.y_test]
        y_pred_trunc = [int(i) for i in results.y_pred]
        df_cr = get_classification_report(y_test_trunc, y_pred_trunc)
        
        y_test_trunc_woleak = [int(i) for i in results_wo_leak.y_test]
        y_pred_trunc_woleak = [int(i) for i in results_wo_leak.y_pred]
        df_cr_wo_leak = get_classification_report(y_test_trunc_woleak, y_pred_trunc_woleak)
        
        df_cr[new_columns_nms] = new_coluns + [True]
        df_cr_wo_leak[new_columns_nms] = new_coluns + [False]
        
        list_results_clf.append(df_cr)
        list_results_clf.append(df_cr_wo_leak)

100%|██████████| 624/624 [00:22<00:00, 27.78it/s]
100%|██████████| 48/48 [00:01<00:00, 30.31it/s]


In [6]:
# concat results
final_results_reg = pd.concat(list_results_reg).reset_index(drop=True)
final_results_clf = pd.concat(list_results_clf).reset_index(drop=True)

In [7]:
# only the "right" labels
final_results_clf = final_results_clf[final_results_clf.label_col.str.contains('meta')]
final_results_reg = final_results_reg[final_results_reg.label_col.str.contains('diff_close_mean_z_score')]

### Results

In [8]:
assets_domain = [(["PETR3.SA","PRIO3.SA"], 'Petróleo'), (["VALE3.SA", "GGBR3.SA"], "Mineração"), (["ABCB4.SA", "ITUB3.SA"], 'Financeiro'), (["FLRY3.SA", "RADL3.SA"], 'Saúde')]

# Configuração para não omitir linhas
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)


#### Dummy results

In [9]:
assets_domain = [(["PETR3.SA","PRIO3.SA"], 'Petróleo'), (["VALE3.SA", "GGBR3.SA"], "Mineração"), (["ABCB4.SA", "ITUB3.SA"], 'Financeiro'), (["FLRY3.SA", "RADL3.SA"], 'Saúde')]

asset_to_domain = lambda x: 'Petróleo' if x in ["PETR3.SA","PRIO3.SA"] else "Mineração" if x in ["VALE3.SA", "GGBR3.SA"] else 'Financeiro' if x in ["ABCB4.SA", "ITUB3.SA"] else 'Saúde' if x in ["FLRY3.SA", "RADL3.SA"] else None

In [10]:
# general 
macro_dummy = final_results_clf[
    (final_results_clf['class'] == 'macro avg') &
    (final_results_clf['prediction_type'] == 'dummy')
    
    ].round(2)
macro_dummy['domain'] = macro_dummy.asset.apply(asset_to_domain)

reg_dummy = final_results_reg[
    (final_results_reg['prediction_type'] == 'dummy')
    ].round(2)
reg_dummy['domain'] = reg_dummy.asset.apply(asset_to_domain)

In [11]:
# for label in macro_dummy.label_col.unique():
#     for model in macro_dummy.model.unique():

#         print(label, model)
        
#         fig = px.box(macro_dummy, x="model", y="f1-score", points="all")
#         fig.show()

In [12]:
for assets, domain in assets_domain:
    print(f'''
##############################################
# DOMAIN: {domain}
# ASSETS: {assets}
##############################################
          ''')
    
    print('Metricas macro - label meta')
    metrics_clf = macro_dummy[macro_dummy.label_col.str.contains('meta') & macro_dummy.asset.isin(assets)].\
    drop(['support', 'scaling_method', 'prediction_type', 'seq_len', 'class', 'model'], axis =1 ).\
            rename({'asset': 'ativo', 'label_col': 'alvo'},axis=1)
        
        
    display(metrics_clf.sort_values(['alvo', 'leak', 'ativo']))
    
    print('Metricas regressao - label diff_close_mean_z_score')
    
    metrics_reg = reg_dummy[reg_dummy.label_col.str.contains('diff_close_mean_z_score') & reg_dummy.asset.isin(assets)].\
        drop(['scaling_method', 'prediction_type', 'seq_len', 'model', 'feature_cols'], axis = 1).\
            rename({'asset': 'ativo', 'label_col': 'alvo'},axis=1)
    display(metrics_reg)


##############################################
# DOMAIN: Petróleo
# ASSETS: ['PETR3.SA', 'PRIO3.SA']
##############################################
          
Metricas macro - label meta


Unnamed: 0,precision,recall,f1-score,ativo,feature_cols,alvo,leak,domain
11805,0.67,0.67,0.67,PETR3.SA,['past_meta_14'],meta_14,False,Petróleo
11906,0.65,0.65,0.65,PRIO3.SA,['past_meta_14'],meta_14,False,Petróleo
11796,0.43,0.43,0.43,PETR3.SA,['past_meta_14'],meta_14,True,Petróleo
11898,0.49,0.49,0.49,PRIO3.SA,['past_meta_14'],meta_14,True,Petróleo
11841,0.71,0.7,0.7,PETR3.SA,['past_meta_21'],meta_21,False,Petróleo
11937,0.74,0.74,0.74,PRIO3.SA,['past_meta_21'],meta_21,False,Petróleo
11832,0.49,0.49,0.49,PETR3.SA,['past_meta_21'],meta_21,True,Petróleo
11929,0.62,0.62,0.62,PRIO3.SA,['past_meta_21'],meta_21,True,Petróleo
11770,0.54,0.54,0.54,PETR3.SA,['past_meta_7'],meta_7,False,Petróleo
11874,0.48,0.48,0.48,PRIO3.SA,['past_meta_7'],meta_7,False,Petróleo


Metricas regressao - label diff_close_mean_z_score


Unnamed: 0,Mean Absolute Error (MAE),Mean Squared Error (MSE),Root Mean Squared Error (RMSE),R-squared (R2),Mean Absolute Percentage Error (MAPE),ativo,alvo,leak,domain
1248,0.71,0.92,0.96,0.26,2.07,PETR3.SA,diff_close_mean_z_score_7,True,Petróleo
1249,0.49,0.63,0.79,0.48,1.42,PETR3.SA,diff_close_mean_z_score_7,False,Petróleo
1252,0.57,0.65,0.81,0.58,1.78,PETR3.SA,diff_close_mean_z_score_14,True,Petróleo
1253,0.39,0.45,0.67,0.71,1.23,PETR3.SA,diff_close_mean_z_score_14,False,Petróleo
1256,0.48,0.48,0.69,0.7,1.08,PETR3.SA,diff_close_mean_z_score_21,True,Petróleo
1257,0.33,0.33,0.58,0.79,0.74,PETR3.SA,diff_close_mean_z_score_21,False,Petróleo
1260,0.69,0.84,0.92,0.33,2.15,PRIO3.SA,diff_close_mean_z_score_7,True,Petróleo
1261,0.48,0.58,0.76,0.53,1.48,PRIO3.SA,diff_close_mean_z_score_7,False,Petróleo
1264,0.56,0.58,0.76,0.61,1.58,PRIO3.SA,diff_close_mean_z_score_14,True,Petróleo
1265,0.38,0.4,0.63,0.73,1.09,PRIO3.SA,diff_close_mean_z_score_14,False,Petróleo



##############################################
# DOMAIN: Mineração
# ASSETS: ['VALE3.SA', 'GGBR3.SA']
##############################################
          
Metricas macro - label meta


Unnamed: 0,precision,recall,f1-score,ativo,feature_cols,alvo,leak,domain
12105,0.69,0.69,0.69,GGBR3.SA,['past_meta_14'],meta_14,False,Mineração
12002,0.7,0.7,0.7,VALE3.SA,['past_meta_14'],meta_14,False,Mineração
12098,0.53,0.53,0.53,GGBR3.SA,['past_meta_14'],meta_14,True,Mineração
11994,0.56,0.56,0.56,VALE3.SA,['past_meta_14'],meta_14,True,Mineração
12146,0.66,0.66,0.66,GGBR3.SA,['past_meta_21'],meta_21,False,Mineração
12041,0.71,0.71,0.71,VALE3.SA,['past_meta_21'],meta_21,False,Mineração
12136,0.42,0.42,0.42,GGBR3.SA,['past_meta_21'],meta_21,True,Mineração
12032,0.45,0.45,0.45,VALE3.SA,['past_meta_21'],meta_21,True,Mineração
12074,0.52,0.52,0.52,GGBR3.SA,['past_meta_7'],meta_7,False,Mineração
11970,0.6,0.6,0.6,VALE3.SA,['past_meta_7'],meta_7,False,Mineração


Metricas regressao - label diff_close_mean_z_score


Unnamed: 0,Mean Absolute Error (MAE),Mean Squared Error (MSE),Root Mean Squared Error (RMSE),R-squared (R2),Mean Absolute Percentage Error (MAPE),ativo,alvo,leak,domain
1272,0.66,0.76,0.87,0.41,264745000000.0,VALE3.SA,diff_close_mean_z_score_7,True,Mineração
1273,0.45,0.51,0.72,0.61,182029500000.0,VALE3.SA,diff_close_mean_z_score_7,False,Mineração
1276,0.52,0.52,0.72,0.67,1.19,VALE3.SA,diff_close_mean_z_score_14,True,Mineração
1277,0.35,0.35,0.59,0.78,0.78,VALE3.SA,diff_close_mean_z_score_14,False,Mineração
1280,0.43,0.37,0.61,0.79,1.37,VALE3.SA,diff_close_mean_z_score_21,True,Mineração
1281,0.29,0.23,0.48,0.87,0.92,VALE3.SA,diff_close_mean_z_score_21,False,Mineração
1284,0.71,0.88,0.94,0.27,1.76,GGBR3.SA,diff_close_mean_z_score_7,True,Mineração
1285,0.48,0.6,0.78,0.49,1.2,GGBR3.SA,diff_close_mean_z_score_7,False,Mineração
1288,0.56,0.57,0.76,0.62,3.22,GGBR3.SA,diff_close_mean_z_score_14,True,Mineração
1289,0.38,0.39,0.62,0.75,2.2,GGBR3.SA,diff_close_mean_z_score_14,False,Mineração



##############################################
# DOMAIN: Financeiro
# ASSETS: ['ABCB4.SA', 'ITUB3.SA']
##############################################
          
Metricas macro - label meta


Unnamed: 0,precision,recall,f1-score,ativo,feature_cols,alvo,leak,domain
12210,0.65,0.65,0.65,ABCB4.SA,['past_meta_14'],meta_14,False,Financeiro
12314,0.58,0.58,0.58,ITUB3.SA,['past_meta_14'],meta_14,False,Financeiro
12202,0.47,0.47,0.47,ABCB4.SA,['past_meta_14'],meta_14,True,Financeiro
12305,0.46,0.46,0.46,ITUB3.SA,['past_meta_14'],meta_14,True,Financeiro
12247,0.57,0.57,0.57,ABCB4.SA,['past_meta_21'],meta_21,False,Financeiro
12354,0.54,0.54,0.54,ITUB3.SA,['past_meta_21'],meta_21,False,Financeiro
12237,0.45,0.45,0.45,ABCB4.SA,['past_meta_21'],meta_21,True,Financeiro
12344,0.46,0.46,0.46,ITUB3.SA,['past_meta_21'],meta_21,True,Financeiro
12178,0.51,0.51,0.51,ABCB4.SA,['past_meta_7'],meta_7,False,Financeiro
12278,0.43,0.43,0.43,ITUB3.SA,['past_meta_7'],meta_7,False,Financeiro


Metricas regressao - label diff_close_mean_z_score


Unnamed: 0,Mean Absolute Error (MAE),Mean Squared Error (MSE),Root Mean Squared Error (RMSE),R-squared (R2),Mean Absolute Percentage Error (MAPE),ativo,alvo,leak,domain
1296,0.76,1.0,1.0,0.17,265.8,ABCB4.SA,diff_close_mean_z_score_7,True,Financeiro
1297,0.52,0.68,0.82,0.44,182.74,ABCB4.SA,diff_close_mean_z_score_7,False,Financeiro
1300,0.62,0.71,0.84,0.5,2.74,ABCB4.SA,diff_close_mean_z_score_14,True,Financeiro
1301,0.43,0.49,0.7,0.66,1.9,ABCB4.SA,diff_close_mean_z_score_14,False,Financeiro
1304,0.54,0.52,0.72,0.66,1.69,ABCB4.SA,diff_close_mean_z_score_21,True,Financeiro
1305,0.37,0.36,0.6,0.77,1.17,ABCB4.SA,diff_close_mean_z_score_21,False,Financeiro
1308,0.68,0.88,0.94,0.33,4948931000000.0,ITUB3.SA,diff_close_mean_z_score_7,True,Financeiro
1309,0.46,0.59,0.77,0.54,3402713000000.0,ITUB3.SA,diff_close_mean_z_score_7,False,Financeiro
1312,0.52,0.55,0.74,0.67,167.98,ITUB3.SA,diff_close_mean_z_score_14,True,Financeiro
1313,0.36,0.37,0.61,0.78,116.34,ITUB3.SA,diff_close_mean_z_score_14,False,Financeiro



##############################################
# DOMAIN: Saúde
# ASSETS: ['FLRY3.SA', 'RADL3.SA']
##############################################
          
Metricas macro - label meta


Unnamed: 0,precision,recall,f1-score,ativo,feature_cols,alvo,leak,domain
12422,0.63,0.63,0.63,FLRY3.SA,['past_meta_14'],meta_14,False,Saúde
12522,0.66,0.66,0.66,RADL3.SA,['past_meta_14'],meta_14,False,Saúde
12412,0.38,0.38,0.38,FLRY3.SA,['past_meta_14'],meta_14,True,Saúde
12514,0.5,0.5,0.5,RADL3.SA,['past_meta_14'],meta_14,True,Saúde
12457,0.7,0.7,0.7,FLRY3.SA,['past_meta_21'],meta_21,False,Saúde
12558,0.61,0.61,0.61,RADL3.SA,['past_meta_21'],meta_21,False,Saúde
12449,0.48,0.48,0.48,FLRY3.SA,['past_meta_21'],meta_21,True,Saúde
12549,0.52,0.52,0.52,RADL3.SA,['past_meta_21'],meta_21,True,Saúde
12386,0.51,0.51,0.51,FLRY3.SA,['past_meta_7'],meta_7,False,Saúde
12490,0.5,0.5,0.5,RADL3.SA,['past_meta_7'],meta_7,False,Saúde


Metricas regressao - label diff_close_mean_z_score


Unnamed: 0,Mean Absolute Error (MAE),Mean Squared Error (MSE),Root Mean Squared Error (RMSE),R-squared (R2),Mean Absolute Percentage Error (MAPE),ativo,alvo,leak,domain
1320,0.72,0.91,0.95,0.25,2.83,FLRY3.SA,diff_close_mean_z_score_7,True,Saúde
1321,0.49,0.62,0.79,0.48,1.94,FLRY3.SA,diff_close_mean_z_score_7,False,Saúde
1324,0.58,0.56,0.75,0.61,2.25,FLRY3.SA,diff_close_mean_z_score_14,True,Saúde
1325,0.39,0.39,0.62,0.74,1.56,FLRY3.SA,diff_close_mean_z_score_14,False,Saúde
1328,0.49,0.42,0.65,0.73,203.79,FLRY3.SA,diff_close_mean_z_score_21,True,Saúde
1329,0.34,0.29,0.54,0.81,142.19,FLRY3.SA,diff_close_mean_z_score_21,False,Saúde
1332,0.68,0.82,0.91,0.36,2.85,RADL3.SA,diff_close_mean_z_score_7,True,Saúde
1333,0.47,0.56,0.75,0.57,1.96,RADL3.SA,diff_close_mean_z_score_7,False,Saúde
1336,0.56,0.58,0.76,0.62,3164.58,RADL3.SA,diff_close_mean_z_score_14,True,Saúde
1337,0.39,0.4,0.63,0.74,2191.88,RADL3.SA,diff_close_mean_z_score_14,False,Saúde


In [13]:
for leak in [True, False]:
        
    fig = px.box(
        macro_dummy[macro_dummy.leak == leak], x="label_col", 
        y="f1-score", 
        points="all", 
        title = f'Distribuição de resultados para cada alvo com leak={leak} | model=dummy')
    
    fig.write_image(PATH_REPORTS + f"/images/box_plot_alvo_dummy_leak={leak}_clf.png")
    fig.show()

for leak in [True, False]:
        
    fig = px.box(
        reg_dummy[reg_dummy.leak == leak], x="label_col", 
        y="R-squared (R2)", 
        points="all", 
        title = f'Distribuição de resultados para cada alvo com leak={leak} | model=dummy')
    
    fig.write_image(PATH_REPORTS + f"/images/box_plot_alvo_dummy_leak={leak}_reg.png")
    fig.show()

In [14]:
for leak in [True, False]:
        
    fig = px.box(
        macro_dummy[macro_dummy.leak == leak], x="asset", 
        y="f1-score", 
        points="all",
        color = 'domain',
        title = f'Distribuição de resultados para cada ativo com leak={leak}')
    
    fig.write_image(PATH_REPORTS + f"/images/box_plot_asset_dummy_leak={leak}_clf.png")
    fig.show()

for leak in [True, False]:
        
    fig = px.box(
        reg_dummy[reg_dummy.leak == leak], x="asset", 
        y="R-squared (R2)", 
        points="all", 
        color = 'domain',
        title = f'Distribuição de resultados para cada ativo com leak={leak}')
    
    fig.write_image(PATH_REPORTS + f"/images/box_plot_asset_dummy_leak={leak}_ref.png")
    fig.show()

#### NN results

In [15]:
# general 
macro_clf = final_results_clf[(final_results_clf['class'] == 'macro avg')].round(5)
reg= final_results_reg.round(5)

In [16]:
for label in reg.label_col.unique():

    for leak in [False]:

        print(f"label: {label} - leak: {leak}")
        
        
        if 'meta' in label: label_formated = label.replace('meta', 'k')
        if 'diff_close_mean_z_score' in label: label_formated = label.replace('diff_close_mean_z_score', 'z')
        
        df_plt = reg[(reg.label_col == label) & (reg.leak == leak)]
        
        fig = px.box(
            df_plt, 
            x="model", 
            y="R-squared (R2)", 
            points="all",
            title = f'Distribuição de f1 para os experimentos \n label:{label_formated}|leak={leak}'
            )
        
        fig.write_image(PATH_REPORTS + f"/images/box_plot_exp_dist_label={label_formated}__leak={leak}_reg.png")
        # fig.update_traces(boxpoints=False) 
        fig.show()

label: diff_close_mean_z_score_7 - leak: False


label: diff_close_mean_z_score_14 - leak: False


label: diff_close_mean_z_score_21 - leak: False


In [17]:
for label in macro_clf.label_col.unique():

    for leak in [False]:

        print(f"label: {label} - leak: {leak}")
        
        if 'meta' in label: label_col = label.replace('meta', 'k')
        if 'diff_close_mean_z_score' in label: label_col = label.replace('diff_close_mean_z_score', 'z')
        
        df_plt = macro_clf[(macro_clf.label_col == label) & (macro_clf.leak == leak)]
        
        fig = px.box(
            df_plt, 
            x="model", 
            y="f1-score", 
            points="all",
            title = f'Distribuição dos resultados de f1 para os experimentos - label: {label} | leak={leak}'
            )
        
        fig.write_image(PATH_REPORTS + f"/images/box_plot_exp_dist_label={label}__leak={leak}_clf.png")
        fig.show()

label: meta_7 - leak: False


label: meta_14 - leak: False


label: meta_21 - leak: False


In [18]:
for assets, domain in assets_domain:
    print(f'''
##############################################
# DOMAIN: {domain}
# ASSETS: {assets}
##############################################
          ''')
    
    print('Metricas macro - label meta')
    metrics_clf = macro_clf[macro_clf.label_col.str.contains('meta') & macro_clf.asset.isin(assets)].\
    drop(['support', 'scaling_method', 'prediction_type', 'class'], axis =1 ).\
            rename({'asset': 'ativo', 'label_col': 'alvo'},axis=1)[['ativo','seq_len','alvo','precision','recall', 'f1-score', 'model', 'leak', 'feature_cols']]
    
    
    display(metrics_clf.loc[metrics_clf.groupby(['alvo','ativo', 'leak'])['f1-score'].idxmax()])
    
    print('Metricas regressao - label diff_close_mean_z_score')
    
    metrics_reg = reg[reg.label_col.str.contains('diff_close_mean_z_score') & reg.asset.isin(assets)].\
        drop(['scaling_method', 'prediction_type', 'feature_cols'], axis = 1).\
            rename({'asset': 'ativo', 'label_col': 'alvo','f1-score':'valor'},axis=1)
    
    display(metrics_reg.loc[metrics_reg.groupby(['alvo','ativo', 'leak'])['R-squared (R2)'].idxmax()])


##############################################
# DOMAIN: Petróleo
# ASSETS: ['PETR3.SA', 'PRIO3.SA']
##############################################
          
Metricas macro - label meta


Unnamed: 0,ativo,seq_len,alvo,precision,recall,f1-score,model,leak,feature_cols
11805,PETR3.SA,1,meta_14,0.66893,0.66658,0.66773,Dummy_model,False,['past_meta_14']
11796,PETR3.SA,1,meta_14,0.43335,0.43335,0.43335,Dummy_model,True,['past_meta_14']
11906,PRIO3.SA,1,meta_14,0.65342,0.65342,0.65342,Dummy_model,False,['past_meta_14']
11898,PRIO3.SA,1,meta_14,0.49031,0.49031,0.49031,Dummy_model,True,['past_meta_14']
11841,PETR3.SA,1,meta_21,0.70554,0.70281,0.70415,Dummy_model,False,['past_meta_21']
11832,PETR3.SA,1,meta_21,0.4854,0.4854,0.4854,Dummy_model,True,['past_meta_21']
11937,PRIO3.SA,1,meta_21,0.74257,0.74321,0.74289,Dummy_model,False,['past_meta_21']
11929,PRIO3.SA,1,meta_21,0.62055,0.62055,0.62055,Dummy_model,True,['past_meta_21']
11770,PETR3.SA,1,meta_7,0.53916,0.53916,0.53916,Dummy_model,False,['past_meta_7']
11762,PETR3.SA,1,meta_7,0.32921,0.32921,0.32921,Dummy_model,True,['past_meta_7']


Metricas regressao - label diff_close_mean_z_score


Unnamed: 0,Mean Absolute Error (MAE),Mean Squared Error (MSE),Root Mean Squared Error (RMSE),R-squared (R2),Mean Absolute Percentage Error (MAPE),ativo,alvo,seq_len,model,leak
871,0.40917,0.39888,0.63157,0.74236,1.22579,PETR3.SA,diff_close_mean_z_score_14,28,LSTM_with_Attention,False
870,0.53155,0.58508,0.7649,0.6192,1.63756,PETR3.SA,diff_close_mean_z_score_14,28,LSTM_with_Attention,True
19,0.39672,0.37316,0.61087,0.74965,1.04252,PRIO3.SA,diff_close_mean_z_score_14,1,LSTM_with_Attention,False
18,0.52957,0.53137,0.72895,0.64202,1.46902,PRIO3.SA,diff_close_mean_z_score_14,1,LSTM_with_Attention,True
1163,0.34849,0.30688,0.55397,0.80636,0.81748,PETR3.SA,diff_close_mean_z_score_21,49,LSTM_with_Attention,False
202,0.45591,0.44734,0.66883,0.72352,1.00518,PETR3.SA,diff_close_mean_z_score_21,3,LSTM_with_Attention,True
23,0.33884,0.28417,0.53308,0.82774,0.87883,PRIO3.SA,diff_close_mean_z_score_21,1,LSTM_with_Attention,False
22,0.46105,0.40547,0.63676,0.75144,1.23122,PRIO3.SA,diff_close_mean_z_score_21,1,LSTM_with_Attention,True
963,0.5082,0.5447,0.73803,0.54928,1.47531,PETR3.SA,diff_close_mean_z_score_7,35,LSTM_with_Attention,False
386,0.63986,0.76313,0.87357,0.3834,1.66932,PETR3.SA,diff_close_mean_z_score_7,5,LSTM_with_Attention,True



##############################################
# DOMAIN: Mineração
# ASSETS: ['VALE3.SA', 'GGBR3.SA']
##############################################
          
Metricas macro - label meta


Unnamed: 0,ativo,seq_len,alvo,precision,recall,f1-score,model,leak,feature_cols
12105,GGBR3.SA,1,meta_14,0.69012,0.69012,0.69012,Dummy_model,False,['past_meta_14']
12098,GGBR3.SA,1,meta_14,0.52911,0.52871,0.52891,Dummy_model,True,['past_meta_14']
12002,VALE3.SA,1,meta_14,0.70389,0.70389,0.70389,Dummy_model,False,['past_meta_14']
11994,VALE3.SA,1,meta_14,0.55919,0.55919,0.55919,Dummy_model,True,['past_meta_14']
12146,GGBR3.SA,1,meta_21,0.66197,0.66161,0.66179,Dummy_model,False,['past_meta_21']
12136,GGBR3.SA,1,meta_21,0.42447,0.42408,0.42427,Dummy_model,True,['past_meta_21']
12041,VALE3.SA,1,meta_21,0.70587,0.70695,0.70641,Dummy_model,False,['past_meta_21']
12032,VALE3.SA,1,meta_21,0.45227,0.45227,0.45227,Dummy_model,True,['past_meta_21']
12074,GGBR3.SA,1,meta_7,0.52066,0.52066,0.52066,Dummy_model,False,['past_meta_7']
12066,GGBR3.SA,1,meta_7,0.34135,0.34101,0.34118,Dummy_model,True,['past_meta_7']


Metricas regressao - label diff_close_mean_z_score


Unnamed: 0,Mean Absolute Error (MAE),Mean Squared Error (MSE),Root Mean Squared Error (RMSE),R-squared (R2),Mean Absolute Percentage Error (MAPE),ativo,alvo,seq_len,model,leak
1003,0.40989,0.36746,0.60619,0.76451,0.84724,GGBR3.SA,diff_close_mean_z_score_14,35,LSTM_with_Attention,False
42,0.53267,0.52314,0.72328,0.65462,2.87121,GGBR3.SA,diff_close_mean_z_score_14,1,LSTM_with_Attention,True
799,0.37996,0.32356,0.56882,0.79711,0.83628,VALE3.SA,diff_close_mean_z_score_14,21,LSTM_with_Attention,False
30,0.49339,0.47421,0.68863,0.69748,1.09357,VALE3.SA,diff_close_mean_z_score_14,1,LSTM_with_Attention,True
47,0.33911,0.27677,0.52609,0.83352,1.16459,GGBR3.SA,diff_close_mean_z_score_21,1,LSTM_with_Attention,False
46,0.46047,0.40752,0.63837,0.75106,1.66385,GGBR3.SA,diff_close_mean_z_score_21,1,LSTM_with_Attention,True
35,0.30513,0.22024,0.4693,0.87814,0.92156,VALE3.SA,diff_close_mean_z_score_21,1,LSTM_with_Attention,False
34,0.41497,0.34561,0.58789,0.80277,1.32215,VALE3.SA,diff_close_mean_z_score_21,1,LSTM_with_Attention,True
135,0.51107,0.52467,0.72434,0.55798,1.03901,GGBR3.SA,diff_close_mean_z_score_7,2,LSTM_with_Attention,False
422,0.64527,0.73248,0.85585,0.39339,1.38284,GGBR3.SA,diff_close_mean_z_score_7,5,LSTM_with_Attention,True



##############################################
# DOMAIN: Financeiro
# ASSETS: ['ABCB4.SA', 'ITUB3.SA']
##############################################
          
Metricas macro - label meta


Unnamed: 0,ativo,seq_len,alvo,precision,recall,f1-score,model,leak,feature_cols
12210,ABCB4.SA,1,meta_14,0.64893,0.64893,0.64893,Dummy_model,False,['past_meta_14']
12202,ABCB4.SA,1,meta_14,0.47122,0.47122,0.47122,Dummy_model,True,['past_meta_14']
12314,ITUB3.SA,1,meta_14,0.57602,0.5763,0.57616,Dummy_model,False,['past_meta_14']
12305,ITUB3.SA,1,meta_14,0.4604,0.46071,0.46056,Dummy_model,True,['past_meta_14']
12247,ABCB4.SA,1,meta_21,0.5693,0.56882,0.56906,Dummy_model,False,['past_meta_21']
12237,ABCB4.SA,1,meta_21,0.44868,0.44868,0.44868,Dummy_model,True,['past_meta_21']
12354,ITUB3.SA,1,meta_21,0.53849,0.53866,0.53858,Dummy_model,False,['past_meta_21']
12344,ITUB3.SA,1,meta_21,0.45801,0.45834,0.45817,Dummy_model,True,['past_meta_21']
12178,ABCB4.SA,1,meta_7,0.51475,0.51433,0.51454,Dummy_model,False,['past_meta_7']
12170,ABCB4.SA,1,meta_7,0.35303,0.35303,0.35303,Dummy_model,True,['past_meta_7']


Metricas regressao - label diff_close_mean_z_score


Unnamed: 0,Mean Absolute Error (MAE),Mean Squared Error (MSE),Root Mean Squared Error (RMSE),R-squared (R2),Mean Absolute Percentage Error (MAPE),ativo,alvo,seq_len,model,leak
1111,0.44426,0.44645,0.66817,0.68949,1.66644,ABCB4.SA,diff_close_mean_z_score_14,42,LSTM_with_Attention,False
438,0.59116,0.63933,0.79958,0.55167,2.45978,ABCB4.SA,diff_close_mean_z_score_14,5,LSTM_with_Attention,True
67,0.38814,0.353,0.59414,0.78711,110.8771,ITUB3.SA,diff_close_mean_z_score_14,1,LSTM_with_Attention,False
66,0.50067,0.50085,0.70771,0.69763,160.031,ITUB3.SA,diff_close_mean_z_score_14,1,LSTM_with_Attention,True
59,0.38132,0.3399,0.58301,0.78049,1.14116,ABCB4.SA,diff_close_mean_z_score_21,1,LSTM_with_Attention,False
58,0.51659,0.48855,0.69896,0.68276,1.60116,ABCB4.SA,diff_close_mean_z_score_21,1,LSTM_with_Attention,True
71,0.31693,0.25537,0.50534,0.86047,1.8393,ITUB3.SA,diff_close_mean_z_score_21,1,LSTM_with_Attention,False
70,0.41671,0.36802,0.60665,0.79849,2.60857,ITUB3.SA,diff_close_mean_z_score_21,1,LSTM_with_Attention,True
531,0.53444,0.57412,0.75771,0.52215,141.5387,ABCB4.SA,diff_close_mean_z_score_7,6,LSTM_with_Attention,False
626,0.68179,0.81188,0.90104,0.32806,216.755,ABCB4.SA,diff_close_mean_z_score_7,7,LSTM_with_Attention,True



##############################################
# DOMAIN: Saúde
# ASSETS: ['FLRY3.SA', 'RADL3.SA']
##############################################
          
Metricas macro - label meta


Unnamed: 0,ativo,seq_len,alvo,precision,recall,f1-score,model,leak,feature_cols
12422,FLRY3.SA,1,meta_14,0.6327,0.63292,0.63281,Dummy_model,False,['past_meta_14']
12412,FLRY3.SA,1,meta_14,0.37578,0.37601,0.3759,Dummy_model,True,['past_meta_14']
12522,RADL3.SA,1,meta_14,0.66057,0.6603,0.66044,Dummy_model,False,['past_meta_14']
12514,RADL3.SA,1,meta_14,0.50464,0.50435,0.5045,Dummy_model,True,['past_meta_14']
12457,FLRY3.SA,1,meta_21,0.69998,0.70159,0.70078,Dummy_model,False,['past_meta_21']
12449,FLRY3.SA,1,meta_21,0.47878,0.48026,0.47951,Dummy_model,True,['past_meta_21']
12558,RADL3.SA,1,meta_21,0.61151,0.6113,0.6114,Dummy_model,False,['past_meta_21']
12549,RADL3.SA,1,meta_21,0.51669,0.51643,0.51656,Dummy_model,True,['past_meta_21']
12386,FLRY3.SA,1,meta_7,0.51439,0.51439,0.51439,Dummy_model,False,['past_meta_7']
12378,FLRY3.SA,1,meta_7,0.32899,0.3293,0.32914,Dummy_model,True,['past_meta_7']


Metricas regressao - label diff_close_mean_z_score


Unnamed: 0,Mean Absolute Error (MAE),Mean Squared Error (MSE),Root Mean Squared Error (RMSE),R-squared (R2),Mean Absolute Percentage Error (MAPE),ativo,alvo,seq_len,model,leak
79,0.41483,0.36029,0.60024,0.75498,1.47864,FLRY3.SA,diff_close_mean_z_score_14,1,LSTM_with_Attention,False
78,0.54324,0.51046,0.71447,0.64985,2.07223,FLRY3.SA,diff_close_mean_z_score_14,1,LSTM_with_Attention,True
1243,0.39686,0.36853,0.60707,0.76366,2109.77658,RADL3.SA,diff_close_mean_z_score_14,49,LSTM_with_Attention,False
1146,0.53194,0.52185,0.72239,0.66045,2778.68656,RADL3.SA,diff_close_mean_z_score_14,42,LSTM_with_Attention,True
83,0.35726,0.27871,0.52793,0.82232,191.80007,FLRY3.SA,diff_close_mean_z_score_21,1,LSTM_with_Attention,False
82,0.47139,0.39468,0.62824,0.74451,274.83695,FLRY3.SA,diff_close_mean_z_score_21,1,LSTM_with_Attention,True
1247,0.34443,0.27498,0.52439,0.83237,2.88478,RADL3.SA,diff_close_mean_z_score_21,49,LSTM_with_Attention,False
1054,0.46803,0.40183,0.6339,0.75049,3.82217,RADL3.SA,diff_close_mean_z_score_21,35,LSTM_with_Attention,True
1131,0.52491,0.54195,0.73617,0.5509,1.71437,FLRY3.SA,diff_close_mean_z_score_7,42,LSTM_with_Attention,False
74,0.65786,0.75791,0.87058,0.37442,2.27334,FLRY3.SA,diff_close_mean_z_score_7,1,LSTM_with_Attention,True


In [19]:

print('Metricas macro - label meta')
metrics_clf = macro_clf[macro_clf.label_col.str.contains('meta')].\
drop(['support', 'scaling_method', 'prediction_type', 'class'], axis =1 ).\
        rename({'asset': 'ativo', 'label_col': 'alvo'},axis=1)[['ativo','seq_len','alvo', 'f1-score', 'model', 'leak']]

metrics_clf = metrics_clf[metrics_clf.leak == False]

metrics_clf.alvo = metrics_clf.alvo.apply(lambda x: x.replace('meta', 'k'))

display(metrics_clf.loc[metrics_clf.groupby(['alvo','ativo', 'leak'])['f1-score'].idxmax()].drop('leak', axis = 1).round(2))

print('Metricas regressao - label diff_close_mean_z_score')


metrics_reg = reg[reg.label_col.str.contains('diff_close_mean_z_score')].\
    drop(['scaling_method', 'prediction_type', 'feature_cols'], axis = 1).\
        rename({'asset': 'ativo', 'label_col': 'alvo','f1-score':'valor'},axis=1)[['ativo','seq_len','alvo', 'R-squared (R2)', 'model', 'leak']].round(2)
        
metrics_reg = metrics_reg[metrics_reg.leak == False]

metrics_reg.alvo = metrics_reg.alvo.apply(lambda x: x.replace('diff_close_mean_z_score', 'z'))

display(metrics_reg.loc[metrics_reg.groupby(['alvo','ativo', 'leak'])['R-squared (R2)'].idxmax()])

Metricas macro - label meta


Unnamed: 0,ativo,seq_len,alvo,f1-score,model
12210,ABCB4.SA,1,k_14,0.65,Dummy_model
12422,FLRY3.SA,1,k_14,0.63,Dummy_model
12105,GGBR3.SA,1,k_14,0.69,Dummy_model
12314,ITUB3.SA,1,k_14,0.58,Dummy_model
11805,PETR3.SA,1,k_14,0.67,Dummy_model
11906,PRIO3.SA,1,k_14,0.65,Dummy_model
12522,RADL3.SA,1,k_14,0.66,Dummy_model
12002,VALE3.SA,1,k_14,0.7,Dummy_model
12247,ABCB4.SA,1,k_21,0.57,Dummy_model
12457,FLRY3.SA,1,k_21,0.7,Dummy_model


Metricas regressao - label diff_close_mean_z_score


Unnamed: 0,ativo,seq_len,alvo,R-squared (R2),model,leak
55,ABCB4.SA,1,z_14,0.69,LSTM_with_Attention,False
79,FLRY3.SA,1,z_14,0.75,LSTM_with_Attention,False
43,GGBR3.SA,1,z_14,0.76,LSTM_with_Attention,False
67,ITUB3.SA,1,z_14,0.79,LSTM_with_Attention,False
871,PETR3.SA,28,z_14,0.74,LSTM_with_Attention,False
19,PRIO3.SA,1,z_14,0.75,LSTM_with_Attention,False
91,RADL3.SA,1,z_14,0.76,LSTM_with_Attention,False
799,VALE3.SA,21,z_14,0.8,LSTM_with_Attention,False
59,ABCB4.SA,1,z_21,0.78,LSTM_with_Attention,False
83,FLRY3.SA,1,z_21,0.82,LSTM_with_Attention,False


In [20]:
for assets, domain in assets_domain:
    print(f'''
##############################################
# DOMAIN: {domain}
# ASSETS: {assets}
##############################################
          ''')
    
    print('Metricas macro - label meta')
    metrics_clf = macro_clf[macro_clf.label_col.str.contains('meta') & macro_clf.asset.isin(assets)].\
    drop(['support', 'scaling_method', 'prediction_type', 'class'], axis =1 ).\
            rename({'asset': 'ativo', 'label_col': 'alvo'},axis=1)[['ativo','seq_len','alvo','precision','recall', 'f1-score', 'model', 'leak', 'feature_cols']]
    
    display(metrics_clf.loc[metrics_clf.groupby(['alvo','ativo', 'leak'])['f1-score'].idxmax()])
    
    print('Metricas regressao - label diff_close_mean_z_score')
    
    
    metrics_reg = reg[reg.label_col.str.contains('diff_close_mean_z_score') & reg.asset.isin(assets)].\
        drop(['scaling_method', 'prediction_type', 'feature_cols'], axis = 1).\
            rename({'asset': 'ativo', 'label_col': 'alvo','f1-score':'valor'},axis=1)
    
    display(metrics_reg.loc[metrics_reg.groupby(['alvo','ativo', 'leak'])['R-squared (R2)'].idxmax()])


##############################################
# DOMAIN: Petróleo
# ASSETS: ['PETR3.SA', 'PRIO3.SA']
##############################################
          
Metricas macro - label meta


Unnamed: 0,ativo,seq_len,alvo,precision,recall,f1-score,model,leak,feature_cols
11805,PETR3.SA,1,meta_14,0.66893,0.66658,0.66773,Dummy_model,False,['past_meta_14']
11796,PETR3.SA,1,meta_14,0.43335,0.43335,0.43335,Dummy_model,True,['past_meta_14']
11906,PRIO3.SA,1,meta_14,0.65342,0.65342,0.65342,Dummy_model,False,['past_meta_14']
11898,PRIO3.SA,1,meta_14,0.49031,0.49031,0.49031,Dummy_model,True,['past_meta_14']
11841,PETR3.SA,1,meta_21,0.70554,0.70281,0.70415,Dummy_model,False,['past_meta_21']
11832,PETR3.SA,1,meta_21,0.4854,0.4854,0.4854,Dummy_model,True,['past_meta_21']
11937,PRIO3.SA,1,meta_21,0.74257,0.74321,0.74289,Dummy_model,False,['past_meta_21']
11929,PRIO3.SA,1,meta_21,0.62055,0.62055,0.62055,Dummy_model,True,['past_meta_21']
11770,PETR3.SA,1,meta_7,0.53916,0.53916,0.53916,Dummy_model,False,['past_meta_7']
11762,PETR3.SA,1,meta_7,0.32921,0.32921,0.32921,Dummy_model,True,['past_meta_7']


Metricas regressao - label diff_close_mean_z_score


Unnamed: 0,Mean Absolute Error (MAE),Mean Squared Error (MSE),Root Mean Squared Error (RMSE),R-squared (R2),Mean Absolute Percentage Error (MAPE),ativo,alvo,seq_len,model,leak
871,0.40917,0.39888,0.63157,0.74236,1.22579,PETR3.SA,diff_close_mean_z_score_14,28,LSTM_with_Attention,False
870,0.53155,0.58508,0.7649,0.6192,1.63756,PETR3.SA,diff_close_mean_z_score_14,28,LSTM_with_Attention,True
19,0.39672,0.37316,0.61087,0.74965,1.04252,PRIO3.SA,diff_close_mean_z_score_14,1,LSTM_with_Attention,False
18,0.52957,0.53137,0.72895,0.64202,1.46902,PRIO3.SA,diff_close_mean_z_score_14,1,LSTM_with_Attention,True
1163,0.34849,0.30688,0.55397,0.80636,0.81748,PETR3.SA,diff_close_mean_z_score_21,49,LSTM_with_Attention,False
202,0.45591,0.44734,0.66883,0.72352,1.00518,PETR3.SA,diff_close_mean_z_score_21,3,LSTM_with_Attention,True
23,0.33884,0.28417,0.53308,0.82774,0.87883,PRIO3.SA,diff_close_mean_z_score_21,1,LSTM_with_Attention,False
22,0.46105,0.40547,0.63676,0.75144,1.23122,PRIO3.SA,diff_close_mean_z_score_21,1,LSTM_with_Attention,True
963,0.5082,0.5447,0.73803,0.54928,1.47531,PETR3.SA,diff_close_mean_z_score_7,35,LSTM_with_Attention,False
386,0.63986,0.76313,0.87357,0.3834,1.66932,PETR3.SA,diff_close_mean_z_score_7,5,LSTM_with_Attention,True



##############################################
# DOMAIN: Mineração
# ASSETS: ['VALE3.SA', 'GGBR3.SA']
##############################################
          
Metricas macro - label meta


Unnamed: 0,ativo,seq_len,alvo,precision,recall,f1-score,model,leak,feature_cols
12105,GGBR3.SA,1,meta_14,0.69012,0.69012,0.69012,Dummy_model,False,['past_meta_14']
12098,GGBR3.SA,1,meta_14,0.52911,0.52871,0.52891,Dummy_model,True,['past_meta_14']
12002,VALE3.SA,1,meta_14,0.70389,0.70389,0.70389,Dummy_model,False,['past_meta_14']
11994,VALE3.SA,1,meta_14,0.55919,0.55919,0.55919,Dummy_model,True,['past_meta_14']
12146,GGBR3.SA,1,meta_21,0.66197,0.66161,0.66179,Dummy_model,False,['past_meta_21']
12136,GGBR3.SA,1,meta_21,0.42447,0.42408,0.42427,Dummy_model,True,['past_meta_21']
12041,VALE3.SA,1,meta_21,0.70587,0.70695,0.70641,Dummy_model,False,['past_meta_21']
12032,VALE3.SA,1,meta_21,0.45227,0.45227,0.45227,Dummy_model,True,['past_meta_21']
12074,GGBR3.SA,1,meta_7,0.52066,0.52066,0.52066,Dummy_model,False,['past_meta_7']
12066,GGBR3.SA,1,meta_7,0.34135,0.34101,0.34118,Dummy_model,True,['past_meta_7']


Metricas regressao - label diff_close_mean_z_score


Unnamed: 0,Mean Absolute Error (MAE),Mean Squared Error (MSE),Root Mean Squared Error (RMSE),R-squared (R2),Mean Absolute Percentage Error (MAPE),ativo,alvo,seq_len,model,leak
1003,0.40989,0.36746,0.60619,0.76451,0.84724,GGBR3.SA,diff_close_mean_z_score_14,35,LSTM_with_Attention,False
42,0.53267,0.52314,0.72328,0.65462,2.87121,GGBR3.SA,diff_close_mean_z_score_14,1,LSTM_with_Attention,True
799,0.37996,0.32356,0.56882,0.79711,0.83628,VALE3.SA,diff_close_mean_z_score_14,21,LSTM_with_Attention,False
30,0.49339,0.47421,0.68863,0.69748,1.09357,VALE3.SA,diff_close_mean_z_score_14,1,LSTM_with_Attention,True
47,0.33911,0.27677,0.52609,0.83352,1.16459,GGBR3.SA,diff_close_mean_z_score_21,1,LSTM_with_Attention,False
46,0.46047,0.40752,0.63837,0.75106,1.66385,GGBR3.SA,diff_close_mean_z_score_21,1,LSTM_with_Attention,True
35,0.30513,0.22024,0.4693,0.87814,0.92156,VALE3.SA,diff_close_mean_z_score_21,1,LSTM_with_Attention,False
34,0.41497,0.34561,0.58789,0.80277,1.32215,VALE3.SA,diff_close_mean_z_score_21,1,LSTM_with_Attention,True
135,0.51107,0.52467,0.72434,0.55798,1.03901,GGBR3.SA,diff_close_mean_z_score_7,2,LSTM_with_Attention,False
422,0.64527,0.73248,0.85585,0.39339,1.38284,GGBR3.SA,diff_close_mean_z_score_7,5,LSTM_with_Attention,True



##############################################
# DOMAIN: Financeiro
# ASSETS: ['ABCB4.SA', 'ITUB3.SA']
##############################################
          
Metricas macro - label meta


Unnamed: 0,ativo,seq_len,alvo,precision,recall,f1-score,model,leak,feature_cols
12210,ABCB4.SA,1,meta_14,0.64893,0.64893,0.64893,Dummy_model,False,['past_meta_14']
12202,ABCB4.SA,1,meta_14,0.47122,0.47122,0.47122,Dummy_model,True,['past_meta_14']
12314,ITUB3.SA,1,meta_14,0.57602,0.5763,0.57616,Dummy_model,False,['past_meta_14']
12305,ITUB3.SA,1,meta_14,0.4604,0.46071,0.46056,Dummy_model,True,['past_meta_14']
12247,ABCB4.SA,1,meta_21,0.5693,0.56882,0.56906,Dummy_model,False,['past_meta_21']
12237,ABCB4.SA,1,meta_21,0.44868,0.44868,0.44868,Dummy_model,True,['past_meta_21']
12354,ITUB3.SA,1,meta_21,0.53849,0.53866,0.53858,Dummy_model,False,['past_meta_21']
12344,ITUB3.SA,1,meta_21,0.45801,0.45834,0.45817,Dummy_model,True,['past_meta_21']
12178,ABCB4.SA,1,meta_7,0.51475,0.51433,0.51454,Dummy_model,False,['past_meta_7']
12170,ABCB4.SA,1,meta_7,0.35303,0.35303,0.35303,Dummy_model,True,['past_meta_7']


Metricas regressao - label diff_close_mean_z_score


Unnamed: 0,Mean Absolute Error (MAE),Mean Squared Error (MSE),Root Mean Squared Error (RMSE),R-squared (R2),Mean Absolute Percentage Error (MAPE),ativo,alvo,seq_len,model,leak
1111,0.44426,0.44645,0.66817,0.68949,1.66644,ABCB4.SA,diff_close_mean_z_score_14,42,LSTM_with_Attention,False
438,0.59116,0.63933,0.79958,0.55167,2.45978,ABCB4.SA,diff_close_mean_z_score_14,5,LSTM_with_Attention,True
67,0.38814,0.353,0.59414,0.78711,110.8771,ITUB3.SA,diff_close_mean_z_score_14,1,LSTM_with_Attention,False
66,0.50067,0.50085,0.70771,0.69763,160.031,ITUB3.SA,diff_close_mean_z_score_14,1,LSTM_with_Attention,True
59,0.38132,0.3399,0.58301,0.78049,1.14116,ABCB4.SA,diff_close_mean_z_score_21,1,LSTM_with_Attention,False
58,0.51659,0.48855,0.69896,0.68276,1.60116,ABCB4.SA,diff_close_mean_z_score_21,1,LSTM_with_Attention,True
71,0.31693,0.25537,0.50534,0.86047,1.8393,ITUB3.SA,diff_close_mean_z_score_21,1,LSTM_with_Attention,False
70,0.41671,0.36802,0.60665,0.79849,2.60857,ITUB3.SA,diff_close_mean_z_score_21,1,LSTM_with_Attention,True
531,0.53444,0.57412,0.75771,0.52215,141.5387,ABCB4.SA,diff_close_mean_z_score_7,6,LSTM_with_Attention,False
626,0.68179,0.81188,0.90104,0.32806,216.755,ABCB4.SA,diff_close_mean_z_score_7,7,LSTM_with_Attention,True



##############################################
# DOMAIN: Saúde
# ASSETS: ['FLRY3.SA', 'RADL3.SA']
##############################################
          
Metricas macro - label meta


Unnamed: 0,ativo,seq_len,alvo,precision,recall,f1-score,model,leak,feature_cols
12422,FLRY3.SA,1,meta_14,0.6327,0.63292,0.63281,Dummy_model,False,['past_meta_14']
12412,FLRY3.SA,1,meta_14,0.37578,0.37601,0.3759,Dummy_model,True,['past_meta_14']
12522,RADL3.SA,1,meta_14,0.66057,0.6603,0.66044,Dummy_model,False,['past_meta_14']
12514,RADL3.SA,1,meta_14,0.50464,0.50435,0.5045,Dummy_model,True,['past_meta_14']
12457,FLRY3.SA,1,meta_21,0.69998,0.70159,0.70078,Dummy_model,False,['past_meta_21']
12449,FLRY3.SA,1,meta_21,0.47878,0.48026,0.47951,Dummy_model,True,['past_meta_21']
12558,RADL3.SA,1,meta_21,0.61151,0.6113,0.6114,Dummy_model,False,['past_meta_21']
12549,RADL3.SA,1,meta_21,0.51669,0.51643,0.51656,Dummy_model,True,['past_meta_21']
12386,FLRY3.SA,1,meta_7,0.51439,0.51439,0.51439,Dummy_model,False,['past_meta_7']
12378,FLRY3.SA,1,meta_7,0.32899,0.3293,0.32914,Dummy_model,True,['past_meta_7']


Metricas regressao - label diff_close_mean_z_score


Unnamed: 0,Mean Absolute Error (MAE),Mean Squared Error (MSE),Root Mean Squared Error (RMSE),R-squared (R2),Mean Absolute Percentage Error (MAPE),ativo,alvo,seq_len,model,leak
79,0.41483,0.36029,0.60024,0.75498,1.47864,FLRY3.SA,diff_close_mean_z_score_14,1,LSTM_with_Attention,False
78,0.54324,0.51046,0.71447,0.64985,2.07223,FLRY3.SA,diff_close_mean_z_score_14,1,LSTM_with_Attention,True
1243,0.39686,0.36853,0.60707,0.76366,2109.77658,RADL3.SA,diff_close_mean_z_score_14,49,LSTM_with_Attention,False
1146,0.53194,0.52185,0.72239,0.66045,2778.68656,RADL3.SA,diff_close_mean_z_score_14,42,LSTM_with_Attention,True
83,0.35726,0.27871,0.52793,0.82232,191.80007,FLRY3.SA,diff_close_mean_z_score_21,1,LSTM_with_Attention,False
82,0.47139,0.39468,0.62824,0.74451,274.83695,FLRY3.SA,diff_close_mean_z_score_21,1,LSTM_with_Attention,True
1247,0.34443,0.27498,0.52439,0.83237,2.88478,RADL3.SA,diff_close_mean_z_score_21,49,LSTM_with_Attention,False
1054,0.46803,0.40183,0.6339,0.75049,3.82217,RADL3.SA,diff_close_mean_z_score_21,35,LSTM_with_Attention,True
1131,0.52491,0.54195,0.73617,0.5509,1.71437,FLRY3.SA,diff_close_mean_z_score_7,42,LSTM_with_Attention,False
74,0.65786,0.75791,0.87058,0.37442,2.27334,FLRY3.SA,diff_close_mean_z_score_7,1,LSTM_with_Attention,True


In [21]:
for leak in [True, False]:
        
    fig = px.box(
        macro_clf[macro_clf.leak == leak], x="label_col", 
        y="f1-score", 
        points="all", 
        title = f'Distribuição de resultados para cada alvo com leak={leak}')
    
    fig.write_image(PATH_REPORTS + f"/images/box_plot_alvo_models_leak={leak}_clf.png")
    fig.show()

for leak in [True, False]:
        
    fig = px.box(
        reg[reg.leak == leak], x="label_col", 
        y="R-squared (R2)", 
        points="all", 
        title = f'Distribuição de resultados para cada alvo com leak={leak}')
    
    fig.write_image(PATH_REPORTS + f"/images/box_plot_alvo_models_leak={leak}_reg.png")
    fig.show()

In [22]:

macro_clf['domain'] = macro_clf.asset.apply(asset_to_domain)
reg['domain'] = reg.asset.apply(asset_to_domain)

for leak in [True, False]:
        
    fig = px.box(
        macro_clf[macro_clf.leak == leak], x="asset", 
        y="f1-score", 
        points="all", 
        color = 'domain',
        title = f'Distribuição de resultados para cada ativo com leak={leak}')
    
    fig.write_image(PATH_REPORTS + f"/images/box_plot_asset_models_leak={leak}_clf.png")
    fig.show()

for leak in [True, False]:
    
    fig = px.box(
        reg[reg.leak == leak], x="asset", 
        y="R-squared (R2)", 
        points="all", 
        color = 'domain',
        title = f'Distribuição de resultados para cada ativo com leak={leak}')
    
    fig.write_image(PATH_REPORTS + f"/images/box_plot_asset_models_leak={leak}_reg.png")
    fig.show()

In [23]:
# for assets, domain in assets_domain:
#     print(f'''
# ##############################################
# # DOMAIN: {domain}
# # ASSETS: {assets}
# ##############################################
#           ''')
    
#     print('Metricas macro - label meta')
#     metrics_clf = macro_clf[macro_clf.label_col.str.contains('meta') & macro_clf.asset.isin(assets)].\
#     drop(['support', 'scaling_method', 'prediction_type', 'class', 'feature_cols'], axis =1 ).\
#             rename({'asset': 'ativo', 'label_col': 'alvo'},axis=1)[['ativo','seq_len','alvo','precision','recall', 'f1-score', 'model', 'leak']]
    
#     display(metrics_clf)
    
#     print('Metricas regressao - label diff_close_mean_z_score')
    
    
#     metrics_reg = reg[reg.label_col.str.contains('diff_close_mean_z_score') & reg.asset.isin(assets)].\
#         drop(['scaling_method', 'prediction_type', 'feature_cols'], axis = 1).\
#             rename({'asset': 'ativo', 'label_col': 'alvo','f1-score':'valor'},axis=1)
#     display(metrics_reg)