In [1]:

# import external libs
import pandas as pd
import warnings
from tqdm import tqdm
import os
import sys
sys.path.append('../src/')
import re
import plotly.express as px

# import internal libs
from model.evaluation import classification_report, regression_metrics, get_classification_report
from model.config import create_experiment_configs_dummy, create_experiment_configs_tf
from data.preparation import load_dataset

In [2]:
# remove warning
warnings.filterwarnings('ignore')

In [3]:
# define paths
PATH_REPORTS = '../reports/'
lstm_results_path = 'test_results/LSTM_with_Attention_{asset}_test_results.csv'
mlp_results_path =  'test_results/MLP_{asset}_test_results.csv'
DATA_DIR = '../data/'

In [4]:
# define experiments
ASSETS = [
    "PETR3.SA", 
    "PRIO3.SA", 
    "VALE3.SA", 
    "GGBR3.SA", 
    "ABCB4.SA", 
    "ITUB3.SA", 
    "FLRY3.SA", 
    "RADL3.SA"
    ]

seq_len_list = [1,2,3,4,5,6,7,14,21,28,35,42,49,56,63,70]

moving_windows = [7,14,21]

algorithms=[
    'LSTM_with_Attention', 
    'MLP',
    'KAN'
    ]

dict_experiments_dummy = create_experiment_configs_dummy(ASSETS, moving_windows)
dict_experiments_tf = create_experiment_configs_tf(ASSETS, seq_len_list, moving_windows,algorithms=algorithms)

## General results

### Create table

In [5]:
list_results_clf = []
list_results_reg = []
# list_results = []

for name, dict_experiments, path_results in [
    ("tf", dict_experiments_tf, PATH_REPORTS + 'test_results/{algorithm}_{asset}_features={features}__label={label_col}__sql_len={seq_len}__scaling_method={scaling_method}_test_results.csv'),
    ('dummy', dict_experiments_dummy, PATH_REPORTS + "test_results/Dummy_model_{asset}_features={feature_col}__label={label_col}_test_results.csv")
]:
    
    for exp_name, config in tqdm(dict_experiments.items()):
        
        if name == "tf":
            
            feature_cols = config['feature_cols']
            label_col = config['label_col']
            seq_len = config['seq_len']
            asset = config['asset']
            scaling_method = config['scaling_method']
            algorithm = config['algorithm']
            asset = config['asset']
            prediction_type = config['prediction_type']
            
            filepath = path_results.format(
                algorithm = algorithm,
                features = "_".join(feature_cols),
                label_col = label_col,
                asset = asset.replace(".", "_"),
                scaling_method = scaling_method.__str__(),
                seq_len = seq_len
            )
            
            if not os.path.exists(filepath): 
                print(f"The file {filepath} dont't exists")
                continue

            results = pd.read_csv(filepath)
            
        elif name == 'dummy':
            feature_cols = [config['feature_col']]
            label_col = config['label_col']
            asset = config['asset']
            algorithm = "Dummy_model"
            seq_len = 1
            scaling_method = None
            prediction_type = 'dummy'
            asset_formated = asset.replace(".", "_")
            
            filepath = path_results.format(
                algorithm = algorithm,
                feature_col = feature_cols[0],
                label_col = label_col,
                asset = asset_formated
            )
            
            if  not os.path.exists(filepath): 
                print(f"The file {filepath} dont't exists")
                continue
            
            results = pd.read_csv(filepath)
        
        # concat with the test dataset
        full_test = load_dataset(asset=asset, data_dir=DATA_DIR,dataset_split='test')
        results = pd.concat([results,full_test], axis =1)
        
        # find the window for label calculation (get the number of the string)
        window = int(re.findall(r'\d+', label_col)[0])

        # results without leak
        results_wo_leak = results.iloc[max(seq_len, window):]
        
        # remove invalid days
        results = results[results.Invalid_Days == 0]
        
        # raise error if there is nan values
        if results.isna().sum().sum() > 0: 
            raise ValueError('Há dados nulos no dataframe de resultados')
                
                
        new_coluns = [asset, str(feature_cols), str(label_col), seq_len, algorithm, scaling_method, prediction_type]
        new_columns_nms = ['asset','feature_cols','label_col','seq_len','model','scaling_method','prediction_type', 'leak']
                
        # regression metrics
        reg_metrics = regression_metrics(results.y_test, results.y_pred)
        reg_metrics_wo_leak = regression_metrics(results_wo_leak.y_test, results_wo_leak.y_pred)
        
    
        # add columns with experiment config 

        reg_metrics[
            new_columns_nms
            ] = new_coluns + [True]
        reg_metrics_wo_leak[
            new_columns_nms
            ] = new_coluns+ [False]
        
        list_results_reg.append(reg_metrics)
        list_results_reg.append(reg_metrics_wo_leak)
    
    
        # obtem metricas de classificacao, truncando os valores (se o valor já for a meta, não terá diferenca)
        y_test_trunc = [int(i) for i in results.y_test]
        y_pred_trunc = [int(i) for i in results.y_pred]
        df_cr = get_classification_report(y_test_trunc, y_pred_trunc)
        
        y_test_trunc_woleak = [int(i) for i in results_wo_leak.y_test]
        y_pred_trunc_woleak = [int(i) for i in results_wo_leak.y_pred]
        df_cr_wo_leak = get_classification_report(y_test_trunc_woleak, y_pred_trunc_woleak)
        
        df_cr[new_columns_nms] = new_coluns + [True]
        df_cr_wo_leak[new_columns_nms] = new_coluns + [False]
        
        list_results_clf.append(df_cr)
        list_results_clf.append(df_cr_wo_leak)

100%|██████████| 2304/2304 [01:41<00:00, 22.80it/s]
100%|██████████| 48/48 [00:02<00:00, 23.29it/s]


In [6]:
# concat results
final_results_reg = pd.concat(list_results_reg).reset_index(drop=True)
final_results_clf = pd.concat(list_results_clf).reset_index(drop=True)

In [7]:
# only the "right" labels
final_results_clf = final_results_clf[final_results_clf.label_col.str.contains('meta')]
final_results_reg = final_results_reg[final_results_reg.label_col.str.contains('diff_close_mean_z_score')]

### Results

In [8]:
assets_domain = [(["PETR3.SA","PRIO3.SA"], 'Petróleo'), (["VALE3.SA", "GGBR3.SA"], "Mineração"), (["ABCB4.SA", "ITUB3.SA"], 'Financeiro'), (["FLRY3.SA", "RADL3.SA"], 'Saúde')]

# Configuração para não omitir linhas
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)


#### Dummy results

In [9]:
assets_domain = [(["PETR3.SA","PRIO3.SA"], 'Petróleo'), (["VALE3.SA", "GGBR3.SA"], "Mineração"), (["ABCB4.SA", "ITUB3.SA"], 'Financeiro'), (["FLRY3.SA", "RADL3.SA"], 'Saúde')]

asset_to_domain = lambda x: 'Petróleo' if x in ["PETR3.SA","PRIO3.SA"] else "Mineração" if x in ["VALE3.SA", "GGBR3.SA"] else 'Financeiro' if x in ["ABCB4.SA", "ITUB3.SA"] else 'Saúde' if x in ["FLRY3.SA", "RADL3.SA"] else None

In [10]:
# general 
macro_dummy = final_results_clf[
    (final_results_clf['class'] == 'macro avg') &
    (final_results_clf['prediction_type'] == 'dummy')
    
    ].round(2)
macro_dummy['domain'] = macro_dummy.asset.apply(asset_to_domain)

reg_dummy = final_results_reg[
    (final_results_reg['prediction_type'] == 'dummy')
    ].round(2)
reg_dummy['domain'] = reg_dummy.asset.apply(asset_to_domain)

In [11]:
# for label in macro_dummy.label_col.unique():
#     for model in macro_dummy.model.unique():

#         print(label, model)
        
#         fig = px.box(macro_dummy, x="model", y="f1-score", points="all")
#         fig.show()

In [12]:
for assets, domain in assets_domain:
    print(f'''
##############################################
# DOMAIN: {domain}
# ASSETS: {assets}
##############################################
          ''')
    
    print('Metricas macro - label meta')
    metrics_clf = macro_dummy[macro_dummy.label_col.str.contains('meta') & macro_dummy.asset.isin(assets)].\
    drop(['support', 'scaling_method', 'prediction_type', 'seq_len', 'class', 'model'], axis =1 ).\
            rename({'asset': 'ativo', 'label_col': 'alvo'},axis=1)
        
        
    display(metrics_clf.sort_values(['alvo', 'leak', 'ativo']))
    
    print('Metricas regressao - label diff_close_mean_z_score')
    
    metrics_reg = reg_dummy[reg_dummy.label_col.str.contains('diff_close_mean_z_score') & reg_dummy.asset.isin(assets)].\
        drop(['scaling_method', 'prediction_type', 'seq_len', 'model', 'feature_cols'], axis = 1).\
            rename({'asset': 'ativo', 'label_col': 'alvo'},axis=1)
    display(metrics_reg)


##############################################
# DOMAIN: Petróleo
# ASSETS: ['PETR3.SA', 'PRIO3.SA']
##############################################
          
Metricas macro - label meta


Unnamed: 0,precision,recall,f1-score,ativo,feature_cols,alvo,leak,domain
42114,0.67,0.67,0.67,PETR3.SA,['past_meta_14'],meta_14,False,Petróleo
42215,0.65,0.65,0.65,PRIO3.SA,['past_meta_14'],meta_14,False,Petróleo
42105,0.43,0.43,0.43,PETR3.SA,['past_meta_14'],meta_14,True,Petróleo
42207,0.49,0.49,0.49,PRIO3.SA,['past_meta_14'],meta_14,True,Petróleo
42150,0.71,0.7,0.7,PETR3.SA,['past_meta_21'],meta_21,False,Petróleo
42246,0.74,0.74,0.74,PRIO3.SA,['past_meta_21'],meta_21,False,Petróleo
42141,0.49,0.49,0.49,PETR3.SA,['past_meta_21'],meta_21,True,Petróleo
42238,0.62,0.62,0.62,PRIO3.SA,['past_meta_21'],meta_21,True,Petróleo
42079,0.54,0.54,0.54,PETR3.SA,['past_meta_7'],meta_7,False,Petróleo
42183,0.48,0.48,0.48,PRIO3.SA,['past_meta_7'],meta_7,False,Petróleo


Metricas regressao - label diff_close_mean_z_score


Unnamed: 0,Mean Absolute Error (MAE),Mean Squared Error (MSE),Root Mean Squared Error (RMSE),R-squared (R2),Mean Absolute Percentage Error (MAPE),ativo,alvo,leak,domain
4608,0.71,0.92,0.96,0.26,2.07,PETR3.SA,diff_close_mean_z_score_7,True,Petróleo
4609,0.49,0.63,0.79,0.48,1.42,PETR3.SA,diff_close_mean_z_score_7,False,Petróleo
4612,0.57,0.65,0.81,0.58,1.78,PETR3.SA,diff_close_mean_z_score_14,True,Petróleo
4613,0.39,0.45,0.67,0.71,1.23,PETR3.SA,diff_close_mean_z_score_14,False,Petróleo
4616,0.48,0.48,0.69,0.7,1.08,PETR3.SA,diff_close_mean_z_score_21,True,Petróleo
4617,0.33,0.33,0.58,0.79,0.74,PETR3.SA,diff_close_mean_z_score_21,False,Petróleo
4620,0.69,0.84,0.92,0.33,2.15,PRIO3.SA,diff_close_mean_z_score_7,True,Petróleo
4621,0.48,0.58,0.76,0.53,1.48,PRIO3.SA,diff_close_mean_z_score_7,False,Petróleo
4624,0.56,0.58,0.76,0.61,1.58,PRIO3.SA,diff_close_mean_z_score_14,True,Petróleo
4625,0.38,0.4,0.63,0.73,1.09,PRIO3.SA,diff_close_mean_z_score_14,False,Petróleo



##############################################
# DOMAIN: Mineração
# ASSETS: ['VALE3.SA', 'GGBR3.SA']
##############################################
          
Metricas macro - label meta


Unnamed: 0,precision,recall,f1-score,ativo,feature_cols,alvo,leak,domain
42414,0.69,0.69,0.69,GGBR3.SA,['past_meta_14'],meta_14,False,Mineração
42311,0.7,0.7,0.7,VALE3.SA,['past_meta_14'],meta_14,False,Mineração
42407,0.53,0.53,0.53,GGBR3.SA,['past_meta_14'],meta_14,True,Mineração
42303,0.56,0.56,0.56,VALE3.SA,['past_meta_14'],meta_14,True,Mineração
42455,0.66,0.66,0.66,GGBR3.SA,['past_meta_21'],meta_21,False,Mineração
42350,0.71,0.71,0.71,VALE3.SA,['past_meta_21'],meta_21,False,Mineração
42445,0.42,0.42,0.42,GGBR3.SA,['past_meta_21'],meta_21,True,Mineração
42341,0.45,0.45,0.45,VALE3.SA,['past_meta_21'],meta_21,True,Mineração
42383,0.52,0.52,0.52,GGBR3.SA,['past_meta_7'],meta_7,False,Mineração
42279,0.6,0.6,0.6,VALE3.SA,['past_meta_7'],meta_7,False,Mineração


Metricas regressao - label diff_close_mean_z_score


Unnamed: 0,Mean Absolute Error (MAE),Mean Squared Error (MSE),Root Mean Squared Error (RMSE),R-squared (R2),Mean Absolute Percentage Error (MAPE),ativo,alvo,leak,domain
4632,0.66,0.76,0.87,0.41,264745000000.0,VALE3.SA,diff_close_mean_z_score_7,True,Mineração
4633,0.45,0.51,0.72,0.61,182029500000.0,VALE3.SA,diff_close_mean_z_score_7,False,Mineração
4636,0.52,0.52,0.72,0.67,1.19,VALE3.SA,diff_close_mean_z_score_14,True,Mineração
4637,0.35,0.35,0.59,0.78,0.78,VALE3.SA,diff_close_mean_z_score_14,False,Mineração
4640,0.43,0.37,0.61,0.79,1.37,VALE3.SA,diff_close_mean_z_score_21,True,Mineração
4641,0.29,0.23,0.48,0.87,0.92,VALE3.SA,diff_close_mean_z_score_21,False,Mineração
4644,0.71,0.88,0.94,0.27,1.76,GGBR3.SA,diff_close_mean_z_score_7,True,Mineração
4645,0.48,0.6,0.78,0.49,1.2,GGBR3.SA,diff_close_mean_z_score_7,False,Mineração
4648,0.56,0.57,0.76,0.62,3.22,GGBR3.SA,diff_close_mean_z_score_14,True,Mineração
4649,0.38,0.39,0.62,0.75,2.2,GGBR3.SA,diff_close_mean_z_score_14,False,Mineração



##############################################
# DOMAIN: Financeiro
# ASSETS: ['ABCB4.SA', 'ITUB3.SA']
##############################################
          
Metricas macro - label meta


Unnamed: 0,precision,recall,f1-score,ativo,feature_cols,alvo,leak,domain
42519,0.65,0.65,0.65,ABCB4.SA,['past_meta_14'],meta_14,False,Financeiro
42623,0.58,0.58,0.58,ITUB3.SA,['past_meta_14'],meta_14,False,Financeiro
42511,0.47,0.47,0.47,ABCB4.SA,['past_meta_14'],meta_14,True,Financeiro
42614,0.46,0.46,0.46,ITUB3.SA,['past_meta_14'],meta_14,True,Financeiro
42556,0.57,0.57,0.57,ABCB4.SA,['past_meta_21'],meta_21,False,Financeiro
42663,0.54,0.54,0.54,ITUB3.SA,['past_meta_21'],meta_21,False,Financeiro
42546,0.45,0.45,0.45,ABCB4.SA,['past_meta_21'],meta_21,True,Financeiro
42653,0.46,0.46,0.46,ITUB3.SA,['past_meta_21'],meta_21,True,Financeiro
42487,0.51,0.51,0.51,ABCB4.SA,['past_meta_7'],meta_7,False,Financeiro
42587,0.43,0.43,0.43,ITUB3.SA,['past_meta_7'],meta_7,False,Financeiro


Metricas regressao - label diff_close_mean_z_score


Unnamed: 0,Mean Absolute Error (MAE),Mean Squared Error (MSE),Root Mean Squared Error (RMSE),R-squared (R2),Mean Absolute Percentage Error (MAPE),ativo,alvo,leak,domain
4656,0.76,1.0,1.0,0.17,265.8,ABCB4.SA,diff_close_mean_z_score_7,True,Financeiro
4657,0.52,0.68,0.82,0.44,182.74,ABCB4.SA,diff_close_mean_z_score_7,False,Financeiro
4660,0.62,0.71,0.84,0.5,2.74,ABCB4.SA,diff_close_mean_z_score_14,True,Financeiro
4661,0.43,0.49,0.7,0.66,1.9,ABCB4.SA,diff_close_mean_z_score_14,False,Financeiro
4664,0.54,0.52,0.72,0.66,1.69,ABCB4.SA,diff_close_mean_z_score_21,True,Financeiro
4665,0.37,0.36,0.6,0.77,1.17,ABCB4.SA,diff_close_mean_z_score_21,False,Financeiro
4668,0.68,0.88,0.94,0.33,4948931000000.0,ITUB3.SA,diff_close_mean_z_score_7,True,Financeiro
4669,0.46,0.59,0.77,0.54,3402713000000.0,ITUB3.SA,diff_close_mean_z_score_7,False,Financeiro
4672,0.52,0.55,0.74,0.67,167.98,ITUB3.SA,diff_close_mean_z_score_14,True,Financeiro
4673,0.36,0.37,0.61,0.78,116.34,ITUB3.SA,diff_close_mean_z_score_14,False,Financeiro



##############################################
# DOMAIN: Saúde
# ASSETS: ['FLRY3.SA', 'RADL3.SA']
##############################################
          
Metricas macro - label meta


Unnamed: 0,precision,recall,f1-score,ativo,feature_cols,alvo,leak,domain
42731,0.63,0.63,0.63,FLRY3.SA,['past_meta_14'],meta_14,False,Saúde
42831,0.66,0.66,0.66,RADL3.SA,['past_meta_14'],meta_14,False,Saúde
42721,0.38,0.38,0.38,FLRY3.SA,['past_meta_14'],meta_14,True,Saúde
42823,0.5,0.5,0.5,RADL3.SA,['past_meta_14'],meta_14,True,Saúde
42766,0.7,0.7,0.7,FLRY3.SA,['past_meta_21'],meta_21,False,Saúde
42867,0.61,0.61,0.61,RADL3.SA,['past_meta_21'],meta_21,False,Saúde
42758,0.48,0.48,0.48,FLRY3.SA,['past_meta_21'],meta_21,True,Saúde
42858,0.52,0.52,0.52,RADL3.SA,['past_meta_21'],meta_21,True,Saúde
42695,0.51,0.51,0.51,FLRY3.SA,['past_meta_7'],meta_7,False,Saúde
42799,0.5,0.5,0.5,RADL3.SA,['past_meta_7'],meta_7,False,Saúde


Metricas regressao - label diff_close_mean_z_score


Unnamed: 0,Mean Absolute Error (MAE),Mean Squared Error (MSE),Root Mean Squared Error (RMSE),R-squared (R2),Mean Absolute Percentage Error (MAPE),ativo,alvo,leak,domain
4680,0.72,0.91,0.95,0.25,2.83,FLRY3.SA,diff_close_mean_z_score_7,True,Saúde
4681,0.49,0.62,0.79,0.48,1.94,FLRY3.SA,diff_close_mean_z_score_7,False,Saúde
4684,0.58,0.56,0.75,0.61,2.25,FLRY3.SA,diff_close_mean_z_score_14,True,Saúde
4685,0.39,0.39,0.62,0.74,1.56,FLRY3.SA,diff_close_mean_z_score_14,False,Saúde
4688,0.49,0.42,0.65,0.73,203.79,FLRY3.SA,diff_close_mean_z_score_21,True,Saúde
4689,0.34,0.29,0.54,0.81,142.19,FLRY3.SA,diff_close_mean_z_score_21,False,Saúde
4692,0.68,0.82,0.91,0.36,2.85,RADL3.SA,diff_close_mean_z_score_7,True,Saúde
4693,0.47,0.56,0.75,0.57,1.96,RADL3.SA,diff_close_mean_z_score_7,False,Saúde
4696,0.56,0.58,0.76,0.62,3164.58,RADL3.SA,diff_close_mean_z_score_14,True,Saúde
4697,0.39,0.4,0.63,0.74,2191.88,RADL3.SA,diff_close_mean_z_score_14,False,Saúde


In [13]:
for leak in [True, False]:
        
    fig = px.box(
        macro_dummy[macro_dummy.leak == leak], x="label_col", 
        y="f1-score", 
        points="all", 
        title = f'Distribuição de resultados para cada alvo com leak={leak} | model=dummy')
    
    fig.write_image(PATH_REPORTS + f"/images/box_plot_alvo_dummy_leak={leak}_clf.png")
    fig.show()

for leak in [True, False]:
        
    fig = px.box(
        reg_dummy[reg_dummy.leak == leak], x="label_col", 
        y="R-squared (R2)", 
        points="all", 
        title = f'Distribuição de resultados para cada alvo com leak={leak} | model=dummy')
    
    fig.write_image(PATH_REPORTS + f"/images/box_plot_alvo_dummy_leak={leak}_reg.png")
    fig.show()

In [14]:
for leak in [True, False]:
        
    fig = px.box(
        macro_dummy[macro_dummy.leak == leak], x="asset", 
        y="f1-score", 
        points="all",
        color = 'domain',
        title = f'Distribuição de resultados para cada ativo com leak={leak}')
    
    fig.write_image(PATH_REPORTS + f"/images/box_plot_asset_dummy_leak={leak}_clf.png")
    fig.show()

for leak in [True, False]:
        
    fig = px.box(
        reg_dummy[reg_dummy.leak == leak], x="asset", 
        y="R-squared (R2)", 
        points="all", 
        color = 'domain',
        title = f'Distribuição de resultados para cada ativo com leak={leak}')
    
    fig.write_image(PATH_REPORTS + f"/images/box_plot_asset_dummy_leak={leak}_ref.png")
    fig.show()

#### NN results

In [15]:
# general 
macro_clf = final_results_clf[(final_results_clf['class'] == 'macro avg')].round(5)
reg= final_results_reg.round(5)

In [16]:
for label in reg.label_col.unique():

    for leak in [False]:

        print(f"label: {label} - leak: {leak}")
        
        
        if 'meta' in label: label_formated = label.replace('meta', 'k')
        if 'diff_close_mean_z_score' in label: label_formated = label.replace('diff_close_mean_z_score', 'z')
        
        df_plt = reg[(reg.label_col == label) & (reg.leak == leak)]
        
        fig = px.box(
            df_plt, 
            x="model", 
            y="R-squared (R2)", 
            points="all",
            title = f'Distribuição de f1 para os experimentos \n label:{label_formated}|leak={leak}'
            )
        
        fig.write_image(PATH_REPORTS + f"/images/box_plot_exp_dist_label={label_formated}__leak={leak}_reg.png")
        # fig.update_traces(boxpoints=False) 
        fig.show()

label: diff_close_mean_z_score_7 - leak: False


label: diff_close_mean_z_score_14 - leak: False


label: diff_close_mean_z_score_21 - leak: False


In [17]:
for label in macro_clf.label_col.unique():

    for leak in [False]:

        print(f"label: {label} - leak: {leak}")
        
        if 'meta' in label: label_col = label.replace('meta', 'k')
        if 'diff_close_mean_z_score' in label: label_col = label.replace('diff_close_mean_z_score', 'z')
        
        df_plt = macro_clf[(macro_clf.label_col == label) & (macro_clf.leak == leak)]
        
        fig = px.box(
            df_plt, 
            x="model", 
            y="f1-score", 
            points="all",
            title = f'Distribuição dos resultados de f1 para os experimentos - label: {label} | leak={leak}'
            )
        
        fig.write_image(PATH_REPORTS + f"/images/box_plot_exp_dist_label={label}__leak={leak}_clf.png")
        fig.show()

label: meta_7 - leak: False


label: meta_14 - leak: False


label: meta_21 - leak: False


In [18]:
for assets, domain in assets_domain:
    print(f'''
##############################################
# DOMAIN: {domain}
# ASSETS: {assets}
##############################################
          ''')
    
    print('Metricas macro - label meta')
    metrics_clf = macro_clf[macro_clf.label_col.str.contains('meta') & macro_clf.asset.isin(assets)].\
    drop(['support', 'scaling_method', 'prediction_type', 'class'], axis =1 ).\
            rename({'asset': 'ativo', 'label_col': 'alvo'},axis=1)[['ativo','seq_len','alvo','precision','recall', 'f1-score', 'model', 'leak', 'feature_cols']]
    
    
    display(metrics_clf.loc[metrics_clf.groupby(['alvo','ativo', 'leak'])['f1-score'].idxmax()])
    
    print('Metricas regressao - label diff_close_mean_z_score')
    
    metrics_reg = reg[reg.label_col.str.contains('diff_close_mean_z_score') & reg.asset.isin(assets)].\
        drop(['scaling_method', 'prediction_type', 'feature_cols'], axis = 1).\
            rename({'asset': 'ativo', 'label_col': 'alvo','f1-score':'valor'},axis=1)
    
    display(metrics_reg.loc[metrics_reg.groupby(['alvo','ativo', 'leak'])['R-squared (R2)'].idxmax()])


##############################################
# DOMAIN: Petróleo
# ASSETS: ['PETR3.SA', 'PRIO3.SA']
##############################################
          
Metricas macro - label meta


Unnamed: 0,ativo,seq_len,alvo,precision,recall,f1-score,model,leak,feature_cols
42114,PETR3.SA,1,meta_14,0.66893,0.66658,0.66773,Dummy_model,False,['past_meta_14']
42105,PETR3.SA,1,meta_14,0.43335,0.43335,0.43335,Dummy_model,True,['past_meta_14']
42215,PRIO3.SA,1,meta_14,0.65342,0.65342,0.65342,Dummy_model,False,['past_meta_14']
42207,PRIO3.SA,1,meta_14,0.49031,0.49031,0.49031,Dummy_model,True,['past_meta_14']
42150,PETR3.SA,1,meta_21,0.70554,0.70281,0.70415,Dummy_model,False,['past_meta_21']
42141,PETR3.SA,1,meta_21,0.4854,0.4854,0.4854,Dummy_model,True,['past_meta_21']
3218,PRIO3.SA,2,meta_21,0.74257,0.74321,0.74289,KAN,False,['meta_21']
3210,PRIO3.SA,2,meta_21,0.62055,0.62055,0.62055,KAN,True,['meta_21']
42079,PETR3.SA,1,meta_7,0.53916,0.53916,0.53916,Dummy_model,False,['past_meta_7']
5292,PETR3.SA,3,meta_7,0.33,0.34189,0.33578,KAN,True,['meta_7']


Metricas regressao - label diff_close_mean_z_score


Unnamed: 0,Mean Absolute Error (MAE),Mean Squared Error (MSE),Root Mean Squared Error (RMSE),R-squared (R2),Mean Absolute Percentage Error (MAPE),ativo,alvo,seq_len,model,leak
2611,0.41673,0.40954,0.63995,0.73547,1.20534,PETR3.SA,diff_close_mean_z_score_14,28,LSTM_with_Attention,False
2324,0.53667,0.58711,0.76623,0.61788,1.50186,PETR3.SA,diff_close_mean_z_score_14,21,MLP,True
55,0.39605,0.37282,0.61059,0.74988,1.041,PRIO3.SA,diff_close_mean_z_score_14,1,LSTM_with_Attention,False
632,0.52812,0.52064,0.72155,0.64925,1.45624,PRIO3.SA,diff_close_mean_z_score_14,3,MLP,True
2623,0.35278,0.30969,0.55649,0.80414,0.83491,PETR3.SA,diff_close_mean_z_score_21,28,LSTM_with_Attention,False
32,0.45541,0.4464,0.66813,0.7241,0.9954,PETR3.SA,diff_close_mean_z_score_21,1,MLP,True
67,0.33896,0.28422,0.53312,0.82772,0.87365,PRIO3.SA,diff_close_mean_z_score_21,1,LSTM_with_Attention,False
68,0.45955,0.40064,0.63297,0.7544,1.20705,PRIO3.SA,diff_close_mean_z_score_21,1,MLP,True
585,0.51608,0.54786,0.74018,0.54704,1.26817,PETR3.SA,diff_close_mean_z_score_7,3,MLP,False
1448,0.64145,0.75639,0.86971,0.38884,1.60165,PETR3.SA,diff_close_mean_z_score_7,6,MLP,True



##############################################
# DOMAIN: Mineração
# ASSETS: ['VALE3.SA', 'GGBR3.SA']
##############################################
          
Metricas macro - label meta


Unnamed: 0,ativo,seq_len,alvo,precision,recall,f1-score,model,leak,feature_cols
42414,GGBR3.SA,1,meta_14,0.69012,0.69012,0.69012,Dummy_model,False,['past_meta_14']
42407,GGBR3.SA,1,meta_14,0.52911,0.52871,0.52891,Dummy_model,True,['past_meta_14']
42311,VALE3.SA,1,meta_14,0.70389,0.70389,0.70389,Dummy_model,False,['past_meta_14']
42303,VALE3.SA,1,meta_14,0.55919,0.55919,0.55919,Dummy_model,True,['past_meta_14']
42455,GGBR3.SA,1,meta_21,0.66197,0.66161,0.66179,Dummy_model,False,['past_meta_21']
42445,GGBR3.SA,1,meta_21,0.42447,0.42408,0.42427,Dummy_model,True,['past_meta_21']
42350,VALE3.SA,1,meta_21,0.70587,0.70695,0.70641,Dummy_model,False,['past_meta_21']
42341,VALE3.SA,1,meta_21,0.45227,0.45227,0.45227,Dummy_model,True,['past_meta_21']
42383,GGBR3.SA,1,meta_7,0.52066,0.52066,0.52066,Dummy_model,False,['past_meta_7']
6262,GGBR3.SA,3,meta_7,0.34742,0.35858,0.3529,KAN,True,['meta_7']


Metricas regressao - label diff_close_mean_z_score


Unnamed: 0,Mean Absolute Error (MAE),Mean Squared Error (MSE),Root Mean Squared Error (RMSE),R-squared (R2),Mean Absolute Percentage Error (MAPE),ativo,alvo,seq_len,model,leak
127,0.40508,0.36376,0.60312,0.7638,2.02734,GGBR3.SA,diff_close_mean_z_score_14,1,LSTM_with_Attention,False
1278,0.5335,0.52207,0.72255,0.65533,2.7918,GGBR3.SA,diff_close_mean_z_score_14,5,LSTM_with_Attention,True
2395,0.38416,0.32073,0.56633,0.79888,0.80308,VALE3.SA,diff_close_mean_z_score_14,21,LSTM_with_Attention,False
1530,0.49259,0.47155,0.68669,0.69917,1.06718,VALE3.SA,diff_close_mean_z_score_14,6,LSTM_with_Attention,True
139,0.3418,0.27724,0.52654,0.83324,1.16223,GGBR3.SA,diff_close_mean_z_score_21,1,LSTM_with_Attention,False
140,0.45813,0.4041,0.63569,0.75315,1.67891,GGBR3.SA,diff_close_mean_z_score_21,1,MLP,True
1543,0.30609,0.21836,0.4673,0.87918,0.92926,VALE3.SA,diff_close_mean_z_score_21,6,LSTM_with_Attention,False
1542,0.4141,0.34257,0.5853,0.8045,1.30509,VALE3.SA,diff_close_mean_z_score_21,6,LSTM_with_Attention,True
403,0.51008,0.52115,0.72191,0.56095,1.04879,GGBR3.SA,diff_close_mean_z_score_7,2,LSTM_with_Attention,False
1556,0.64689,0.72321,0.85042,0.40107,1.32073,GGBR3.SA,diff_close_mean_z_score_7,6,MLP,True



##############################################
# DOMAIN: Financeiro
# ASSETS: ['ABCB4.SA', 'ITUB3.SA']
##############################################
          
Metricas macro - label meta


Unnamed: 0,ativo,seq_len,alvo,precision,recall,f1-score,model,leak,feature_cols
42519,ABCB4.SA,1,meta_14,0.64893,0.64893,0.64893,Dummy_model,False,['past_meta_14']
42511,ABCB4.SA,1,meta_14,0.47122,0.47122,0.47122,Dummy_model,True,['past_meta_14']
42623,ITUB3.SA,1,meta_14,0.57602,0.5763,0.57616,Dummy_model,False,['past_meta_14']
42614,ITUB3.SA,1,meta_14,0.4604,0.46071,0.46056,Dummy_model,True,['past_meta_14']
22589,ABCB4.SA,21,meta_21,0.62418,0.61196,0.61751,KAN,False,['meta_21']
22580,ABCB4.SA,21,meta_21,0.54597,0.53944,0.54235,KAN,True,['meta_21']
4533,ITUB3.SA,2,meta_21,0.53843,0.53902,0.53872,KAN,False,['meta_21']
4523,ITUB3.SA,2,meta_21,0.45799,0.45884,0.45841,KAN,True,['meta_21']
42487,ABCB4.SA,1,meta_7,0.51475,0.51433,0.51454,Dummy_model,False,['past_meta_7']
42479,ABCB4.SA,1,meta_7,0.35303,0.35303,0.35303,Dummy_model,True,['past_meta_7']


Metricas regressao - label diff_close_mean_z_score


Unnamed: 0,Mean Absolute Error (MAE),Mean Squared Error (MSE),Root Mean Squared Error (RMSE),R-squared (R2),Mean Absolute Percentage Error (MAPE),ativo,alvo,seq_len,model,leak
1891,0.44324,0.4462,0.66798,0.68718,1.75463,ABCB4.SA,diff_close_mean_z_score_14,7,LSTM_with_Attention,False
1892,0.58421,0.61869,0.78657,0.56614,2.32065,ABCB4.SA,diff_close_mean_z_score_14,7,MLP,True
199,0.38996,0.35298,0.59412,0.78712,119.7278,ITUB3.SA,diff_close_mean_z_score_14,1,LSTM_with_Attention,False
200,0.5051,0.49831,0.70591,0.69916,110.9771,ITUB3.SA,diff_close_mean_z_score_14,1,MLP,True
175,0.38084,0.34007,0.58315,0.78038,1.15552,ABCB4.SA,diff_close_mean_z_score_21,1,LSTM_with_Attention,False
176,0.51269,0.48344,0.6953,0.68608,1.63351,ABCB4.SA,diff_close_mean_z_score_21,1,MLP,True
1363,0.31635,0.25526,0.50523,0.86053,1.86865,ITUB3.SA,diff_close_mean_z_score_21,5,LSTM_with_Attention,False
1362,0.41752,0.36798,0.60662,0.79851,2.56526,ITUB3.SA,diff_close_mean_z_score_21,5,LSTM_with_Attention,True
1879,0.53721,0.57495,0.75826,0.52145,134.768,ABCB4.SA,diff_close_mean_z_score_7,7,LSTM_with_Attention,False
1592,0.68404,0.8002,0.89454,0.33772,178.5707,ABCB4.SA,diff_close_mean_z_score_7,6,MLP,True



##############################################
# DOMAIN: Saúde
# ASSETS: ['FLRY3.SA', 'RADL3.SA']
##############################################
          
Metricas macro - label meta


Unnamed: 0,ativo,seq_len,alvo,precision,recall,f1-score,model,leak,feature_cols
42731,FLRY3.SA,1,meta_14,0.6327,0.63292,0.63281,Dummy_model,False,['past_meta_14']
42721,FLRY3.SA,1,meta_14,0.37578,0.37601,0.3759,Dummy_model,True,['past_meta_14']
42831,RADL3.SA,1,meta_14,0.66057,0.6603,0.66044,Dummy_model,False,['past_meta_14']
42823,RADL3.SA,1,meta_14,0.50464,0.50435,0.5045,Dummy_model,True,['past_meta_14']
42766,FLRY3.SA,1,meta_21,0.69998,0.70159,0.70078,Dummy_model,False,['past_meta_21']
42758,FLRY3.SA,1,meta_21,0.47878,0.48026,0.47951,Dummy_model,True,['past_meta_21']
42867,RADL3.SA,1,meta_21,0.61151,0.6113,0.6114,Dummy_model,False,['past_meta_21']
42858,RADL3.SA,1,meta_21,0.51669,0.51643,0.51656,Dummy_model,True,['past_meta_21']
9896,FLRY3.SA,4,meta_7,0.70839,0.48968,0.52261,KAN,False,['meta_7']
4637,FLRY3.SA,2,meta_7,0.33019,0.33744,0.33374,KAN,True,['meta_7']


Metricas regressao - label diff_close_mean_z_score


Unnamed: 0,Mean Absolute Error (MAE),Mean Squared Error (MSE),Root Mean Squared Error (RMSE),R-squared (R2),Mean Absolute Percentage Error (MAPE),ativo,alvo,seq_len,model,leak
235,0.41775,0.36041,0.60034,0.75489,1.48075,FLRY3.SA,diff_close_mean_z_score_14,1,LSTM_with_Attention,False
236,0.54325,0.50617,0.71146,0.65279,2.02491,FLRY3.SA,diff_close_mean_z_score_14,1,MLP,True
847,0.40684,0.37265,0.61045,0.76,1803.80895,RADL3.SA,diff_close_mean_z_score_14,3,LSTM_with_Attention,False
846,0.53146,0.528,0.72664,0.65644,2604.21232,RADL3.SA,diff_close_mean_z_score_14,3,LSTM_with_Attention,True
247,0.35758,0.27842,0.52765,0.8225,191.76303,FLRY3.SA,diff_close_mean_z_score_21,1,LSTM_with_Attention,False
536,0.4725,0.39121,0.62547,0.74676,71.88471,FLRY3.SA,diff_close_mean_z_score_21,2,MLP,True
283,0.34432,0.27977,0.52894,0.82705,2.84413,RADL3.SA,diff_close_mean_z_score_21,1,LSTM_with_Attention,False
284,0.46131,0.39785,0.63075,0.75296,3.97417,RADL3.SA,diff_close_mean_z_score_21,1,MLP,True
2239,0.53219,0.5436,0.73729,0.55135,1.60333,FLRY3.SA,diff_close_mean_z_score_7,14,LSTM_with_Attention,False
2240,0.66556,0.73428,0.8569,0.39393,1.98737,FLRY3.SA,diff_close_mean_z_score_7,14,MLP,True


In [19]:

print('Metricas macro - label meta')
metrics_clf = macro_clf[macro_clf.label_col.str.contains('meta')].\
drop(['support', 'scaling_method', 'prediction_type', 'class'], axis =1 ).\
        rename({'asset': 'ativo', 'label_col': 'alvo'},axis=1)[['ativo','seq_len','alvo', 'f1-score', 'model', 'leak']]

metrics_clf = metrics_clf[metrics_clf.leak == False]

metrics_clf.alvo = metrics_clf.alvo.apply(lambda x: x.replace('meta', 'k'))

display(metrics_clf.loc[metrics_clf.groupby(['alvo','ativo', 'leak'])['f1-score'].idxmax()].drop('leak', axis = 1).round(2))

print('Metricas regressao - label diff_close_mean_z_score')


metrics_reg = reg[reg.label_col.str.contains('diff_close_mean_z_score')].\
    drop(['scaling_method', 'prediction_type', 'feature_cols'], axis = 1).\
        rename({'asset': 'ativo', 'label_col': 'alvo','f1-score':'valor'},axis=1)[['ativo','seq_len','alvo', 'R-squared (R2)', 'model', 'leak']].round(2)
        
metrics_reg = metrics_reg[metrics_reg.leak == False]

metrics_reg.alvo = metrics_reg.alvo.apply(lambda x: x.replace('diff_close_mean_z_score', 'z'))

display(metrics_reg.loc[metrics_reg.groupby(['alvo','ativo', 'leak'])['R-squared (R2)'].idxmax()])

Metricas macro - label meta


Unnamed: 0,ativo,seq_len,alvo,f1-score,model
42519,ABCB4.SA,1,k_14,0.65,Dummy_model
42731,FLRY3.SA,1,k_14,0.63,Dummy_model
42414,GGBR3.SA,1,k_14,0.69,Dummy_model
42623,ITUB3.SA,1,k_14,0.58,Dummy_model
42114,PETR3.SA,1,k_14,0.67,Dummy_model
42215,PRIO3.SA,1,k_14,0.65,Dummy_model
42831,RADL3.SA,1,k_14,0.66,Dummy_model
42311,VALE3.SA,1,k_14,0.7,Dummy_model
22589,ABCB4.SA,21,k_21,0.62,KAN
42766,FLRY3.SA,1,k_21,0.7,Dummy_model


Metricas regressao - label diff_close_mean_z_score


Unnamed: 0,ativo,seq_len,alvo,R-squared (R2),model,leak
165,ABCB4.SA,1,z_14,0.69,MLP,False
235,FLRY3.SA,1,z_14,0.75,LSTM_with_Attention,False
127,GGBR3.SA,1,z_14,0.76,LSTM_with_Attention,False
199,ITUB3.SA,1,z_14,0.79,LSTM_with_Attention,False
2611,PETR3.SA,28,z_14,0.74,LSTM_with_Attention,False
55,PRIO3.SA,1,z_14,0.75,LSTM_with_Attention,False
271,RADL3.SA,1,z_14,0.76,LSTM_with_Attention,False
2395,VALE3.SA,21,z_14,0.8,LSTM_with_Attention,False
175,ABCB4.SA,1,z_21,0.78,LSTM_with_Attention,False
247,FLRY3.SA,1,z_21,0.82,LSTM_with_Attention,False


In [20]:
for assets, domain in assets_domain:
    print(f'''
##############################################
# DOMAIN: {domain}
# ASSETS: {assets}
##############################################
          ''')
    
    print('Metricas macro - label meta')
    metrics_clf = macro_clf[macro_clf.label_col.str.contains('meta') & macro_clf.asset.isin(assets)].\
    drop(['support', 'scaling_method', 'prediction_type', 'class'], axis =1 ).\
            rename({'asset': 'ativo', 'label_col': 'alvo'},axis=1)[['ativo','seq_len','alvo','precision','recall', 'f1-score', 'model', 'leak', 'feature_cols']]
    
    display(metrics_clf.loc[metrics_clf.groupby(['alvo','ativo', 'leak'])['f1-score'].idxmax()])
    
    print('Metricas regressao - label diff_close_mean_z_score')
    
    
    metrics_reg = reg[reg.label_col.str.contains('diff_close_mean_z_score') & reg.asset.isin(assets)].\
        drop(['scaling_method', 'prediction_type', 'feature_cols'], axis = 1).\
            rename({'asset': 'ativo', 'label_col': 'alvo','f1-score':'valor'},axis=1)
    
    display(metrics_reg.loc[metrics_reg.groupby(['alvo','ativo', 'leak'])['R-squared (R2)'].idxmax()])


##############################################
# DOMAIN: Petróleo
# ASSETS: ['PETR3.SA', 'PRIO3.SA']
##############################################
          
Metricas macro - label meta


Unnamed: 0,ativo,seq_len,alvo,precision,recall,f1-score,model,leak,feature_cols
42114,PETR3.SA,1,meta_14,0.66893,0.66658,0.66773,Dummy_model,False,['past_meta_14']
42105,PETR3.SA,1,meta_14,0.43335,0.43335,0.43335,Dummy_model,True,['past_meta_14']
42215,PRIO3.SA,1,meta_14,0.65342,0.65342,0.65342,Dummy_model,False,['past_meta_14']
42207,PRIO3.SA,1,meta_14,0.49031,0.49031,0.49031,Dummy_model,True,['past_meta_14']
42150,PETR3.SA,1,meta_21,0.70554,0.70281,0.70415,Dummy_model,False,['past_meta_21']
42141,PETR3.SA,1,meta_21,0.4854,0.4854,0.4854,Dummy_model,True,['past_meta_21']
3218,PRIO3.SA,2,meta_21,0.74257,0.74321,0.74289,KAN,False,['meta_21']
3210,PRIO3.SA,2,meta_21,0.62055,0.62055,0.62055,KAN,True,['meta_21']
42079,PETR3.SA,1,meta_7,0.53916,0.53916,0.53916,Dummy_model,False,['past_meta_7']
5292,PETR3.SA,3,meta_7,0.33,0.34189,0.33578,KAN,True,['meta_7']


Metricas regressao - label diff_close_mean_z_score


Unnamed: 0,Mean Absolute Error (MAE),Mean Squared Error (MSE),Root Mean Squared Error (RMSE),R-squared (R2),Mean Absolute Percentage Error (MAPE),ativo,alvo,seq_len,model,leak
2611,0.41673,0.40954,0.63995,0.73547,1.20534,PETR3.SA,diff_close_mean_z_score_14,28,LSTM_with_Attention,False
2324,0.53667,0.58711,0.76623,0.61788,1.50186,PETR3.SA,diff_close_mean_z_score_14,21,MLP,True
55,0.39605,0.37282,0.61059,0.74988,1.041,PRIO3.SA,diff_close_mean_z_score_14,1,LSTM_with_Attention,False
632,0.52812,0.52064,0.72155,0.64925,1.45624,PRIO3.SA,diff_close_mean_z_score_14,3,MLP,True
2623,0.35278,0.30969,0.55649,0.80414,0.83491,PETR3.SA,diff_close_mean_z_score_21,28,LSTM_with_Attention,False
32,0.45541,0.4464,0.66813,0.7241,0.9954,PETR3.SA,diff_close_mean_z_score_21,1,MLP,True
67,0.33896,0.28422,0.53312,0.82772,0.87365,PRIO3.SA,diff_close_mean_z_score_21,1,LSTM_with_Attention,False
68,0.45955,0.40064,0.63297,0.7544,1.20705,PRIO3.SA,diff_close_mean_z_score_21,1,MLP,True
585,0.51608,0.54786,0.74018,0.54704,1.26817,PETR3.SA,diff_close_mean_z_score_7,3,MLP,False
1448,0.64145,0.75639,0.86971,0.38884,1.60165,PETR3.SA,diff_close_mean_z_score_7,6,MLP,True



##############################################
# DOMAIN: Mineração
# ASSETS: ['VALE3.SA', 'GGBR3.SA']
##############################################
          
Metricas macro - label meta


Unnamed: 0,ativo,seq_len,alvo,precision,recall,f1-score,model,leak,feature_cols
42414,GGBR3.SA,1,meta_14,0.69012,0.69012,0.69012,Dummy_model,False,['past_meta_14']
42407,GGBR3.SA,1,meta_14,0.52911,0.52871,0.52891,Dummy_model,True,['past_meta_14']
42311,VALE3.SA,1,meta_14,0.70389,0.70389,0.70389,Dummy_model,False,['past_meta_14']
42303,VALE3.SA,1,meta_14,0.55919,0.55919,0.55919,Dummy_model,True,['past_meta_14']
42455,GGBR3.SA,1,meta_21,0.66197,0.66161,0.66179,Dummy_model,False,['past_meta_21']
42445,GGBR3.SA,1,meta_21,0.42447,0.42408,0.42427,Dummy_model,True,['past_meta_21']
42350,VALE3.SA,1,meta_21,0.70587,0.70695,0.70641,Dummy_model,False,['past_meta_21']
42341,VALE3.SA,1,meta_21,0.45227,0.45227,0.45227,Dummy_model,True,['past_meta_21']
42383,GGBR3.SA,1,meta_7,0.52066,0.52066,0.52066,Dummy_model,False,['past_meta_7']
6262,GGBR3.SA,3,meta_7,0.34742,0.35858,0.3529,KAN,True,['meta_7']


Metricas regressao - label diff_close_mean_z_score


Unnamed: 0,Mean Absolute Error (MAE),Mean Squared Error (MSE),Root Mean Squared Error (RMSE),R-squared (R2),Mean Absolute Percentage Error (MAPE),ativo,alvo,seq_len,model,leak
127,0.40508,0.36376,0.60312,0.7638,2.02734,GGBR3.SA,diff_close_mean_z_score_14,1,LSTM_with_Attention,False
1278,0.5335,0.52207,0.72255,0.65533,2.7918,GGBR3.SA,diff_close_mean_z_score_14,5,LSTM_with_Attention,True
2395,0.38416,0.32073,0.56633,0.79888,0.80308,VALE3.SA,diff_close_mean_z_score_14,21,LSTM_with_Attention,False
1530,0.49259,0.47155,0.68669,0.69917,1.06718,VALE3.SA,diff_close_mean_z_score_14,6,LSTM_with_Attention,True
139,0.3418,0.27724,0.52654,0.83324,1.16223,GGBR3.SA,diff_close_mean_z_score_21,1,LSTM_with_Attention,False
140,0.45813,0.4041,0.63569,0.75315,1.67891,GGBR3.SA,diff_close_mean_z_score_21,1,MLP,True
1543,0.30609,0.21836,0.4673,0.87918,0.92926,VALE3.SA,diff_close_mean_z_score_21,6,LSTM_with_Attention,False
1542,0.4141,0.34257,0.5853,0.8045,1.30509,VALE3.SA,diff_close_mean_z_score_21,6,LSTM_with_Attention,True
403,0.51008,0.52115,0.72191,0.56095,1.04879,GGBR3.SA,diff_close_mean_z_score_7,2,LSTM_with_Attention,False
1556,0.64689,0.72321,0.85042,0.40107,1.32073,GGBR3.SA,diff_close_mean_z_score_7,6,MLP,True



##############################################
# DOMAIN: Financeiro
# ASSETS: ['ABCB4.SA', 'ITUB3.SA']
##############################################
          
Metricas macro - label meta


Unnamed: 0,ativo,seq_len,alvo,precision,recall,f1-score,model,leak,feature_cols
42519,ABCB4.SA,1,meta_14,0.64893,0.64893,0.64893,Dummy_model,False,['past_meta_14']
42511,ABCB4.SA,1,meta_14,0.47122,0.47122,0.47122,Dummy_model,True,['past_meta_14']
42623,ITUB3.SA,1,meta_14,0.57602,0.5763,0.57616,Dummy_model,False,['past_meta_14']
42614,ITUB3.SA,1,meta_14,0.4604,0.46071,0.46056,Dummy_model,True,['past_meta_14']
22589,ABCB4.SA,21,meta_21,0.62418,0.61196,0.61751,KAN,False,['meta_21']
22580,ABCB4.SA,21,meta_21,0.54597,0.53944,0.54235,KAN,True,['meta_21']
4533,ITUB3.SA,2,meta_21,0.53843,0.53902,0.53872,KAN,False,['meta_21']
4523,ITUB3.SA,2,meta_21,0.45799,0.45884,0.45841,KAN,True,['meta_21']
42487,ABCB4.SA,1,meta_7,0.51475,0.51433,0.51454,Dummy_model,False,['past_meta_7']
42479,ABCB4.SA,1,meta_7,0.35303,0.35303,0.35303,Dummy_model,True,['past_meta_7']


Metricas regressao - label diff_close_mean_z_score


Unnamed: 0,Mean Absolute Error (MAE),Mean Squared Error (MSE),Root Mean Squared Error (RMSE),R-squared (R2),Mean Absolute Percentage Error (MAPE),ativo,alvo,seq_len,model,leak
1891,0.44324,0.4462,0.66798,0.68718,1.75463,ABCB4.SA,diff_close_mean_z_score_14,7,LSTM_with_Attention,False
1892,0.58421,0.61869,0.78657,0.56614,2.32065,ABCB4.SA,diff_close_mean_z_score_14,7,MLP,True
199,0.38996,0.35298,0.59412,0.78712,119.7278,ITUB3.SA,diff_close_mean_z_score_14,1,LSTM_with_Attention,False
200,0.5051,0.49831,0.70591,0.69916,110.9771,ITUB3.SA,diff_close_mean_z_score_14,1,MLP,True
175,0.38084,0.34007,0.58315,0.78038,1.15552,ABCB4.SA,diff_close_mean_z_score_21,1,LSTM_with_Attention,False
176,0.51269,0.48344,0.6953,0.68608,1.63351,ABCB4.SA,diff_close_mean_z_score_21,1,MLP,True
1363,0.31635,0.25526,0.50523,0.86053,1.86865,ITUB3.SA,diff_close_mean_z_score_21,5,LSTM_with_Attention,False
1362,0.41752,0.36798,0.60662,0.79851,2.56526,ITUB3.SA,diff_close_mean_z_score_21,5,LSTM_with_Attention,True
1879,0.53721,0.57495,0.75826,0.52145,134.768,ABCB4.SA,diff_close_mean_z_score_7,7,LSTM_with_Attention,False
1592,0.68404,0.8002,0.89454,0.33772,178.5707,ABCB4.SA,diff_close_mean_z_score_7,6,MLP,True



##############################################
# DOMAIN: Saúde
# ASSETS: ['FLRY3.SA', 'RADL3.SA']
##############################################
          
Metricas macro - label meta


Unnamed: 0,ativo,seq_len,alvo,precision,recall,f1-score,model,leak,feature_cols
42731,FLRY3.SA,1,meta_14,0.6327,0.63292,0.63281,Dummy_model,False,['past_meta_14']
42721,FLRY3.SA,1,meta_14,0.37578,0.37601,0.3759,Dummy_model,True,['past_meta_14']
42831,RADL3.SA,1,meta_14,0.66057,0.6603,0.66044,Dummy_model,False,['past_meta_14']
42823,RADL3.SA,1,meta_14,0.50464,0.50435,0.5045,Dummy_model,True,['past_meta_14']
42766,FLRY3.SA,1,meta_21,0.69998,0.70159,0.70078,Dummy_model,False,['past_meta_21']
42758,FLRY3.SA,1,meta_21,0.47878,0.48026,0.47951,Dummy_model,True,['past_meta_21']
42867,RADL3.SA,1,meta_21,0.61151,0.6113,0.6114,Dummy_model,False,['past_meta_21']
42858,RADL3.SA,1,meta_21,0.51669,0.51643,0.51656,Dummy_model,True,['past_meta_21']
9896,FLRY3.SA,4,meta_7,0.70839,0.48968,0.52261,KAN,False,['meta_7']
4637,FLRY3.SA,2,meta_7,0.33019,0.33744,0.33374,KAN,True,['meta_7']


Metricas regressao - label diff_close_mean_z_score


Unnamed: 0,Mean Absolute Error (MAE),Mean Squared Error (MSE),Root Mean Squared Error (RMSE),R-squared (R2),Mean Absolute Percentage Error (MAPE),ativo,alvo,seq_len,model,leak
235,0.41775,0.36041,0.60034,0.75489,1.48075,FLRY3.SA,diff_close_mean_z_score_14,1,LSTM_with_Attention,False
236,0.54325,0.50617,0.71146,0.65279,2.02491,FLRY3.SA,diff_close_mean_z_score_14,1,MLP,True
847,0.40684,0.37265,0.61045,0.76,1803.80895,RADL3.SA,diff_close_mean_z_score_14,3,LSTM_with_Attention,False
846,0.53146,0.528,0.72664,0.65644,2604.21232,RADL3.SA,diff_close_mean_z_score_14,3,LSTM_with_Attention,True
247,0.35758,0.27842,0.52765,0.8225,191.76303,FLRY3.SA,diff_close_mean_z_score_21,1,LSTM_with_Attention,False
536,0.4725,0.39121,0.62547,0.74676,71.88471,FLRY3.SA,diff_close_mean_z_score_21,2,MLP,True
283,0.34432,0.27977,0.52894,0.82705,2.84413,RADL3.SA,diff_close_mean_z_score_21,1,LSTM_with_Attention,False
284,0.46131,0.39785,0.63075,0.75296,3.97417,RADL3.SA,diff_close_mean_z_score_21,1,MLP,True
2239,0.53219,0.5436,0.73729,0.55135,1.60333,FLRY3.SA,diff_close_mean_z_score_7,14,LSTM_with_Attention,False
2240,0.66556,0.73428,0.8569,0.39393,1.98737,FLRY3.SA,diff_close_mean_z_score_7,14,MLP,True


In [21]:
for leak in [True, False]:
        
    fig = px.box(
        macro_clf[macro_clf.leak == leak], x="label_col", 
        y="f1-score", 
        points="all", 
        title = f'Distribuição de resultados para cada alvo com leak={leak}')
    
    fig.write_image(PATH_REPORTS + f"/images/box_plot_alvo_models_leak={leak}_clf.png")
    fig.show()

for leak in [True, False]:
        
    fig = px.box(
        reg[reg.leak == leak], x="label_col", 
        y="R-squared (R2)", 
        points="all", 
        title = f'Distribuição de resultados para cada alvo com leak={leak}')
    
    fig.write_image(PATH_REPORTS + f"/images/box_plot_alvo_models_leak={leak}_reg.png")
    fig.show()

In [22]:

macro_clf['domain'] = macro_clf.asset.apply(asset_to_domain)
reg['domain'] = reg.asset.apply(asset_to_domain)

for leak in [True, False]:
        
    fig = px.box(
        macro_clf[macro_clf.leak == leak], x="asset", 
        y="f1-score", 
        points="all", 
        color = 'domain',
        title = f'Distribuição de resultados para cada ativo com leak={leak}')
    
    fig.write_image(PATH_REPORTS + f"/images/box_plot_asset_models_leak={leak}_clf.png")
    fig.show()

for leak in [True, False]:
    
    fig = px.box(
        reg[reg.leak == leak], x="asset", 
        y="R-squared (R2)", 
        points="all", 
        color = 'domain',
        title = f'Distribuição de resultados para cada ativo com leak={leak}')
    
    fig.write_image(PATH_REPORTS + f"/images/box_plot_asset_models_leak={leak}_reg.png")
    fig.show()

In [23]:
# for assets, domain in assets_domain:
#     print(f'''
# ##############################################
# # DOMAIN: {domain}
# # ASSETS: {assets}
# ##############################################
#           ''')
    
#     print('Metricas macro - label meta')
#     metrics_clf = macro_clf[macro_clf.label_col.str.contains('meta') & macro_clf.asset.isin(assets)].\
#     drop(['support', 'scaling_method', 'prediction_type', 'class', 'feature_cols'], axis =1 ).\
#             rename({'asset': 'ativo', 'label_col': 'alvo'},axis=1)[['ativo','seq_len','alvo','precision','recall', 'f1-score', 'model', 'leak']]
    
#     display(metrics_clf)
    
#     print('Metricas regressao - label diff_close_mean_z_score')
    
    
#     metrics_reg = reg[reg.label_col.str.contains('diff_close_mean_z_score') & reg.asset.isin(assets)].\
#         drop(['scaling_method', 'prediction_type', 'feature_cols'], axis = 1).\
#             rename({'asset': 'ativo', 'label_col': 'alvo','f1-score':'valor'},axis=1)
#     display(metrics_reg)