In [59]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
from tqdm import tqdm
PATH_REPORTS = '../reports/'
lstm_results_path = 'test_results/LSTM_with_Attention_{asset}_test_results.csv'
mlp_results_path =  'test_results/MLP_{asset}_test_results.csv'
from sklearn.preprocessing import StandardScaler

import sys
sys.path.append('../src/')
from model.evaluation import classification_report, regression_metrics, get_classification_report
from model.config import create_experiment_configs_dummy, create_experiment_configs_tf

In [60]:
ASSETS = [
    "PETR3.SA", 
    "PRIO3.SA", 
    "VALE3.SA", 
    "GGBR3.SA", 
    "ABCB4.SA", 
    "ITUB3.SA", 
    "FLRY3.SA", 
    "RADL3.SA"
    ]

seq_len_list = [1,2,3,4,5,6,7,14,21,28,35,42,49,56,63,70]

moving_windows = [7,14,21]

dict_experiments_dummy = create_experiment_configs_dummy(ASSETS, moving_windows)
dict_experiments_tf = create_experiment_configs_tf(ASSETS, seq_len_list, moving_windows)


In [61]:
algorithm = "Dummy_model"

## General results

### Create table

In [62]:
list_results = []

for name, dict_experiments, path_results in [
    ("tf", dict_experiments_tf, PATH_REPORTS + 'test_results/{algorithm}_{asset}_features={features}__label={label_col}__sql_len={seq_len}__scaling_method={scaling_method}_test_results.csv'),
    ('dummy', dict_experiments_dummy, PATH_REPORTS + "test_results/Dummy_model_{asset}_features={feature_col}__label={label_col}_test_results.csv")
]:
    

    for exp_name, config in tqdm(dict_experiments.items()):
        

            
        if name == "tf":
            feature_cols = config['feature_cols']
            label_col = config['label_col']
            seq_len = config['seq_len']
            asset = config['asset']
            scaling_method = config['scaling_method']
            algorithm = config['algorithm']
            asset = config['asset']
            prediction_type = config['prediction_type']
            results = pd.read_csv(path_results.format(
                algorithm = algorithm,
                features = "_".join(feature_cols),
                label_col = label_col,
                asset = asset.replace(".", "_"),
                scaling_method = scaling_method.__str__(),
                seq_len = seq_len
            ))
            
        elif name == 'dummy':
            feature_cols = [config['feature_col']]
            label_col = config['label_col']
            asset = config['asset']
            algorithm = "Dummy_model"
            seq_len = 1
            scaling_method = None
            prediction_type = 'dummy'
            asset_formated = asset.replace(".", "_")
            results = pd.read_csv(path_results.format(
                algorithm = algorithm,
                feature_col = feature_cols[0],
                label_col = label_col,
                asset = asset_formated
            ))
        
        if results.isna().sum().sum() > 0: 
            raise ValueError('Há dados nulos no dataframe de resultados')
            
            
        
        # obtem metricas de regressão
        reg_metrics_lstm = regression_metrics(results.y_test, results.y_pred)
        

        # obtem metricas de classificacao, truncando os valores (se o valor já for a meta, não terá diferenca)
        y_test =  results.y_test
        y_pred = results.y_pred
        y_test_trunc = [int(i) for i in y_test]
        y_pred_trunc = [int(i) for i in y_pred]
        df_cr = get_classification_report(y_test_trunc, y_pred_trunc)
        df_cr = df_cr.reset_index(drop=False).rename({'index': 'class'}, axis =1 )
        
        for metric, value in reg_metrics_lstm.squeeze().to_dict().items():
            new_row = {
                'class': metric,
                'precision': value,
                'recall': value,
                'f1-score': value,
                'support': value, 
            }
            df_cr.loc[len(df_cr)] = new_row
        
        df_cr['asset'] = asset
        df_cr['feature_cols'] = str(feature_cols)
        df_cr['label_col'] = str(label_col)
        df_cr['seq_len'] = seq_len
        df_cr['model'] = algorithm
        df_cr['scaling_method'] = scaling_method
        df_cr['prediction_type'] = prediction_type
        
        list_results.append(df_cr)

  0%|          | 0/1536 [00:00<?, ?it/s]

100%|██████████| 1536/1536 [00:28<00:00, 53.37it/s]
100%|██████████| 48/48 [00:00<00:00, 61.61it/s]


In [63]:
final_results = pd.concat(list_results)

In [64]:
final_results.model.value_counts()

model
LSTM_with_Attention    10410
MLP                    10400
Dummy_model              650
Name: count, dtype: int64

In [65]:

final_results.prediction_type.value_counts()

prediction_type
classification    10410
regression        10400
dummy               650
Name: count, dtype: int64

In [66]:
final_results[final_results.prediction_type  == None]

Unnamed: 0,class,precision,recall,f1-score,support,asset,feature_cols,label_col,seq_len,model,scaling_method,prediction_type


In [67]:
final_results_dummy = final_results[final_results.prediction_type == 'dummy']

final_results_reg = final_results[final_results.prediction_type == 'regression']
final_results_clf = final_results[final_results.prediction_type.isin(['classification'])]
final_results_clf

Unnamed: 0,class,precision,recall,f1-score,support,asset,feature_cols,label_col,seq_len,model,scaling_method,prediction_type
0,0,7.421260e-01,7.421260e-01,7.421260e-01,5.080000e+02,PETR3.SA,['meta_7'],meta_7,1,LSTM_with_Attention,,classification
1,accuracy,7.002075e-01,7.002075e-01,7.002075e-01,7.002075e-01,PETR3.SA,['meta_7'],meta_7,1,LSTM_with_Attention,,classification
2,1,6.807692e-01,7.137097e-01,6.968504e-01,2.480000e+02,PETR3.SA,['meta_7'],meta_7,1,LSTM_with_Attention,,classification
3,weighted avg,6.821271e-01,7.002075e-01,6.908761e-01,9.640000e+02,PETR3.SA,['meta_7'],meta_7,1,LSTM_with_Attention,,classification
4,4,6.173469e-01,6.685083e-01,6.419098e-01,1.810000e+02,PETR3.SA,['meta_7'],meta_7,1,LSTM_with_Attention,,classification
...,...,...,...,...,...,...,...,...,...,...,...,...
9,Mean Absolute Error (MAE),6.379668e-01,6.379668e-01,6.379668e-01,6.379668e-01,RADL3.SA,['meta_21'],meta_21,70,MLP,,classification
10,Mean Squared Error (MSE),2.978216e+00,2.978216e+00,2.978216e+00,2.978216e+00,RADL3.SA,['meta_21'],meta_21,70,MLP,,classification
11,Root Mean Squared Error (RMSE),1.725751e+00,1.725751e+00,1.725751e+00,1.725751e+00,RADL3.SA,['meta_21'],meta_21,70,MLP,,classification
12,R-squared (R2),4.438631e-01,4.438631e-01,4.438631e-01,4.438631e-01,RADL3.SA,['meta_21'],meta_21,70,MLP,,classification


### Results


#### Dummy results

In [68]:
# general 
macro_dummy = final_results_dummy[final_results_dummy['class'] == 'macro avg']
macro_dummy

Unnamed: 0,class,precision,recall,f1-score,support,asset,feature_cols,label_col,seq_len,model,scaling_method,prediction_type
5,macro avg,0.53916,0.53916,0.53916,963.0,PETR3.SA,['past_diff_close_mean_z_score_7'],diff_close_mean_z_score_7,1,Dummy_model,,dummy
5,macro avg,0.53916,0.53916,0.53916,963.0,PETR3.SA,['past_meta_7'],meta_7,1,Dummy_model,,dummy
5,macro avg,0.667813,0.667813,0.667813,963.0,PETR3.SA,['past_diff_close_mean_z_score_14'],diff_close_mean_z_score_14,1,Dummy_model,,dummy
5,macro avg,0.667813,0.667813,0.667813,963.0,PETR3.SA,['past_meta_14'],meta_14,1,Dummy_model,,dummy
5,macro avg,0.707253,0.707253,0.707253,963.0,PETR3.SA,['past_diff_close_mean_z_score_21'],diff_close_mean_z_score_21,1,Dummy_model,,dummy
5,macro avg,0.707253,0.707253,0.707253,963.0,PETR3.SA,['past_meta_21'],meta_21,1,Dummy_model,,dummy
5,macro avg,0.482951,0.482595,0.482772,963.0,PRIO3.SA,['past_diff_close_mean_z_score_7'],diff_close_mean_z_score_7,1,Dummy_model,,dummy
5,macro avg,0.482951,0.482595,0.482772,963.0,PRIO3.SA,['past_meta_7'],meta_7,1,Dummy_model,,dummy
5,macro avg,0.654128,0.654128,0.654128,963.0,PRIO3.SA,['past_diff_close_mean_z_score_14'],diff_close_mean_z_score_14,1,Dummy_model,,dummy
5,macro avg,0.654128,0.654128,0.654128,963.0,PRIO3.SA,['past_meta_14'],meta_14,1,Dummy_model,,dummy


In [69]:
assets_domain = [(["PETR3.SA","PRIO3.SA"], 'Petróleo'), (["VALE3.SA", "GGBR3.SA"], "Mineração"), (["ABCB4.SA", "ITUB3.SA"], 'Financeiro'), (["FLRY3.SA", "RADL3.SA"], 'Saúde')]

In [70]:
for assets, domain in assets_domain:
    print(f'''
##############################################
# DOMAIN: {domain}
# ASSETS: {assets}
##############################################
          ''')
    
    
    
    print('Metricas macro')
    metrics_clf = macro_dummy[macro_dummy.label_col.str.contains('meta') & macro_dummy.asset.isin(assets)].\
    drop(['support', 'scaling_method', 'prediction_type', 'seq_len', 'class', 'model', 'feature_cols'], axis =1 ).\
        round(2).\
            rename({'asset': 'ativo', 'label_col': 'alvo'},axis=1)[['ativo','alvo','precision','recall', 'f1-score']]
        
    display(metrics_clf)


##############################################
# DOMAIN: Petróleo
# ASSETS: ['PETR3.SA', 'PRIO3.SA']
##############################################
          
Metricas macro


Unnamed: 0,ativo,alvo,precision,recall,f1-score
5,PETR3.SA,meta_7,0.54,0.54,0.54
5,PETR3.SA,meta_14,0.67,0.67,0.67
5,PETR3.SA,meta_21,0.71,0.71,0.71
5,PRIO3.SA,meta_7,0.48,0.48,0.48
5,PRIO3.SA,meta_14,0.65,0.65,0.65
4,PRIO3.SA,meta_21,0.74,0.74,0.74



##############################################
# DOMAIN: Mineração
# ASSETS: ['VALE3.SA', 'GGBR3.SA']
##############################################
          
Metricas macro


Unnamed: 0,ativo,alvo,precision,recall,f1-score
5,VALE3.SA,meta_7,0.6,0.6,0.6
5,VALE3.SA,meta_14,0.7,0.7,0.7
6,VALE3.SA,meta_21,0.7,0.7,0.7
5,GGBR3.SA,meta_7,0.52,0.52,0.52
4,GGBR3.SA,meta_14,0.69,0.69,0.69
7,GGBR3.SA,meta_21,0.66,0.66,0.66



##############################################
# DOMAIN: Financeiro
# ASSETS: ['ABCB4.SA', 'ITUB3.SA']
##############################################
          
Metricas macro


Unnamed: 0,ativo,alvo,precision,recall,f1-score
5,ABCB4.SA,meta_7,0.51,0.51,0.51
5,ABCB4.SA,meta_14,0.64,0.64,0.64
7,ABCB4.SA,meta_21,0.57,0.57,0.57
5,ITUB3.SA,meta_7,0.43,0.43,0.43
6,ITUB3.SA,meta_14,0.57,0.57,0.57
7,ITUB3.SA,meta_21,0.54,0.54,0.54



##############################################
# DOMAIN: Saúde
# ASSETS: ['FLRY3.SA', 'RADL3.SA']
##############################################
          
Metricas macro


Unnamed: 0,ativo,alvo,precision,recall,f1-score
5,FLRY3.SA,meta_7,0.51,0.51,0.51
6,FLRY3.SA,meta_14,0.63,0.63,0.63
5,FLRY3.SA,meta_21,0.7,0.7,0.7
5,RADL3.SA,meta_7,0.5,0.5,0.5
5,RADL3.SA,meta_14,0.66,0.66,0.66
6,RADL3.SA,meta_21,0.61,0.61,0.61


In [None]:
print('Metricas macro')
metrics_clf = macro_dummy[macro_dummy.label_col.str.contains('meta') & macro_dummy.asset.isin(assets)].\
drop(['support', 'scaling_method', 'prediction_type', 'seq_len', 'class', 'model', 'feature_cols'], axis =1 ).\
    round(2).\
        rename({'asset': 'ativo', 'label_col': 'alvo'},axis=1)[['ativo','alvo','precision','recall', 'f1-score']]

In [72]:
final_results_dummy[final_results_dummy['class'].isin(
    ['Mean Absolute Error (MAE)','Mean Squared Error (MSE)','Root Mean Squared Error (RMSE)','R-squared (R2)', 'Mean Absolute Percentage Error (MAPE)'
     ])].round(2)

Unnamed: 0,class,precision,recall,f1-score,support,asset,feature_cols,label_col,seq_len,model,scaling_method,prediction_type
8,Mean Absolute Error (MAE),6.300000e-01,6.300000e-01,6.300000e-01,6.300000e-01,PETR3.SA,['past_diff_close_mean_z_score_7'],diff_close_mean_z_score_7,1,Dummy_model,,dummy
9,Mean Squared Error (MSE),6.800000e-01,6.800000e-01,6.800000e-01,6.800000e-01,PETR3.SA,['past_diff_close_mean_z_score_7'],diff_close_mean_z_score_7,1,Dummy_model,,dummy
10,Root Mean Squared Error (RMSE),8.300000e-01,8.300000e-01,8.300000e-01,8.300000e-01,PETR3.SA,['past_diff_close_mean_z_score_7'],diff_close_mean_z_score_7,1,Dummy_model,,dummy
11,R-squared (R2),4.400000e-01,4.400000e-01,4.400000e-01,4.400000e-01,PETR3.SA,['past_diff_close_mean_z_score_7'],diff_close_mean_z_score_7,1,Dummy_model,,dummy
12,Mean Absolute Percentage Error (MAPE),9.300000e-01,9.300000e-01,9.300000e-01,9.300000e-01,PETR3.SA,['past_diff_close_mean_z_score_7'],diff_close_mean_z_score_7,1,Dummy_model,,dummy
...,...,...,...,...,...,...,...,...,...,...,...,...
9,Mean Absolute Error (MAE),2.300000e-01,2.300000e-01,2.300000e-01,2.300000e-01,RADL3.SA,['past_meta_21'],meta_21,1,Dummy_model,,dummy
10,Mean Squared Error (MSE),2.600000e-01,2.600000e-01,2.600000e-01,2.600000e-01,RADL3.SA,['past_meta_21'],meta_21,1,Dummy_model,,dummy
11,Root Mean Squared Error (RMSE),5.100000e-01,5.100000e-01,5.100000e-01,5.100000e-01,RADL3.SA,['past_meta_21'],meta_21,1,Dummy_model,,dummy
12,R-squared (R2),6.700000e-01,6.700000e-01,6.700000e-01,6.700000e-01,RADL3.SA,['past_meta_21'],meta_21,1,Dummy_model,,dummy
