In [1]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
from tqdm import tqdm
PATH_REPORTS = '../reports/'
lstm_results_path = 'test_results/LSTM_with_Attention_{asset}_test_results.csv'
mlp_results_path =  'test_results/MLP_{asset}_test_results.csv'
from sklearn.preprocessing import StandardScaler

import sys
sys.path.append('../src/')
from model.evaluation import classification_report, regression_metrics, get_classification_report
from model.config import create_experiment_configs_dummy, create_experiment_configs_tf

In [2]:
ASSETS = [
    # "PETR3.SA", 
    # "PRIO3.SA", 
    # "VALE3.SA", 
    # "GGBR3.SA", 
    # "ABCB4.SA", 
    "ITUB3.SA", 
    # "FLRY3.SA", 
    # "RADL3.SA"
    ]

seq_len_list = [1,2,3,4,5,6,7,14,21,28,35,42,49,56,63,70]

moving_windows = [7,14,21]

dict_experiments_dummy = create_experiment_configs_dummy(ASSETS, moving_windows)
dict_experiments_tf = create_experiment_configs_tf(ASSETS, seq_len_list, moving_windows)


In [3]:
algorithm = "Dummy_model"

In [6]:
list_results = []

for name, dict_experiments, path_results in [
    ("tf", dict_experiments_tf, PATH_REPORTS + 'test_results/{algorithm}_{asset}_features={features}__label={label_col}__sql_len={seq_len}__scaling_method={scaling_method}_test_results.csv'),
    
]:
    

    for exp_name, config in tqdm(dict_experiments.items()):
        

            
        if name == "tf":
            feature_cols = config['feature_cols']
            label_col = config['label_col']
            seq_len = config['seq_len']
            asset = config['asset']
            scaling_method = config['scaling_method']
            algorithm = config['algorithm']
            asset = config['asset']
            results = pd.read_csv(path_results.format(
                algorithm = algorithm,
                features = "_".join(feature_cols),
                label_col = label_col,
                asset = asset.replace(".", "_"),
                scaling_method = scaling_method.__str__(),
                seq_len = seq_len
            ))
            
        elif name == 'dummy':
            feature_cols = [config['feature_col']]
            label_col = config['label_col']
            asset = config['asset']
            algorithm = "Dummy_model"
            seq_len = 1
            scaling_method = None
            results = pd.read_csv(path_results.format(
                algorithm = algorithm,
                feature_col = feature_cols[0],
                label_col = label_col,
                asset = asset.replace(".", "_"),
            ))
        
        if results.isna().sum().sum() > 0: continue
            
            
        
        
        reg_metrics_lstm = regression_metrics(results.y_test, results.y_pred)
        
        y_test =  results.y_test
        y_pred = results.y_pred
        
        y_test_trunc = [int(i) for i in y_test]
        y_pred_trunc = [int(i) for i in y_pred]
        
        df_cr = get_classification_report(y_test_trunc, y_pred_trunc)
        
        df_cr = df_cr.reset_index(drop=False).rename({'index': 'class'}, axis =1 )
        
        for metric, value in reg_metrics_lstm.squeeze().to_dict().items():
            new_row = {
                'class': metric,
                'precision': value,
                'recall': value,
                'f1-score': value,
                'support': value, 
            }
            df_cr.loc[len(df_cr)] = new_row
        
        df_cr['asset'] = asset
        df_cr['feature_cols'] = str(feature_cols)
        df_cr['label_col'] = str(label_col)
        df_cr['seq_len'] = seq_len
        df_cr['model'] = algorithm
        df_cr['scaling_method'] = scaling_method
        
        list_results.append(df_cr)

100%|██████████| 192/192 [00:04<00:00, 41.54it/s]


In [8]:
final_results = pd.concat(list_results)

In [10]:
final_results

Unnamed: 0,class,precision,recall,f1-score,support,asset,feature_cols,label_col,seq_len,model,scaling_method
0,0,9.418484e-01,9.988987e-01,9.695350e-01,9.080000e+02,ITUB3.SA,['diff_close_mean_z_score_21_diff'],diff_close_mean_z_score_21_diff,1,MLP,StandardScaler()
1,accuracy,9.408714e-01,9.408714e-01,9.408714e-01,9.408714e-01,ITUB3.SA,['diff_close_mean_z_score_21_diff'],diff_close_mean_z_score_21_diff,1,MLP,StandardScaler()
2,weighted avg,8.871352e-01,9.408714e-01,9.132135e-01,9.640000e+02,ITUB3.SA,['diff_close_mean_z_score_21_diff'],diff_close_mean_z_score_21_diff,1,MLP,StandardScaler()
3,macro avg,1.569747e-01,1.664831e-01,1.615892e-01,9.640000e+02,ITUB3.SA,['diff_close_mean_z_score_21_diff'],diff_close_mean_z_score_21_diff,1,MLP,StandardScaler()
4,-4,0.000000e+00,0.000000e+00,0.000000e+00,1.000000e+00,ITUB3.SA,['diff_close_mean_z_score_21_diff'],diff_close_mean_z_score_21_diff,1,MLP,StandardScaler()
...,...,...,...,...,...,...,...,...,...,...,...
9,Mean Absolute Error (MAE),3.138942e-01,3.138942e-01,3.138942e-01,3.138942e-01,ITUB3.SA,['diff_close_mean_z_score_21_diff'],diff_close_mean_z_score_21_diff,70,MLP,
10,Mean Squared Error (MSE),2.624346e-01,2.624346e-01,2.624346e-01,2.624346e-01,ITUB3.SA,['diff_close_mean_z_score_21_diff'],diff_close_mean_z_score_21_diff,70,MLP,
11,Root Mean Squared Error (RMSE),5.122837e-01,5.122837e-01,5.122837e-01,5.122837e-01,ITUB3.SA,['diff_close_mean_z_score_21_diff'],diff_close_mean_z_score_21_diff,70,MLP,
12,R-squared (R2),1.544079e-02,1.544079e-02,1.544079e-02,1.544079e-02,ITUB3.SA,['diff_close_mean_z_score_21_diff'],diff_close_mean_z_score_21_diff,70,MLP,


In [14]:
final_results[
    (final_results['class'] == 'R-squared (R2)') &
    (final_results['label_col'] == 'diff_close_mean_z_score_21_diff')
    ].sort_values('f1-score')

Unnamed: 0,class,precision,recall,f1-score,support,asset,feature_cols,label_col,seq_len,model,scaling_method
12,R-squared (R2),-0.001672,-0.001672,-0.001672,-0.001672,ITUB3.SA,['diff_close_mean_z_score_21_diff'],diff_close_mean_z_score_21_diff,1,MLP,StandardScaler()
12,R-squared (R2),-0.000959,-0.000959,-0.000959,-0.000959,ITUB3.SA,['diff_close_mean_z_score_21_diff'],diff_close_mean_z_score_21_diff,6,MLP,
12,R-squared (R2),-0.000900,-0.000900,-0.000900,-0.000900,ITUB3.SA,['diff_close_mean_z_score_21_diff'],diff_close_mean_z_score_21_diff,1,MLP,
12,R-squared (R2),0.000153,0.000153,0.000153,0.000153,ITUB3.SA,['diff_close_mean_z_score_21_diff'],diff_close_mean_z_score_21_diff,6,LSTM_with_Attention,
12,R-squared (R2),0.000157,0.000157,0.000157,0.000157,ITUB3.SA,['diff_close_mean_z_score_21_diff'],diff_close_mean_z_score_21_diff,1,LSTM_with_Attention,
...,...,...,...,...,...,...,...,...,...,...,...
12,R-squared (R2),0.017385,0.017385,0.017385,0.017385,ITUB3.SA,['diff_close_mean_z_score_21_diff'],diff_close_mean_z_score_21_diff,56,MLP,
12,R-squared (R2),0.017506,0.017506,0.017506,0.017506,ITUB3.SA,['diff_close_mean_z_score_21_diff'],diff_close_mean_z_score_21_diff,63,LSTM_with_Attention,StandardScaler()
12,R-squared (R2),0.017901,0.017901,0.017901,0.017901,ITUB3.SA,['diff_close_mean_z_score_21_diff'],diff_close_mean_z_score_21_diff,42,LSTM_with_Attention,
12,R-squared (R2),0.019147,0.019147,0.019147,0.019147,ITUB3.SA,['diff_close_mean_z_score_21_diff'],diff_close_mean_z_score_21_diff,63,MLP,


In [None]:
final_results[
    (final_results['class'] == 'R-squared (R2)') &
    (final_results['label_col'] == 'diff_close_mean_z_score_21')
    ].sort_values('f1-score').tail(5)

Unnamed: 0,class,precision,recall,f1-score,support,asset,feature_cols,label_col,seq_len,model,scaling_method
13,R-squared (R2),0.724818,0.724818,0.724818,0.724818,ITUB3.SA,['past_diff_close_mean_z_score_21'],diff_close_mean_z_score_21,1,Dummy_model,


In [None]:
final_results[
    (final_results['class'] == 'macro avg') &
    (final_results['label_col'] == 'diff_close_mean_z_score_21') &
    (final_results['model'] == 'Dummy_model')
    ].sort_values('f1-score').tail(199)

Unnamed: 0,class,precision,recall,f1-score,support,asset,feature_cols,label_col,seq_len,model,scaling_method
7,macro avg,0.536742,0.536995,0.536868,963.0,ITUB3.SA,['past_diff_close_mean_z_score_21'],diff_close_mean_z_score_21,1,Dummy_model,


In [None]:
final_results[
    (final_results['class'] == 'macro avg') &
    (final_results['label_col'] == 'diff_close_mean_z_score_21')
    ].sort_values('f1-score').tail(5)

Unnamed: 0,class,precision,recall,f1-score,support,asset,feature_cols,label_col,seq_len,model,scaling_method
5,macro avg,0.567579,0.310273,0.312683,964.0,ITUB3.SA,['diff_close_mean_z_score_21'],diff_close_mean_z_score_21,7,MLP,StandardScaler()
5,macro avg,0.540619,0.31328,0.315079,964.0,ITUB3.SA,['diff_close_mean_z_score_21'],diff_close_mean_z_score_21,3,MLP,
5,macro avg,0.567776,0.313739,0.318082,964.0,ITUB3.SA,['diff_close_mean_z_score_21'],diff_close_mean_z_score_21,3,MLP,StandardScaler()
5,macro avg,0.56796,0.313852,0.318276,964.0,ITUB3.SA,['diff_close_mean_z_score_21'],diff_close_mean_z_score_21,4,LSTM_with_Attention,StandardScaler()
7,macro avg,0.536742,0.536995,0.536868,963.0,ITUB3.SA,['past_diff_close_mean_z_score_21'],diff_close_mean_z_score_21,1,Dummy_model,
