In [20]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
from tqdm import tqdm
PATH_REPORTS = '../reports/'
lstm_results_path = 'test_results/LSTM_with_Attention_{asset}_test_results.csv'
mlp_results_path =  'test_results/MLP_{asset}_test_results.csv'


import sys
sys.path.append('../src/')
from model.evaluation import classification_report, regression_metrics, get_classification_report

In [21]:
ASSETS = [
    # "PETR3.SA", 
    # "PRIO3.SA", 
    # "VALE3.SA", 
    # "GGBR3.SA", 
    # "ABCB4.SA", 
    "ITUB3.SA", 
    # "FLRY3.SA", 
    # "RADL3.SA"
    ]

seq_len_list = [1,2,3,4,5,6,7,14,21,28,35,42,49,56,63,70]

dict_experiments = {}
from sklearn.preprocessing import StandardScaler
exp_id = 0
for seq_len in seq_len_list:
    
    for asset in ASSETS:
        
        for sub_conj_feats in [
            ['diff_close_mean_z_score'], 
            ["Close","Volume","SMA_21","MSTD_21","Day_of_week","diff_close_mean","diff_close_mean_z_score"],
            ["Close","Volume","diff_close_mean_z_score"],
            ['Close'],
            ['Close', 'Volume', 'SMA_21']
            ]:
            
            
            for label_col in ['diff_close_mean_z_score', 'meta', 'Close']:
                
                for scaling_method in [StandardScaler(), None]:
                    
                    for algorithm in ['LSTM_with_Attention', 'MLP']:
    
                        exp_id +=1
                        
                        dict_experiments.update({
                            exp_id:{
                                "feature_cols": sub_conj_feats,
                                "label_col": label_col,
                                "seq_len": seq_len,
                                'asset': asset,
                                'scaling_method': scaling_method,
                                'algorithm': algorithm
                            }
                        })

In [22]:
list_results = []

for exp_name, config in tqdm(dict_experiments.items()):
    
    feature_cols = config['feature_cols']
    label_col = config['label_col']
    seq_len = config['seq_len']
    asset = config['asset']
    scaling_method = config['scaling_method']
    algorithm = config['algorithm']

    for asset in ASSETS:
        
        lstm_results = pd.read_csv(PATH_REPORTS + f'test_results/{algorithm}_{asset.replace(".", "_")}_features={"_".join(feature_cols)}__label={label_col}__sql_len={seq_len}__scaling_method={scaling_method.__str__()}_test_results.csv')
        
        reg_metrics_lstm = regression_metrics(lstm_results.y_test, lstm_results.y_pred)
        
        y_test =  lstm_results.y_test
        y_pred = lstm_results.y_pred
        
        y_test_trunc = [int(i) for i in y_test]
        y_pred_trunc = [int(i) for i in y_pred]
        
        df_cr = get_classification_report(y_test_trunc, y_pred_trunc)
        
        df_cr = df_cr.reset_index(drop=False).rename({'index': 'class'}, axis =1 )
        
        for metric, value in reg_metrics_lstm.squeeze().to_dict().items():
            new_row = {
                'class': metric,
                'precision': value,
                'recall': value,
                'f1-score': value,
                'support': value, 
            }
            df_cr.loc[len(df_cr)] = new_row
        
        df_cr['asset'] = asset
        df_cr['feature_cols'] = str(feature_cols)
        df_cr['label_col'] = str(label_col)
        df_cr['seq_len'] = seq_len
        df_cr['model'] = 'LSTM'
        
        list_results.append(df_cr)
        
        # mlp_results = pd.read_csv(PATH_REPORTS + f'test_results/MLP_{asset.replace(".", "_")}_features={"_".join(feature_cols)}__label={"_".join(label_col)}__sql_len={seq_len}_test_results.csv')

        # reg_metrics_mlp = regression_metrics(mlp_results.y_test, mlp_results.y_pred)

        # y_test =  mlp_results.y_test
        # y_pred = mlp_results.y_pred
        
        # y_test_trunc = [int(i) for i in y_test]
        # y_pred_trunc = [int(i) for i in y_pred]
        
        # df_cr = get_classification_report(y_test_trunc, y_pred_trunc)
        # df_cr = df_cr.reset_index(drop=False).rename({'index': 'class'}, axis =1 )
        
        # for metric, value in reg_metrics_mlp.squeeze().to_dict().items():
        #     new_row = {
        #         'class': metric,
        #         'precision': value,
        #         'recall': value,
        #         'f1-score': value,
        #         'support': value, 
        #     }
        #     df_cr.loc[len(df_cr)] = new_row
        
        # df_cr['asset'] = asset
        # df_cr['feature_cols'] = str(feature_cols)
        # df_cr['label_col'] = str(label_col)
        # df_cr['seq_len'] = seq_len
        # df_cr['model'] = 'MLP'
        
        # list_results.append(df_cr)


  0%|          | 0/960 [00:00<?, ?it/s]

100%|██████████| 960/960 [00:18<00:00, 52.37it/s]


In [23]:
final_results = pd.concat(list_results)

In [24]:
final_results

Unnamed: 0,class,precision,recall,f1-score,support,asset,feature_cols,label_col,seq_len,model
0,0,0.660177,0.925558,0.770661,403.000000,ITUB3.SA,['diff_close_mean_z_score'],diff_close_mean_z_score,1,LSTM
1,1,0.711864,0.691358,0.701461,243.000000,ITUB3.SA,['diff_close_mean_z_score'],diff_close_mean_z_score,1,LSTM
2,accuracy,0.663900,0.663900,0.663900,0.663900,ITUB3.SA,['diff_close_mean_z_score'],diff_close_mean_z_score,1,LSTM
3,weighted avg,0.697546,0.663900,0.616664,964.000000,ITUB3.SA,['diff_close_mean_z_score'],diff_close_mean_z_score,1,LSTM
4,-1,0.600000,0.470588,0.527473,204.000000,ITUB3.SA,['diff_close_mean_z_score'],diff_close_mean_z_score,1,LSTM
...,...,...,...,...,...,...,...,...,...,...
16,Mean Absolute Error (MAE),22.638399,22.638399,22.638399,22.638399,ITUB3.SA,"['Close', 'Volume', 'SMA_21']",Close,70,LSTM
17,Mean Squared Error (MSE),519.428153,519.428153,519.428153,519.428153,ITUB3.SA,"['Close', 'Volume', 'SMA_21']",Close,70,LSTM
18,Root Mean Squared Error (RMSE),22.790966,22.790966,22.790966,22.790966,ITUB3.SA,"['Close', 'Volume', 'SMA_21']",Close,70,LSTM
19,R-squared (R2),-73.942277,-73.942277,-73.942277,-73.942277,ITUB3.SA,"['Close', 'Volume', 'SMA_21']",Close,70,LSTM


In [25]:
None.__str__()

'None'

In [26]:
final_results[final_results['class'] == 'R-squared (R2)'].groupby(['feature_cols', 'label_col', 'seq_len', 'model']).mean('mean')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,precision,recall,f1-score,support
feature_cols,label_col,seq_len,model,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"['Close', 'Volume', 'SMA_21', 'MSTD_21', 'Day_of_week', 'diff_close_mean', 'diff_close_mean_z_score']",Close,1,LSTM,-72.946070,-72.946070,-72.946070,-72.946070
"['Close', 'Volume', 'SMA_21', 'MSTD_21', 'Day_of_week', 'diff_close_mean', 'diff_close_mean_z_score']",Close,2,LSTM,-72.786682,-72.786682,-72.786682,-72.786682
"['Close', 'Volume', 'SMA_21', 'MSTD_21', 'Day_of_week', 'diff_close_mean', 'diff_close_mean_z_score']",Close,3,LSTM,-72.273983,-72.273983,-72.273983,-72.273983
"['Close', 'Volume', 'SMA_21', 'MSTD_21', 'Day_of_week', 'diff_close_mean', 'diff_close_mean_z_score']",Close,4,LSTM,-72.658764,-72.658764,-72.658764,-72.658764
"['Close', 'Volume', 'SMA_21', 'MSTD_21', 'Day_of_week', 'diff_close_mean', 'diff_close_mean_z_score']",Close,5,LSTM,-72.194954,-72.194954,-72.194954,-72.194954
...,...,...,...,...,...,...,...
['diff_close_mean_z_score'],meta,42,LSTM,0.685306,0.685306,0.685306,0.685306
['diff_close_mean_z_score'],meta,49,LSTM,0.683496,0.683496,0.683496,0.683496
['diff_close_mean_z_score'],meta,56,LSTM,0.683124,0.683124,0.683124,0.683124
['diff_close_mean_z_score'],meta,63,LSTM,0.685106,0.685106,0.685106,0.685106


In [27]:
final_results[final_results['class'] == 'macro avg'].groupby(['feature_cols', 'label_col', 'seq_len', 'model']).mean('mean')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,precision,recall,f1-score,support
feature_cols,label_col,seq_len,model,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"['Close', 'Volume', 'SMA_21', 'MSTD_21', 'Day_of_week', 'diff_close_mean', 'diff_close_mean_z_score']",Close,1,LSTM,0.000000,0.000000,0.000000,964.0
"['Close', 'Volume', 'SMA_21', 'MSTD_21', 'Day_of_week', 'diff_close_mean', 'diff_close_mean_z_score']",Close,2,LSTM,0.000000,0.000000,0.000000,964.0
"['Close', 'Volume', 'SMA_21', 'MSTD_21', 'Day_of_week', 'diff_close_mean', 'diff_close_mean_z_score']",Close,3,LSTM,0.000000,0.000000,0.000000,964.0
"['Close', 'Volume', 'SMA_21', 'MSTD_21', 'Day_of_week', 'diff_close_mean', 'diff_close_mean_z_score']",Close,4,LSTM,0.000000,0.000000,0.000000,964.0
"['Close', 'Volume', 'SMA_21', 'MSTD_21', 'Day_of_week', 'diff_close_mean', 'diff_close_mean_z_score']",Close,5,LSTM,0.000000,0.000000,0.000000,964.0
...,...,...,...,...,...,...,...
['diff_close_mean_z_score'],meta,42,LSTM,0.153531,0.160116,0.120067,964.0
['diff_close_mean_z_score'],meta,49,LSTM,0.147952,0.159366,0.118604,964.0
['diff_close_mean_z_score'],meta,56,LSTM,0.146622,0.159398,0.118510,964.0
['diff_close_mean_z_score'],meta,63,LSTM,0.149230,0.159688,0.118923,964.0


In [28]:
for asset in ASSETS:

    display(final_results[
        (final_results['feature_cols'] == "['diff_close_mean_z_score']") &
        (final_results['label_col'] == "['diff_close_mean_z_score']") &
        (final_results['model'] == "MLP") &
        (final_results['seq_len'] == 15) &
        (final_results['asset'] == 'PETR3.SA')
        ])

Unnamed: 0,class,precision,recall,f1-score,support,asset,feature_cols,label_col,seq_len,model


In [29]:
final_results[final_results['class'] == 'R-squared (R2)']

Unnamed: 0,class,precision,recall,f1-score,support,asset,feature_cols,label_col,seq_len,model
13,R-squared (R2),0.816779,0.816779,0.816779,0.816779,ITUB3.SA,['diff_close_mean_z_score'],diff_close_mean_z_score,1,LSTM
13,R-squared (R2),0.814224,0.814224,0.814224,0.814224,ITUB3.SA,['diff_close_mean_z_score'],diff_close_mean_z_score,1,LSTM
13,R-squared (R2),0.816157,0.816157,0.816157,0.816157,ITUB3.SA,['diff_close_mean_z_score'],diff_close_mean_z_score,1,LSTM
13,R-squared (R2),0.814361,0.814361,0.814361,0.814361,ITUB3.SA,['diff_close_mean_z_score'],diff_close_mean_z_score,1,LSTM
13,R-squared (R2),0.708306,0.708306,0.708306,0.708306,ITUB3.SA,['diff_close_mean_z_score'],meta,1,LSTM
...,...,...,...,...,...,...,...,...,...,...
13,R-squared (R2),-0.000445,-0.000445,-0.000445,-0.000445,ITUB3.SA,"['Close', 'Volume', 'SMA_21']",meta,70,LSTM
21,R-squared (R2),-69.653233,-69.653233,-69.653233,-69.653233,ITUB3.SA,"['Close', 'Volume', 'SMA_21']",Close,70,LSTM
22,R-squared (R2),-68.371770,-68.371770,-68.371770,-68.371770,ITUB3.SA,"['Close', 'Volume', 'SMA_21']",Close,70,LSTM
19,R-squared (R2),-74.522803,-74.522803,-74.522803,-74.522803,ITUB3.SA,"['Close', 'Volume', 'SMA_21']",Close,70,LSTM
