In [1]:
import pandas as pd
import re
import numpy as np
from sklearn.metrics import mean_absolute_percentage_error

MAPE

In [2]:
tmp_df = pd.DataFrame(columns=['type','features','fold','MAPE'])
type_list = ['mlp','rf','svm','gp']
feat_list = ['all','ou','classic','pupil']
for fold in range(5):
    for type in type_list:
        for feat in feat_list:

            if type=='mlp':
                path='MLP/MLPRegressor'
            if type=='rf':
                path='RF/RandomForestRegressor'
            if type=='gp':
                path='GP/SparseGPRegression'
            if type=='svm':
                path='SVR/SVR'

            RF_pred = np.load('results/trial_based/'+path+'_y_pred_'+feat+'_features_'+str(fold)+'.npy', allow_pickle=True)
            RF_test = np.load('results/trial_based/'+path+'_y_test_'+feat+'_features_'+str(fold)+'.npy', allow_pickle=True)
            mape = (mean_absolute_percentage_error(RF_test, RF_pred))
            dct = ({ 'type': type,
                     'features': feat,
                     'fold': fold+1,
                     'index': 1,
                     'MAPE': mape
                   })
            dct = {k:[v] for k,v in dct.items()}  # WORKAROUND
            df = pd.DataFrame(dct)
            tmp_df = pd.concat((tmp_df,df))

In [3]:
MAPE_df = tmp_df.drop(columns=['index'])
MAPE_df

Unnamed: 0,type,features,fold,MAPE
0,mlp,all,1,0.261308
0,mlp,ou,1,0.309057
0,mlp,classic,1,0.351389
0,mlp,pupil,1,0.298471
0,rf,all,1,0.256900
...,...,...,...,...
0,svm,pupil,5,0.292694
0,gp,all,5,0.278523
0,gp,ou,5,0.331144
0,gp,classic,5,0.337592


In [4]:
def parse_file(namefile):
    
    df_tmp = pd.DataFrame(columns = ['type','fold','RMSE','MAE','R2','features'])

    with open(namefile, "r") as f:
        file_contents = f.read()

    rmse = re.findall(r"RMSE test (-?[\d.]+)", file_contents)
    mae = re.findall(r"MAE test (-?[\d.]+)", file_contents)
    r2 = re.findall(r"R2 test (-?[\d.]+)", file_contents)
    
    pattern = r"_(.*?)\.log"
    match = re.search(pattern, namefile)
    if match:
        type_ = match.group(1)

    df_tmp['RMSE'] = pd.to_numeric(pd.Series(rmse))
    df_tmp['MAE'] = pd.to_numeric(pd.Series(mae))
    df_tmp['R2'] = pd.to_numeric(pd.Series(r2))
    df_tmp['type'] = pd.Series([type_]*len(mae))
    df_tmp['fold'] = pd.to_numeric(pd.Series([1,2,3,4,5]*(4)))
    
    series_values = ['all'] * 5 + ['classic'] * 5 + ['pupil'] * 5 + ['ou'] * 5
    series_index = ['A', 'B', 'C', 'D', 'E'] * 4

    df_tmp['features'] = pd.Series(series_values)

    return df_tmp

In [5]:
df_mlp = parse_file("results/output_mlp.log")

In [6]:
df_gp = parse_file("results/output_gp.log")

In [7]:
df_rf = parse_file("results/output_rf.log")

In [8]:
df_svm = parse_file("results/output_svm.log")

In [9]:
df_values = pd.concat((df_mlp,df_gp,df_rf,df_svm))

In [10]:
df_values

Unnamed: 0,type,fold,RMSE,MAE,R2,features
0,mlp,1,6.645393,4.734410,0.299397,all
1,mlp,2,6.241332,4.531311,0.382342,all
2,mlp,3,6.652367,4.861051,0.298304,all
3,mlp,4,6.237107,4.401830,0.382868,all
4,mlp,5,7.016061,4.540525,0.220856,all
...,...,...,...,...,...,...
15,svm,1,7.958160,5.568466,-0.004745,ou
16,svm,2,7.877014,5.514766,0.016178,ou
17,svm,3,7.917876,5.545049,0.005937,ou
18,svm,4,7.953239,5.544676,-0.003459,ou


In [11]:
df_values['features'].unique()

array(['all', 'classic', 'pupil', 'ou'], dtype=object)

In [12]:
MAPE_df['features'].unique()

array(['all', 'ou', 'classic', 'pupil'], dtype=object)

In [13]:
MAPE_df.reset_index()

Unnamed: 0,index,type,features,fold,MAPE
0,0,mlp,all,1,0.261308
1,0,mlp,ou,1,0.309057
2,0,mlp,classic,1,0.351389
3,0,mlp,pupil,1,0.298471
4,0,rf,all,1,0.256900
...,...,...,...,...,...
75,0,svm,pupil,5,0.292694
76,0,gp,all,5,0.278523
77,0,gp,ou,5,0.331144
78,0,gp,classic,5,0.337592


In [14]:
result = pd.merge(df_values,MAPE_df,on=['fold','features','type'],how='inner')
result

Unnamed: 0,type,fold,RMSE,MAE,R2,features,MAPE
0,mlp,1,6.645393,4.734410,0.299397,all,0.261308
1,mlp,2,6.241332,4.531311,0.382342,all,0.253590
2,mlp,3,6.652367,4.861051,0.298304,all,0.266866
3,mlp,4,6.237107,4.401830,0.382868,all,0.237622
4,mlp,5,7.016061,4.540525,0.220856,all,0.248136
...,...,...,...,...,...,...,...
75,svm,1,7.958160,5.568466,-0.004745,ou,0.276135
76,svm,2,7.877014,5.514766,0.016178,ou,0.275526
77,svm,3,7.917876,5.545049,0.005937,ou,0.276901
78,svm,4,7.953239,5.544676,-0.003459,ou,0.275710


In [15]:
result.to_csv('metrics_results.csv', index=False)

In [16]:
df_mlp = df[df['type']=='mlp']

In [17]:
df_mlp[['RMSE','MAE','R2','features']].groupby(['features']).mean().sort_values('RMSE')

KeyError: "['RMSE', 'MAE', 'R2'] not in index"

In [None]:
df_gp = df[df['type']=='gp']

In [None]:
df_gp[['RMSE','MAE','R2','features']].groupby(['features']).mean().sort_values('RMSE')

In [None]:
df_rf = df[df['type']=='rf']

In [None]:
df_rf[['RMSE','MAE','R2','features']].groupby(['features']).mean().sort_values('RMSE')

In [None]:
df_svm = df[df['type']=='svm']

In [None]:
df_svm[['RMSE','MAE','R2','features']].groupby(['features']).mean().sort_values('RMSE')