In [23]:
import pandas as pd
import re
import numpy as np
from sklearn.metrics import mean_absolute_percentage_error

MAPE

In [24]:
tmp_df = pd.DataFrame(columns=['type','features','fold','MAPE'])
type_list = ['mlp','rf'] #,'svm','gp'
feat_list = ['all','ou','classic','pupil']
for fold in range(5):
    for type in type_list:
        for feat in feat_list:

            if type=='mlp':
                path='MLP/MLPRegressor'
            if type=='rf':
                path='RF/RandomForestRegressor'
            if type=='gp':
                path='GP/SparseGPRegression'
            if type=='svm':
                path='SVR/SVR'

            RF_pred = np.load('results/kfold_subject/subject_based/'+path+'_y_pred_'+feat+'_features_'+str(fold)+'.npy', allow_pickle=True)
            RF_test = np.load('results/kfold_subject/subject_based/'+path+'_y_test_'+feat+'_features_'+str(fold)+'.npy', allow_pickle=True)
            mape = (mean_absolute_percentage_error(RF_test, RF_pred))
            dct = ({ 'type': type,
                     'features': feat,
                     'fold': fold+1,
                     'index': 1,
                     'MAPE': mape
                   })
            dct = {k:[v] for k,v in dct.items()}  # WORKAROUND
            df = pd.DataFrame(dct)
            tmp_df = pd.concat((tmp_df,df))

In [25]:
MAPE_df = tmp_df.drop(columns=['index'])
MAPE_df

Unnamed: 0,type,features,fold,MAPE
0,mlp,all,1,0.307251
0,mlp,ou,1,0.397121
0,mlp,classic,1,0.340453
0,mlp,pupil,1,0.315663
0,rf,all,1,0.34372
0,rf,ou,1,0.346047
0,rf,classic,1,0.338887
0,rf,pupil,1,0.323192
0,mlp,all,2,0.259517
0,mlp,ou,2,0.283344


In [26]:
def parse_file(namefile):
    
    df_tmp = pd.DataFrame(columns = ['type','fold','RMSE','MAE','R2','features'])

    with open(namefile, "r") as f:
        file_contents = f.read()

    rmse = re.findall(r"RMSE test (-?[\d.]+)", file_contents)
    mae = re.findall(r"MAE test (-?[\d.]+)", file_contents)
    r2 = re.findall(r"R2 test (-?[\d.]+)", file_contents)
    
    pattern = r"_(.*?)\.log"
    match = re.search(pattern, namefile)
    print(match.group(1))
    if match:
        type_ = match.group(1)

    df_tmp['RMSE'] = pd.to_numeric(pd.Series(rmse))
    df_tmp['MAE'] = pd.to_numeric(pd.Series(mae))
    df_tmp['R2'] = pd.to_numeric(pd.Series(r2))
    df_tmp['type'] = pd.Series([type_]*len(mae))
    df_tmp['fold'] = pd.to_numeric(pd.Series([1,2,3,4,5]*(4)))
    
    series_values = ['all'] * 5 + ['classic'] * 5 + ['pupil'] * 5 + ['ou'] * 5
    series_index = ['A', 'B', 'C', 'D', 'E'] * 4

    df_tmp['features'] = pd.Series(series_values)

    return df_tmp

In [27]:
df_mlp = parse_file("results/kfold_subject/output_mlp.log")

subject/output_mlp


In [28]:
df_rf = parse_file("results/kfold_subject/output_rf.log")

subject/output_rf


In [29]:
df_values = pd.concat((df_mlp,df_rf))

In [30]:
df_values

Unnamed: 0,type,fold,RMSE,MAE,R2,features
0,subject/output_mlp,1,5.943081,5.188627,0.069311,all
1,subject/output_mlp,2,10.161729,6.064146,-0.058481,all
2,subject/output_mlp,3,7.13702,5.723141,-0.101415,all
3,subject/output_mlp,4,8.141195,6.987332,-0.064724,all
4,subject/output_mlp,5,8.214942,6.589985,0.039356,all
5,subject/output_mlp,1,6.291195,5.489279,-0.042912,classic
6,subject/output_mlp,2,9.857382,6.211742,0.003973,classic
7,subject/output_mlp,3,7.009607,5.380381,-0.06244,classic
8,subject/output_mlp,4,7.916204,6.806904,-0.006687,classic
9,subject/output_mlp,5,8.318495,6.942868,0.014984,classic


In [31]:
df_values['features'].unique()

array(['all', 'classic', 'pupil', 'ou'], dtype=object)

In [32]:
MAPE_df['features'].unique()

array(['all', 'ou', 'classic', 'pupil'], dtype=object)

In [33]:
MAPE_df.reset_index()

Unnamed: 0,index,type,features,fold,MAPE
0,0,mlp,all,1,0.307251
1,0,mlp,ou,1,0.397121
2,0,mlp,classic,1,0.340453
3,0,mlp,pupil,1,0.315663
4,0,rf,all,1,0.34372
5,0,rf,ou,1,0.346047
6,0,rf,classic,1,0.338887
7,0,rf,pupil,1,0.323192
8,0,mlp,all,2,0.259517
9,0,mlp,ou,2,0.283344


In [34]:
result = pd.merge(df_values,MAPE_df,on=['fold','features','type'],how='inner')
result

Unnamed: 0,type,fold,RMSE,MAE,R2,features,MAPE


In [35]:
result.to_csv('metrics_results.csv', index=False)

In [36]:
df_mlp = result[result['type']=='mlp']

In [37]:
df_mlp[['RMSE','MAE','MAPE','R2','features']].groupby(['features']).mean().sort_values('RMSE')

Unnamed: 0_level_0,RMSE,MAE,MAPE,R2
features,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1


In [38]:
df_gp = result[result['type']=='gp']

In [39]:
df_gp[['RMSE','MAE','MAPE','R2','features']].groupby(['features']).mean().sort_values('RMSE')

Unnamed: 0_level_0,RMSE,MAE,MAPE,R2
features,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1


In [40]:
df_rf = result[result['type']=='rf']

In [41]:
df_rf[['RMSE','MAE','MAPE','R2','features']].groupby(['features']).mean().sort_values('RMSE')

Unnamed: 0_level_0,RMSE,MAE,MAPE,R2
features,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1


In [42]:
df_svm = result[result['type']=='svm']

In [43]:
df_svm[['RMSE','MAE','R2','MAPE','features']].groupby(['features']).mean().sort_values('RMSE')

Unnamed: 0_level_0,RMSE,MAE,R2,MAPE
features,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
