**Questions:**

- Which model has the highest weighted precision?
- Which model has the highest precision by classification type?
- Which model has the highest f1 value (microavg)?

In [250]:
import pandas as pd
import numpy as np
import glob, os

pd.set_option('display.max_columns', 100)
pd.set_option('display.width', 1000)

In [251]:
list_path =['./models_out_inverted/']

all_files = [glob.glob(os.path.join(path, 'sweeps_*.csv')) for path in list_path]
             
df_from_each_file = (pd.read_csv(file) for list_files in all_files for file in list_files )
df = pd.concat(df_from_each_file, ignore_index=True)


In [252]:
df.head(3)

Unnamed: 0.1,Unnamed: 0,model_type,model,sweeps_market_variables,sweep_news_agg,sweep_buy_sell,before_data,sweep_grid,best_score,best_params,f1_microavg,precision_weighted,precision_EUR_down,precision_EUR_same,precision_EUR_up,support_EUR_down,support_EUR_same,support_EUR_up,clf_report,elapsed_time
0,0,clf,kn,basic,ALL_NO_1_1_1,0_60,included,basic,0.51,{'n_neighbors': 161},0.53,0.88,0.05,0.96,0.11,32.0,1178.0,82.0,"{'0': {'precision': 0.05144694533762058, 'reca...",1.86
1,1,clf,svc-rbf,basic,ALL_NO_1_1_1,0_60,included,basic,0.5,"{'C': 1, 'gamma': 1}",0.51,1.0,0.0,1.0,0.0,0.0,1292.0,0.0,"{'0': {'precision': 0.0, 'recall': 0.0, 'f1-sc...",5.56
2,2,clf,dtree,basic,ALL_NO_1_1_1,0_60,included,basic,0.51,"{'max_depth': 7, 'min_samples_leaf': 100}",0.51,0.63,0.26,0.8,0.15,244.0,908.0,140.0,"{'0': {'precision': 0.2572347266881029, 'recal...",0.26


We split the dataframe into classification and regression 

In [258]:
df_clf = df[df['model_type'] == 'clf']
df_reg = df[df['model_type'] == 'reg']

** Model with the highest weighted precision? **

In [259]:
df_clf.sort_values(by='precision_weighted', ascending=False)[0:5]

Unnamed: 0.1,Unnamed: 0,model_type,model,sweeps_market_variables,sweep_news_agg,sweep_buy_sell,before_data,sweep_grid,best_score,best_params,f1_microavg,precision_weighted,precision_EUR_down,precision_EUR_same,precision_EUR_up,support_EUR_down,support_EUR_same,support_EUR_up,clf_report,elapsed_time
155,1,clf,svc-rbf,basic,High_YES_1_1_1,30_60,included,basic,0.46,"{'C': 1, 'gamma': 1}",0.42,1.0,0.0,1.0,0.0,0.0,389.0,0.0,"{'0': {'precision': 0.0, 'recall': 0.0, 'f1-sc...",0.42
1,1,clf,svc-rbf,basic,ALL_NO_1_1_1,0_60,included,basic,0.5,"{'C': 1, 'gamma': 1}",0.51,1.0,0.0,1.0,0.0,0.0,1292.0,0.0,"{'0': {'precision': 0.0, 'recall': 0.0, 'f1-sc...",5.56
407,1,clf,svc-rbf,basic,High_YES_1_1_1,60_180,included,basic,0.45,"{'C': 1, 'gamma': 1}",0.47,1.0,0.0,1.0,0.0,0.0,389.0,0.0,"{'0': {'precision': 0.0, 'recall': 0.0, 'f1-sc...",0.45
414,1,clf,svc-rbf,all,High_YES_1_1_1,60_180,included,basic,0.45,"{'C': 1, 'gamma': 1}",0.47,1.0,0.0,1.0,0.0,0.0,389.0,0.0,"{'0': {'precision': 0.0, 'recall': 0.0, 'f1-sc...",0.34
421,1,clf,svc-rbf,basic,ALL_NO_1_1_1,60_240,included,basic,0.5,"{'C': 1, 'gamma': 1}",0.48,1.0,0.0,1.0,0.0,0.0,1292.0,0.0,"{'0': {'precision': 0.0, 'recall': 0.0, 'f1-sc...",7.13


** Which model has the highest precision by classification type? **

In [261]:
df_clf.sort_values(by='precision_EUR_down', ascending=False)[0:5]

Unnamed: 0.1,Unnamed: 0,model_type,model,sweeps_market_variables,sweep_news_agg,sweep_buy_sell,before_data,sweep_grid,best_score,best_params,f1_microavg,precision_weighted,precision_EUR_down,precision_EUR_same,precision_EUR_up,support_EUR_down,support_EUR_same,support_EUR_up,clf_report,elapsed_time
160,6,clf,ada,basic,High_YES_1_1_1,30_60,included,basic,0.68,{'n_estimators': 10},0.67,0.67,0.78,0.64,0.59,127.0,167.0,95.0,"{'0': {'precision': 0.7758620689655172, 'recal...",0.21
142,2,clf,dtree,basic,High_NO_1_1_1,30_60,included,basic,0.7,"{'max_depth': 2, 'min_samples_leaf': 10}",0.68,0.68,0.76,0.64,0.64,172.0,208.0,124.0,"{'0': {'precision': 0.757396449704142, 'recall...",0.13
149,2,clf,dtree,all,High_NO_1_1_1,30_60,included,basic,0.7,"{'max_depth': 2, 'min_samples_leaf': 10}",0.68,0.68,0.76,0.64,0.64,172.0,208.0,124.0,"{'0': {'precision': 0.757396449704142, 'recall...",0.14
163,2,clf,dtree,all,High_YES_1_1_1,30_60,included,basic,0.68,"{'max_depth': 2, 'min_samples_leaf': 200}",0.68,0.68,0.75,0.63,0.69,122.0,161.0,106.0,"{'0': {'precision': 0.75, 'recall': 0.71311475...",0.11
167,6,clf,ada,all,High_YES_1_1_1,30_60,included,basic,0.68,{'n_estimators': 10},0.65,0.66,0.75,0.64,0.57,124.0,175.0,90.0,"{'0': {'precision': 0.75, 'recall': 0.70161290...",0.23


In [262]:
df_clf.sort_values(by='precision_EUR_same', ascending=False)[0:5]

Unnamed: 0.1,Unnamed: 0,model_type,model,sweeps_market_variables,sweep_news_agg,sweep_buy_sell,before_data,sweep_grid,best_score,best_params,f1_microavg,precision_weighted,precision_EUR_down,precision_EUR_same,precision_EUR_up,support_EUR_down,support_EUR_same,support_EUR_up,clf_report,elapsed_time
148,1,clf,svc-rbf,all,High_NO_1_1_1,30_60,included,basic,0.42,"{'C': 1, 'gamma': 1}",0.41,1.0,0.0,1.0,0.0,0.0,504.0,0.0,"{'0': {'precision': 0.0, 'recall': 0.0, 'f1-sc...",0.75
36,1,clf,svc-rbf,all,ALL_YES_1_1_1,0_60,included,basic,0.53,"{'C': 1, 'gamma': 1}",0.51,1.0,0.0,1.0,0.0,0.0,917.0,0.0,"{'0': {'precision': 0.0, 'recall': 0.0, 'f1-sc...",2.79
449,1,clf,svc-rbf,basic,ALL_YES_1_1_1,60_240,included,basic,0.53,"{'C': 1, 'gamma': 1}",0.54,1.0,0.0,1.0,0.0,0.0,917.0,0.0,"{'0': {'precision': 0.0, 'recall': 0.0, 'f1-sc...",3.38
337,1,clf,svc-rbf,basic,ALL_NO_1_1_1,60_180,included,basic,0.5,"{'C': 1, 'gamma': 1}",0.48,1.0,0.0,1.0,0.0,0.0,1292.0,0.0,"{'0': {'precision': 0.0, 'recall': 0.0, 'f1-sc...",7.19
85,1,clf,svc-rbf,basic,ALL_NO_1_1_1,30_60,included,basic,0.5,"{'C': 1, 'gamma': 1}",0.51,1.0,0.0,1.0,0.0,0.0,1292.0,0.0,"{'0': {'precision': 0.0, 'recall': 0.0, 'f1-sc...",6.6


In [263]:
df_clf.sort_values(by='precision_EUR_up', ascending=False)[0:5]

Unnamed: 0.1,Unnamed: 0,model_type,model,sweeps_market_variables,sweep_news_agg,sweep_buy_sell,before_data,sweep_grid,best_score,best_params,f1_microavg,precision_weighted,precision_EUR_down,precision_EUR_same,precision_EUR_up,support_EUR_down,support_EUR_same,support_EUR_up,clf_report,elapsed_time
156,2,clf,dtree,basic,High_YES_1_1_1,30_60,included,basic,0.69,"{'max_depth': 4, 'min_samples_leaf': 20}",0.69,0.72,0.54,0.79,0.69,71.0,212.0,106.0,"{'0': {'precision': 0.5431034482758621, 'recal...",0.09
163,2,clf,dtree,all,High_YES_1_1_1,30_60,included,basic,0.68,"{'max_depth': 2, 'min_samples_leaf': 200}",0.68,0.68,0.75,0.63,0.69,122.0,161.0,106.0,"{'0': {'precision': 0.75, 'recall': 0.71311475...",0.11
328,6,clf,ada,basic,High_YES_1_1_1,60_120,included,basic,0.62,{'n_estimators': 10},0.62,0.63,0.57,0.62,0.68,97.0,177.0,115.0,"{'0': {'precision': 0.5714285714285714, 'recal...",0.15
317,2,clf,dtree,all,High_NO_1_1_1,60_120,included,basic,0.65,"{'max_depth': 2, 'min_samples_leaf': 10}",0.62,0.64,0.48,0.69,0.68,113.0,255.0,136.0,"{'0': {'precision': 0.48255813953488375, 'reca...",0.16
279,6,clf,ada,all,ALL_NO_3_2_1,60_120,included,basic,0.67,{'n_estimators': 10},0.69,0.69,0.61,0.73,0.68,277.0,664.0,351.0,"{'0': {'precision': 0.6114457831325302, 'recal...",0.69


** Which model has the highest f1 value (microavg) **

In [264]:
df_clf.sort_values(by='f1_microavg', ascending=False)[0:5]

Unnamed: 0.1,Unnamed: 0,model_type,model,sweeps_market_variables,sweep_news_agg,sweep_buy_sell,before_data,sweep_grid,best_score,best_params,f1_microavg,precision_weighted,precision_EUR_down,precision_EUR_same,precision_EUR_up,support_EUR_down,support_EUR_same,support_EUR_up,clf_report,elapsed_time
102,4,clf,xgb,basic,ALL_NO_3_2_1,30_60,included,basic,0.71,{'n_estimators': 10},0.71,0.72,0.63,0.79,0.61,255.0,751.0,286.0,"{'0': {'precision': 0.6270096463022508, 'recal...",0.61
109,4,clf,xgb,all,ALL_NO_3_2_1,30_60,included,basic,0.71,{'n_estimators': 10},0.71,0.72,0.63,0.79,0.61,255.0,751.0,286.0,"{'0': {'precision': 0.6270096463022508, 'recal...",0.7
87,3,clf,rforest,basic,ALL_NO_1_1_1,30_60,included,basic,0.7,"{'max_depth': 7, 'min_samples_leaf': 10, 'n_es...",0.7,0.72,0.62,0.8,0.59,258.0,762.0,272.0,"{'0': {'precision': 0.617363344051447, 'recall...",14.27
88,4,clf,xgb,basic,ALL_NO_1_1_1,30_60,included,basic,0.71,{'n_estimators': 10},0.7,0.71,0.63,0.78,0.62,259.0,740.0,293.0,"{'0': {'precision': 0.6270096463022508, 'recal...",0.6
101,3,clf,rforest,basic,ALL_NO_3_2_1,30_60,included,basic,0.7,"{'max_depth': 7, 'min_samples_leaf': 10, 'n_es...",0.7,0.72,0.62,0.8,0.59,258.0,763.0,271.0,"{'0': {'precision': 0.6237942122186495, 'recal...",15.53
