In [1]:
import pandas as pd
import numpy as np
import eval_analysis as ea

In [2]:
# display full dataframe information 
pd.set_option('display.max_colwidth', -1)

In [3]:
METRICS = ["train_time", "test_time", "accuracy", "F1_score", "auc", 
           "p@1","p@2","p@5","p@10","p@20","p@30","p@50",
           "r@1","r@2","r@5","r@10","r@20","r@30","r@50"]

In [4]:
result_df = pd.read_csv("eval_results/classifiers_eval.csv", index_col=0)

In [5]:
ranking_dict = ea.all_rankings(result_df, METRICS)

In [6]:
# training time
train_time_ranking = ea.get_info(ranking_dict, 'train_time', ['model','parameters','train_time'])
train_time_ranking

Unnamed: 0,index,model,parameters,train_time
0,2,DT,"{'max_depth': 1, 'criterion': 'gini', 'min_samples_split': 10, 'max_features': 'sqrt'}",0.747062
1,6,RF,"{'max_depth': 1, 'min_samples_split': 2, 'n_estimators': 1, 'max_features': 'log2'}",0.771065
2,0,BAG,"{'n_estimators': 5, 'max_samples': 0.5}",1.032019
3,5,NB,{},1.115967
4,4,LR,"{'C': 0.01, 'penalty': 'l1'}",2.501115
5,3,KNN,"{'algorithm': 'kd_tree', 'weights': 'uniform', 'n_neighbors': 25}",3.132684
6,1,BST,"{'algorithm': 'SAMME', 'n_estimators': 10}",3.989697


In [7]:
# testing time
test_time_ranking = ea.get_info(ranking_dict, 'test_time', ['model','parameters','test_time'])
test_time_ranking

Unnamed: 0,index,model,parameters,test_time
0,6,RF,"{'max_depth': 1, 'min_samples_split': 2, 'n_estimators': 1, 'max_features': 'log2'}",0.215229
1,2,DT,"{'max_depth': 1, 'criterion': 'entropy', 'min_samples_split': 5, 'max_features': 'log2'}",0.219448
2,4,LR,"{'C': 0.1, 'penalty': 'l1'}",0.248512
3,0,BAG,"{'n_estimators': 5, 'max_samples': 0.5}",0.266158
4,1,BST,"{'algorithm': 'SAMME', 'n_estimators': 10}",0.306303
5,5,NB,{},0.431144
6,3,KNN,"{'algorithm': 'kd_tree', 'weights': 'uniform', 'n_neighbors': 1}",4.689851


In [8]:
# accuracy
acc_ranking = ea.get_info(ranking_dict, 'accuracy', ['model','parameters','accuracy'])
acc_ranking

Unnamed: 0,index,model,parameters,accuracy
0,6,RF,"{'max_depth': 50, 'min_samples_split': 10, 'n_estimators': 100, 'max_features': 'log2'}",0.941578
1,2,DT,"{'max_depth': 10, 'criterion': 'entropy', 'min_samples_split': 2, 'max_features': 'sqrt'}",0.924967
2,1,BST,"{'algorithm': 'SAMME', 'n_estimators': 100}",0.913609
3,4,LR,"{'C': 0.01, 'penalty': 'l2'}",0.911669
4,5,NB,{},0.856185
5,3,KNN,"{'algorithm': 'ball_tree', 'weights': 'distance', 'n_neighbors': 10}",0.767127
6,0,BAG,"{'n_estimators': 5, 'max_samples': 0.35}",0.712914


In [9]:
# F1 score
f1_ranking = ea.get_info(ranking_dict, 'F1_score', ['model','parameters','F1_score'])
f1_ranking

Unnamed: 0,index,model,parameters,F1_score
0,6,RF,"{'max_depth': 50, 'min_samples_split': 10, 'n_estimators': 100, 'max_features': 'log2'}",0.959939
1,2,DT,"{'max_depth': 10, 'criterion': 'entropy', 'min_samples_split': 2, 'max_features': 'sqrt'}",0.948369
2,1,BST,"{'algorithm': 'SAMME', 'n_estimators': 100}",0.94049
3,4,LR,"{'C': 0.01, 'penalty': 'l2'}",0.939481
4,5,NB,{},0.903468
5,3,KNN,"{'algorithm': 'ball_tree', 'weights': 'distance', 'n_neighbors': 10}",0.851536
6,0,BAG,"{'n_estimators': 10, 'max_samples': 0.35}",0.832399


In [11]:
# AUC-ROC
auc_ranking = ea.get_info(ranking_dict, 'auc', ['model','parameters','auc'])
auc_ranking

Unnamed: 0,model,parameters,auc
6,RF,"{'max_depth': 50, 'min_samples_split': 10, 'n_estimators': 100, 'max_features': 'log2'}",0.911729
2,DT,"{'max_depth': 20, 'criterion': 'entropy', 'min_samples_split': 5, 'max_features': 'log2'}",0.894092
1,BST,"{'algorithm': 'SAMME', 'n_estimators': 200}",0.882421
4,LR,"{'C': 0.01, 'penalty': 'l1'}",0.877199
5,NB,{},0.791051
3,KNN,"{'algorithm': 'ball_tree', 'weights': 'uniform', 'n_neighbors': 1}",0.659994
0,BAG,"{'n_estimators': 10, 'max_samples': 0.35}",0.5


In [13]:
# precision at 50%
p_at_50_ranking = ea.get_info(ranking_dict, 'p@50', ['model','parameters','p@50', 'train_time'])
p_at_50_ranking

Unnamed: 0,model,parameters,p@50,train_time
2,DT,"{'max_depth': 1, 'criterion': 'entropy', 'min_samples_split': 2, 'max_features': 'sqrt'}",1.0,0.772367
3,KNN,"{'algorithm': 'kd_tree', 'weights': 'uniform', 'n_neighbors': 1}",1.0,3.34056
6,RF,"{'max_depth': 1, 'min_samples_split': 2, 'n_estimators': 1, 'max_features': 'sqrt'}",1.0,0.801112
1,BST,"{'algorithm': 'SAMME', 'n_estimators': 10}",0.960401,3.989697
4,LR,"{'C': 10, 'penalty': 'l1'}",0.957792,2.851313
0,BAG,"{'n_estimators': 5, 'max_samples': 0.5}",0.937595,1.032019
5,NB,{},0.918079,1.115967


In [14]:
# recall at 50%
r_at_50_ranking = ea.get_info(ranking_dict, 'r@50', ['model','parameters','r@50', 'train_time'])
r_at_50_ranking

Unnamed: 0,model,parameters,r@50,train_time
2,DT,"{'max_depth': 1, 'criterion': 'entropy', 'min_samples_split': 2, 'max_features': 'sqrt'}",0.701346,0.772367
3,KNN,"{'algorithm': 'kd_tree', 'weights': 'uniform', 'n_neighbors': 1}",0.701346,3.34056
6,RF,"{'max_depth': 1, 'min_samples_split': 2, 'n_estimators': 1, 'max_features': 'sqrt'}",0.701346,0.801112
1,BST,"{'algorithm': 'SAMME', 'n_estimators': 10}",0.673574,3.989697
4,LR,"{'C': 10, 'penalty': 'l1'}",0.671744,2.851313
0,BAG,"{'n_estimators': 5, 'max_samples': 0.5}",0.657579,1.032019
5,NB,{},0.643892,1.115967
