Results for test window = 6 months

In [1]:
import pandas as pd
import numpy as np
import eval_analysis as ea

In [2]:
# display full dataframe information 
pd.set_option('display.max_colwidth', -1)

In [3]:
METRICS = ["train_time", "test_time", "accuracy", "F1_score", "auc", 
           "p@1","p@2","p@5","p@10","p@20","p@30","p@50",
           "r@1","r@2","r@5","r@10","r@20","r@30","r@50"]

In [4]:
result_df = pd.read_csv("eval_results/classifiers_eval.csv", index_col=0)

In [5]:
ranking_dict = ea.all_rankings(result_df, METRICS)

In [6]:
# training time
train_time_ranking = ea.get_info(ranking_dict, 'train_time', ['model','parameters','train_time'])
train_time_ranking

Unnamed: 0,index,model,parameters,train_time
0,2,DT,"{'max_depth': 1, 'criterion': 'gini', 'min_samples_split': 10, 'max_features': 'sqrt'}",0.747062
1,6,RF,"{'max_depth': 1, 'min_samples_split': 2, 'n_estimators': 1, 'max_features': 'log2'}",0.771065
2,0,BAG,"{'n_estimators': 5, 'max_samples': 0.5}",1.032019
3,5,NB,{},1.115967
4,4,LR,"{'C': 0.01, 'penalty': 'l1'}",2.501115
5,3,KNN,"{'algorithm': 'kd_tree', 'weights': 'uniform', 'n_neighbors': 25}",3.132684
6,1,BST,"{'algorithm': 'SAMME', 'n_estimators': 10}",3.989697


In [7]:
# testing time
test_time_ranking = ea.get_info(ranking_dict, 'test_time', ['model','parameters','test_time'])
test_time_ranking

Unnamed: 0,index,model,parameters,test_time
0,6,RF,"{'max_depth': 1, 'min_samples_split': 2, 'n_estimators': 1, 'max_features': 'log2'}",0.215229
1,2,DT,"{'max_depth': 1, 'criterion': 'entropy', 'min_samples_split': 5, 'max_features': 'log2'}",0.219448
2,4,LR,"{'C': 0.1, 'penalty': 'l1'}",0.248512
3,0,BAG,"{'n_estimators': 5, 'max_samples': 0.5}",0.266158
4,1,BST,"{'algorithm': 'SAMME', 'n_estimators': 10}",0.306303
5,5,NB,{},0.431144
6,3,KNN,"{'algorithm': 'kd_tree', 'weights': 'uniform', 'n_neighbors': 1}",4.689851


In [8]:
# accuracy
acc_ranking = ea.get_info(ranking_dict, 'accuracy', ['model','parameters','accuracy'])
acc_ranking

Unnamed: 0,index,model,parameters,accuracy
0,6,RF,"{'max_depth': 50, 'min_samples_split': 10, 'n_estimators': 100, 'max_features': 'log2'}",0.941578
1,2,DT,"{'max_depth': 10, 'criterion': 'entropy', 'min_samples_split': 2, 'max_features': 'sqrt'}",0.924967
2,1,BST,"{'algorithm': 'SAMME', 'n_estimators': 100}",0.913609
3,4,LR,"{'C': 0.01, 'penalty': 'l2'}",0.911669
4,5,NB,{},0.856185
5,3,KNN,"{'algorithm': 'ball_tree', 'weights': 'distance', 'n_neighbors': 10}",0.767127
6,0,BAG,"{'n_estimators': 5, 'max_samples': 0.35}",0.712914


In [9]:
# F1 score
f1_ranking = ea.get_info(ranking_dict, 'F1_score', ['model','parameters','F1_score'])
f1_ranking

Unnamed: 0,index,model,parameters,F1_score
0,6,RF,"{'max_depth': 50, 'min_samples_split': 10, 'n_estimators': 100, 'max_features': 'log2'}",0.959939
1,2,DT,"{'max_depth': 10, 'criterion': 'entropy', 'min_samples_split': 2, 'max_features': 'sqrt'}",0.948369
2,1,BST,"{'algorithm': 'SAMME', 'n_estimators': 100}",0.94049
3,4,LR,"{'C': 0.01, 'penalty': 'l2'}",0.939481
4,5,NB,{},0.903468
5,3,KNN,"{'algorithm': 'ball_tree', 'weights': 'distance', 'n_neighbors': 10}",0.851536
6,0,BAG,"{'n_estimators': 10, 'max_samples': 0.35}",0.832399


In [10]:
# AUC-ROC
auc_ranking = ea.get_info(ranking_dict, 'auc', ['model','parameters','auc'])
auc_ranking

Unnamed: 0,index,model,parameters,auc
0,6,RF,"{'max_depth': 50, 'min_samples_split': 10, 'n_estimators': 100, 'max_features': 'log2'}",0.911729
1,2,DT,"{'max_depth': 20, 'criterion': 'entropy', 'min_samples_split': 5, 'max_features': 'log2'}",0.894092
2,1,BST,"{'algorithm': 'SAMME', 'n_estimators': 200}",0.882421
3,4,LR,"{'C': 0.01, 'penalty': 'l1'}",0.877199
4,5,NB,{},0.791051
5,3,KNN,"{'algorithm': 'ball_tree', 'weights': 'uniform', 'n_neighbors': 1}",0.659994
6,0,BAG,"{'n_estimators': 10, 'max_samples': 0.35}",0.5


In [30]:
# precision at 50%
p_at_50_ranking = ea.get_info(ranking_dict, 'p@50', ['model','parameters','p@50', 'train_time','r@50'])
p_at_50_ranking

Unnamed: 0,index,model,parameters,p@50,train_time,r@50
0,2,DT,"{'max_depth': 1, 'criterion': 'entropy', 'min_samples_split': 2, 'max_features': 'sqrt'}",1.0,0.772367,0.701346
1,3,KNN,"{'algorithm': 'kd_tree', 'weights': 'uniform', 'n_neighbors': 1}",1.0,3.34056,0.701346
2,6,RF,"{'max_depth': 1, 'min_samples_split': 2, 'n_estimators': 1, 'max_features': 'sqrt'}",1.0,0.801112,0.701346
3,1,BST,"{'algorithm': 'SAMME', 'n_estimators': 10}",0.960401,3.989697,0.673574
4,4,LR,"{'C': 10, 'penalty': 'l1'}",0.957792,2.851313,0.671744
5,0,BAG,"{'n_estimators': 5, 'max_samples': 0.5}",0.937595,1.032019,0.657579
6,5,NB,{},0.918079,1.115967,0.643892


In [13]:
# precision at 30%
p_at_30_ranking = ea.get_info(ranking_dict, 'p@30', ['model','parameters','p@30', 'train_time'])
p_at_30_ranking

Unnamed: 0,index,model,parameters,p@30,train_time
0,2,DT,"{'max_depth': 100, 'criterion': 'gini', 'min_samples_split': 10, 'max_features': 'log2'}",1.0,1.350665
1,3,KNN,"{'algorithm': 'auto', 'weights': 'uniform', 'n_neighbors': 1}",1.0,3.779852
2,6,RF,"{'max_depth': 1, 'min_samples_split': 2, 'n_estimators': 1, 'max_features': 'sqrt'}",1.0,0.801112
3,1,BST,"{'algorithm': 'SAMME', 'n_estimators': 10}",0.974735,3.989697
4,4,LR,"{'C': 10, 'penalty': 'l1'}",0.97466,2.851313
5,5,NB,{},0.951324,1.115967
6,0,BAG,"{'n_estimators': 20, 'max_samples': 0.5}",0.916452,1.814827


In [14]:
# recall at 30%
r_at_30_ranking = ea.get_info(ranking_dict, 'r@30', ['model','parameters','r@30', 'train_time'])
r_at_30_ranking

Unnamed: 0,index,model,parameters,r@30,train_time
0,2,DT,"{'max_depth': 100, 'criterion': 'gini', 'min_samples_split': 10, 'max_features': 'log2'}",0.420805,1.350665
1,3,KNN,"{'algorithm': 'auto', 'weights': 'uniform', 'n_neighbors': 1}",0.420805,3.779852
2,6,RF,"{'max_depth': 1, 'min_samples_split': 2, 'n_estimators': 1, 'max_features': 'sqrt'}",0.420805,0.801112
3,1,BST,"{'algorithm': 'SAMME', 'n_estimators': 10}",0.410173,3.989697
4,4,LR,"{'C': 10, 'penalty': 'l1'}",0.410141,2.851313
5,5,NB,{},0.400321,1.115967
6,0,BAG,"{'n_estimators': 20, 'max_samples': 0.5}",0.385647,1.814827


In [15]:
# precision at 20%
p_at_20_ranking = ea.get_info(ranking_dict, 'p@20', ['model','parameters','p@20', 'train_time'])
p_at_20_ranking

Unnamed: 0,index,model,parameters,p@20,train_time
0,2,DT,"{'max_depth': 100, 'criterion': 'gini', 'min_samples_split': 5, 'max_features': 'sqrt'}",1.0,1.339354
1,3,KNN,"{'algorithm': 'auto', 'weights': 'uniform', 'n_neighbors': 1}",1.0,3.779852
2,6,RF,"{'max_depth': 1, 'min_samples_split': 2, 'n_estimators': 1, 'max_features': 'sqrt'}",1.0,0.801112
3,4,LR,"{'C': 10, 'penalty': 'l1'}",0.988426,2.851313
4,1,BST,"{'algorithm': 'SAMME', 'n_estimators': 200}",0.985022,63.412135
5,5,NB,{},0.977079,1.115967
6,0,BAG,"{'n_estimators': 20, 'max_samples': 0.5}",0.939521,1.814827


In [18]:
# precision at 10%
p_at_10_ranking = ea.get_info(ranking_dict, 'p@10', ['model','parameters','p@10', 'train_time'])
p_at_10_ranking

Unnamed: 0,index,model,parameters,p@10,train_time
0,1,BST,"{'algorithm': 'SAMME', 'n_estimators': 10}",1.0,3.989697
1,2,DT,"{'max_depth': 5, 'criterion': 'gini', 'min_samples_split': 5, 'max_features': 'log2'}",1.0,0.949104
2,3,KNN,"{'algorithm': 'kd_tree', 'weights': 'distance', 'n_neighbors': 10}",1.0,3.271407
3,4,LR,"{'C': 1, 'penalty': 'l1'}",1.0,6.005412
4,6,RF,"{'max_depth': 1, 'min_samples_split': 2, 'n_estimators': 1, 'max_features': 'sqrt'}",1.0,0.801112
5,5,NB,{},0.996256,1.115967
6,0,BAG,"{'n_estimators': 20, 'max_samples': 0.5}",0.957676,1.814827


In [19]:
# precision at 5%
p_at_5_ranking = ea.get_info(ranking_dict, 'p@5', ['model','parameters','p@5', 'train_time'])
p_at_5_ranking

Unnamed: 0,index,model,parameters,p@5,train_time
0,1,BST,"{'algorithm': 'SAMME', 'n_estimators': 200}",1.0,63.412135
1,2,DT,"{'max_depth': 10, 'criterion': 'gini', 'min_samples_split': 10, 'max_features': 'sqrt'}",1.0,1.098358
2,3,KNN,"{'algorithm': 'kd_tree', 'weights': 'distance', 'n_neighbors': 10}",1.0,3.271407
3,4,LR,"{'C': 1, 'penalty': 'l1'}",1.0,6.005412
4,6,RF,"{'max_depth': 1, 'min_samples_split': 2, 'n_estimators': 1, 'max_features': 'sqrt'}",1.0,0.801112
5,5,NB,{},0.992964,1.115967
6,0,BAG,"{'n_estimators': 5, 'max_samples': 0.5}",0.97231,1.032019


In [25]:
# precision at 2%
p_at_2_ranking = ea.get_info(ranking_dict, 'p@2', ['model','parameters','p@2', 'train_time'])
p_at_2_ranking

Unnamed: 0,index,model,parameters,p@2,train_time
0,1,BST,"{'algorithm': 'SAMME', 'n_estimators': 10}",1.0,3.989697
1,2,DT,"{'max_depth': 10, 'criterion': 'gini', 'min_samples_split': 5, 'max_features': 'sqrt'}",1.0,1.141779
2,3,KNN,"{'algorithm': 'auto', 'weights': 'uniform', 'n_neighbors': 1}",1.0,3.779852
3,4,LR,"{'C': 10, 'penalty': 'l1'}",1.0,2.851313
4,5,NB,{},1.0,1.115967
5,6,RF,"{'max_depth': 1, 'min_samples_split': 2, 'n_estimators': 1, 'max_features': 'sqrt'}",1.0,0.801112
6,0,BAG,"{'n_estimators': 20, 'max_samples': 0.5}",0.974461,1.814827


In [26]:
# precision at 1%
p_at_1_ranking = ea.get_info(ranking_dict, 'p@1', ['model','parameters','p@1', 'train_time'])
p_at_1_ranking

Unnamed: 0,index,model,parameters,p@1,train_time
0,1,BST,"{'algorithm': 'SAMME', 'n_estimators': 10}",1.0,3.989697
1,2,DT,"{'max_depth': 5, 'criterion': 'gini', 'min_samples_split': 2, 'max_features': 'log2'}",1.0,1.082848
2,3,KNN,"{'algorithm': 'kd_tree', 'weights': 'distance', 'n_neighbors': 25}",1.0,3.197221
3,4,LR,"{'C': 0.01, 'penalty': 'l1'}",1.0,2.501115
4,5,NB,{},1.0,1.115967
5,6,RF,"{'max_depth': 1, 'min_samples_split': 2, 'n_estimators': 1, 'max_features': 'sqrt'}",1.0,0.801112
6,0,BAG,"{'n_estimators': 20, 'max_samples': 0.5}",0.981839,1.814827


In [32]:
# recall at 50%
r_at_50_ranking = ea.get_info(ranking_dict, 'r@50', ['model','parameters','r@50', 'train_time', 'p@50'])
r_at_50_ranking

Unnamed: 0,index,model,parameters,r@50,train_time
0,2,DT,"{'max_depth': 1, 'criterion': 'entropy', 'min_samples_split': 2, 'max_features': 'sqrt'}",0.701346,0.772367
1,3,KNN,"{'algorithm': 'kd_tree', 'weights': 'uniform', 'n_neighbors': 1}",0.701346,3.34056
2,6,RF,"{'max_depth': 1, 'min_samples_split': 2, 'n_estimators': 1, 'max_features': 'sqrt'}",0.701346,0.801112
3,1,BST,"{'algorithm': 'SAMME', 'n_estimators': 10}",0.673574,3.989697
4,4,LR,"{'C': 10, 'penalty': 'l1'}",0.671744,2.851313
5,0,BAG,"{'n_estimators': 5, 'max_samples': 0.5}",0.657579,1.032019
6,5,NB,{},0.643892,1.115967


In [21]:
# recall at 30%
r_at_30_ranking = ea.get_info(ranking_dict, 'r@30', ['model','parameters','r@30', 'train_time'])
r_at_30_ranking

Unnamed: 0,index,model,parameters,r@30,train_time
0,2,DT,"{'max_depth': 100, 'criterion': 'gini', 'min_samples_split': 10, 'max_features': 'log2'}",0.420805,1.350665
1,3,KNN,"{'algorithm': 'auto', 'weights': 'uniform', 'n_neighbors': 1}",0.420805,3.779852
2,6,RF,"{'max_depth': 1, 'min_samples_split': 2, 'n_estimators': 1, 'max_features': 'sqrt'}",0.420805,0.801112
3,1,BST,"{'algorithm': 'SAMME', 'n_estimators': 10}",0.410173,3.989697
4,4,LR,"{'C': 10, 'penalty': 'l1'}",0.410141,2.851313
5,5,NB,{},0.400321,1.115967
6,0,BAG,"{'n_estimators': 20, 'max_samples': 0.5}",0.385647,1.814827


In [22]:
# recall at 20%
r_at_20_ranking = ea.get_info(ranking_dict, 'r@20', ['model','parameters','r@20', 'train_time'])
r_at_20_ranking

Unnamed: 0,index,model,parameters,r@20,train_time
0,2,DT,"{'max_depth': 100, 'criterion': 'gini', 'min_samples_split': 5, 'max_features': 'sqrt'}",0.280526,1.339354
1,3,KNN,"{'algorithm': 'auto', 'weights': 'uniform', 'n_neighbors': 1}",0.280526,3.779852
2,6,RF,"{'max_depth': 1, 'min_samples_split': 2, 'n_estimators': 1, 'max_features': 'sqrt'}",0.280526,0.801112
3,4,LR,"{'C': 10, 'penalty': 'l1'}",0.277279,2.851313
4,1,BST,"{'algorithm': 'SAMME', 'n_estimators': 200}",0.276324,63.412135
5,5,NB,{},0.274096,1.115967
6,0,BAG,"{'n_estimators': 20, 'max_samples': 0.5}",0.26356,1.814827


In [23]:
# recall at 10%
r_at_10_ranking = ea.get_info(ranking_dict, 'r@10', ['model','parameters','r@10', 'train_time'])
r_at_10_ranking

Unnamed: 0,index,model,parameters,r@10,train_time
0,1,BST,"{'algorithm': 'SAMME', 'n_estimators': 10}",0.140263,3.989697
1,2,DT,"{'max_depth': 5, 'criterion': 'gini', 'min_samples_split': 5, 'max_features': 'log2'}",0.140263,0.949104
2,3,KNN,"{'algorithm': 'kd_tree', 'weights': 'distance', 'n_neighbors': 10}",0.140263,3.271407
3,4,LR,"{'C': 1, 'penalty': 'l1'}",0.140263,6.005412
4,6,RF,"{'max_depth': 1, 'min_samples_split': 2, 'n_estimators': 1, 'max_features': 'sqrt'}",0.140263,0.801112
5,5,NB,{},0.139738,1.115967
6,0,BAG,"{'n_estimators': 20, 'max_samples': 0.5}",0.134326,1.814827


In [24]:
# recall at 5%
r_at_5_ranking = ea.get_info(ranking_dict, 'r@5', ['model','parameters','r@5', 'train_time'])
r_at_5_ranking

Unnamed: 0,index,model,parameters,r@5,train_time
0,1,BST,"{'algorithm': 'SAMME', 'n_estimators': 200}",0.070124,63.412135
1,2,DT,"{'max_depth': 10, 'criterion': 'gini', 'min_samples_split': 10, 'max_features': 'sqrt'}",0.070124,1.098358
2,3,KNN,"{'algorithm': 'kd_tree', 'weights': 'distance', 'n_neighbors': 10}",0.070124,3.271407
3,4,LR,"{'C': 1, 'penalty': 'l1'}",0.070124,6.005412
4,6,RF,"{'max_depth': 1, 'min_samples_split': 2, 'n_estimators': 1, 'max_features': 'sqrt'}",0.070124,0.801112
5,5,NB,{},0.06963,1.115967
6,0,BAG,"{'n_estimators': 5, 'max_samples': 0.5}",0.068182,1.032019


In [27]:
# recall at 2%
r_at_2_ranking = ea.get_info(ranking_dict, 'r@2', ['model','parameters','r@2', 'train_time'])
r_at_2_ranking

Unnamed: 0,index,model,parameters,r@2,train_time
0,1,BST,"{'algorithm': 'SAMME', 'n_estimators': 10}",0.028043,3.989697
1,2,DT,"{'max_depth': 10, 'criterion': 'gini', 'min_samples_split': 5, 'max_features': 'sqrt'}",0.028043,1.141779
2,3,KNN,"{'algorithm': 'auto', 'weights': 'uniform', 'n_neighbors': 1}",0.028043,3.779852
3,4,LR,"{'C': 10, 'penalty': 'l1'}",0.028043,2.851313
4,5,NB,{},0.028043,1.115967
5,6,RF,"{'max_depth': 1, 'min_samples_split': 2, 'n_estimators': 1, 'max_features': 'sqrt'}",0.028043,0.801112
6,0,BAG,"{'n_estimators': 20, 'max_samples': 0.5}",0.027327,1.814827


In [28]:
# recall at 1%
r_at_1_ranking = ea.get_info(ranking_dict, 'r@1', ['model','parameters','r@1', 'train_time'])
r_at_1_ranking

Unnamed: 0,index,model,parameters,r@1,train_time
0,1,BST,"{'algorithm': 'SAMME', 'n_estimators': 10}",0.014022,3.989697
1,2,DT,"{'max_depth': 5, 'criterion': 'gini', 'min_samples_split': 2, 'max_features': 'log2'}",0.014022,1.082848
2,3,KNN,"{'algorithm': 'kd_tree', 'weights': 'distance', 'n_neighbors': 25}",0.014022,3.197221
3,4,LR,"{'C': 0.01, 'penalty': 'l1'}",0.014022,2.501115
4,5,NB,{},0.014022,1.115967
5,6,RF,"{'max_depth': 1, 'min_samples_split': 2, 'n_estimators': 1, 'max_features': 'sqrt'}",0.014022,0.801112
6,0,BAG,"{'n_estimators': 20, 'max_samples': 0.5}",0.013767,1.814827
