Results for test window = 12 months

In [12]:
import pandas as pd
import numpy as np
import eval_analysis as ea

In [3]:
# display full dataframe information 
pd.set_option('display.max_colwidth', -1)

In [4]:
METRICS = ["train_time", "test_time", "accuracy", "F1_score", "auc", 
           "p@1","p@2","p@5","p@10","p@20","p@30","p@50",
           "r@1","r@2","r@5","r@10","r@20","r@30","r@50"]

In [6]:
result_df = pd.read_csv("eval_results_1/classifiers_eval.csv", index_col=0)

In [7]:
ranking_dict = ea.all_rankings(result_df, METRICS)

In [8]:
# training time
train_time_ranking = ea.get_info(ranking_dict, 'train_time', ['model','parameters','train_time'])
train_time_ranking

Unnamed: 0,index,model,parameters,train_time
0,2,DT,"{'criterion': 'entropy', 'max_depth': 1, 'min_samples_split': 10, 'max_features': 'sqrt'}",0.586452
1,6,RF,"{'max_features': 'sqrt', 'n_estimators': 1, 'max_depth': 1, 'min_samples_split': 2}",0.66046
2,0,BAG,"{'n_estimators': 5, 'max_samples': 0.65}",0.812236
3,5,NB,{},0.941155
4,4,LR,"{'penalty': 'l1', 'C': 1}",1.932698
5,3,KNN,"{'n_neighbors': 1, 'weights': 'distance', 'algorithm': 'kd_tree'}",2.242482
6,1,BST,"{'n_estimators': 10, 'algorithm': 'SAMME'}",3.223455


In [9]:
# testing time
test_time_ranking = ea.get_info(ranking_dict, 'test_time', ['model','parameters','test_time'])
test_time_ranking

Unnamed: 0,index,model,parameters,test_time
0,2,DT,"{'criterion': 'gini', 'max_depth': 1, 'min_samples_split': 5, 'max_features': 'sqrt'}",0.299813
1,6,RF,"{'max_features': 'sqrt', 'n_estimators': 1, 'max_depth': 1, 'min_samples_split': 10}",0.307295
2,4,LR,"{'penalty': 'l1', 'C': 1}",0.326775
3,0,BAG,"{'n_estimators': 5, 'max_samples': 0.5}",0.380141
4,1,BST,"{'n_estimators': 10, 'algorithm': 'SAMME'}",0.433016
5,5,NB,{},0.517187
6,3,KNN,"{'n_neighbors': 1, 'weights': 'distance', 'algorithm': 'kd_tree'}",6.434297


In [10]:
# accuracy
acc_ranking = ea.get_info(ranking_dict, 'accuracy', ['model','parameters','accuracy'])
acc_ranking

Unnamed: 0,index,model,parameters,accuracy
0,6,RF,"{'max_features': 'log2', 'n_estimators': 100, 'max_depth': 50, 'min_samples_split': 10}",0.936914
1,2,DT,"{'criterion': 'entropy', 'max_depth': 10, 'min_samples_split': 2, 'max_features': 'sqrt'}",0.918449
2,4,LR,"{'penalty': 'l2', 'C': 0.1}",0.90989
3,1,BST,"{'n_estimators': 200, 'algorithm': 'SAMME'}",0.906502
4,5,NB,{},0.853018
5,0,BAG,"{'n_estimators': 5, 'max_samples': 0.5}",0.827631
6,3,KNN,"{'n_neighbors': 10, 'weights': 'distance', 'algorithm': 'ball_tree'}",0.754175


In [11]:
# F1 score
f1_ranking = ea.get_info(ranking_dict, 'F1_score', ['model','parameters','F1_score'])
f1_ranking

Unnamed: 0,index,model,parameters,F1_score
0,6,RF,"{'max_features': 'log2', 'n_estimators': 100, 'max_depth': 50, 'min_samples_split': 10}",0.956272
1,2,DT,"{'criterion': 'entropy', 'max_depth': 10, 'min_samples_split': 2, 'max_features': 'sqrt'}",0.943606
2,4,LR,"{'penalty': 'l2', 'C': 0.1}",0.937462
3,1,BST,"{'n_estimators': 200, 'algorithm': 'SAMME'}",0.934127
4,5,NB,{},0.900198
5,0,BAG,"{'n_estimators': 5, 'max_samples': 0.5}",0.890862
6,3,KNN,"{'n_neighbors': 10, 'weights': 'distance', 'algorithm': 'ball_tree'}",0.842532


In [13]:
# AUC-ROC
auc_ranking = ea.get_info(ranking_dict, 'auc', ['model','parameters','auc'])
auc_ranking

Unnamed: 0,index,model,parameters,auc
0,6,RF,"{'max_features': 'sqrt', 'n_estimators': 100, 'max_depth': 50, 'min_samples_split': 2}",0.907289
1,2,DT,"{'criterion': 'entropy', 'max_depth': 20, 'min_samples_split': 10, 'max_features': 'log2'}",0.892452
2,1,BST,"{'n_estimators': 200, 'algorithm': 'SAMME'}",0.881952
3,4,LR,"{'penalty': 'l1', 'C': 0.01}",0.879197
4,5,NB,{},0.791799
5,0,BAG,"{'n_estimators': 5, 'max_samples': 0.5}",0.709327
6,3,KNN,"{'n_neighbors': 1, 'weights': 'distance', 'algorithm': 'ball_tree'}",0.652945


In [14]:
# precision at 50%
p_at_50_ranking = ea.get_info(ranking_dict, 'p@50', ['model','parameters','p@50', 'train_time','r@50'])
p_at_50_ranking

Unnamed: 0,index,model,parameters,p@50,train_time,r@50
0,2,DT,"{'criterion': 'entropy', 'max_depth': 1, 'min_samples_split': 5, 'max_features': 'sqrt'}",1.0,0.624636,0.710657
1,3,KNN,"{'n_neighbors': 1, 'weights': 'uniform', 'algorithm': 'ball_tree'}",1.0,2.376567,0.710657
2,6,RF,"{'max_features': 'sqrt', 'n_estimators': 1, 'max_depth': 1, 'min_samples_split': 2}",1.0,0.66046,0.710657
3,1,BST,"{'n_estimators': 10, 'algorithm': 'SAMME'}",0.961927,3.223455,0.6836
4,4,LR,"{'penalty': 'l1', 'C': 0.1}",0.958059,2.06702,0.680852
5,0,BAG,"{'n_estimators': 10, 'max_samples': 0.5}",0.943348,0.951025,0.670397
6,5,NB,{},0.918464,0.941155,0.652713


In [15]:
# precision at 30%
p_at_30_ranking = ea.get_info(ranking_dict, 'p@30', ['model','parameters','p@30', 'train_time'])
p_at_30_ranking

Unnamed: 0,index,model,parameters,p@30,train_time
0,2,DT,"{'criterion': 'entropy', 'max_depth': 1, 'min_samples_split': 5, 'max_features': 'sqrt'}",1.0,0.624636
1,3,KNN,"{'n_neighbors': 1, 'weights': 'distance', 'algorithm': 'auto'}",1.0,2.310801
2,6,RF,"{'max_features': 'sqrt', 'n_estimators': 1, 'max_depth': 1, 'min_samples_split': 2}",1.0,0.66046
3,1,BST,"{'n_estimators': 200, 'algorithm': 'SAMME'}",0.975989,49.490264
4,4,LR,"{'penalty': 'l1', 'C': 0.1}",0.973425,2.06702
5,5,NB,{},0.949388,0.941155
6,0,BAG,"{'n_estimators': 10, 'max_samples': 0.5}",0.930555,0.951025


In [16]:
# precision at 20%
p_at_20_ranking = ea.get_info(ranking_dict, 'p@20', ['model','parameters','p@20', 'train_time'])
p_at_20_ranking

Unnamed: 0,index,model,parameters,p@20,train_time
0,2,DT,"{'criterion': 'gini', 'max_depth': 100, 'min_samples_split': 5, 'max_features': 'sqrt'}",1.0,1.126271
1,3,KNN,"{'n_neighbors': 1, 'weights': 'uniform', 'algorithm': 'auto'}",1.0,2.799279
2,6,RF,"{'max_features': 'sqrt', 'n_estimators': 1, 'max_depth': 1, 'min_samples_split': 2}",1.0,0.66046
3,4,LR,"{'penalty': 'l1', 'C': 0.1}",0.985418,2.06702
4,1,BST,"{'n_estimators': 200, 'algorithm': 'SAMME'}",0.981915,49.490264
5,5,NB,{},0.976699,0.941155
6,0,BAG,"{'n_estimators': 5, 'max_samples': 0.5}",0.970188,0.831823


In [17]:
# precision at 10%
p_at_10_ranking = ea.get_info(ranking_dict, 'p@10', ['model','parameters','p@10', 'train_time'])
p_at_10_ranking

Unnamed: 0,index,model,parameters,p@10,train_time
0,1,BST,"{'n_estimators': 10, 'algorithm': 'SAMME'}",1.0,3.223455
1,2,DT,"{'criterion': 'gini', 'max_depth': 1, 'min_samples_split': 2, 'max_features': 'sqrt'}",1.0,0.665905
2,3,KNN,"{'n_neighbors': 1, 'weights': 'distance', 'algorithm': 'kd_tree'}",1.0,2.242482
3,4,LR,"{'penalty': 'l1', 'C': 10}",1.0,5.092103
4,6,RF,"{'max_features': 'sqrt', 'n_estimators': 1, 'max_depth': 1, 'min_samples_split': 2}",1.0,0.66046
5,5,NB,{},0.99467,0.941155
6,0,BAG,"{'n_estimators': 10, 'max_samples': 0.65}",0.993147,1.022354


In [18]:
# precision at 5%
p_at_5_ranking = ea.get_info(ranking_dict, 'p@5', ['model','parameters','p@5', 'train_time'])
p_at_5_ranking

Unnamed: 0,index,model,parameters,p@5,train_time
0,0,BAG,"{'n_estimators': 10, 'max_samples': 0.65}",1.0,1.022354
1,1,BST,"{'n_estimators': 200, 'algorithm': 'SAMME'}",1.0,49.490264
2,2,DT,"{'criterion': 'gini', 'max_depth': 1, 'min_samples_split': 2, 'max_features': 'sqrt'}",1.0,0.665905
3,3,KNN,"{'n_neighbors': 10, 'weights': 'distance', 'algorithm': 'kd_tree'}",1.0,2.284315
4,4,LR,"{'penalty': 'l1', 'C': 0.1}",1.0,2.06702
5,6,RF,"{'max_features': 'sqrt', 'n_estimators': 1, 'max_depth': 1, 'min_samples_split': 2}",1.0,0.66046
6,5,NB,{},0.989644,0.941155


In [19]:
# precision at 2%
p_at_2_ranking = ea.get_info(ranking_dict, 'p@2', ['model','parameters','p@2', 'train_time'])
p_at_2_ranking

Unnamed: 0,index,model,parameters,p@2,train_time
0,0,BAG,"{'n_estimators': 5, 'max_samples': 0.5}",1.0,0.831823
1,1,BST,"{'n_estimators': 200, 'algorithm': 'SAMME'}",1.0,49.490264
2,2,DT,"{'criterion': 'gini', 'max_depth': 10, 'min_samples_split': 2, 'max_features': 'sqrt'}",1.0,0.839643
3,3,KNN,"{'n_neighbors': 10, 'weights': 'distance', 'algorithm': 'kd_tree'}",1.0,2.284315
4,4,LR,"{'penalty': 'l1', 'C': 0.1}",1.0,2.06702
5,5,NB,{},1.0,0.941155
6,6,RF,"{'max_features': 'sqrt', 'n_estimators': 1, 'max_depth': 1, 'min_samples_split': 2}",1.0,0.66046


In [20]:
# precision at 1%
p_at_1_ranking = ea.get_info(ranking_dict, 'p@1', ['model','parameters','p@1', 'train_time'])
p_at_1_ranking

Unnamed: 0,index,model,parameters,p@1,train_time
0,0,BAG,"{'n_estimators': 20, 'max_samples': 0.65}",1.0,1.631213
1,1,BST,"{'n_estimators': 10, 'algorithm': 'SAMME'}",1.0,3.223455
2,2,DT,"{'criterion': 'gini', 'max_depth': 5, 'min_samples_split': 10, 'max_features': 'sqrt'}",1.0,0.708742
3,3,KNN,"{'n_neighbors': 25, 'weights': 'uniform', 'algorithm': 'kd_tree'}",1.0,2.32462
4,4,LR,"{'penalty': 'l1', 'C': 1}",1.0,1.932698
5,5,NB,{},1.0,0.941155
6,6,RF,"{'max_features': 'sqrt', 'n_estimators': 1, 'max_depth': 1, 'min_samples_split': 2}",1.0,0.66046


In [21]:
# recall at 50%
r_at_50_ranking = ea.get_info(ranking_dict, 'r@50', ['model','parameters','r@50', 'train_time', 'p@50'])
r_at_50_ranking

Unnamed: 0,index,model,parameters,r@50,train_time,p@50
0,2,DT,"{'criterion': 'entropy', 'max_depth': 1, 'min_samples_split': 5, 'max_features': 'sqrt'}",0.710657,0.624636,1.0
1,3,KNN,"{'n_neighbors': 1, 'weights': 'uniform', 'algorithm': 'ball_tree'}",0.710657,2.376567,1.0
2,6,RF,"{'max_features': 'sqrt', 'n_estimators': 1, 'max_depth': 1, 'min_samples_split': 2}",0.710657,0.66046,1.0
3,1,BST,"{'n_estimators': 10, 'algorithm': 'SAMME'}",0.6836,3.223455,0.961927
4,4,LR,"{'penalty': 'l1', 'C': 0.1}",0.680852,2.06702,0.958059
5,0,BAG,"{'n_estimators': 10, 'max_samples': 0.5}",0.670397,0.951025,0.943348
6,5,NB,{},0.652713,0.941155,0.918464


In [22]:
# recall at 30%
r_at_30_ranking = ea.get_info(ranking_dict, 'r@30', ['model','parameters','r@30', 'train_time'])
r_at_30_ranking

Unnamed: 0,index,model,parameters,r@30,train_time
0,2,DT,"{'criterion': 'entropy', 'max_depth': 1, 'min_samples_split': 5, 'max_features': 'sqrt'}",0.42639,0.624636
1,3,KNN,"{'n_neighbors': 1, 'weights': 'distance', 'algorithm': 'auto'}",0.42639,2.310801
2,6,RF,"{'max_features': 'sqrt', 'n_estimators': 1, 'max_depth': 1, 'min_samples_split': 2}",0.42639,0.66046
3,1,BST,"{'n_estimators': 200, 'algorithm': 'SAMME'}",0.416152,49.490264
4,4,LR,"{'penalty': 'l1', 'C': 0.1}",0.415059,2.06702
5,5,NB,{},0.40481,0.941155
6,0,BAG,"{'n_estimators': 10, 'max_samples': 0.5}",0.396779,0.951025


In [23]:
# recall at 20%
r_at_20_ranking = ea.get_info(ranking_dict, 'r@20', ['model','parameters','r@20', 'train_time'])
r_at_20_ranking

Unnamed: 0,index,model,parameters,r@20,train_time
0,2,DT,"{'criterion': 'gini', 'max_depth': 100, 'min_samples_split': 5, 'max_features': 'sqrt'}",0.284256,1.126271
1,3,KNN,"{'n_neighbors': 1, 'weights': 'uniform', 'algorithm': 'auto'}",0.284256,2.799279
2,6,RF,"{'max_features': 'sqrt', 'n_estimators': 1, 'max_depth': 1, 'min_samples_split': 2}",0.284256,0.66046
3,4,LR,"{'penalty': 'l1', 'C': 0.1}",0.280111,2.06702
4,1,BST,"{'n_estimators': 200, 'algorithm': 'SAMME'}",0.279116,49.490264
5,5,NB,{},0.277633,0.941155
6,0,BAG,"{'n_estimators': 5, 'max_samples': 0.5}",0.275782,0.831823


In [24]:
# recall at 10%
r_at_10_ranking = ea.get_info(ranking_dict, 'r@10', ['model','parameters','r@10', 'train_time'])
r_at_10_ranking

Unnamed: 0,index,model,parameters,r@10,train_time
0,1,BST,"{'n_estimators': 10, 'algorithm': 'SAMME'}",0.142123,3.223455
1,2,DT,"{'criterion': 'gini', 'max_depth': 1, 'min_samples_split': 2, 'max_features': 'sqrt'}",0.142123,0.665905
2,3,KNN,"{'n_neighbors': 1, 'weights': 'distance', 'algorithm': 'kd_tree'}",0.142123,2.242482
3,4,LR,"{'penalty': 'l1', 'C': 10}",0.142123,5.092103
4,6,RF,"{'max_features': 'sqrt', 'n_estimators': 1, 'max_depth': 1, 'min_samples_split': 2}",0.142123,0.66046
5,5,NB,{},0.141365,0.941155
6,0,BAG,"{'n_estimators': 10, 'max_samples': 0.65}",0.141149,1.022354


In [25]:
# recall at 5%
r_at_5_ranking = ea.get_info(ranking_dict, 'r@5', ['model','parameters','r@5', 'train_time'])
r_at_5_ranking

Unnamed: 0,index,model,parameters,r@5,train_time
0,0,BAG,"{'n_estimators': 10, 'max_samples': 0.65}",0.071061,1.022354
1,1,BST,"{'n_estimators': 200, 'algorithm': 'SAMME'}",0.071061,49.490264
2,2,DT,"{'criterion': 'gini', 'max_depth': 1, 'min_samples_split': 2, 'max_features': 'sqrt'}",0.071061,0.665905
3,3,KNN,"{'n_neighbors': 10, 'weights': 'distance', 'algorithm': 'kd_tree'}",0.071061,2.284315
4,4,LR,"{'penalty': 'l1', 'C': 0.1}",0.071061,2.06702
5,6,RF,"{'max_features': 'sqrt', 'n_estimators': 1, 'max_depth': 1, 'min_samples_split': 2}",0.071061,0.66046
6,5,NB,{},0.070325,0.941155


In [26]:
# recall at 2%
r_at_2_ranking = ea.get_info(ranking_dict, 'r@2', ['model','parameters','r@2', 'train_time'])
r_at_2_ranking

Unnamed: 0,index,model,parameters,r@2,train_time
0,0,BAG,"{'n_estimators': 5, 'max_samples': 0.5}",0.02842,0.831823
1,1,BST,"{'n_estimators': 200, 'algorithm': 'SAMME'}",0.02842,49.490264
2,2,DT,"{'criterion': 'gini', 'max_depth': 10, 'min_samples_split': 2, 'max_features': 'sqrt'}",0.02842,0.839643
3,3,KNN,"{'n_neighbors': 10, 'weights': 'distance', 'algorithm': 'kd_tree'}",0.02842,2.284315
4,4,LR,"{'penalty': 'l1', 'C': 0.1}",0.02842,2.06702
5,5,NB,{},0.02842,0.941155
6,6,RF,"{'max_features': 'sqrt', 'n_estimators': 1, 'max_depth': 1, 'min_samples_split': 2}",0.02842,0.66046


In [27]:
# recall at 1%
r_at_1_ranking = ea.get_info(ranking_dict, 'r@1', ['model','parameters','r@1', 'train_time'])
r_at_1_ranking

Unnamed: 0,index,model,parameters,r@1,train_time
0,0,BAG,"{'n_estimators': 20, 'max_samples': 0.65}",0.01421,1.631213
1,1,BST,"{'n_estimators': 10, 'algorithm': 'SAMME'}",0.01421,3.223455
2,2,DT,"{'criterion': 'gini', 'max_depth': 5, 'min_samples_split': 10, 'max_features': 'sqrt'}",0.01421,0.708742
3,3,KNN,"{'n_neighbors': 25, 'weights': 'uniform', 'algorithm': 'kd_tree'}",0.01421,2.32462
4,4,LR,"{'penalty': 'l1', 'C': 1}",0.01421,1.932698
5,5,NB,{},0.01421,0.941155
6,6,RF,"{'max_features': 'sqrt', 'n_estimators': 1, 'max_depth': 1, 'min_samples_split': 2}",0.01421,0.66046
