In [1]:
from sklearn.pipeline import make_pipeline
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score
import numpy as np
from src.data import X_train, y_train, X_test, y_test

from src.feature_engineering import FeatureEngineering

models = [RandomForestClassifier(),
          XGBClassifier()
          ]
for model in range(len(models)):
    models[model] = make_pipeline(FeatureEngineering(), StandardScaler(), models[model])
    models[model].fit(X_train, y_train)


In [2]:
def precision_at_k(y_true, y_score, k):
    top_k_indices = np.argsort(y_score)[::-1][:k]
    relevant = y_true[top_k_indices].sum()
    return relevant / k


scores = []
for model in range(len(models)):
    y_true = y_test.values
    y_score_proba = models[model].predict_proba(X_test)[:, 0]
    y_score = y_score_proba >= 0.5
    auc_score = roc_auc_score(y_true, y_score)
    precision_at_k_score = precision_at_k(y_true, y_score_proba, 100)
    scores.append((models[model].steps, auc_score, precision_at_k_score))

In [3]:
scores.sort(key=lambda x: x[2], reverse=True)
scores

[([('featureengineering',
    <src.feature_engineering.FeatureEngineering at 0x2679c616860>),
   ('standardscaler', StandardScaler()),
   ('randomforestclassifier', RandomForestClassifier())],
  0.5,
  np.float64(0.18)),
 ([('featureengineering',
    <src.feature_engineering.FeatureEngineering at 0x2679e277790>),
   ('standardscaler', StandardScaler()),
   ('xgbclassifier',
    XGBClassifier(base_score=None, booster=None, callbacks=None,
                  colsample_bylevel=None, colsample_bynode=None,
                  colsample_bytree=None, device=None, early_stopping_rounds=None,
                  enable_categorical=False, eval_metric=None, feature_types=None,
                  feature_weights=None, gamma=None, grow_policy=None,
                  importance_type=None, interaction_constraints=None,
                  learning_rate=None, max_bin=None, max_cat_threshold=None,
                  max_cat_to_onehot=None, max_delta_step=None, max_depth=None,
                  max_leaves=None,