In [1]:
import pickle
import pandas as pd
import numpy as np
from sklearn.metrics import f1_score, confusion_matrix, accuracy_score, roc_auc_score ,classification_report
import seaborn as sns 
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import StratifiedKFold
from xgboost import XGBClassifier

import warnings
warnings.filterwarnings('ignore')

In [2]:
def find_best_threshold_f1_score(train_labels, oofs, average='macro'):
    scores = []
    thresholds = []
    best_score = 0
    best_threshold = 0
    for threshold in np.arange(0.1, 0.9, 0.01):
        print(f'{threshold:.02f}, ', end='')
        preds = (oofs > threshold).astype('int')
        m = f1_score(train_labels, preds, average=average)
        scores.append(m)
        thresholds.append(threshold)
        if m > best_score:
            best_score = m
            best_threshold = threshold
    return best_threshold, best_score

In [3]:
with open(f"/Users/natalie/Desktop/DS Thesis/user-churn-prediction/checkpoints/nothing_model_after_tune.pkl", "rb") as f:
    nothing_loadded = pickle.load(f)
with open(f"/Users/natalie/Desktop/DS Thesis/user-churn-prediction/checkpoints/smote_model_after_tune.pkl", "rb") as f:
    smote_loadded = pickle.load(f)
with open(f"/Users/natalie/Desktop/DS Thesis/user-churn-prediction/checkpoints/class_weight_model_after_tune.pkl", "rb") as f:
    class_weight_loadded = pickle.load(f)
with open(f"/Users/natalie/Desktop/DS Thesis/user-churn-prediction/checkpoints/under_sampling_model_after_tune.pkl", "rb") as f:
    under_sampling_loadded = pickle.load(f)
test = pd.read_parquet("/Users/natalie/Desktop/DS Thesis/Code/data/test.parquet")
train = pd.read_parquet("/Users/natalie/Desktop/DS Thesis/Code/data/train.parquet")

In [None]:
# drop column province
test = test.drop(columns=['province'])
train = train.drop(columns=['province'])

In [4]:
MODEL_NAMES = nothing_loadded['model_names']
nothing_score_df = nothing_loadded['score_df']
nothing_oofs = nothing_loadded['oofs']
nothing_models = nothing_loadded['models']
nothing_predictions = nothing_loadded['predictions']

smote_score_df = smote_loadded['score_df']
smote_oofs = smote_loadded['oofs']
smote_models = smote_loadded['models']
smote_predictions = smote_loadded['predictions']

class_weight_score_df = class_weight_loadded['score_df']
class_weight_oofs = class_weight_loadded['oofs']
class_weight_df_models = class_weight_loadded['models']
class_weight_predictions = class_weight_loadded['predictions']

under_sampling_score_df = under_sampling_loadded['score_df']
under_sampling_oofs = under_sampling_loadded['oofs']
under_sampling_models = under_sampling_loadded['models']
under_sampling_predictions = under_sampling_loadded['predictions']

In [31]:
def scoring(y_test,y_pred_proba, best_threshold):
    y_pred = [1 if y_hat >= best_threshold else 0 for y_hat in y_pred_proba]
    acc = accuracy_score(y_test, y_pred)
    _f1_score = f1_score(y_test, y_pred, average='macro')
    auc_score = roc_auc_score(y_test, y_pred_proba)
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
    specificity = tn / (tn+fp)
    sensitivity = tp / (tp+fn)
    print("accuracy", acc)
    print("f1_score", _f1_score)
    print("auc", auc_score)
    print("sensitivity", sensitivity, "specificity", specificity)
    print(classification_report(y_test, y_pred, digits=4))
    return acc, _f1_score, auc_score, specificity, sensitivity

In [6]:
N_ESTIMATORS=200
SEED=42

XGBoost_Hyperparameters = {
    'objective': 'binary:logistic',
    'eval_metric': ['logloss', 'auc'],
    'n_estimators': N_ESTIMATORS,
    'learning_rate': 0.034630277480196384,
    'max_depth': 9,
    'colsample_bytree': 0.8,
    'subsample': 0.30000000000000004,
    'reg_alpha': 0.0020136244579038245,
    'reg_lambda': 1.3270228907353322e-06,
    'seed': SEED,
    # 'scale_pos_weight':3,
    'enable_categorical':True,
    'early_stopping_rounds': 50,
    #'tree_method':'gpu_hist'
}

In [20]:
kfold = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)

def stacking(step_1_oofs,step_1_predictions):
    oofs = np.zeros(step_1_oofs.shape[0])
    for i, (train_index, valid_index) in enumerate(kfold.split(class_weight_oofs, train['churn_user'])):
        X_train = step_1_oofs[train_index]
        X_valid = step_1_oofs[valid_index]
        y_train = train.iloc[train_index]['churn_user']
        y_valid = train.iloc[valid_index]['churn_user']
        xgb_model = XGBClassifier(**XGBoost_Hyperparameters)
        xgb_model.fit(X_train, y_train,
                    eval_set=[(X_train, y_train), (X_valid, y_valid)],
                    verbose=50)
        oofs[valid_index] = xgb_model.predict_proba(X_valid)[:,1]
        best_threshold, best_score = find_best_threshold_f1_score(y_valid, oofs[valid_index])
        acc, _f1_score, auc_score, specificity, sensitivity = scoring(y_valid,oofs[valid_index],best_threshold)
    best_threshold, best_score = find_best_threshold_f1_score(train['churn_user'], oofs)
    xgboost_hyperparameters = XGBoost_Hyperparameters.copy()
    del xgboost_hyperparameters['early_stopping_rounds']
    xgb_model = XGBClassifier(**xgboost_hyperparameters)
    xgb_model.fit(step_1_oofs, train['churn_user'])
    X_test = np.vstack(step_1_predictions).transpose()
    y_test = test['churn_user']
    y_pred_proba = xgb_model.predict_proba(X_test)[:,1]
    acc, _f1_score, auc_score, specificity, sensitivity = scoring(y_test,y_pred_proba,best_threshold)
    return acc, _f1_score, auc_score, specificity, sensitivity, y_pred_proba


In [23]:
def blend(stage_1_oofs, stage_1_predictions):
    y_pred_proba = stage_1_oofs.mean(axis=1)
    best_threshold, best_score = find_best_threshold_f1_score(train['churn_user'], y_pred_proba)
    X_test = np.vstack(stage_1_predictions).transpose()
    y_test = test['churn_user']
    y_pred_proba= X_test.mean(axis=1)
    acc, _f1_score, auc_score, specificity, sensitivity = scoring(y_test,y_pred_proba,best_threshold)
    print("accuracy", acc)
    print("f1_score", _f1_score)
    print("auc", auc_score)
    print("specificity", specificity)
    print("sensitivity", sensitivity)
    return acc, _f1_score, auc_score, specificity, sensitivity, y_pred_proba

In [41]:
def ensemble(stage_1_oofs, stage_1_predictions):
    accuracy_scores = []
    f1_scores = []
    auc_scores = []
    specificity_scores = []
    sensitivity_scores = []
    predictions = []
   

    acc, _f1_score, auc_score, specificity, sensitivity, y_pred_proba = blend(stage_1_oofs, stage_1_predictions)
    print("accuracy", acc)
    print("f1_score", _f1_score)
    print("auc", auc_score)
    print("specificity", specificity)
    print("sensitivity", sensitivity)
    accuracy_scores.append(acc)
    f1_scores.append(_f1_score)
    auc_scores.append(auc_score)
    specificity_scores.append(specificity)
    sensitivity_scores.append(sensitivity)
    predictions.append(y_pred_proba)


    acc, _f1_score, auc_score, specificity, sensitivity, y_pred_proba = stacking(stage_1_oofs, stage_1_predictions)
    print("accuracy", acc)
    print("f1_score", _f1_score)
    print("auc", auc_score)
    print("specificity", specificity)
    print("sensitivity", sensitivity)
    accuracy_scores.append(acc)
    f1_scores.append(_f1_score)
    auc_scores.append(auc_score)
    specificity_scores.append(specificity)
    sensitivity_scores.append(sensitivity)
    predictions.append(y_pred_proba)

    score_df = pd.DataFrame({'model_name': ['blend', 'stack'],
                         'accuracy_score':accuracy_scores, 
                         'f1_score': f1_scores, 
                         'auc_score': auc_scores, 
                         'specificity_score': specificity_scores, 
                         'sensitivity_score': sensitivity_scores})
    return score_df, predictions

In [42]:
nothing_ensemble_df, nothing_ensemble_predictions = ensemble(nothing_oofs, nothing_predictions)

0.10, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.20, 0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.30, 0.31, 0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.40, 0.41, 0.42, 0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.50, 0.51, 0.52, 0.53, 0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.60, 0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.70, 0.71, 0.72, 0.73, 0.74, 0.75, 0.76, 0.77, 0.78, 0.79, 0.80, 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87, 0.88, 0.89, accuracy 0.82144
f1_score 0.7728461745986004
auc 0.88954296234857
sensitivity 0.7335976781261497 specificity 0.849888134291804
              precision    recall  f1-score   support

           0     0.9078    0.8499    0.8779    151074
           1     0.6128    0.7336    0.6678     48926

    accuracy                         0.8214    200000
   macro avg     0.7603    0.7917    0.7728    200000
weighted avg     0.8357    0.8214    0.8265    200000

accuracy 0.82144
f1_score 0.7728461745986004
auc 0.88954296234857


In [43]:
smote_ensemble_df, smote_ensemble_predictions = ensemble(smote_oofs, smote_predictions)

0.10, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.20, 0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.30, 0.31, 0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.40, 0.41, 0.42, 0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.50, 0.51, 0.52, 0.53, 0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.60, 0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.70, 0.71, 0.72, 0.73, 0.74, 0.75, 0.76, 0.77, 0.78, 0.79, 0.80, 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87, 0.88, 0.89, accuracy 0.817555
f1_score 0.7693196381548502
auc 0.8920577932331126
sensitivity 0.7363773862567959 specificity 0.8438447383401512
              precision    recall  f1-score   support

           0     0.9081    0.8438    0.8748    151074
           1     0.6043    0.7364    0.6638     48926

    accuracy                         0.8176    200000
   macro avg     0.7562    0.7901    0.7693    200000
weighted avg     0.8338    0.8176    0.8232    200000

accuracy 0.817555
f1_score 0.7693196381548502
auc 0.8920577932

In [44]:
under_sampling_ensemble_df, under_sampling_ensemble_predictions = ensemble(under_sampling_oofs, under_sampling_predictions)

0.10, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.20, 0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.30, 0.31, 0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.40, 0.41, 0.42, 0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.50, 0.51, 0.52, 0.53, 0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.60, 0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.70, 0.71, 0.72, 0.73, 0.74, 0.75, 0.76, 0.77, 0.78, 0.79, 0.80, 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87, 0.88, 0.89, accuracy 0.82601
f1_score 0.7717170619527024
auc 0.8909411367067884
sensitivity 0.6915137145893799 specificity 0.869567231952553
              precision    recall  f1-score   support

           0     0.8969    0.8696    0.8830    151074
           1     0.6319    0.6915    0.6604     48926

    accuracy                         0.8260    200000
   macro avg     0.7644    0.7805    0.7717    200000
weighted avg     0.8321    0.8260    0.8286    200000

accuracy 0.82601
f1_score 0.7717170619527024
auc 0.8909411367067

In [45]:
with open(f"../checkpoints/nothing_ensemble.pkl", "wb") as f:
    pickle.dump({
            "score_df":nothing_ensemble_df,
            "predictions":nothing_ensemble_predictions},f,protocol=pickle.HIGHEST_PROTOCOL)
with open(f"../checkpoints/smote_ensemble.pkl", "wb") as f:
    pickle.dump({
            "score_df":smote_ensemble_df,
            "predictions":smote_ensemble_df},f,protocol=pickle.HIGHEST_PROTOCOL)
with open(f"../checkpoints/under_sampling_ensemble.pkl", "wb") as f:
    pickle.dump({
            "score_df":under_sampling_ensemble_df,
            "predictions":under_sampling_ensemble_predictions},f,protocol=pickle.HIGHEST_PROTOCOL)

In [46]:
under_sampling_ensemble_df

Unnamed: 0,model_name,accuracy_score,f1_score,auc_score,specificity_score,sensitivity_score
0,blend,0.82601,0.771717,0.890941,0.869567,0.691514
1,stack,0.83393,0.779842,0.899027,0.880092,0.691391
