In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

import sys
import os

from pathlib import Path

parent = Path(os.path.abspath("")).resolve().parents[0]
if parent not in sys.path:
    sys.path.insert(0, str(parent))
    
from sklearn.model_selection import StratifiedKFold

from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import MultinomialNB, BernoulliNB, GaussianNB
from xgboost import XGBClassifier
from catboost import CatBoostClassifier

from sklearn.preprocessing import MinMaxScaler
from ml.utils.metrics import get_classification_metrics, get_probability_measures, get_lift_demotion_scores, find_all

In [2]:
def merge_strings_and_integers(val):
    try:
        return int(val)
    except:
        return 'other'

In [3]:
df = pd.read_csv("final_autof.csv")
df.drop([
    'MODE(consumptions.MS_METER_NBR)',
    'MODE(representations.SUPPLIER)',
    'MODE(representations.SUPPLIER_TO)',
], axis=1, inplace=True)
df.dropna(subset=['number_of_zeros'], inplace=True)


df['MODE(consumptions.BS_RATE)'] = df['MODE(consumptions.BS_RATE)'].apply(merge_strings_and_integers)
df = df.drop(['rec_id'], axis=1)
df = df.drop_duplicates()
df['MODE(requests.REQUEST_TYPE)'] = df['MODE(requests.REQUEST_TYPE)'].replace(0, 'unknown')
df = pd.get_dummies(df, columns=['MODE(requests.REQUEST_TYPE)'], prefix='MODE(requests.REQUEST_TYPE)')
df = pd.get_dummies(df, columns=['MODE(consumptions.BS_RATE)'], prefix='MODE(consumptions.BS_RATE)')
df = df.drop(['voltage'], axis=1)
df.fillna(0, inplace=True)
df.head()

  df = pd.read_csv("final_autof.csv")


Unnamed: 0,COUNT(consumptions),MAX(consumptions.CSS_MS_HS_USE),MEAN(consumptions.CSS_MS_HS_USE),MIN(consumptions.CSS_MS_HS_USE),NUM_UNIQUE(consumptions.BS_RATE),NUM_UNIQUE(consumptions.MS_METER_NBR),SKEW(consumptions.CSS_MS_HS_USE),STD(consumptions.CSS_MS_HS_USE),SUM(consumptions.CSS_MS_HS_USE),MODE(consumptions.DAY(MEASUREMENT_DATE)),...,MODE(consumptions.BS_RATE)_40,MODE(consumptions.BS_RATE)_41,MODE(consumptions.BS_RATE)_42,MODE(consumptions.BS_RATE)_43,MODE(consumptions.BS_RATE)_44,MODE(consumptions.BS_RATE)_52,MODE(consumptions.BS_RATE)_53,MODE(consumptions.BS_RATE)_54,MODE(consumptions.BS_RATE)_55,MODE(consumptions.BS_RATE)_other
0,4.0,135.0,64.25,12.0,1.0,1.0,0.758461,54.389797,257.0,2.0,...,0,0,0,0,0,0,0,0,0,0
1,10.0,127.0,51.9,0.0,1.0,2.0,0.783315,43.072162,519.0,2.0,...,0,0,0,0,0,0,0,0,0,0
2,12.0,6068.0,3924.166667,2446.0,1.0,1.0,0.574519,1012.516472,47090.0,28.0,...,0,0,0,0,0,0,0,0,0,0
3,6.0,1061.0,479.333333,7.0,2.0,1.0,0.020392,409.340282,2876.0,4.0,...,0,0,0,0,0,0,0,0,0,0
4,24.0,1247.0,536.833333,0.0,2.0,1.0,0.537625,335.364392,12884.0,4.0,...,0,0,0,0,0,0,0,0,0,0


In [4]:
def train(X, y):
    
    
    classifiers = {
        #"GaussianNB": GaussianNB(),
        #"BernoulliNB": GaussianNB(),
        #"MultinomialNB": MultinomialNB(),
        "KNN": KNeighborsClassifier(),
        "DT": DecisionTreeClassifier(random_state=0),
        "RF": RandomForestClassifier(random_state=0),
        "LR": LogisticRegression(random_state=0),
        "XGB": XGBClassifier(random_state=0),
        "CatBoost": CatBoostClassifier(random_state=0) 
    }
    averaged_scores = dict()
    
    print("Number of samples:", len(X))
    print("Data bins:", np.bincount(y))
    
    for clf_name, clf in classifiers.items():
        
        print("Classifier:", clf_name)
        assert len(X) == len(y)
        scaler = MinMaxScaler()

        acc_scores = []
        prec_scores = []
        rec_scores = []
        f1_scores = []
        brier_scores, auc_roc_scores, pr_auc_scores = [], [], []
        lift_scores1, demotion_scores1, weighted_scores1 = [], [], []
        lift_scores2, demotion_scores2, weighted_scores2 = [], [], []
        lift_scores3, demotion_scores3, weighted_scores3 = [], [], []
        lift_scores4, demotion_scores4, weighted_scores4 = [], [], []
        lift_scores5, demotion_scores5, weighted_scores5 = [], [], []
        lift_scores6, demotion_scores6, weighted_scores6 = [], [], []
        lift_scores7, demotion_scores7, weighted_scores7 = [], [], []
        lift_scores8, demotion_scores8, weighted_scores8 = [], [], []
        lift_scores9, demotion_scores9, weighted_scores9 = [], [], []
        lift_scores10, demotion_scores10, weighted_scores10 = [], [], []


        for train_index, test_index in skf.split(X, y):
            X_train, X_test = X.iloc[train_index], X.iloc[test_index]
            y_train, y_test = y.iloc[train_index], y.iloc[test_index]

            scaler.fit(X_train)
            X_train = scaler.transform(X_train)
            X_test = scaler.transform(X_test)

            #print("Shape", X_train.shape)


            #print("bins train", np.bincount(y_train))
            #print("bins test", np.bincount(y_test))

            if clf_name == "CatBoost":
                clf.fit(X_train, y_train, verbose=False)
            else:
                clf.fit(X_train, y_train)

            y_pred = clf.predict(X_test)

            acc, precision, recall, f1 = get_classification_metrics(y_test.values, y_pred)

            y_scores = clf.predict_proba(X_test)
            idx = np.argmax(y_scores, axis=1)
            y_scores = np.where(idx == 0, 1 - y_scores[:, 0], y_scores[:, 1])


            brier, roc_auc, pr_auc = get_probability_measures(y_test.values, y_scores)

            num_ones = np.sum(y_test == 1)
            
            print("HOW", num_ones)


            s1 = int(0.1 * num_ones)
            s2 = int(0.2 * num_ones)
            s3 = int(0.3 * num_ones)
            s4 = int(0.4 * num_ones)
            s5 = int(0.5 * num_ones)
            s6 = int(0.6 * num_ones)
            s7 = int(0.7 * num_ones)
            s8 = int(0.8 * num_ones)
            s9 = int(0.9 * num_ones)
            s10 = int(1 * num_ones)

            lift1, demotion1, weighted_score1 = get_lift_demotion_scores(y_test.values, y_scores,
                                                                        num_samples=s1)
            lift2, demotion2, weighted_score2 = get_lift_demotion_scores(y_test.values, y_scores,
                                                                        num_samples=s2)
            lift3, demotion3, weighted_score3 = get_lift_demotion_scores(y_test.values, y_scores,
                                                                        num_samples=s3)
            lift4, demotion4, weighted_score4 = get_lift_demotion_scores(y_test.values, y_scores,
                                                                        num_samples=s4)
            lift5, demotion5, weighted_score5 = get_lift_demotion_scores(y_test.values, y_scores,
                                                                        num_samples=s5)
            lift6, demotion6, weighted_score6 = get_lift_demotion_scores(y_test.values, y_scores,
                                                                        num_samples=s6)
            lift7, demotion7, weighted_score7 = get_lift_demotion_scores(y_test.values, y_scores,
                                                                        num_samples=s7)
            lift8, demotion8, weighted_score8 = get_lift_demotion_scores(y_test.values, y_scores,
                                                                        num_samples=s8)
            lift9, demotion9, weighted_score9 = get_lift_demotion_scores(y_test.values, y_scores,
                                                                        num_samples=s9)
            lift10, demotion10, weighted_score10 = get_lift_demotion_scores(y_test.values, y_scores,
                                                                        num_samples=s10)

            acc_scores.append(acc)
            prec_scores.append(precision)
            rec_scores.append(recall)
            f1_scores.append(f1)

            brier_scores.append(brier)
            auc_roc_scores.append(roc_auc)
            pr_auc_scores.append(pr_auc)

            lift_scores1.append(lift1)
            demotion_scores1.append(demotion1)
            weighted_scores1.append(weighted_score1)
            lift_scores2.append(lift2)
            demotion_scores2.append(demotion2)
            weighted_scores2.append(weighted_score2)
            lift_scores3.append(lift3)
            demotion_scores3.append(demotion3)
            weighted_scores3.append(weighted_score3)
            lift_scores4.append(lift4)
            demotion_scores4.append(demotion4)
            weighted_scores4.append(weighted_score4)
            lift_scores5.append(lift5)
            demotion_scores5.append(demotion5)
            weighted_scores5.append(weighted_score5)
            lift_scores6.append(lift6)
            demotion_scores6.append(demotion6)
            weighted_scores6.append(weighted_score6)
            lift_scores7.append(lift7)
            demotion_scores7.append(demotion7)
            weighted_scores7.append(weighted_score7)
            lift_scores8.append(lift8)
            demotion_scores8.append(demotion8)
            weighted_scores8.append(weighted_score8)
            lift_scores9.append(lift9)
            demotion_scores9.append(demotion9)
            weighted_scores9.append(weighted_score9)
            lift_scores10.append(lift10)
            demotion_scores10.append(demotion10)
            weighted_scores10.append(weighted_score10)
            

        averaged_scores[clf_name] = dict()
        
        acc, prec, rec = sum(acc_scores)/len(acc_scores), sum(prec_scores)/len(prec_scores), sum(rec_scores)/len(rec_scores) 
        acc_std, prec_std, rec_std = np.std(acc_scores), np.std(prec_scores), np.std(rec_scores)
        averaged_scores[clf_name]['Accuracy'] = acc
        averaged_scores[clf_name]['Accuracy_std'] = acc_std
        averaged_scores[clf_name]['Precision'] = prec
        averaged_scores[clf_name]['Precision_std'] = prec_std
        averaged_scores[clf_name]['Recall'] = rec
        averaged_scores[clf_name]['Recall_std'] = rec_std
        
        f1, brier, auc_roc, pr_auc = sum(f1_scores)/len(f1_scores), sum(brier_scores)/len(brier_scores), sum(auc_roc_scores)/len(auc_roc_scores), sum(pr_auc_scores)/len(pr_auc_scores)
        f1_std, brier_std, auc_roc_std, pr_auc_std = np.std(f1_scores), np.std(brier_scores), np.std(auc_roc_scores), np.std(pr_auc_scores)
        averaged_scores[clf_name]['F1'] = f1
        averaged_scores[clf_name]['F1_std'] = f1_std
        averaged_scores[clf_name]['Brier'] = brier
        averaged_scores[clf_name]['Brier_std'] = brier_std
        averaged_scores[clf_name]['AUC-ROC'] = auc_roc
        averaged_scores[clf_name]['AUC-ROC_std'] = auc_roc_std
        averaged_scores[clf_name]['PR-AUC'] = pr_auc
        averaged_scores[clf_name]['PR-AUC_std'] = pr_auc_std
        
        
        lift1, demotion1, weighted_score1 = sum(lift_scores1)/len(lift_scores1), sum(demotion_scores1)/len(demotion_scores1), sum(weighted_scores1)/len(weighted_scores1)
        lift1_std, demotion1_std, weighted1_std = np.std(lift_scores1), np.std(demotion_scores1), np.std(weighted_scores1)
        
        lift2, demotion2, weighted_score2 = sum(lift_scores2)/len(lift_scores2), sum(demotion_scores2)/len(demotion_scores2), sum(weighted_scores2)/len(weighted_scores2)
        lift2_std, demotion2_std, weighted2_std = np.std(lift_scores2), np.std(demotion_scores2), np.std(weighted_scores2)

        lift3, demotion3, weighted_score3 = sum(lift_scores3)/len(lift_scores3), sum(demotion_scores3)/len(demotion_scores3), sum(weighted_scores3)/len(weighted_scores3)
        lift3_std, demotion3_std, weighted3_std = np.std(lift_scores3), np.std(demotion_scores3), np.std(weighted_scores3)
        
        lift4, demotion4, weighted_score4 = sum(lift_scores4)/len(lift_scores4), sum(demotion_scores4)/len(demotion_scores4), sum(weighted_scores4)/len(weighted_scores4)
        lift4_std, demotion4_std, weighted4_std = np.std(lift_scores4), np.std(demotion_scores4), np.std(weighted_scores4)
        
        lift5, demotion5, weighted_score5 = sum(lift_scores5)/len(lift_scores5), sum(demotion_scores5)/len(demotion_scores5), sum(weighted_scores5)/len(weighted_scores5)
        lift5_std, demotion5_std, weighted5_std = np.std(lift_scores5), np.std(demotion_scores5), np.std(weighted_scores5)
        
        lift6, demotion6, weighted_score6 = sum(lift_scores6)/len(lift_scores6), sum(demotion_scores6)/len(demotion_scores6), sum(weighted_scores6)/len(weighted_scores6)
        lift6_std, demotion6_std, weighted6_std = np.std(lift_scores6), np.std(demotion_scores6), np.std(weighted_scores6)
        
        lift7, demotion7, weighted_score7 = sum(lift_scores7)/len(lift_scores7), sum(demotion_scores7)/len(demotion_scores7), sum(weighted_scores7)/len(weighted_scores7)
        lift7_std, demotion7_std, weighted7_std = np.std(lift_scores7), np.std(demotion_scores7), np.std(weighted_scores7)
        
        lift8, demotion8, weighted_score8 = sum(lift_scores8)/len(lift_scores8), sum(demotion_scores8)/len(demotion_scores8), sum(weighted_scores8)/len(weighted_scores8)
        lift8_std, demotion8_std, weighted8_std = np.std(lift_scores8), np.std(demotion_scores8), np.std(weighted_scores8)
        
        lift9, demotion9, weighted_score9 = sum(lift_scores9)/len(lift_scores9), sum(demotion_scores9)/len(demotion_scores9), sum(weighted_scores9)/len(weighted_scores9)
        lift9_std, demotion9_std, weighted9_std = np.std(lift_scores9), np.std(demotion_scores9), np.std(weighted_scores9)
        
        lift10, demotion10, weighted_score10 = sum(lift_scores10)/len(lift_scores10), sum(demotion_scores10)/len(demotion_scores10), sum(weighted_scores10)/len(weighted_scores10)
        lift10_std, demotion10_std, weighted10_std = np.std(lift_scores10), np.std(demotion_scores10), np.std(weighted_scores10)
        
        averaged_scores[clf_name]['Lift1'] = lift1
        averaged_scores[clf_name]['Lift1_std'] = lift1_std
        averaged_scores[clf_name]['Demotion1'] = demotion1
        averaged_scores[clf_name]['Demotion1_std'] = demotion1_std
        averaged_scores[clf_name]['Weighted1'] = weighted_score1
        averaged_scores[clf_name]['Weighted1_std'] = weighted1_std
        
        averaged_scores[clf_name]['Lift2'] = lift2
        averaged_scores[clf_name]['Lift2_std'] = lift2_std
        averaged_scores[clf_name]['Demotion2'] = demotion2
        averaged_scores[clf_name]['Demotion2_std'] = demotion2_std
        averaged_scores[clf_name]['Weighted2'] = weighted_score2
        averaged_scores[clf_name]['Weighted2_std'] = weighted2_std
        
        averaged_scores[clf_name]['Lift3'] = lift3
        averaged_scores[clf_name]['Lift3_std'] = lift3_std
        averaged_scores[clf_name]['Demotion3'] = demotion3
        averaged_scores[clf_name]['Demotion3_std'] = demotion3_std
        averaged_scores[clf_name]['Weighted3'] = weighted_score3
        averaged_scores[clf_name]['Weighted3_std'] = weighted3_std
        
        averaged_scores[clf_name]['Lift4'] = lift4
        averaged_scores[clf_name]['Lift4_std'] = lift4_std
        averaged_scores[clf_name]['Demotion4'] = demotion4
        averaged_scores[clf_name]['Demotion4_std'] = demotion4_std
        averaged_scores[clf_name]['Weighted4'] = weighted_score4
        averaged_scores[clf_name]['Weighted4_std'] = weighted4_std
        
        averaged_scores[clf_name]['Lift5'] = lift5
        averaged_scores[clf_name]['Lift5_std'] = lift5_std
        averaged_scores[clf_name]['Demotion5'] = demotion5
        averaged_scores[clf_name]['Demotion5_std'] = demotion5_std
        averaged_scores[clf_name]['Weighted5'] = weighted_score5
        averaged_scores[clf_name]['Weighted5_std'] = weighted5_std
        
        averaged_scores[clf_name]['Lift6'] = lift6
        averaged_scores[clf_name]['Lift6_std'] = lift6_std
        averaged_scores[clf_name]['Demotion6'] = demotion6
        averaged_scores[clf_name]['Demotion6_std'] = demotion6_std
        averaged_scores[clf_name]['Weighted6'] = weighted_score6
        averaged_scores[clf_name]['Weighted6_std'] = weighted6_std
        
        averaged_scores[clf_name]['Lift7'] = lift7
        averaged_scores[clf_name]['Lift7_std'] = lift7_std
        averaged_scores[clf_name]['Demotion7'] = demotion7
        averaged_scores[clf_name]['Demotion7_std'] = demotion7_std
        averaged_scores[clf_name]['Weighted7'] = weighted_score7
        averaged_scores[clf_name]['Weighted7_std'] = weighted7_std
        
        averaged_scores[clf_name]['Lift8'] = lift8
        averaged_scores[clf_name]['Lift8_std'] = lift8_std
        averaged_scores[clf_name]['Demotion8'] = demotion8
        averaged_scores[clf_name]['Demotion8_std'] = demotion8_std
        averaged_scores[clf_name]['Weighted8'] = weighted_score8
        averaged_scores[clf_name]['Weighted8_std'] = weighted8_std
        
        averaged_scores[clf_name]['Lift9'] = lift9
        averaged_scores[clf_name]['Lift9_std'] = lift9_std
        averaged_scores[clf_name]['Demotion9'] = demotion9
        averaged_scores[clf_name]['Demotion9_std'] = demotion9_std
        averaged_scores[clf_name]['Weighted9'] = weighted_score9
        averaged_scores[clf_name]['Weighted9_std'] = weighted9_std
        
        averaged_scores[clf_name]['Lift10'] = lift10
        averaged_scores[clf_name]['Lift10_std'] = lift10_std
        averaged_scores[clf_name]['Demotion10'] = demotion10
        averaged_scores[clf_name]['Demotion10_std'] = demotion10_std
        averaged_scores[clf_name]['Weighted10'] = weighted_score10
        averaged_scores[clf_name]['Weighted10_std'] = weighted10_std
        
        print("Average - Accuracy: {:.4f}, Precision: {:.4f}, Recall: {:.4f}, F1: {:.4f}, Brier: {:.4f}, AUC-ROC: {:.4f}, PR-AUC: {:.4f}, Lift: {:.4f}, Demotion: {:.4f}, Weighted: {:.4f}\n"
                .format(sum(acc_scores)/len(acc_scores), sum(prec_scores)/len(prec_scores),
                        sum(rec_scores)/len(rec_scores), sum(f1_scores)/len(f1_scores),
                        sum(brier_scores)/len(brier_scores), sum(auc_roc_scores)/len(auc_roc_scores), sum(pr_auc_scores)/len(pr_auc_scores),
                        sum(lift_scores4)/len(lift_scores4), sum(demotion_scores4)/len(demotion_scores4), sum(weighted_scores4)/len(weighted_scores4)))
        print("\n\n")
    return pd.DataFrame(averaged_scores).T

In [5]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
print(len(df))
df = df.drop_duplicates()
print(len(df))
X = df.drop(['target'], axis=1)
y = df['target']
results = train(X, y)

700519
700519
Number of samples: 700519
Data bins: [698967   1552]
Classifier: GaussianNB
Average - Accuracy: 0.0270, Precision: 0.0022, Recall: 0.9897, F1: 0.0045, Brier: 0.9729, AUC-ROC: 0.5077, PR-AUC: 0.0022, Lift: 0.0016, Demotion: 0.9984, Weighted: 0.3006




Classifier: BernoulliNB
Average - Accuracy: 0.0270, Precision: 0.0022, Recall: 0.9897, F1: 0.0045, Brier: 0.9729, AUC-ROC: 0.5077, PR-AUC: 0.0022, Lift: 0.0016, Demotion: 0.9984, Weighted: 0.3006




Classifier: MultinomialNB
Average - Accuracy: 0.9977, Precision: 0.0250, Recall: 0.0006, F1: 0.0013, Brier: 0.0023, AUC-ROC: 0.8138, PR-AUC: 0.0106, Lift: 0.0194, Demotion: 1.0000, Weighted: 0.3135




Classifier: DT
Average - Accuracy: 0.9952, Precision: 0.0653, Recall: 0.0863, F1: 0.0743, Brier: 0.0048, AUC-ROC: 0.5418, PR-AUC: 0.0077, Lift: 0.0661, Demotion: 1.0000, Weighted: 0.3463




Classifier: RF
Average - Accuracy: 0.9978, Precision: 1.0000, Recall: 0.0090, F1: 0.0179, Brier: 0.0021, AUC-ROC: 0.7961, PR-AUC: 0.0952, Lif

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Average - Accuracy: 0.9978, Precision: 0.1533, Recall: 0.0026, F1: 0.0051, Brier: 0.0022, AUC-ROC: 0.8671, PR-AUC: 0.0464, Lift: 0.1661, Demotion: 1.0000, Weighted: 0.4163




Classifier: XGB
Average - Accuracy: 0.9978, Precision: 0.5883, Recall: 0.0406, F1: 0.0758, Brier: 0.0021, AUC-ROC: 0.8937, PR-AUC: 0.1005, Lift: 0.2548, Demotion: 1.0000, Weighted: 0.4784




Classifier: CatBoost
Average - Accuracy: 0.9978, Precision: 0.7208, Recall: 0.0483, F1: 0.0903, Brier: 0.0021, AUC-ROC: 0.8939, PR-AUC: 0.1157, Lift: 0.2903, Demotion: 1.0000, Weighted: 0.5032






In [6]:
pd.set_option('display.max_columns', 500)
results = results.sort_values(['Weighted4', 'Weighted4_std'], ascending=False)

In [8]:
results.to_csv("classifier_res1.csv", index=True)

In [10]:
results

Unnamed: 0,Accuracy,Accuracy_std,Precision,Precision_std,Recall,Recall_std,F1,F1_std,Brier,Brier_std,AUC-ROC,AUC-ROC_std,PR-AUC,PR-AUC_std,Lift1,Lift1_std,Demotion1,Demotion1_std,Weighted1,Weighted1_std,Lift2,Lift2_std,Demotion2,Demotion2_std,Weighted2,Weighted2_std,Lift3,Lift3_std,Demotion3,Demotion3_std,Weighted3,Weighted3_std,Lift4,Lift4_std,Demotion4,Demotion4_std,Weighted4,Weighted4_std,Lift5,Lift5_std,Demotion5,Demotion5_std,Weighted5,Weighted5_std,Lift6,Lift6_std,Demotion6,Demotion6_std,Weighted6,Weighted6_std,Lift7,Lift7_std,Demotion7,Demotion7_std,Weighted7,Weighted7_std,Lift8,Lift8_std,Demotion8,Demotion8_std,Weighted8,Weighted8_std,Lift9,Lift9_std,Demotion9,Demotion9_std,Weighted9,Weighted9_std,Lift10,Lift10_std,Demotion10,Demotion10_std,Weighted10,Weighted10_std
CatBoost,0.997849,1.9e-05,0.720836,0.072434,0.048323,0.009321,0.090337,0.016369,0.00207,2.2e-05,0.893919,0.006897,0.115654,0.013396,0.606452,0.065794,1.0,0.0,0.724516,0.046056,0.409677,0.038978,1.0,0.0,0.586774,0.027284,0.337634,0.033037,1.0,0.0,0.536344,0.023126,0.290323,0.027467,1.0,0.0,0.503226,0.019227,0.256774,0.026569,1.0,0.0,0.479742,0.018599,0.225806,0.028449,1.0,0.0,0.458065,0.019914,0.208295,0.026776,1.0,0.0,0.445806,0.018743,0.195968,0.02414,1.0,0.0,0.437177,0.016898,0.182079,0.021071,1.0,0.0,0.427455,0.01475,0.1682,0.020452,1.0,0.0,0.41774,0.014317
XGB,0.997812,2.9e-05,0.588264,0.081948,0.040591,0.008551,0.075843,0.015521,0.002099,3.3e-05,0.893664,0.005076,0.100469,0.018125,0.509677,0.087514,1.0,0.0,0.656774,0.06126,0.390323,0.065635,1.0,0.0,0.573226,0.045945,0.303226,0.043225,1.0,0.0,0.512258,0.030258,0.254839,0.03871,1.0,0.0,0.478387,0.027097,0.229677,0.032796,1.0,0.0,0.460774,0.022957,0.204301,0.023558,1.0,0.0,0.443011,0.016491,0.188018,0.023207,1.0,0.0,0.431613,0.016245,0.174194,0.019983,1.0,0.0,0.421935,0.013988,0.164875,0.018276,1.0,0.0,0.415412,0.012793,0.157878,0.012438,1.0,0.0,0.410514,0.008707
RF,0.997804,6e-06,1.0,0.0,0.009022,0.001296,0.017879,0.00255,0.002115,1.8e-05,0.7961,0.011823,0.095216,0.011905,0.516129,0.079016,1.0,0.0,0.66129,0.055311,0.351613,0.04493,1.0,0.0,0.546129,0.031451,0.290323,0.02452,1.0,0.0,0.503226,0.017164,0.248387,0.021878,1.0,0.0,0.473871,0.015315,0.230968,0.023932,1.0,0.0,0.461677,0.016752,0.205376,0.017471,1.0,0.0,0.443763,0.01223,0.188018,0.011437,1.0,0.0,0.431613,0.008006,0.175806,0.010999,1.0,0.0,0.423065,0.007699,0.163441,0.014968,1.0,0.0,0.414409,0.010478,0.154656,0.014872,1.0,0.0,0.408259,0.01041
LR,0.997762,1.6e-05,0.153333,0.232475,0.002572,0.00375,0.005059,0.00738,0.002171,1.9e-05,0.867107,0.007248,0.046408,0.009956,0.206452,0.110998,1.0,0.0,0.444516,0.077698,0.190323,0.048279,1.0,0.0,0.433226,0.033796,0.184946,0.037496,1.0,0.0,0.429462,0.026247,0.166129,0.032499,1.0,0.0,0.41629,0.022749,0.156129,0.019738,1.0,0.0,0.40929,0.013817,0.149462,0.010425,1.0,0.0,0.404624,0.007298,0.136406,0.010748,1.0,0.0,0.395484,0.007524,0.125806,0.010008,1.0,0.0,0.388065,0.007005,0.124014,0.011012,1.0,0.0,0.38681,0.007709,0.117909,0.011962,1.0,0.0,0.382536,0.008373
DT,0.995236,0.000134,0.065333,0.006025,0.086338,0.008222,0.074335,0.006675,0.004764,0.000134,0.541796,0.004098,0.007707,0.000974,0.058065,0.02414,1.0,0.0,0.340645,0.016898,0.058065,0.016448,1.0,0.0,0.340645,0.011514,0.073118,0.01254,1.0,0.0,0.351183,0.008778,0.066129,0.006035,1.0,0.0,0.34629,0.004224,0.060645,0.009656,1.0,0.0,0.342452,0.006759,0.064516,0.008996,1.0,0.0,0.345161,0.006297,0.067281,0.009927,0.999078,0.001843,0.34682,0.007296,0.066129,0.009744,0.999194,0.001613,0.346048,0.007074,0.068817,0.012903,0.999283,0.001434,0.347957,0.00929,0.070876,0.012383,0.999355,0.00129,0.34942,0.008875
MultinomialNB,0.997699,7.8e-05,0.025,0.05,0.000643,0.001286,0.001254,0.002508,0.002315,6.9e-05,0.813825,0.014377,0.010584,0.001597,0.025806,0.02414,1.0,0.0,0.318065,0.016898,0.025806,0.021878,1.0,0.0,0.318065,0.015315,0.023656,0.01971,1.0,0.0,0.316559,0.013797,0.019355,0.013103,1.0,0.0,0.313548,0.009172,0.019355,0.011541,1.0,0.0,0.313548,0.008079,0.021505,0.01226,1.0,0.0,0.315054,0.008582,0.024885,0.013228,1.0,0.0,0.317419,0.00926,0.025806,0.013398,1.0,0.0,0.318065,0.009378,0.024373,0.013101,1.0,0.0,0.317061,0.009171,0.022552,0.011355,1.0,0.0,0.315786,0.007948
GaussianNB,0.027023,0.001393,0.002249,1.1e-05,0.989688,0.00474,0.004487,2.1e-05,0.972947,0.001394,0.507655,0.001576,0.00225,9e-06,0.006452,0.012903,1.0,0.0,0.304516,0.009032,0.003226,0.006452,1.0,0.0,0.302258,0.004516,0.002151,0.004301,0.997849,0.004301,0.30086,0.00172,0.001613,0.003226,0.998387,0.003226,0.300645,0.00129,0.003871,0.003161,0.99871,0.002581,0.302323,0.002024,0.003226,0.002634,0.998925,0.002151,0.301935,0.001686,0.002765,0.002258,0.999078,0.001843,0.301659,0.001446,0.002419,0.001975,0.999194,0.001613,0.301452,0.001265,0.002151,0.001756,0.999283,0.001434,0.30129,0.001124,0.001933,0.001579,0.999355,0.00129,0.30116,0.00101
BernoulliNB,0.027023,0.001393,0.002249,1.1e-05,0.989688,0.00474,0.004487,2.1e-05,0.972947,0.001394,0.507655,0.001576,0.00225,9e-06,0.006452,0.012903,1.0,0.0,0.304516,0.009032,0.003226,0.006452,1.0,0.0,0.302258,0.004516,0.002151,0.004301,0.997849,0.004301,0.30086,0.00172,0.001613,0.003226,0.998387,0.003226,0.300645,0.00129,0.003871,0.003161,0.99871,0.002581,0.302323,0.002024,0.003226,0.002634,0.998925,0.002151,0.301935,0.001686,0.002765,0.002258,0.999078,0.001843,0.301659,0.001446,0.002419,0.001975,0.999194,0.001613,0.301452,0.001265,0.002151,0.001756,0.999283,0.001434,0.30129,0.001124,0.001933,0.001579,0.999355,0.00129,0.30116,0.00101


In [5]:
print(len(df))
df = df.drop_duplicates()
print(len(df))
encodings = pd.read_csv('autoencoder_classifier_loss_encoded.csv')
if "auto_loss" in encodings:
    encodings = encodings.drop(['auto_loss'], axis=1)
encodings.head()

700519
700519


Unnamed: 0,t_hidden_1,t_hidden_2,t_hidden_3,t_hidden_4,t_hidden_5,t_hidden_6,t_hidden_7,t_hidden_8,t_hidden_9,t_hidden_10,...,t_hidden_23,t_hidden_24,t_hidden_25,t_hidden_26,t_hidden_27,t_hidden_28,t_hidden_29,t_hidden_30,t_hidden_31,t_hidden_32
0,-0.408745,0.019283,0.875199,1.236878,-0.909931,1.124214,-0.485348,0.343595,0.063377,0.383679,...,0.227373,2.361559,1.938654,-0.574741,-1.06669,0.023123,-0.041047,0.218051,-1.160616,-1.216362
1,1.26145,0.385389,0.524206,0.335333,-1.855268,0.905791,0.01055,-0.675372,1.666657,-0.492278,...,0.844264,0.836465,1.691065,0.117468,-0.533461,-0.163281,0.55224,-0.471347,-0.875878,-0.59958
2,0.631862,1.284966,0.219037,1.735167,-0.802665,0.23424,-0.200826,0.336721,0.557764,-0.086913,...,-0.011472,2.336785,2.482246,-1.427288,-0.665344,-0.56608,-1.438796,0.712387,0.779548,-0.954446
3,-0.533879,-0.30028,0.737864,0.519675,-0.304382,1.147514,-0.642419,-0.139555,-0.96821,-0.300562,...,-0.675597,0.869136,1.958137,-0.50693,-1.264389,-0.298342,-0.625637,-0.40467,0.39997,-0.899167
4,-1.051911,0.350452,0.194913,0.542731,0.227543,0.751633,0.859556,-1.049925,-0.084372,-0.033673,...,-0.425382,0.602726,1.124325,0.165342,-0.799687,0.135509,-0.275075,0.080667,0.015887,0.094562


In [6]:
df.reset_index(drop=True, inplace=True)
encodings.reset_index(drop=True, inplace=True)
df = df.join(encodings)
print(len(df))
df = df.drop_duplicates()
print(len(df))
df.head()

700519
700519


Unnamed: 0,COUNT(consumptions),MAX(consumptions.CSS_MS_HS_USE),MEAN(consumptions.CSS_MS_HS_USE),MIN(consumptions.CSS_MS_HS_USE),NUM_UNIQUE(consumptions.BS_RATE),NUM_UNIQUE(consumptions.MS_METER_NBR),SKEW(consumptions.CSS_MS_HS_USE),STD(consumptions.CSS_MS_HS_USE),SUM(consumptions.CSS_MS_HS_USE),MODE(consumptions.DAY(MEASUREMENT_DATE)),...,t_hidden_23,t_hidden_24,t_hidden_25,t_hidden_26,t_hidden_27,t_hidden_28,t_hidden_29,t_hidden_30,t_hidden_31,t_hidden_32
0,4.0,135.0,64.25,12.0,1.0,1.0,0.758461,54.389797,257.0,2.0,...,0.227373,2.361559,1.938654,-0.574741,-1.06669,0.023123,-0.041047,0.218051,-1.160616,-1.216362
1,10.0,127.0,51.9,0.0,1.0,2.0,0.783315,43.072162,519.0,2.0,...,0.844264,0.836465,1.691065,0.117468,-0.533461,-0.163281,0.55224,-0.471347,-0.875878,-0.59958
2,12.0,6068.0,3924.166667,2446.0,1.0,1.0,0.574519,1012.516472,47090.0,28.0,...,-0.011472,2.336785,2.482246,-1.427288,-0.665344,-0.56608,-1.438796,0.712387,0.779548,-0.954446
3,6.0,1061.0,479.333333,7.0,2.0,1.0,0.020392,409.340282,2876.0,4.0,...,-0.675597,0.869136,1.958137,-0.50693,-1.264389,-0.298342,-0.625637,-0.40467,0.39997,-0.899167
4,24.0,1247.0,536.833333,0.0,2.0,1.0,0.537625,335.364392,12884.0,4.0,...,-0.425382,0.602726,1.124325,0.165342,-0.799687,0.135509,-0.275075,0.080667,0.015887,0.094562


In [15]:
X = df.drop(['target'], axis=1)
y = df['target']
results2 = train(X, y)

Number of samples: 700519
Data bins: [698967   1552]
Classifier: GaussianNB
Average - Accuracy: 0.0273, Precision: 0.0023, Recall: 0.9903, F1: 0.0045, Brier: 0.9726, AUC-ROC: 0.5084, PR-AUC: 0.0023, Lift: 0.0032, Demotion: 1.0000, Weighted: 0.3023




Classifier: BernoulliNB
Average - Accuracy: 0.0273, Precision: 0.0023, Recall: 0.9903, F1: 0.0045, Brier: 0.9726, AUC-ROC: 0.5084, PR-AUC: 0.0023, Lift: 0.0032, Demotion: 1.0000, Weighted: 0.3023




Classifier: MultinomialNB
Average - Accuracy: 0.9976, Precision: 0.0446, Recall: 0.0026, F1: 0.0049, Brier: 0.0024, AUC-ROC: 0.8229, PR-AUC: 0.0116, Lift: 0.0226, Demotion: 1.0000, Weighted: 0.3158




Classifier: DT
Average - Accuracy: 0.9954, Precision: 0.0782, Recall: 0.1012, F1: 0.0881, Brier: 0.0046, AUC-ROC: 0.5493, PR-AUC: 0.0099, Lift: 0.0694, Demotion: 1.0000, Weighted: 0.3485




Classifier: RF
Average - Accuracy: 0.9979, Precision: 0.8344, Recall: 0.0483, F1: 0.0909, Brier: 0.0021, AUC-ROC: 0.8002, PR-AUC: 0.1358, Lift: 0.3290, Dem

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Average - Accuracy: 0.9978, Precision: 0.4176, Recall: 0.0122, F1: 0.0237, Brier: 0.0021, AUC-ROC: 0.8876, PR-AUC: 0.0683, Lift: 0.1887, Demotion: 1.0000, Weighted: 0.4321




Classifier: CatBoost
Average - Accuracy: 0.9979, Precision: 0.6923, Recall: 0.0735, F1: 0.1321, Brier: 0.0020, AUC-ROC: 0.9038, PR-AUC: 0.1429, Lift: 0.3468, Demotion: 1.0000, Weighted: 0.5427






In [17]:
results2 = results2.sort_values(['Weighted4', 'Weighted4_std'], ascending=False)
results2.to_csv("classifier_res2.csv", index=True)
results2

Unnamed: 0,Accuracy,Accuracy_std,Precision,Precision_std,Recall,Recall_std,F1,F1_std,Brier,Brier_std,AUC-ROC,AUC-ROC_std,PR-AUC,PR-AUC_std,Lift1,Lift1_std,Demotion1,Demotion1_std,Weighted1,Weighted1_std,Lift2,Lift2_std,Demotion2,Demotion2_std,Weighted2,Weighted2_std,Lift3,Lift3_std,Demotion3,Demotion3_std,Weighted3,Weighted3_std,Lift4,Lift4_std,Demotion4,Demotion4_std,Weighted4,Weighted4_std,Lift5,Lift5_std,Demotion5,Demotion5_std,Weighted5,Weighted5_std,Lift6,Lift6_std,Demotion6,Demotion6_std,Weighted6,Weighted6_std,Lift7,Lift7_std,Demotion7,Demotion7_std,Weighted7,Weighted7_std,Lift8,Lift8_std,Demotion8,Demotion8_std,Weighted8,Weighted8_std,Lift9,Lift9_std,Demotion9,Demotion9_std,Weighted9,Weighted9_std,Lift10,Lift10_std,Demotion10,Demotion10_std,Weighted10,Weighted10_std
CatBoost,0.997876,5e-05,0.692295,0.098475,0.073457,0.021535,0.132131,0.035533,0.002032,3.2e-05,0.903834,0.007602,0.142906,0.016385,0.716129,0.116129,1.0,0.0,0.80129,0.08129,0.496774,0.064838,1.0,0.0,0.647742,0.045387,0.404301,0.049789,1.0,0.0,0.583011,0.034852,0.346774,0.026007,1.0,0.0,0.542742,0.018205,0.296774,0.029705,1.0,0.0,0.507742,0.020794,0.26129,0.02603,1.0,0.0,0.482903,0.018221,0.240553,0.021098,1.0,0.0,0.468387,0.014768,0.223387,0.018496,1.0,0.0,0.456371,0.012947,0.204301,0.015541,1.0,0.0,0.443011,0.010879,0.192015,0.01258,1.0,0.0,0.434411,0.008806
RF,0.997869,2.6e-05,0.83437,0.084856,0.048327,0.013992,0.090895,0.024528,0.002055,2.3e-05,0.800224,0.006673,0.135759,0.015638,0.748387,0.114323,1.0,0.0,0.823871,0.080026,0.519355,0.073134,1.0,0.0,0.663548,0.051194,0.404301,0.046919,1.0,0.0,0.583011,0.032843,0.329032,0.028122,1.0,0.0,0.530323,0.019685,0.285161,0.022124,1.0,0.0,0.499613,0.015487,0.256989,0.017137,0.998925,0.002151,0.47957,0.012194,0.23318,0.018108,0.999078,0.001843,0.462949,0.012869,0.220968,0.017183,0.999194,0.001613,0.454435,0.01214,0.201434,0.01805,0.999283,0.001434,0.440789,0.012736,0.192022,0.017591,0.999355,0.00129,0.434222,0.012409
LR,0.997773,2.2e-05,0.417599,0.197605,0.012237,0.005143,0.023743,0.01,0.00214,1.7e-05,0.887571,0.007754,0.06829,0.010508,0.283871,0.094379,1.0,0.0,0.49871,0.066065,0.254839,0.04493,1.0,0.0,0.478387,0.031451,0.232258,0.03233,1.0,0.0,0.462581,0.022631,0.18871,0.025806,1.0,0.0,0.432097,0.018065,0.179355,0.021359,1.0,0.0,0.425548,0.014951,0.172043,0.019827,1.0,0.0,0.42043,0.013879,0.160369,0.017094,1.0,0.0,0.412258,0.011966,0.157258,0.018033,1.0,0.0,0.410081,0.012623,0.151254,0.015441,1.0,0.0,0.405878,0.010809,0.144962,0.016582,1.0,0.0,0.401473,0.011608
DT,0.995363,0.000138,0.078195,0.005833,0.101158,0.007762,0.088144,0.006216,0.004637,0.000138,0.549253,0.003857,0.009935,0.001069,0.083871,0.069486,1.0,0.0,0.35871,0.04864,0.093548,0.035921,1.0,0.0,0.365484,0.025145,0.077419,0.02754,1.0,0.0,0.354194,0.019278,0.069355,0.021398,1.0,0.0,0.348548,0.014978,0.08129,0.019398,1.0,0.0,0.356903,0.013578,0.08172,0.014586,1.0,0.0,0.357204,0.01021,0.081106,0.011875,0.999078,0.001843,0.356498,0.00807,0.080645,0.008065,0.999194,0.001613,0.35621,0.005544,0.077419,0.007024,0.999283,0.001434,0.353978,0.004891,0.079898,0.002456,0.999355,0.00129,0.355735,0.001944
MultinomialNB,0.997613,0.000115,0.044551,0.036997,0.002579,0.002413,0.004857,0.0045,0.002444,9.5e-05,0.822901,0.014523,0.011648,0.00185,0.03871,0.037619,1.0,0.0,0.327097,0.026333,0.029032,0.023705,1.0,0.0,0.320323,0.016593,0.023656,0.015803,1.0,0.0,0.316559,0.011062,0.022581,0.017221,1.0,0.0,0.315806,0.012055,0.023226,0.011966,1.0,0.0,0.316258,0.008376,0.022581,0.012447,1.0,0.0,0.315806,0.008713,0.024885,0.011875,1.0,0.0,0.317419,0.008312,0.023387,0.011797,1.0,0.0,0.316371,0.008258,0.021505,0.010872,1.0,0.0,0.315054,0.00761,0.020618,0.009907,1.0,0.0,0.314433,0.006935
GaussianNB,0.02735,0.001425,0.002251,1e-05,0.990333,0.004077,0.004491,1.9e-05,0.972611,0.001417,0.508423,0.00205,0.002253,1.1e-05,0.006452,0.012903,1.0,0.0,0.304516,0.009032,0.006452,0.007902,1.0,0.0,0.304516,0.005531,0.004301,0.005268,1.0,0.0,0.303011,0.003687,0.003226,0.003951,1.0,0.0,0.302258,0.002766,0.002581,0.003161,1.0,0.0,0.301806,0.002212,0.002151,0.002634,0.998925,0.002151,0.301183,0.002188,0.002765,0.002258,0.999078,0.001843,0.301659,0.001968,0.002419,0.001975,0.999194,0.001613,0.301452,0.001722,0.002151,0.001756,0.999283,0.001434,0.30129,0.001531,0.002579,0.001289,0.999357,0.001286,0.301612,0.001288
BernoulliNB,0.02735,0.001425,0.002251,1e-05,0.990333,0.004077,0.004491,1.9e-05,0.972611,0.001417,0.508423,0.00205,0.002253,1.1e-05,0.006452,0.012903,1.0,0.0,0.304516,0.009032,0.006452,0.007902,1.0,0.0,0.304516,0.005531,0.004301,0.005268,1.0,0.0,0.303011,0.003687,0.003226,0.003951,1.0,0.0,0.302258,0.002766,0.002581,0.003161,1.0,0.0,0.301806,0.002212,0.002151,0.002634,0.998925,0.002151,0.301183,0.002188,0.002765,0.002258,0.999078,0.001843,0.301659,0.001968,0.002419,0.001975,0.999194,0.001613,0.301452,0.001722,0.002151,0.001756,0.999283,0.001434,0.30129,0.001531,0.002579,0.001289,0.999357,0.001286,0.301612,0.001288


In [7]:
tmp_df = df.copy()
tmp_df = df[['t_hidden_1', 't_hidden_2', 't_hidden_3', 't_hidden_4',
          't_hidden_5', 't_hidden_6', 't_hidden_7', 't_hidden_8',
          't_hidden_9', 't_hidden_10', 't_hidden_11', 't_hidden_12',
          't_hidden_13', 't_hidden_14', 't_hidden_15', 't_hidden_16',
          't_hidden_17', 't_hidden_18', 't_hidden_19', 't_hidden_20',
          't_hidden_21', 't_hidden_22', 't_hidden_23', 't_hidden_24',
          't_hidden_25', 't_hidden_26', 't_hidden_27', 't_hidden_28',
          't_hidden_29', 't_hidden_30', 't_hidden_31', 't_hidden_32',
            'target']]

In [9]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
X = tmp_df.drop(['target'], axis=1)
y = tmp_df['target']
results3 = train(X, y)

Number of samples: 700519
Data bins: [698967   1552]
Classifier: GaussianNB
Average - Accuracy: 0.9250, Precision: 0.0175, Recall: 0.5967, F1: 0.0341, Brier: 0.0684, AUC-ROC: 0.8951, PR-AUC: 0.0293, Lift: 0.0839, Demotion: 1.0000, Weighted: 0.3587




Classifier: BernoulliNB
Average - Accuracy: 0.9250, Precision: 0.0175, Recall: 0.5967, F1: 0.0341, Brier: 0.0684, AUC-ROC: 0.8951, PR-AUC: 0.0293, Lift: 0.0839, Demotion: 1.0000, Weighted: 0.3587




Classifier: MultinomialNB
Average - Accuracy: 0.9978, Precision: 0.0000, Recall: 0.0000, F1: 0.0000, Brier: 0.0022, AUC-ROC: 0.8535, PR-AUC: 0.0148, Lift: 0.0306, Demotion: 1.0000, Weighted: 0.3215




Classifier: DT
Average - Accuracy: 0.9955, Precision: 0.0830, Recall: 0.1031, F1: 0.0919, Brier: 0.0045, AUC-ROC: 0.5503, PR-AUC: 0.0106, Lift: 0.0855, Demotion: 1.0000, Weighted: 0.3598




Classifier: RF
Average - Accuracy: 0.9979, Precision: 0.8259, Recall: 0.0612, F1: 0.1134, Brier: 0.0021, AUC-ROC: 0.7899, PR-AUC: 0.1356, Lift: 0.3323, Dem

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Average - Accuracy: 0.9978, Precision: 0.3500, Recall: 0.0045, F1: 0.0088, Brier: 0.0022, AUC-ROC: 0.8855, PR-AUC: 0.0585, Lift: 0.1710, Demotion: 1.0000, Weighted: 0.4197




Classifier: CatBoost
Average - Accuracy: 0.9979, Precision: 0.6864, Recall: 0.0709, F1: 0.1276, Brier: 0.0021, AUC-ROC: 0.8849, PR-AUC: 0.1274, Lift: 0.3081, Demotion: 1.0000, Weighted: 0.5156






In [10]:
pd.set_option('display.max_columns', 500)
results3 = results3.sort_values(['Weighted4', 'Weighted4_std'], ascending=False)
results3.to_csv("classifier_res3.csv", index=True)
results3

Unnamed: 0,Accuracy,Accuracy_std,Precision,Precision_std,Recall,Recall_std,F1,F1_std,Brier,Brier_std,AUC-ROC,AUC-ROC_std,PR-AUC,PR-AUC_std,Lift1,Lift1_std,Demotion1,Demotion1_std,Weighted1,Weighted1_std,Lift2,Lift2_std,Demotion2,Demotion2_std,Weighted2,Weighted2_std,Lift3,Lift3_std,Demotion3,Demotion3_std,Weighted3,Weighted3_std,Lift4,Lift4_std,Demotion4,Demotion4_std,Weighted4,Weighted4_std,Lift5,Lift5_std,Demotion5,Demotion5_std,Weighted5,Weighted5_std,Lift6,Lift6_std,Demotion6,Demotion6_std,Weighted6,Weighted6_std,Lift7,Lift7_std,Demotion7,Demotion7_std,Weighted7,Weighted7_std,Lift8,Lift8_std,Demotion8,Demotion8_std,Weighted8,Weighted8_std,Lift9,Lift9_std,Demotion9,Demotion9_std,Weighted9,Weighted9_std,Lift10,Lift10_std,Demotion10,Demotion10_std,Weighted10,Weighted10_std
RF,0.99789,3.6e-05,0.825859,0.098582,0.061224,0.016101,0.113445,0.027377,0.002054,2.6e-05,0.789936,0.006956,0.135632,0.016663,0.748387,0.065794,1.0,0.0,0.823871,0.046056,0.541935,0.051613,1.0,0.0,0.679355,0.036129,0.397849,0.035985,1.0,0.0,0.578495,0.02519,0.332258,0.026698,1.0,0.0,0.532581,0.018689,0.294194,0.029593,1.0,0.0,0.505935,0.020715,0.265591,0.025806,1.0,0.0,0.485914,0.018065,0.237788,0.020526,0.999078,0.001843,0.466175,0.014601,0.216129,0.021579,0.999194,0.001613,0.451048,0.015364,0.201434,0.020199,0.999283,0.001434,0.440789,0.014381,0.187518,0.022242,0.999355,0.00129,0.431069,0.015773
CatBoost,0.997864,3.3e-05,0.686371,0.100081,0.070881,0.016336,0.1276,0.025991,0.002055,2.8e-05,0.884929,0.009276,0.127432,0.014062,0.683871,0.082621,1.0,0.0,0.77871,0.057835,0.5,0.054934,1.0,0.0,0.65,0.038453,0.391398,0.029329,1.0,0.0,0.573978,0.020531,0.308065,0.031606,1.0,0.0,0.515645,0.022124,0.265806,0.026881,1.0,0.0,0.486065,0.018817,0.229032,0.022961,1.0,0.0,0.460323,0.016073,0.21659,0.022387,1.0,0.0,0.451613,0.015671,0.197581,0.020242,1.0,0.0,0.438306,0.014169,0.181362,0.021577,1.0,0.0,0.426953,0.015104,0.169466,0.017201,1.0,0.0,0.418626,0.012041
LR,0.997776,7e-06,0.35,0.366667,0.004512,0.005984,0.008767,0.011505,0.002164,1.3e-05,0.885508,0.007375,0.058489,0.011243,0.225806,0.109867,1.0,0.0,0.458065,0.076907,0.219355,0.050595,1.0,0.0,0.453548,0.035416,0.182796,0.039066,1.0,0.0,0.427957,0.027347,0.170968,0.025705,1.0,0.0,0.419677,0.017994,0.165161,0.019822,1.0,0.0,0.415613,0.013876,0.153763,0.015054,1.0,0.0,0.407634,0.010538,0.152995,0.017824,1.0,0.0,0.407097,0.012477,0.143548,0.016645,1.0,0.0,0.400484,0.011651,0.134767,0.014621,1.0,0.0,0.394337,0.010235,0.126937,0.013942,1.0,0.0,0.388856,0.00976
DT,0.995485,0.000163,0.082957,0.008183,0.103087,0.010536,0.091851,0.008828,0.004515,0.000163,0.550277,0.005246,0.01061,0.00165,0.070968,0.055499,1.0,0.0,0.349677,0.038849,0.080645,0.039508,1.0,0.0,0.356452,0.027656,0.08172,0.027708,1.0,0.0,0.357204,0.019395,0.085484,0.028672,1.0,0.0,0.359839,0.02007,0.091613,0.018429,0.99871,0.002581,0.363742,0.013218,0.090323,0.018748,0.998925,0.002151,0.362903,0.013576,0.089401,0.014456,0.999078,0.001843,0.362304,0.010429,0.08629,0.010999,0.999194,0.001613,0.360161,0.007924,0.089606,0.009881,0.999283,0.001434,0.362509,0.00693,0.086342,0.007483,0.999355,0.00129,0.360246,0.005316
GaussianNB,0.92498,0.000976,0.017529,0.000936,0.596675,0.026771,0.034058,0.00181,0.068408,0.000874,0.895074,0.008292,0.029334,0.002678,0.103226,0.062551,1.0,0.0,0.372258,0.043785,0.090323,0.021878,1.0,0.0,0.363226,0.015315,0.094624,0.014265,1.0,0.0,0.366237,0.009986,0.083871,0.010939,1.0,0.0,0.35871,0.007657,0.082581,0.014368,1.0,0.0,0.357806,0.010058,0.080645,0.020402,1.0,0.0,0.356452,0.014281,0.078341,0.021017,1.0,0.0,0.354839,0.014712,0.079839,0.016008,1.0,0.0,0.355887,0.011205,0.076703,0.011691,1.0,0.0,0.353692,0.008184,0.076662,0.011675,1.0,0.0,0.353664,0.008172
BernoulliNB,0.92498,0.000976,0.017529,0.000936,0.596675,0.026771,0.034058,0.00181,0.068408,0.000874,0.895074,0.008292,0.029334,0.002678,0.103226,0.062551,1.0,0.0,0.372258,0.043785,0.090323,0.021878,1.0,0.0,0.363226,0.015315,0.094624,0.014265,1.0,0.0,0.366237,0.009986,0.083871,0.010939,1.0,0.0,0.35871,0.007657,0.082581,0.014368,1.0,0.0,0.357806,0.010058,0.080645,0.020402,1.0,0.0,0.356452,0.014281,0.078341,0.021017,1.0,0.0,0.354839,0.014712,0.079839,0.016008,1.0,0.0,0.355887,0.011205,0.076703,0.011691,1.0,0.0,0.353692,0.008184,0.076662,0.011675,1.0,0.0,0.353664,0.008172
MultinomialNB,0.997784,3e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.00221,4e-06,0.853524,0.02156,0.014751,0.00706,0.03871,0.037619,1.0,0.0,0.327097,0.026333,0.03871,0.04029,1.0,0.0,0.327097,0.028203,0.036559,0.036369,1.0,0.0,0.325591,0.025458,0.030645,0.030347,1.0,0.0,0.321452,0.021243,0.029677,0.026632,1.0,0.0,0.320774,0.018642,0.026882,0.020961,1.0,0.0,0.318817,0.014673,0.028571,0.018978,1.0,0.0,0.32,0.013285,0.029839,0.017221,1.0,0.0,0.320887,0.012055,0.030108,0.015475,1.0,0.0,0.321075,0.010832,0.030291,0.013012,1.0,0.0,0.321204,0.009108
