# Home Credit Baseline Model - Logistic Regression

In [1]:
import logreg_report_v2

In [11]:
# standard library imports
import gc
import os
import random
import sys
import warnings

warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

# third party imports
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import polars as pl
import pyarrow.parquet as pq
import joblib
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
    roc_auc_score,
    f1_score,
    log_loss,
    SCORERS, 
    get_scorer,
    classification_report, 
    ConfusionMatrixDisplay, 
)
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import FunctionTransformer, StandardScaler, OneHotEncoder, OrdinalEncoder
from sklearn.compose import ColumnTransformer
from sklearn.svm import LinearSVC

## imputation on 100k rows train df

In [3]:
data = pd.read_parquet('data/train/trian_sample_no_imputation.parquet')

In [4]:
data

Unnamed: 0,case_id,date_decision,MONTH,WEEK_NUM,target,actualdpdtolerance_344P,amtinstpaidbefduel24m_4187115A,annuity_780A,annuitynextmonth_57A,applicationcnt_361L,...,pmts_month_706T,pmts_overdue_1140A,pmts_overdue_1152A,pmts_year_1139T,pmts_year_507T,subjectroles_name_541M,subjectroles_name_838M,pmts_date_1107D,pmts_dpdvalue_108P,pmts_pmtsoverdue_635A
0,12866,2019-04-24,201904,16,0,,,6064.6,0.0,0.0,...,,,,2018.0,,a55475b1,ab3c25cf,,,
1,12866,2019-04-24,201904,16,0,,,6064.6,0.0,0.0,...,,,,2018.0,,a55475b1,a55475b1,,,
2,12866,2019-04-24,201904,16,0,,,6064.6,0.0,0.0,...,,,,,,,,,,
3,12866,2019-04-24,201904,16,0,,,6064.6,0.0,0.0,...,,,,,,,,,,
4,12866,2019-04-24,201904,16,0,,,6064.6,0.0,0.0,...,,,,2017.0,,a55475b1,daf49a8a,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
941052,168975,2019-10-21,201910,41,0,0.0,5072.0,6518.2,2536.0,0.0,...,2.0,,,,2010.0,ab3c25cf,a55475b1,,,
941053,168975,2019-10-21,201910,41,0,0.0,5072.0,6518.2,2536.0,0.0,...,3.0,,,,2010.0,a55475b1,a55475b1,,,
941054,168975,2019-10-21,201910,41,0,0.0,5072.0,6518.2,2536.0,0.0,...,4.0,,0.0,,2010.0,a55475b1,a55475b1,,,
941055,168975,2019-10-21,201910,41,0,0.0,5072.0,6518.2,2536.0,0.0,...,2.0,,,2019.0,2007.0,ab3c25cf,ab3c25cf,,,


In [5]:
base_cols = [
    'case_id',
    'WEEK_NUM',
    'target'
]

top20_cols = [
    'actualdpd_943P',
    'purposeofcred_426M',
    'amount_1115A',
    'credacc_actualbalance_314A',
    'actualdpdtolerance_344P',
    'annuity_780A',
    'numinstpaidearly_338L',
    'empl_employedtotal_800L',
    'empl_industry_691L',
    'maininc_215A',
    'debtoverdue_47A',
    'totalsettled_863A',
    'totaloutstanddebtvalue_39A',
    'avgdbddpdlast24m_3658932P',
    'avgdbddpdlast3m_4187120P',
    'clientscnt12m_3712952L',
    'applicationscnt_1086L',
    'applicationcnt_361L',
    'applications30d_658L'
]

In [6]:
train_df = data[base_cols + top20_cols]
train_df.head()

Unnamed: 0,case_id,WEEK_NUM,target,actualdpd_943P,purposeofcred_426M,amount_1115A,credacc_actualbalance_314A,actualdpdtolerance_344P,annuity_780A,numinstpaidearly_338L,...,maininc_215A,debtoverdue_47A,totalsettled_863A,totaloutstanddebtvalue_39A,avgdbddpdlast24m_3658932P,avgdbddpdlast3m_4187120P,clientscnt12m_3712952L,applicationscnt_1086L,applicationcnt_361L,applications30d_658L
0,12866,16,0,0.0,96a8fdfe,,,,6064.6,,...,,,0.0,305398.22,,,0.0,8.0,0.0,0.0
1,12866,16,0,0.0,96a8fdfe,,,,6064.6,,...,,,0.0,305398.22,,,0.0,8.0,0.0,0.0
2,12866,16,0,0.0,a55475b1,,,,6064.6,,...,,,0.0,,,,0.0,8.0,0.0,0.0
3,12866,16,0,0.0,a55475b1,,,,6064.6,,...,,,0.0,,,,0.0,8.0,0.0,0.0
4,12866,16,0,0.0,96a8fdfe,,,,6064.6,,...,,0.0,0.0,3932.352,,,0.0,8.0,0.0,0.0


In [7]:
train_df.shape

(941057, 22)

In [8]:
def impute_default(df):
    
    train_df = df.copy()

    # Assuming train_df is already loaded
    null_df = train_df[train_df.columns[train_df.isnull().any()]]

    # Impute NaN values with 0 in numerical columns
    zero_impute_columns = list(null_df.select_dtypes(exclude="object").columns)
    for column in zero_impute_columns:
        train_df[column] = train_df[column].fillna(0)

    # Impute NaN values with the mode in categorical columns
    mode_impute_columns = list(null_df.select_dtypes(include="object").columns)

    for column in mode_impute_columns:
        mode_value = train_df[column].mode()[0]
        train_df[column] = train_df[column].fillna(mode_value)
        
    return train_df

In [9]:
train_df = impute_default(train_df)

Unnamed: 0,case_id,WEEK_NUM,target,actualdpd_943P,purposeofcred_426M,amount_1115A,credacc_actualbalance_314A,actualdpdtolerance_344P,annuity_780A,numinstpaidearly_338L,...,maininc_215A,debtoverdue_47A,totalsettled_863A,totaloutstanddebtvalue_39A,avgdbddpdlast24m_3658932P,avgdbddpdlast3m_4187120P,clientscnt12m_3712952L,applicationscnt_1086L,applicationcnt_361L,applications30d_658L
0,12866,16,0,0.0,96a8fdfe,0.0,0.0,0.0,6064.6,0.0,...,0.0,0.0,0.0,305398.220,0.0,0.0,0.0,8.0,0.0,0.0
1,12866,16,0,0.0,96a8fdfe,0.0,0.0,0.0,6064.6,0.0,...,0.0,0.0,0.0,305398.220,0.0,0.0,0.0,8.0,0.0,0.0
2,12866,16,0,0.0,a55475b1,0.0,0.0,0.0,6064.6,0.0,...,0.0,0.0,0.0,0.000,0.0,0.0,0.0,8.0,0.0,0.0
3,12866,16,0,0.0,a55475b1,0.0,0.0,0.0,6064.6,0.0,...,0.0,0.0,0.0,0.000,0.0,0.0,0.0,8.0,0.0,0.0
4,12866,16,0,0.0,96a8fdfe,0.0,0.0,0.0,6064.6,0.0,...,0.0,0.0,0.0,3932.352,0.0,0.0,0.0,8.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
941052,168975,41,0,0.0,a55475b1,0.0,0.0,0.0,6518.2,0.0,...,44000.0,0.0,76831.6,0.000,0.0,0.0,0.0,0.0,0.0,3.0
941053,168975,41,0,0.0,a55475b1,0.0,0.0,0.0,6518.2,0.0,...,44000.0,0.0,76831.6,0.000,0.0,0.0,0.0,0.0,0.0,3.0
941054,168975,41,0,0.0,a55475b1,0.0,0.0,0.0,6518.2,0.0,...,44000.0,0.0,76831.6,0.000,0.0,0.0,0.0,0.0,0.0,3.0
941055,168975,41,0,0.0,60c73645,0.0,0.0,0.0,6518.2,0.0,...,44000.0,0.0,76831.6,0.000,0.0,0.0,0.0,0.0,0.0,3.0


In [9]:
train_df.head()

Unnamed: 0,case_id,WEEK_NUM,target,actualdpd_943P,purposeofcred_426M,amount_1115A,credacc_actualbalance_314A,actualdpdtolerance_344P,annuity_780A,numinstpaidearly_338L,...,maininc_215A,debtoverdue_47A,totalsettled_863A,totaloutstanddebtvalue_39A,avgdbddpdlast24m_3658932P,avgdbddpdlast3m_4187120P,clientscnt12m_3712952L,applicationscnt_1086L,applicationcnt_361L,applications30d_658L
0,12866,16,0,0.0,96a8fdfe,0.0,0.0,0.0,6064.6,0.0,...,0.0,0.0,0.0,305398.22,0.0,0.0,0.0,8.0,0.0,0.0
1,12866,16,0,0.0,96a8fdfe,0.0,0.0,0.0,6064.6,0.0,...,0.0,0.0,0.0,305398.22,0.0,0.0,0.0,8.0,0.0,0.0
2,12866,16,0,0.0,a55475b1,0.0,0.0,0.0,6064.6,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0
3,12866,16,0,0.0,a55475b1,0.0,0.0,0.0,6064.6,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0
4,12866,16,0,0.0,96a8fdfe,0.0,0.0,0.0,6064.6,0.0,...,0.0,0.0,0.0,3932.352,0.0,0.0,0.0,8.0,0.0,0.0


In [10]:
train_df.shape

(941057, 22)

## logistic regression model on 100k rows train df

In [30]:
def train_model(train_df):

    X, y = train_df.drop(columns=["target"]), train_df["target"]

    # Splitting by 'case_id'
    case_ids = train_df['case_id'].unique()
    train_case_ids, test_case_ids = train_test_split(case_ids, test_size=0.2, random_state=42)

    X_train = train_df[train_df['case_id'].isin(train_case_ids)].drop(columns=["target"])
    y_train = train_df[train_df['case_id'].isin(train_case_ids)]["target"]
    X_test = train_df[train_df['case_id'].isin(test_case_ids)].drop(columns=["target"])
    y_test = train_df[train_df['case_id'].isin(test_case_ids)]["target"]

    numeric_transformer = Pipeline(steps=[
        ('scaler', StandardScaler())])

    categorical_transformer = OneHotEncoder(handle_unknown='ignore')

    preprocessor = ColumnTransformer(
        transformers=[
            ('num', numeric_transformer,  X_train.select_dtypes(exclude="object").columns),
            ('cat', categorical_transformer,  X_train.select_dtypes(include="object").columns)])

    pipe = Pipeline(
        [
            ('preprocessor', preprocessor),
            ("clf", LogisticRegression(random_state=0, max_iter=1000)),
        ]
    )

    pipe.fit(X_train, y_train)

    # y_pred = pipe.predict(X_train)
    y_pred = [probs[1] for probs in pipe.predict_proba(X_train)]
    return pipe, X_train, y_train, X_test, y_test

In [None]:
pipe, X_train, y_train = train_model(train_df)

In [12]:
logreg_report_v2.generate_report(pipe, X_train, y_train, list(X_train.columns), ['log_loss'])

Log Loss: 0.14580354890288633
AUC: 0.6546572002612966
Gini Stability Score: 0.24296974748302075


{'AUC': 0.6546572002612966,
 'Gini Stability': 0.24296974748302075,
 'log_loss': 0.14580354890288633}

In [13]:
logreg_report_v2.generate_report(pipe, X_test, y_test, list(X_test.columns), ['log_loss'])

Log Loss: 0.15032929353848243
AUC: 0.629782372873686
Gini Stability Score: 0.10315543470215377


{'AUC': 0.629782372873686,
 'Gini Stability': 0.10315543470215377,
 'log_loss': 0.15032929353848243}

In [12]:
def save_model(pipe, start, end):
    
    # save
    joblib.dump(pipe, "logistic_regression_" + "cols{s}-{e}" + ".joblib".format(s = start, e = end)) 

#     # load
#     clf2 = joblib.load("logistic_regression.joblib")

#     clf2.predict(X_test)

In [31]:
def model_pipeline(data, cols):
    
    base_cols = [
    'case_id',
    'WEEK_NUM',
    'target'
    ]
    
    # grab necessary cols
    train_df = data[base_cols + cols]
    
    # impute NaN
    train_df = impute_default(train_df)
    
    # fit model
    pipe, X_train, y_train, X_test, y_test = train_model(train_df)
    
    # evaluate model
    train_stats = logreg_report_v2.generate_report(pipe, X_train, y_train, list(X_train.columns), ['log_loss'])
    test_stats = logreg_report_v2.generate_report(pipe, X_test, y_test, list(X_test.columns), ['log_loss'])
    
    return pipe, train_stats, test_stats

In [18]:
nonsense_cols = ['case_id', 'WEEK_NUM', 'target', 'date_decision', 'MONTH']

In [19]:
possible_cols = list(data.columns)
for col in nonsense_cols:
    possible_cols.remove(col)
possible_cols

['actualdpdtolerance_344P',
 'amtinstpaidbefduel24m_4187115A',
 'annuity_780A',
 'annuitynextmonth_57A',
 'applicationcnt_361L',
 'applications30d_658L',
 'applicationscnt_1086L',
 'applicationscnt_464L',
 'applicationscnt_629L',
 'applicationscnt_867L',
 'avgdbddpdlast24m_3658932P',
 'avgdbddpdlast3m_4187120P',
 'avgdbdtollast24m_4525197P',
 'avgdpdtolclosure24_3658938P',
 'avginstallast24m_3658937A',
 'avglnamtstart24m_4525187A',
 'avgmaxdpdlast9m_3716943P',
 'avgoutstandbalancel6m_4187114A',
 'avgpmtlast12m_4525200A',
 'bankacctype_710L',
 'cardtype_51L',
 'clientscnt12m_3712952L',
 'clientscnt3m_3712950L',
 'clientscnt6m_3712949L',
 'clientscnt_100L',
 'clientscnt_1022L',
 'clientscnt_1071L',
 'clientscnt_1130L',
 'clientscnt_136L',
 'clientscnt_157L',
 'clientscnt_257L',
 'clientscnt_304L',
 'clientscnt_360L',
 'clientscnt_493L',
 'clientscnt_533L',
 'clientscnt_887L',
 'clientscnt_946L',
 'cntincpaycont9m_3716944L',
 'cntpmts24_3658933L',
 'commnoinclast6m_3546845L',
 'credamount

In [20]:
len(possible_cols)

466

In [36]:
models = []

curr_train = None
curr_test = None

for i in range(len(possible_cols) - 20 + 1):

    cols = possible_cols[i:i+20]
    start = i
    end = i + 20
    
    pipe, train_stat, test_stat = model_pipeline(data, cols)
    
    if curr_train and curr_test:
        if train_stat['Gini Stability'] > curr_train['Gini Stability'] \
        or train_stat['AUC'] > curr_train['AUC']:
            model = {}
            model['columns'] = cols
            model['train AUC'] = train_stat['AUC']
            model['train gini'] = train_stat['Gini Stability']
            model['test AUC'] = test_stat['AUC']
            model['test gini'] = test_stat['Gini Stability']
        if train_stat['Gini Stability'] > curr_train['Gini Stability'] \
        and train_stat['AUC'] > curr_train['AUC']:
            save_model(pipe, start, end)
            
        models.append(model)
        
    else:
        model = {}
        model['columns'] = cols
        model['train AUC'] = train_stat['AUC']
        model['train gini'] = train_stat['Gini Stability']
        model['test AUC'] = test_stat['AUC']
        model['test gini'] = test_stat['Gini Stability']
        models.append(model)

Log Loss: 0.14418061851047806
AUC: 0.6802648542398241
Gini Stability Score: 0.2812477099159215
Log Loss: 0.14988634555007632
AUC: 0.655036687200115
Gini Stability Score: 0.11380017241457449
Log Loss: 0.14417613963330714
AUC: 0.6803049695930856
Gini Stability Score: 0.2814697091665604
Log Loss: 0.1498812070216303
AUC: 0.6553850489718098
Gini Stability Score: 0.1145481903626947
Log Loss: 0.14661297760479078
AUC: 0.6541205337440124
Gini Stability Score: 0.20912414605572102
Log Loss: 0.14980751259838596
AUC: 0.6419639109790893
Gini Stability Score: 0.10553943244839467
Log Loss: 0.14674252929375003
AUC: 0.6516712184442089
Gini Stability Score: 0.2038345129669154
Log Loss: 0.15008001408622204
AUC: 0.6348774625276684
Gini Stability Score: 0.0774569085740158
Log Loss: 0.14674852419482656
AUC: 0.6515578803096636
Gini Stability Score: 0.2046165350205812
Log Loss: 0.15023446273651833
AUC: 0.6317390833228519
Gini Stability Score: 0.0682138788798092
Log Loss: 0.146698348543211
AUC: 0.65270912286809

Log Loss: 0.04629158993700833
AUC: 0.9831506461029238
Gini Stability Score: 0.8994409094550861
Log Loss: 0.20778172368062953
AUC: 0.6079264057782955
Gini Stability Score: 0.055255698945064324
Log Loss: 0.0463810671410794
AUC: 0.9831621625781954
Gini Stability Score: 0.8992620094524149
Log Loss: 0.20820798149440442
AUC: 0.6087146196606837
Gini Stability Score: 0.060931496801613194
Log Loss: 0.049975783209814766
AUC: 0.9806842992975346
Gini Stability Score: 0.8942247386085264
Log Loss: 0.20847486670650972
AUC: 0.6001218126227859
Gini Stability Score: 0.054580741559019125
Log Loss: 0.058884192920579115
AUC: 0.9733931288340947
Gini Stability Score: 0.8789939727476547
Log Loss: 0.21131505513167098
AUC: 0.5867505826179051
Gini Stability Score: 0.015380290149082126
Log Loss: 0.06564346160882926
AUC: 0.968478852519013
Gini Stability Score: 0.8695742118932263
Log Loss: 0.22061048677780895
AUC: 0.5450706370982183
Gini Stability Score: -0.06013916158348151
Log Loss: 0.052121681049139494
AUC: 0.98

Gini Stability Score: -0.11396287186519474
Log Loss: 0.11588146355293397
AUC: 0.8611307956764729
Gini Stability Score: 0.6470356279733511
Log Loss: 0.17065225346590807
AUC: 0.568162170842858
Gini Stability Score: -0.10516641189229627
Log Loss: 0.1158909756388091
AUC: 0.8611370970359387
Gini Stability Score: 0.647337486110074
Log Loss: 0.17064049921870136
AUC: 0.5681800104214871
Gini Stability Score: -0.10440569837547549
Log Loss: 0.11585685690846216
AUC: 0.8611755860996211
Gini Stability Score: 0.6474100247927104
Log Loss: 0.17059188897201397
AUC: 0.5687699198211605
Gini Stability Score: -0.10455767669772162
Log Loss: 0.11618179104138256
AUC: 0.8605982920503887
Gini Stability Score: 0.6475361992006959
Log Loss: 0.17150607945919216
AUC: 0.5636975321309577
Gini Stability Score: -0.11441494161204963
Log Loss: 0.1152179604429067
AUC: 0.8605196282184033
Gini Stability Score: 0.642253033818455
Log Loss: 0.17085560570309394
AUC: 0.5730387914904117
Gini Stability Score: -0.10644280217139955
Lo

Gini Stability Score: 0.1373198500564095
Log Loss: 0.13896617345226198
AUC: 0.7281295099959739
Gini Stability Score: 0.3811949844439132
Log Loss: 0.1460248678986014
AUC: 0.6844418051512289
Gini Stability Score: 0.13355407130143626
Log Loss: 0.13913970835744188
AUC: 0.7249046136732227
Gini Stability Score: 0.3758591010559752
Log Loss: 0.14587862659772546
AUC: 0.6870698964090467
Gini Stability Score: 0.15903619491452628
Log Loss: 0.13915432763025443
AUC: 0.7242338895242991
Gini Stability Score: 0.3731470573842778
Log Loss: 0.1458795776175894
AUC: 0.6865408922374587
Gini Stability Score: 0.14832285419430763
Log Loss: 0.13876811924406232
AUC: 0.7276521043036706
Gini Stability Score: 0.3800285996611119
Log Loss: 0.14546051647978894
AUC: 0.6927303818750468
Gini Stability Score: 0.18788629177440172
Log Loss: 0.13882191707295163
AUC: 0.728123033809064
Gini Stability Score: 0.381285757487434
Log Loss: 0.14556929080538428
AUC: 0.6911428652055238
Gini Stability Score: 0.18427155494289355
Log Loss

Gini Stability Score: 0.0071094143406584265
Log Loss: 0.1323255189748554
AUC: 0.7606062838992254
Gini Stability Score: 0.4135416911346504
Log Loss: 0.15897327123480437
AUC: 0.5940542834378252
Gini Stability Score: -0.0010249672153274936
Log Loss: 0.14240801918761647
AUC: 0.6536062612289696
Gini Stability Score: 0.16438569702702796
Log Loss: 0.15143206764462977
AUC: 0.6101945155388763
Gini Stability Score: 0.012792430386317444
Log Loss: 0.14595548459002083
AUC: 0.6368939764648461
Gini Stability Score: 0.1257613223472253
Log Loss: 0.1497988955576803
AUC: 0.6268196445199965
Gini Stability Score: 0.049511515535360484
Log Loss: 0.14593971280208537
AUC: 0.6359446626540994
Gini Stability Score: 0.1301040950554112
Log Loss: 0.14979070950061163
AUC: 0.6255880402771553
Gini Stability Score: 0.04261593184129969
Log Loss: 0.14593976143237156
AUC: 0.6359933123207817
Gini Stability Score: 0.1302098940992646
Log Loss: 0.14978912590401453
AUC: 0.6255252450937109
Gini Stability Score: 0.042989803337130

Gini Stability Score: 0.03747229237952657
Log Loss: 0.1318144828763547
AUC: 0.7882858560393365
Gini Stability Score: 0.5078511413157402
Log Loss: 0.1576785745711297
AUC: 0.5779207120127822
Gini Stability Score: 0.02676525015658146
Log Loss: 0.12790747675621386
AUC: 0.814463296371976
Gini Stability Score: 0.5620402199702426
Log Loss: 0.16141179511777812
AUC: 0.5676718590906883
Gini Stability Score: 0.028042204886377445
Log Loss: 0.1253881732261728
AUC: 0.8261369808453561
Gini Stability Score: 0.5809987223907768
Log Loss: 0.1629894155856306
AUC: 0.5716290481218834
Gini Stability Score: 0.018811198383572744
Log Loss: 0.12566301082220124
AUC: 0.8249840215077149
Gini Stability Score: 0.5793854754811334
Log Loss: 0.1629854509172671
AUC: 0.5701720400364143
Gini Stability Score: 0.010801929801156879
Log Loss: 0.12303858315672622
AUC: 0.8396541195020254
Gini Stability Score: 0.6141923925311772
Log Loss: 0.16476455823764957
AUC: 0.5694631076147314
Gini Stability Score: 0.03372789487946064
Log Lo

Log Loss: 0.11114509818322262
AUC: 0.8247287891604957
Gini Stability Score: 0.5745273080607639
Log Loss: 0.15842369045505889
AUC: 0.5730642317228467
Gini Stability Score: -0.02228144437080118
Log Loss: 0.11121425928636586
AUC: 0.8235550618472932
Gini Stability Score: 0.5554195524768055
Log Loss: 0.1586845831379895
AUC: 0.5652236294178732
Gini Stability Score: -0.033024859085026934
Log Loss: 0.11121435461920484
AUC: 0.8235356371774596
Gini Stability Score: 0.5554363734556137
Log Loss: 0.15867634940185962
AUC: 0.5652587594214358
Gini Stability Score: -0.03303957801764554
Log Loss: 0.11119013957190436
AUC: 0.8236262113591177
Gini Stability Score: 0.5555371692990089
Log Loss: 0.15869642468074915
AUC: 0.5651644708151994
Gini Stability Score: -0.033377998826848734
Log Loss: 0.11121920365369015
AUC: 0.8235652001039278
Gini Stability Score: 0.5554210319930385
Log Loss: 0.15868564006239205
AUC: 0.5652090505955583
Gini Stability Score: -0.033799039665074665
Log Loss: 0.1092669188468929
AUC: 0.83

Log Loss: 0.13771249792726137
AUC: 0.7230129639539692
Gini Stability Score: 0.33496057725560713
Log Loss: 0.1530859406302444
AUC: 0.6042672622072667
Gini Stability Score: 0.07819940319058259
Log Loss: 0.13898677606814097
AUC: 0.7122272182899023
Gini Stability Score: 0.31429310349722267
Log Loss: 0.1532145777736355
AUC: 0.5985499964157514
Gini Stability Score: 0.08117063762834716
Log Loss: 0.14427761335189054
AUC: 0.6506014193952583
Gini Stability Score: 0.22683575262217276
Log Loss: 0.15241646172641005
AUC: 0.5837950965931518
Gini Stability Score: 0.0774346127832086
Log Loss: 0.14428316174776556
AUC: 0.6506050997101328
Gini Stability Score: 0.2267544729355342
Log Loss: 0.15240928883783708
AUC: 0.5838172444033539
Gini Stability Score: 0.07702256958083227
Log Loss: 0.14429127605029468
AUC: 0.6505297654339601
Gini Stability Score: 0.2261586244556035
Log Loss: 0.15239772769015486
AUC: 0.5840356117455172
Gini Stability Score: 0.07730012107922118
Log Loss: 0.14175945103387888
AUC: 0.66711615

Log Loss: 0.1493200501521801
AUC: 0.5511070314056424
Gini Stability Score: 0.0482952916472973
Log Loss: 0.153480529703017
AUC: 0.5372768970196924
Gini Stability Score: 0.000795290673844401
Log Loss: 0.14949266069240857
AUC: 0.523775637506146
Gini Stability Score: -0.038422099140893846
Log Loss: 0.15352032687424974
AUC: 0.5088850334688427
Gini Stability Score: -0.10516087528009219
Log Loss: 0.1495178703059865
AUC: 0.5205357739035628
Gini Stability Score: -0.046786865189073636
Log Loss: 0.1535675856654011
AUC: 0.5038607713085816
Gini Stability Score: -0.12712864946326308
Log Loss: 0.1495150619723338
AUC: 0.5205417555785592
Gini Stability Score: -0.04677723021379815
Log Loss: 0.15356626603058124
AUC: 0.503971236199401
Gini Stability Score: -0.12672394932297876
Log Loss: 0.14951667796625623
AUC: 0.5204543462731483
Gini Stability Score: -0.04718421419100984
Log Loss: 0.15357463781510403
AUC: 0.5036126138367278
Gini Stability Score: -0.12727080758670153
Log Loss: 0.14954300308455393
AUC: 0.5

Gini Stability Score: -0.08233408792381289
Log Loss: 0.1461860308662631
AUC: 0.5836495370102371
Gini Stability Score: 0.08435531975708822
Log Loss: 0.1539530585866984
AUC: 0.5222947233986335
Gini Stability Score: -0.08038755026301361
Log Loss: 0.14597575967098186
AUC: 0.5854469841291047
Gini Stability Score: 0.08659320376134763
Log Loss: 0.15395116819739485
AUC: 0.5243819824309083
Gini Stability Score: -0.07453159860020914
Log Loss: 0.14573692932214843
AUC: 0.5866449950005976
Gini Stability Score: 0.08890759850726844
Log Loss: 0.1540584238143416
AUC: 0.5247833371175772
Gini Stability Score: -0.06839777303618266
Log Loss: 0.14570340565665885
AUC: 0.5866199551887341
Gini Stability Score: 0.08895616028246407
Log Loss: 0.15402720359582447
AUC: 0.5260638618715826
Gini Stability Score: -0.06445458809769
Log Loss: 0.1457399502910894
AUC: 0.584771349458476
Gini Stability Score: 0.08583084573508762
Log Loss: 0.1541119181341773
AUC: 0.5228153252686838
Gini Stability Score: -0.07896967981971406
L

Log Loss: 0.15294717471394428
AUC: 0.5450942648734636
Gini Stability Score: 0.00045513235606610414
Log Loss: 0.14947175833313392
AUC: 0.5591281807963234
Gini Stability Score: 0.0883488602600748
Log Loss: 0.15292049607204675
AUC: 0.5456443402140174
Gini Stability Score: -0.029085807661641372
Log Loss: 0.14953898714654992
AUC: 0.5566503939665209
Gini Stability Score: 0.07803877848314096
Log Loss: 0.15319775194601368
AUC: 0.5415535385054205
Gini Stability Score: -0.004502036632021061
Log Loss: 0.1494014813847317
AUC: 0.5630408095859439
Gini Stability Score: 0.08769043874541577
Log Loss: 0.1530018376614842
AUC: 0.5486495300647665
Gini Stability Score: 0.03708187981465347
Log Loss: 0.14936200340514
AUC: 0.5635962740232469
Gini Stability Score: 0.0881781058391111
Log Loss: 0.1530000425447901
AUC: 0.5482719681494456
Gini Stability Score: 0.03606030506423391
Log Loss: 0.14936105498485838
AUC: 0.5636155534154176
Gini Stability Score: 0.08819656500389282
Log Loss: 0.15300601107742423
AUC: 0.5481

In [37]:
models

[{'columns': ['actualdpdtolerance_344P',
   'amtinstpaidbefduel24m_4187115A',
   'annuity_780A',
   'annuitynextmonth_57A',
   'applicationcnt_361L',
   'applications30d_658L',
   'applicationscnt_1086L',
   'applicationscnt_464L',
   'applicationscnt_629L',
   'applicationscnt_867L',
   'avgdbddpdlast24m_3658932P',
   'avgdbddpdlast3m_4187120P',
   'avgdbdtollast24m_4525197P',
   'avgdpdtolclosure24_3658938P',
   'avginstallast24m_3658937A',
   'avglnamtstart24m_4525187A',
   'avgmaxdpdlast9m_3716943P',
   'avgoutstandbalancel6m_4187114A',
   'avgpmtlast12m_4525200A',
   'bankacctype_710L'],
  'train AUC': 0.6802648542398241,
  'train gini': 0.2812477099159215,
  'test AUC': 0.655036687200115,
  'test gini': 0.11380017241457449},
 {'columns': ['amtinstpaidbefduel24m_4187115A',
   'annuity_780A',
   'annuitynextmonth_57A',
   'applicationcnt_361L',
   'applications30d_658L',
   'applicationscnt_1086L',
   'applicationscnt_464L',
   'applicationscnt_629L',
   'applicationscnt_867L',
   

In [38]:
import csv

# to_csv = [
#     {'name': 'bob', 'age': 25, 'weight': 200},
#     {'name': 'jim', 'age': 31, 'weight': 180},
# ]

keys = models[0].keys()

with open('model_metrics.csv', 'w', newline='') as output_file:
    dict_writer = csv.DictWriter(output_file, keys)
    dict_writer.writeheader()
    dict_writer.writerows(models)

In [25]:
# for i in range(len(possible_cols) - 20 + 1):

#     print(i,i+20)
    
#     print(len(possible_cols[i:i+20]))

In [33]:
# m = []
# h = {'1': 1}
# m.append(h)
# m

[{'1': 1}]