# 1. SETTINGS

In [49]:
# libraries
import pandas as pd
import numpy as np
import scipy.stats
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt

In [50]:
# pandas options
pd.set_option("display.max_columns", None)

In [51]:
# ignore warnings
import warnings
warnings.filterwarnings("ignore")

In [52]:
# garbage collection
import gc
gc.enable()

# 2. FUNCTIONS

In [53]:
##### FUNCTION FOR COUNTING MISSINGS
def count_missings(data):
    total = data.isnull().sum().sort_values(ascending = False)
    percent = (data.isnull().sum() / data.isnull().count() * 100).sort_values(ascending = False)
    table = pd.concat([total, percent], axis = 1, keys = ["Total", "Percent"])
    table = table[table["Total"] > 0]
    return table

In [54]:
##### FUNCTION FOR CREATING LOGARITHMS
def convert_days(data, features, t = 12, rounding = True, replace = False):
    for var in features:
        if replace == True:
            if rounding == True:
                data[var] = round(-data[var]/t)
            else:
                data[var] = -data[var]/t
            data[var][data[var] < 0] = None
        else:
            if rounding == True:
                data["CONVETRTED_" + str(var)] = round(-data[var]/t)
            else:
                data["CONVETRTED_" + str(var)] = -data[var]/t
            data["CONVETRTED_" + str(var)][data["CONVETRTED_" + str(var)] < 0] = None
    return data

In [55]:
##### FUNCTION FOR CREATING LOGARITHMS
def create_logs(data, features, replace = False):
    for var in features:
        if replace == True:
            data[var] = np.log(data[var].abs() + 1)
        else:
            data["LOG_" + str(var)] = np.log(data[var].abs() + 1)      
    return data

In [56]:
##### FUNCTION FOR CREATING FLAGS FOR MISSINGS
def create_null_flags(data, features = None):
    if features == None:
        features = data.columns
    for var in features:
        num_null = data[var].isnull() + 0
        if num_null.sum() > 0:
            data["ISNULL_" + str(var)] = num_null
    return data

In [57]:
##### FUNCTION FOR TREATING FACTORS
def treat_factors(data, method = "label"):
    
    # label encoding
    if method == "label":
        factors = [f for f in data.columns if data[f].dtype == "object"]
        for var in factors:
            data[var], _ = pd.factorize(data[var])
        
    # dummy encoding
    if method == "dummy":
        data = pd.get_dummies(data, drop_first = True)
    
    # dataset
    return data

In [58]:
##### FUNCTION FOR COMPUTING ACCEPT/REJECT RATIOS
def compute_accept_reject_ratio(data, lags = [1, 3, 5]):
    
    # preparations
    dec_prev = data[["SK_ID_CURR", "SK_ID_PREV", "DAYS_DECISION", "NAME_CONTRACT_STATUS"]]
    dec_prev["DAYS_DECISION"] = -dec_prev["DAYS_DECISION"]
    dec_prev = dec_prev.sort_values(by = ["SK_ID_CURR", "DAYS_DECISION"])
    dec_prev = pd.get_dummies(dec_prev)
     
    # compuatation
    for t in lags:
        
        # acceptance ratios
        tmp = dec_prev[["SK_ID_CURR", "NAME_CONTRACT_STATUS_Approved"]].groupby(["SK_ID_CURR"]).head(1)
        tmp = tmp.groupby(["SK_ID_CURR"], as_index = False).mean()
        tmp.columns = ["SK_ID_CURR", "APPROVE_RATIO_" + str(t)]
        data = data.merge(tmp, how = "left", on = "SK_ID_CURR")
        
        # rejection ratios
        tmp = dec_prev[["SK_ID_CURR", "NAME_CONTRACT_STATUS_Refused"]].groupby(["SK_ID_CURR"]).head(1)
        tmp = tmp.groupby(["SK_ID_CURR"], as_index = False).mean()
        tmp.columns = ["SK_ID_CURR", "REJECT_RATIO_" + str(t)]
        data = data.merge(tmp, how = "left", on = "SK_ID_CURR")
        
    # dataset
    return data

In [59]:
##### FUNCTION FOR AGGREGATING DATA
def aggregate_data(data, id_var, label = None):
    
    
    ### SEPARATE FEATURES
  
    # display info
    print("- Preparing the dataset...")

    # find factors
    data_factors = [f for f in data.columns if data[f].dtype == "object"]
    
    # partition subsets
    num_data = data[list(set(data.columns) - set(data_factors))]
    fac_data = data[[id_var] + data_factors]
    
    # display info
    num_facs = fac_data.shape[1] - 1
    num_nums = num_data.shape[1] - 1
    print("- Extracted %.0f factors and %.0f numerics..." % (num_facs, num_nums))


    ##### AGGREGATION
 
    # aggregate numerics
    if (num_nums > 0):
        print("- Aggregating numeric features...")
        num_data = num_data.groupby(id_var).agg(["mean", "std", "min", "max"])
        num_data.columns = ["_".join(col).strip() for col in num_data.columns.values]
        num_data = num_data.sort_index()

    # aggregate factors
    if (num_facs > 0):
        print("- Aggregating factor features...")
        fac_data = fac_data.groupby(id_var).agg([("mode",   lambda x: scipy.stats.mode(x)[0][0]),
                                                 ("unique", lambda x: x.nunique())])
        fac_data.columns = ["_".join(col).strip() for col in fac_data.columns.values]
        fac_data = fac_data.sort_index()


    ##### MERGER

    # merge numerics and factors
    if ((num_facs > 0) & (num_nums > 0)):
        agg_data = pd.concat([num_data, fac_data], axis = 1)
    
    # use factors only
    if ((num_facs > 0) & (num_nums == 0)):
        agg_data = fac_data
        
    # use numerics only
    if ((num_facs == 0) & (num_nums > 0)):
        agg_data = num_data
        

    ##### LAST STEPS

    # update labels
    if label != None:
        agg_data.columns = [label + "_" + str(col) for col in agg_data.columns]
    
    # impute zeros for SD
    #stdevs = agg_data.filter(like = "_std").columns
    #for var in stdevs:
    #    agg_data[var].fillna(0, inplace = True)

    # display info
    print("- Final dimensions:", agg_data.shape)
    
    # return dataset
    return agg_data

# 3. DATA IMPORT

In [60]:
# import data
train = pd.read_csv("../data/raw/application_train.csv")
test  = pd.read_csv("../data/raw/application_test.csv")
buro  = pd.read_csv("../data/raw/bureau.csv")
bbal  = pd.read_csv("../data/raw/bureau_balance.csv")
prev  = pd.read_csv("../data/raw/previous_application.csv")
card  = pd.read_csv("../data/raw/credit_card_balance.csv")
poca  = pd.read_csv("../data/raw/POS_CASH_balance.csv")
inst  = pd.read_csv("../data/raw/installments_payments.csv")

In [61]:
# check dimensions
print("Application:", train.shape, test.shape)
print("Buro:", buro.shape)
print("Bbal:", bbal.shape)
print("Prev:", prev.shape)
print("Card:", card.shape)
print("Poca:", poca.shape)
print("Inst:", inst.shape)

Application: (307511, 122) (48744, 121)
Buro: (1716428, 17)
Bbal: (27299925, 3)
Prev: (1670214, 37)
Card: (3840312, 23)
Poca: (10001358, 8)
Inst: (13605401, 8)


In [62]:
# extract target
y = train[["SK_ID_CURR", "TARGET"]]
del train["TARGET"]

In [63]:
### CHECK LOAN ID DISTRIBUTION

# check unique IDs
print("IDs in TRAIN:", train.SK_ID_CURR.nunique())
print("IDs in TEST:",  test.SK_ID_CURR.nunique())
print("IDs in BURO:",  buro.SK_ID_CURR.nunique())
print("IDs in PREV:",  prev.SK_ID_CURR.nunique())
print("IDs in CARD:",  card.SK_ID_CURR.nunique())
print("IDs in POCA:",  poca.SK_ID_CURR.nunique())
print("IDs in INST:",  inst.SK_ID_CURR.nunique())
print("")

# check current loan differences
print("IDs in TRAIN but not in BURO:", 
      len(list(set(train.SK_ID_CURR.unique()) - set(buro.SK_ID_CURR.unique()))))
print("IDs in TRAIN but not in PREV:", 
      len(list(set(train.SK_ID_CURR.unique()) - set(prev.SK_ID_CURR.unique()))))
print("IDs in TRAIN but not in CARD:", 
      len(list(set(train.SK_ID_CURR.unique()) - set(card.SK_ID_CURR.unique()))))
print("IDs in TRAIN but not in POCA:", 
      len(list(set(train.SK_ID_CURR.unique()) - set(poca.SK_ID_CURR.unique()))))
print("IDs in TRAIN but not in INST:", 
      len(list(set(train.SK_ID_CURR.unique()) - set(poca.SK_ID_CURR.unique()))))
print("")

# check current loan differences
print("IDs in TEST but not in BURO:", 
      len(list(set(test.SK_ID_CURR.unique()) - set(buro.SK_ID_CURR.unique()))))
print("IDs in TEST but not in PREV:", 
      len(list(set(test.SK_ID_CURR.unique()) - set(prev.SK_ID_CURR.unique()))))
print("IDs in TEST but not in CARD:", 
      len(list(set(test.SK_ID_CURR.unique()) - set(card.SK_ID_CURR.unique()))))
print("IDs in TEST but not in POCA:", 
      len(list(set(test.SK_ID_CURR.unique()) - set(poca.SK_ID_CURR.unique()))))
print("IDs in TEST but not in INST:", 
      len(list(set(test.SK_ID_CURR.unique()) - set(poca.SK_ID_CURR.unique()))))
print("")

print("IDs in TRAIN and BURO:", 
       len(set.intersection(set(train.SK_ID_CURR.unique()), set(buro.SK_ID_CURR.unique()))))
print("IDs in TRAIN and PREV:", 
       len(set.intersection(set(train.SK_ID_CURR.unique()), set(prev.SK_ID_CURR.unique()))))
print("IDs in TRAIN and CARD:", 
       len(set.intersection(set(train.SK_ID_CURR.unique()), set(card.SK_ID_CURR.unique())))) 
print("IDs in TRAIN and POCA:", 
       len(set.intersection(set(train.SK_ID_CURR.unique()), set(poca.SK_ID_CURR.unique())))) 
print("IDs in TRAIN and INST:", 
       len(set.intersection(set(train.SK_ID_CURR.unique()), set(inst.SK_ID_CURR.unique())))) 
print("IDs in TRAIN and BURO and PREV:", 
       len(set.intersection(set(train.SK_ID_CURR.unique()), set(buro.SK_ID_CURR.unique()), 
                            set(prev.SK_ID_CURR.unique()))))
print("IDs in TRAIN and PREV and POCA and INST:", 
       len(set.intersection(set(train.SK_ID_CURR.unique()), set(prev.SK_ID_CURR.unique()), 
                            set(poca.SK_ID_CURR.unique()), set(inst.SK_ID_CURR.unique()))))
print("")

print("IDs in TEST and BURO:", 
       len(set.intersection(set(test.SK_ID_CURR.unique()), set(buro.SK_ID_CURR.unique()))))
print("IDs in TEST and PREV:", 
       len(set.intersection(set(test.SK_ID_CURR.unique()), set(prev.SK_ID_CURR.unique()))))
print("IDs in TEST and CARD:", 
       len(set.intersection(set(test.SK_ID_CURR.unique()), set(card.SK_ID_CURR.unique())))) 
print("IDs in TEST and POCA:", 
       len(set.intersection(set(test.SK_ID_CURR.unique()), set(poca.SK_ID_CURR.unique())))) 
print("IDs in TEST and INST:", 
       len(set.intersection(set(test.SK_ID_CURR.unique()), set(inst.SK_ID_CURR.unique())))) 
print("IDs in TEST and BURO and PREV:", 
       len(set.intersection(set(test.SK_ID_CURR.unique()), set(buro.SK_ID_CURR.unique()), 
                            set(prev.SK_ID_CURR.unique()))))
print("IDs in TEST and PREV and POCA and INST:", 
       len(set.intersection(set(test.SK_ID_CURR.unique()), set(prev.SK_ID_CURR.unique()), 
                            set(poca.SK_ID_CURR.unique()), set(inst.SK_ID_CURR.unique()))))

IDs in TRAIN: 307511
IDs in TEST: 48744
IDs in BURO: 305811
IDs in PREV: 338857
IDs in CARD: 103558
IDs in POCA: 337252
IDs in INST: 339587

IDs in TRAIN but not in BURO: 44020
IDs in TRAIN but not in PREV: 16454
IDs in TRAIN but not in CARD: 220606
IDs in TRAIN but not in POCA: 18067
IDs in TRAIN but not in INST: 18067

IDs in TEST but not in BURO: 6424
IDs in TEST but not in PREV: 944
IDs in TEST but not in CARD: 32091
IDs in TEST but not in POCA: 936
IDs in TEST but not in INST: 936

IDs in TRAIN and BURO: 263491
IDs in TRAIN and PREV: 291057
IDs in TRAIN and CARD: 86905
IDs in TRAIN and POCA: 289444
IDs in TRAIN and INST: 291643
IDs in TRAIN and BURO and PREV: 249507
IDs in TRAIN and PREV and POCA and INST: 288028

IDs in TEST and BURO: 42320
IDs in TEST and PREV: 47800
IDs in TEST and CARD: 16653
IDs in TEST and POCA: 47808
IDs in TEST and INST: 47944
IDs in TEST and BURO and PREV: 41584
IDs in TEST and PREV and POCA and INST: 47537


# 4. PREPROCESSING

## 4.1. APPLICATION DATA

In [64]:
# concatenate application data
appl = pd.concat([train, test])
del train, test

In [65]:
### FEATURE ENGINEERING

# income ratios
appl["CREDIT_BY_INCOME"]      = appl["AMT_CREDIT"]      / appl["AMT_INCOME_TOTAL"]
appl["ANNUITY_BY_INCOME"]     = appl["AMT_ANNUITY"]     / appl["AMT_INCOME_TOTAL"]
appl["GOODS_PRICE_BY_INCOME"] = appl["AMT_GOODS_PRICE"] / appl["AMT_INCOME_TOTAL"]

# career ratio
appl["PERCENT_WORKED"] = appl["DAYS_EMPLOYED"] / appl["DAYS_BIRTH"]
appl["PERCENT_WORKED"][appl["PERCENT_WORKED"] < 0] = None

# number of adults
appl["CNT_ADULTS"] = appl["CNT_FAM_MEMBERS"] - appl["CNT_CHILDREN"]

# external sources
#appl["EXT_SOURCE_MEAN"] = appl[["EXT_SOURCE_1", "EXT_SOURCE_1", "EXT_SOURCE_3"]].mean(axis = 1)
#appl["EXT_SOURCE_SD"]   = appl[["EXT_SOURCE_1", "EXT_SOURCE_1", "EXT_SOURCE_3"]].std(axis = 1)
appl["NUM_EXT_SOURCES"] = 3 - (appl["EXT_SOURCE_1"].isnull().astype(int) +
                               appl["EXT_SOURCE_2"].isnull().astype(int) +
                               appl["EXT_SOURCE_3"].isnull().astype(int))

# number of documents
doc_vars = ["FLAG_DOCUMENT_2",  "FLAG_DOCUMENT_3",  "FLAG_DOCUMENT_4",  "FLAG_DOCUMENT_5",  "FLAG_DOCUMENT_6",
            "FLAG_DOCUMENT_7",  "FLAG_DOCUMENT_8",  "FLAG_DOCUMENT_9",  "FLAG_DOCUMENT_10", "FLAG_DOCUMENT_11",
            "FLAG_DOCUMENT_12", "FLAG_DOCUMENT_13", "FLAG_DOCUMENT_14", "FLAG_DOCUMENT_15", "FLAG_DOCUMENT_16",
            "FLAG_DOCUMENT_17", "FLAG_DOCUMENT_18", "FLAG_DOCUMENT_19", "FLAG_DOCUMENT_20", "FLAG_DOCUMENT_21"]
appl["NUM_DOCUMENTS"] = appl[doc_vars].sum(axis = 1)

# application date
appl["DAY_APPR_PROCESS_START"] = "Working day"
appl["DAY_APPR_PROCESS_START"][(appl["WEEKDAY_APPR_PROCESS_START"] == "SATURDAY") |
                               (appl["WEEKDAY_APPR_PROCESS_START"] == "SUNDAY")] = "Weekend"

# logarithms
log_vars = ["AMT_CREDIT", "AMT_INCOME_TOTAL", "AMT_GOODS_PRICE", "AMT_ANNUITY"]
appl = create_logs(appl, log_vars, replace = True)

# convert days
day_vars = ["DAYS_BIRTH", "DAYS_REGISTRATION", "DAYS_ID_PUBLISH", "DAYS_EMPLOYED", "DAYS_LAST_PHONE_CHANGE"]
appl = convert_days(appl, day_vars, t = 30, rounding = True, replace = True)



##### FEATURE REMOVAL
#drops = ['APARTMENTS_MEDI', 'BASEMENTAREA_MEDI', 'COMMONAREA_MEDI', 'ELEVATORS_MEDI', 'ENTRANCES_MEDI', 
#         'FLOORSMAX_MEDI', 'FLOORSMIN_MEDI', 'LANDAREA_MEDI', 'LIVINGAPARTMENTS_MEDI', 'LIVINGAREA_MEDI',
#         'NONLIVINGAPARTMENTS_MEDI', 'NONLIVINGAREA_MEDI','YEARS_BEGINEXPLUATATION_MEDI', 'YEARS_BUILD_MEDI',
#         'APARTMENTS_MODE', 'BASEMENTAREA_MODE', 'COMMONAREA_MODE','ELEVATORS_MODE', 'ENTRANCES_MODE', 
#         'FLOORSMAX_MODE', 'FLOORSMIN_MODE', 'LANDAREA_MODE', 'LIVINGAPARTMENTS_MODE', 'LIVINGAREA_MODE', 
#         'NONLIVINGAPARTMENTS_MODE', 'NONLIVINGAREA_MODE', 'TOTALAREA_MODE',  'YEARS_BEGINEXPLUATATION_MODE']
#appl = appl.drop(columns = drops)

In [66]:
# rename features
appl.columns = ["SK_ID_CURR"] + ["app_" + str(col) for col in appl.columns if col not in "SK_ID_CURR"]

In [67]:
# check data
appl.head()

Unnamed: 0,SK_ID_CURR,app_NAME_CONTRACT_TYPE,app_CODE_GENDER,app_FLAG_OWN_CAR,app_FLAG_OWN_REALTY,app_CNT_CHILDREN,app_AMT_INCOME_TOTAL,app_AMT_CREDIT,app_AMT_ANNUITY,app_AMT_GOODS_PRICE,app_NAME_TYPE_SUITE,app_NAME_INCOME_TYPE,app_NAME_EDUCATION_TYPE,app_NAME_FAMILY_STATUS,app_NAME_HOUSING_TYPE,app_REGION_POPULATION_RELATIVE,app_DAYS_BIRTH,app_DAYS_EMPLOYED,app_DAYS_REGISTRATION,app_DAYS_ID_PUBLISH,app_OWN_CAR_AGE,app_FLAG_MOBIL,app_FLAG_EMP_PHONE,app_FLAG_WORK_PHONE,app_FLAG_CONT_MOBILE,app_FLAG_PHONE,app_FLAG_EMAIL,app_OCCUPATION_TYPE,app_CNT_FAM_MEMBERS,app_REGION_RATING_CLIENT,app_REGION_RATING_CLIENT_W_CITY,app_WEEKDAY_APPR_PROCESS_START,app_HOUR_APPR_PROCESS_START,app_REG_REGION_NOT_LIVE_REGION,app_REG_REGION_NOT_WORK_REGION,app_LIVE_REGION_NOT_WORK_REGION,app_REG_CITY_NOT_LIVE_CITY,app_REG_CITY_NOT_WORK_CITY,app_LIVE_CITY_NOT_WORK_CITY,app_ORGANIZATION_TYPE,app_EXT_SOURCE_1,app_EXT_SOURCE_2,app_EXT_SOURCE_3,app_APARTMENTS_AVG,app_BASEMENTAREA_AVG,app_YEARS_BEGINEXPLUATATION_AVG,app_YEARS_BUILD_AVG,app_COMMONAREA_AVG,app_ELEVATORS_AVG,app_ENTRANCES_AVG,app_FLOORSMAX_AVG,app_FLOORSMIN_AVG,app_LANDAREA_AVG,app_LIVINGAPARTMENTS_AVG,app_LIVINGAREA_AVG,app_NONLIVINGAPARTMENTS_AVG,app_NONLIVINGAREA_AVG,app_APARTMENTS_MODE,app_BASEMENTAREA_MODE,app_YEARS_BEGINEXPLUATATION_MODE,app_YEARS_BUILD_MODE,app_COMMONAREA_MODE,app_ELEVATORS_MODE,app_ENTRANCES_MODE,app_FLOORSMAX_MODE,app_FLOORSMIN_MODE,app_LANDAREA_MODE,app_LIVINGAPARTMENTS_MODE,app_LIVINGAREA_MODE,app_NONLIVINGAPARTMENTS_MODE,app_NONLIVINGAREA_MODE,app_APARTMENTS_MEDI,app_BASEMENTAREA_MEDI,app_YEARS_BEGINEXPLUATATION_MEDI,app_YEARS_BUILD_MEDI,app_COMMONAREA_MEDI,app_ELEVATORS_MEDI,app_ENTRANCES_MEDI,app_FLOORSMAX_MEDI,app_FLOORSMIN_MEDI,app_LANDAREA_MEDI,app_LIVINGAPARTMENTS_MEDI,app_LIVINGAREA_MEDI,app_NONLIVINGAPARTMENTS_MEDI,app_NONLIVINGAREA_MEDI,app_FONDKAPREMONT_MODE,app_HOUSETYPE_MODE,app_TOTALAREA_MODE,app_WALLSMATERIAL_MODE,app_EMERGENCYSTATE_MODE,app_OBS_30_CNT_SOCIAL_CIRCLE,app_DEF_30_CNT_SOCIAL_CIRCLE,app_OBS_60_CNT_SOCIAL_CIRCLE,app_DEF_60_CNT_SOCIAL_CIRCLE,app_DAYS_LAST_PHONE_CHANGE,app_FLAG_DOCUMENT_2,app_FLAG_DOCUMENT_3,app_FLAG_DOCUMENT_4,app_FLAG_DOCUMENT_5,app_FLAG_DOCUMENT_6,app_FLAG_DOCUMENT_7,app_FLAG_DOCUMENT_8,app_FLAG_DOCUMENT_9,app_FLAG_DOCUMENT_10,app_FLAG_DOCUMENT_11,app_FLAG_DOCUMENT_12,app_FLAG_DOCUMENT_13,app_FLAG_DOCUMENT_14,app_FLAG_DOCUMENT_15,app_FLAG_DOCUMENT_16,app_FLAG_DOCUMENT_17,app_FLAG_DOCUMENT_18,app_FLAG_DOCUMENT_19,app_FLAG_DOCUMENT_20,app_FLAG_DOCUMENT_21,app_AMT_REQ_CREDIT_BUREAU_HOUR,app_AMT_REQ_CREDIT_BUREAU_DAY,app_AMT_REQ_CREDIT_BUREAU_WEEK,app_AMT_REQ_CREDIT_BUREAU_MON,app_AMT_REQ_CREDIT_BUREAU_QRT,app_AMT_REQ_CREDIT_BUREAU_YEAR,app_CREDIT_BY_INCOME,app_ANNUITY_BY_INCOME,app_GOODS_PRICE_BY_INCOME,app_PERCENT_WORKED,app_CNT_ADULTS,app_NUM_EXT_SOURCES,app_NUM_DOCUMENTS,app_DAY_APPR_PROCESS_START
0,100002,Cash loans,M,N,Y,0,12.2185,12.915581,10.114619,12.768544,Unaccompanied,Working,Secondary / secondary special,Single / not married,House / apartment,0.018801,315.0,21.0,122.0,71.0,,1,1,0,1,1,0,Laborers,1.0,2,2,WEDNESDAY,10,0,0,0,0,0,0,Business Entity Type 3,0.083037,0.262949,0.139376,0.0247,0.0369,0.9722,0.6192,0.0143,0.0,0.069,0.0833,0.125,0.0369,0.0202,0.019,0.0,0.0,0.0252,0.0383,0.9722,0.6341,0.0144,0.0,0.069,0.0833,0.125,0.0377,0.022,0.0198,0.0,0.0,0.025,0.0369,0.9722,0.6243,0.0144,0.0,0.069,0.0833,0.125,0.0375,0.0205,0.0193,0.0,0.0,reg oper account,block of flats,0.0149,"Stone, brick",No,2.0,2.0,2.0,2.0,38.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,1.0,2.007889,0.121978,1.733333,0.067329,1.0,3,1,Working day
1,100003,Cash loans,F,N,N,0,12.506181,14.072865,10.482892,13.937287,Family,State servant,Higher education,Married,House / apartment,0.003541,559.0,40.0,40.0,10.0,,1,1,0,1,1,0,Core staff,2.0,1,1,MONDAY,11,0,0,0,0,0,0,School,0.311267,0.622246,,0.0959,0.0529,0.9851,0.796,0.0605,0.08,0.0345,0.2917,0.3333,0.013,0.0773,0.0549,0.0039,0.0098,0.0924,0.0538,0.9851,0.804,0.0497,0.0806,0.0345,0.2917,0.3333,0.0128,0.079,0.0554,0.0,0.0,0.0968,0.0529,0.9851,0.7987,0.0608,0.08,0.0345,0.2917,0.3333,0.0132,0.0787,0.0558,0.0039,0.01,reg oper account,block of flats,0.0714,Block,No,1.0,0.0,1.0,0.0,28.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,4.79075,0.132217,4.183333,0.070862,2.0,2,1,Working day
2,100004,Revolving loans,M,Y,Y,0,11.119898,11.813037,8.817446,11.813037,Unaccompanied,Working,Secondary / secondary special,Single / not married,House / apartment,0.010032,635.0,8.0,142.0,84.0,26.0,1,1,1,1,1,0,Laborers,1.0,2,2,MONDAY,9,0,0,0,0,0,0,Government,,0.555912,0.729567,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,27.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.1,2.0,0.011814,1.0,2,0,Working day
3,100006,Cash loans,F,N,Y,0,11.813037,12.652947,10.298481,12.601491,Unaccompanied,Working,Secondary / secondary special,Civil marriage,House / apartment,0.008019,634.0,101.0,328.0,81.0,,1,1,0,1,0,0,Laborers,2.0,2,2,WEDNESDAY,17,0,0,0,0,0,0,Business Entity Type 3,,0.650442,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.0,0.0,2.0,0.0,21.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,,,,,2.316167,0.2199,2.2,0.159905,2.0,1,1,Working day
4,100007,Cash loans,M,N,Y,0,11.707678,13.148033,9.992711,13.148033,Unaccompanied,Working,Secondary / secondary special,Single / not married,House / apartment,0.028663,664.0,101.0,144.0,115.0,,1,1,0,1,0,0,Core staff,1.0,2,2,THURSDAY,11,0,0,0,0,1,1,Religion,,0.322738,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,37.0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,4.222222,0.179963,4.222222,0.152418,1.0,1,1,Working day


In [68]:
# count missings
nas = count_missings(appl)
nas

Unnamed: 0,Total,Percent
app_COMMONAREA_MEDI,248360,69.714109
app_COMMONAREA_MODE,248360,69.714109
app_COMMONAREA_AVG,248360,69.714109
app_NONLIVINGAPARTMENTS_AVG,246861,69.293343
app_NONLIVINGAPARTMENTS_MODE,246861,69.293343
app_NONLIVINGAPARTMENTS_MEDI,246861,69.293343
app_FONDKAPREMONT_MODE,243092,68.235393
app_LIVINGAPARTMENTS_AVG,242979,68.203674
app_LIVINGAPARTMENTS_MODE,242979,68.203674
app_LIVINGAPARTMENTS_MEDI,242979,68.203674


In [69]:
##### IMPUTE MISSINGS

### APARTMENT DATA

# find variables
living_nums = ["app_COMMONAREA_MODE", "app_COMMONAREA_AVG", "app_COMMONAREA_MEDI",
               "app_NONLIVINGAPARTMENTS_AVG", "app_NONLIVINGAPARTMENTS_MODE",
               "app_NONLIVINGAPARTMENTS_MEDI",
               "app_LIVINGAPARTMENTS_MODE", "app_LIVINGAPARTMENTS_AVG",
               "app_LIVINGAPARTMENTS_MEDI", "app_FLOORSMIN_MODE", "app_FLOORSMIN_AVG",
               "app_FLOORSMIN_MEDI", "app_YEARS_BUILD_AVG", "app_YEARS_BUILD_MEDI",
               "app_YEARS_BUILD_MODE", "app_LANDAREA_AVG",
               "app_LANDAREA_MEDI", "app_LANDAREA_MODE", "app_BASEMENTAREA_MEDI",
               "app_BASEMENTAREA_MODE", "app_BASEMENTAREA_AVG",
               "app_NONLIVINGAREA_MODE", "app_NONLIVINGAREA_AVG",
               "app_NONLIVINGAREA_MEDI","app_ELEVATORS_MODE",
               "app_ELEVATORS_AVG", "app_ELEVATORS_MEDI",
               "app_APARTMENTS_MEDI", "app_APARTMENTS_MODE", "app_APARTMENTS_AVG",
               "app_ENTRANCES_AVG", "app_ENTRANCES_MODE", "app_ENTRANCES_MEDI",
               "app_LIVINGAREA_MEDI", "app_LIVINGAREA_AVG",
               "app_LIVINGAREA_MODE", "app_FLOORSMAX_AVG", "app_FLOORSMAX_MODE",
               "app_FLOORSMAX_MEDI", "app_YEARS_BEGINEXPLUATATION_MEDI",
               "app_YEARS_BEGINEXPLUATATION_AVG", "app_YEARS_BEGINEXPLUATATION_MODE",
               "app_TOTALAREA_MODE"]
living_facs = ["app_FONDKAPREMONT_MODE", "app_WALLSMATERIAL_MODE", "app_HOUSETYPE_MODE", "app_EMERGENCYSTATE_MODE"]
living_vars = living_nums + living_facs

# dummy indicator
appl["app_isnull_HOUSE"] = (appl[living_vars].isnull().sum(axis = 1) == 0).astype(int)

# impute numerics
for var in living_nums:
    appl[var].fillna(-99, inplace = True)
    
# impute factors
for var in living_facs:
    appl[var].fillna("Unknown level", inplace = True)
    
    
### OWN CAR
appl["app_OWN_CAR_AGE"][appl["app_FLAG_OWN_CAR"] == "Y"].fillna(appl["app_OWN_CAR_AGE"].median(), inplace = True)
appl["app_OWN_CAR_AGE"].fillna(-9, inplace = True)


### EXTERNAL SCORES
for var in ["app_EXT_SOURCE_1", "app_EXT_SOURCE_2", "app_EXT_SOURCE_3"]:
    appl[var].fillna(-9, inplace = True)
    
    
### OCCUPATION
appl["app_OCCUPATION_TYPE"][appl["app_NAME_INCOME_TYPE"] == "Pensioner"].fillna("Retired", inplace = True)
appl["app_OCCUPATION_TYPE"][appl["app_NAME_INCOME_TYPE"] == "Student"].fillna("Student",   inplace = True)
appl["app_OCCUPATION_TYPE"].fillna("Unknown level", inplace = True)


### BURO ENQUIRIES

# find variables
buro_vars = ["app_AMT_REQ_CREDIT_BUREAU_YEAR", "app_AMT_REQ_CREDIT_BUREAU_QRT", 
             "app_AMT_REQ_CREDIT_BUREAU_MON",  "app_AMT_REQ_CREDIT_BUREAU_WEEK", 
             "app_AMT_REQ_CREDIT_BUREAU_DAY",  "app_AMT_REQ_CREDIT_BUREAU_HOUR"]

# dummy indicator
appl["app_isnull_BURO_ENQUIRIES"] = (appl[buro_vars].isnull().sum(axis = 1) == 0).astype(int)

# impute numerics
for var in buro_vars:
    appl[var].fillna(-99, inplace = True)
    
    
### COMPANY
appl["app_NAME_TYPE_SUITE"].fillna("Unknown level", inplace = True)


### SOCIAL CIRCLE

# find variables
social_vars = ["app_OBS_30_CNT_SOCIAL_CIRCLE", "app_DEF_30_CNT_SOCIAL_CIRCLE", "app_OBS_60_CNT_SOCIAL_CIRCLE", 
               "app_DEF_60_CNT_SOCIAL_CIRCLE", "app_AMT_GOODS_PRICE"]

# dummy indicator
appl["app_isnull_SOCIAL_CIRCLE"] = (appl[social_vars].isnull().sum(axis = 1) == 0).astype(int)

# impute numerics
for var in social_vars:
    appl[var].fillna(-99, inplace = True)
             
             
### OTHER FEATURES
             
# find variables
other_vars = ["app_AMT_GOODS_PRICE", "app_AMT_ANNUITY", "app_CNT_FAM_MEMBERS", "app_DAYS_LAST_PHONE_CHANGE"]
for var in other_vars:
    appl[var].fillna(-99, inplace = True)           

In [70]:
# count missings
nas = count_missings(appl)
nas

Unnamed: 0,Total,Percent
app_DAYS_EMPLOYED,64648,18.146552
app_PERCENT_WORKED,64648,18.146552
app_GOODS_PRICE_BY_INCOME,278,0.078034
app_ANNUITY_BY_INCOME,36,0.010105
app_CNT_ADULTS,2,0.000561


## 4.2. CREDIT BUREAU DATA

### 4.2.1. CHECKS

In [71]:
# check buro data
buro.head()

Unnamed: 0,SK_ID_CURR,SK_ID_BUREAU,CREDIT_ACTIVE,CREDIT_CURRENCY,DAYS_CREDIT,CREDIT_DAY_OVERDUE,DAYS_CREDIT_ENDDATE,DAYS_ENDDATE_FACT,AMT_CREDIT_MAX_OVERDUE,CNT_CREDIT_PROLONG,AMT_CREDIT_SUM,AMT_CREDIT_SUM_DEBT,AMT_CREDIT_SUM_LIMIT,AMT_CREDIT_SUM_OVERDUE,CREDIT_TYPE,DAYS_CREDIT_UPDATE,AMT_ANNUITY
0,215354,5714462,Closed,currency 1,-497,0,-153.0,-153.0,,0,91323.0,0.0,,0.0,Consumer credit,-131,
1,215354,5714463,Active,currency 1,-208,0,1075.0,,,0,225000.0,171342.0,,0.0,Credit card,-20,
2,215354,5714464,Active,currency 1,-203,0,528.0,,,0,464323.5,,,0.0,Consumer credit,-16,
3,215354,5714465,Active,currency 1,-203,0,,,,0,90000.0,,,0.0,Credit card,-16,
4,215354,5714466,Active,currency 1,-629,0,1197.0,,77674.5,0,2700000.0,,,0.0,Consumer credit,-21,


In [72]:
# check bbal data
bbal.head()

Unnamed: 0,SK_ID_BUREAU,MONTHS_BALANCE,STATUS
0,5715448,0,C
1,5715448,-1,C
2,5715448,-2,C
3,5715448,-3,C
4,5715448,-4,C


### 4.2.2. BBAL DATA

In [73]:
### FEATURE ENGINEERING

# dummy encoding for STATUS
bbal = pd.get_dummies(bbal, columns = ["STATUS"], prefix = "STATUS")

In [74]:
# count missings
nas = count_missings(bbal)
nas

Unnamed: 0,Total,Percent


In [75]:
### AGGREGATIONS

# total month count
cnt_mon = bbal[["SK_ID_BUREAU", "MONTHS_BALANCE"]].groupby("SK_ID_BUREAU").count()
del bbal["MONTHS_BALANCE"]

# aggregate data
agg_bbal = bbal.groupby("SK_ID_BUREAU").mean()

# add total month count
agg_bbal["MONTH_COUNT"] = cnt_mon

In [76]:
# count missings
nas = count_missings(agg_bbal)
nas

Unnamed: 0,Total,Percent


In [77]:
# check data
agg_bbal.head()

Unnamed: 0_level_0,STATUS_0,STATUS_1,STATUS_2,STATUS_3,STATUS_4,STATUS_5,STATUS_C,STATUS_X,MONTH_COUNT
SK_ID_BUREAU,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
5001709,0.0,0.0,0.0,0.0,0.0,0.0,0.886598,0.113402,97
5001710,0.060241,0.0,0.0,0.0,0.0,0.0,0.578313,0.361446,83
5001711,0.75,0.0,0.0,0.0,0.0,0.0,0.0,0.25,4
5001712,0.526316,0.0,0.0,0.0,0.0,0.0,0.473684,0.0,19
5001713,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,22


In [78]:
# clear memory
del bbal

### 4.2.3. BURO DATA

In [79]:
### MERGE
buro = buro.merge(right = agg_bbal.reset_index(), how = "left", on = "SK_ID_BUREAU")

In [80]:
##### FEATURE ENGINEERING

# number of buro loans 
cnt_buro = buro[["SK_ID_CURR", "SK_ID_BUREAU"]].groupby(["SK_ID_CURR"], as_index = False).count()
cnt_buro.columns = ["SK_ID_CURR", "CNT_BURO_LOANS"]
buro = buro.merge(cnt_buro, how = "left", on = "SK_ID_CURR")

# amount ratios
buro["AMT_SUM_OVERDUE_RATIO_1"] = buro["AMT_CREDIT_SUM_OVERDUE"] / buro["AMT_ANNUITY"]
buro["AMT_SUM_OVERDUE_RATIO_2"] = buro["AMT_CREDIT_SUM_OVERDUE"] / buro["AMT_CREDIT_SUM"]
buro["AMT_MAX_OVERDUE_RATIO_1"] = buro["AMT_CREDIT_MAX_OVERDUE"] / buro["AMT_ANNUITY"]
buro["AMT_MAX_OVERDUE_RATIO_2"] = buro["AMT_CREDIT_MAX_OVERDUE"] / buro["AMT_CREDIT_SUM"]
buro["AMT_SUM_DEBT_RATIO_1"]    = buro["AMT_CREDIT_SUM_DEBT"] / buro["AMT_CREDIT_SUM"]
buro["AMT_SUM_DEBT_RATIO_2"]    = buro["AMT_CREDIT_SUM_DEBT"] / buro["AMT_CREDIT_SUM_LIMIT"]

# logarithms
log_vars = ["AMT_CREDIT_SUM", "AMT_CREDIT_SUM_DEBT", "AMT_CREDIT_SUM_LIMIT", "AMT_CREDIT_SUM_OVERDUE", "AMT_ANNUITY"]
buro = create_logs(buro, log_vars, replace = True)

# convert days
day_vars = ["DAYS_CREDIT", "CREDIT_DAY_OVERDUE", "DAYS_CREDIT_ENDDATE", "DAYS_ENDDATE_FACT", "DAYS_CREDIT_UPDATE"]
buro = convert_days(buro, day_vars, t = 1, rounding = False, replace = True)

# day differences
buro["DAYS_END_DIFF_1"] = buro["DAYS_ENDDATE_FACT"]   - buro["DAYS_CREDIT_ENDDATE"]
buro["DAYS_END_DIFF_2"] = buro["DAYS_CREDIT_UPDATE"]  - buro["DAYS_CREDIT_ENDDATE"]
buro["DAYS_DURATION_1"] = buro["DAYS_CREDIT_ENDDATE"] - buro["DAYS_CREDIT"]
buro["DAYS_DURATION_2"] = buro["DAYS_ENDDATE_FACT"]   - buro["DAYS_CREDIT"]

# number of active buro loans
cnt_buro = buro[["SK_ID_CURR", "CREDIT_ACTIVE"]]
cnt_buro.columns = ["SK_ID_CURR", "CNT_BURO_ACTIVE"]
cnt_buro = cnt_buro[cnt_buro["CNT_BURO_ACTIVE"] == "Active"]
cnt_buro = cnt_buro[["SK_ID_CURR", "CNT_BURO_ACTIVE"]].groupby(["SK_ID_CURR"], as_index = False).count()
buro = buro.merge(cnt_buro, how = "left", on = "SK_ID_CURR")
buro["CNT_BURO_ACTIVE"].fillna(0, inplace = True)

# number of closed buro loans
cnt_buro = buro[["SK_ID_CURR", "CREDIT_ACTIVE"]]
cnt_buro.columns = ["SK_ID_CURR", "CNT_BURO_CLOSED"]
cnt_buro = cnt_buro[cnt_buro["CNT_BURO_CLOSED"] == "Closed"]
cnt_buro = cnt_buro[["SK_ID_CURR", "CNT_BURO_CLOSED"]].groupby(["SK_ID_CURR"], as_index = False).count()
buro = buro.merge(cnt_buro, how = "left", on = "SK_ID_CURR")
buro["CNT_BURO_CLOSED"].fillna(0, inplace = True)

# number of defaulted buro loans
cnt_buro = buro[["SK_ID_CURR", "CREDIT_ACTIVE"]]
cnt_buro.columns = ["SK_ID_CURR", "CNT_BURO_BAD"]
cnt_buro = cnt_buro[cnt_buro["CNT_BURO_BAD"] == "Bad debt"]
cnt_buro = cnt_buro[["SK_ID_CURR", "CNT_BURO_BAD"]].groupby(["SK_ID_CURR"], as_index = False).count()
buro = buro.merge(cnt_buro, how = "left", on = "SK_ID_CURR")
buro["CNT_BURO_BAD"].fillna(0, inplace = True)

In [81]:
# count missings
nas = count_missings(buro)
nas

Unnamed: 0,Total,Percent
AMT_MAX_OVERDUE_RATIO_1,1629591,94.940831
AMT_SUM_OVERDUE_RATIO_1,1483326,86.419355
AMT_SUM_DEBT_RATIO_2,1336100,77.84189
AMT_ANNUITY,1226791,71.47349
AMT_MAX_OVERDUE_RATIO_2,1149800,66.987954
AMT_CREDIT_MAX_OVERDUE,1124488,65.513264
MONTH_COUNT,942074,54.885728
STATUS_0,942074,54.885728
STATUS_1,942074,54.885728
STATUS_3,942074,54.885728


In [82]:
##### IMPUTE MISSINGS

### STATUS

# find variables
stats_vars = ["STATUS_0", "STATUS_1", "STATUS_2", "STATUS_3", 
              "STATUS_4", "STATUS_5", "STATUS_C", "STATUS_X", "MONTH_COUNT"]

# dummy indicator
buro["isnull_STATUS"] = (buro[stats_vars].isnull().sum(axis = 1) == 0).astype(int)

# impute stats_vars
for var in stats_vars:
    buro[var].fillna((buro[var].median()), inplace = True)
         
        
### AMOUNTS
amnts = ["AMT_ANNUITY", "AMT_CREDIT_MAX_OVERDUE"] 
for var in amnts:
    buro["isnull_" + var] = buro[var].isnull() + 0

             
### OTHER FEATURES
             
# find variables
other_vars = ["DAYS_ENDDATE_FACT", "AMT_CREDIT_SUM_LIMIT", "AMT_CREDIT_SUM_DEBT", 
              "DAYS_CREDIT_ENDDATE", "AMT_CREDIT_SUM"]
for var in other_vars:
    buro[var].fillna((buro[var].median()), inplace = True)

In [83]:
### AGGREGATIONS

# count previous buro loans
cnt_buro = buro[["SK_ID_CURR", "SK_ID_BUREAU"]].groupby("SK_ID_CURR").count()
del buro["SK_ID_BUREAU"]

# aggregate data
agg_buro = aggregate_data(buro, id_var = "SK_ID_CURR", label = "buro")

# add buro loan count
agg_buro["buro_BURO_COUNT"] = cnt_buro

- Preparing the dataset...
- Extracted 3 factors and 38 numerics...
- Aggregating numeric features...
- Aggregating factor features...
- Final dimensions: (305811, 158)


In [84]:
# count missings
nas = count_missings(agg_buro)
nas

Unnamed: 0,Total,Percent
buro_AMT_SUM_DEBT_RATIO_2_std,302007,98.756094
buro_AMT_MAX_OVERDUE_RATIO_1_std,288362,94.294188
buro_AMT_MAX_OVERDUE_RATIO_1_mean,253792,82.989821
buro_AMT_MAX_OVERDUE_RATIO_1_min,253792,82.989821
buro_AMT_MAX_OVERDUE_RATIO_1_max,253792,82.989821
buro_AMT_SUM_OVERDUE_RATIO_1_std,250948,82.059834
buro_AMT_SUM_OVERDUE_RATIO_1_min,215535,70.479806
buro_AMT_SUM_OVERDUE_RATIO_1_mean,215535,70.479806
buro_AMT_SUM_OVERDUE_RATIO_1_max,215535,70.479806
buro_AMT_ANNUITY_std,213412,69.785587


In [85]:
# check data
agg_buro.head()

Unnamed: 0_level_0,buro_AMT_CREDIT_SUM_mean,buro_AMT_CREDIT_SUM_std,buro_AMT_CREDIT_SUM_min,buro_AMT_CREDIT_SUM_max,buro_STATUS_0_mean,buro_STATUS_0_std,buro_STATUS_0_min,buro_STATUS_0_max,buro_STATUS_3_mean,buro_STATUS_3_std,buro_STATUS_3_min,buro_STATUS_3_max,buro_DAYS_ENDDATE_FACT_mean,buro_DAYS_ENDDATE_FACT_std,buro_DAYS_ENDDATE_FACT_min,buro_DAYS_ENDDATE_FACT_max,buro_isnull_AMT_CREDIT_MAX_OVERDUE_mean,buro_isnull_AMT_CREDIT_MAX_OVERDUE_std,buro_isnull_AMT_CREDIT_MAX_OVERDUE_min,buro_isnull_AMT_CREDIT_MAX_OVERDUE_max,buro_DAYS_CREDIT_mean,buro_DAYS_CREDIT_std,buro_DAYS_CREDIT_min,buro_DAYS_CREDIT_max,buro_AMT_SUM_DEBT_RATIO_1_mean,buro_AMT_SUM_DEBT_RATIO_1_std,buro_AMT_SUM_DEBT_RATIO_1_min,buro_AMT_SUM_DEBT_RATIO_1_max,buro_DAYS_CREDIT_ENDDATE_mean,buro_DAYS_CREDIT_ENDDATE_std,buro_DAYS_CREDIT_ENDDATE_min,buro_DAYS_CREDIT_ENDDATE_max,buro_STATUS_1_mean,buro_STATUS_1_std,buro_STATUS_1_min,buro_STATUS_1_max,buro_AMT_CREDIT_SUM_OVERDUE_mean,buro_AMT_CREDIT_SUM_OVERDUE_std,buro_AMT_CREDIT_SUM_OVERDUE_min,buro_AMT_CREDIT_SUM_OVERDUE_max,buro_CREDIT_DAY_OVERDUE_mean,buro_CREDIT_DAY_OVERDUE_std,buro_CREDIT_DAY_OVERDUE_min,buro_CREDIT_DAY_OVERDUE_max,buro_isnull_STATUS_mean,buro_isnull_STATUS_std,buro_isnull_STATUS_min,buro_isnull_STATUS_max,buro_AMT_SUM_OVERDUE_RATIO_2_mean,buro_AMT_SUM_OVERDUE_RATIO_2_std,buro_AMT_SUM_OVERDUE_RATIO_2_min,buro_AMT_SUM_OVERDUE_RATIO_2_max,buro_isnull_AMT_ANNUITY_mean,buro_isnull_AMT_ANNUITY_std,buro_isnull_AMT_ANNUITY_min,buro_isnull_AMT_ANNUITY_max,buro_AMT_CREDIT_SUM_LIMIT_mean,buro_AMT_CREDIT_SUM_LIMIT_std,buro_AMT_CREDIT_SUM_LIMIT_min,buro_AMT_CREDIT_SUM_LIMIT_max,buro_AMT_MAX_OVERDUE_RATIO_1_mean,buro_AMT_MAX_OVERDUE_RATIO_1_std,buro_AMT_MAX_OVERDUE_RATIO_1_min,buro_AMT_MAX_OVERDUE_RATIO_1_max,buro_CNT_BURO_CLOSED_mean,buro_CNT_BURO_CLOSED_std,buro_CNT_BURO_CLOSED_min,buro_CNT_BURO_CLOSED_max,buro_AMT_SUM_OVERDUE_RATIO_1_mean,buro_AMT_SUM_OVERDUE_RATIO_1_std,buro_AMT_SUM_OVERDUE_RATIO_1_min,buro_AMT_SUM_OVERDUE_RATIO_1_max,buro_MONTH_COUNT_mean,buro_MONTH_COUNT_std,buro_MONTH_COUNT_min,buro_MONTH_COUNT_max,buro_CNT_BURO_ACTIVE_mean,buro_CNT_BURO_ACTIVE_std,buro_CNT_BURO_ACTIVE_min,buro_CNT_BURO_ACTIVE_max,buro_STATUS_5_mean,buro_STATUS_5_std,buro_STATUS_5_min,buro_STATUS_5_max,buro_STATUS_C_mean,buro_STATUS_C_std,buro_STATUS_C_min,buro_STATUS_C_max,buro_AMT_CREDIT_SUM_DEBT_mean,buro_AMT_CREDIT_SUM_DEBT_std,buro_AMT_CREDIT_SUM_DEBT_min,buro_AMT_CREDIT_SUM_DEBT_max,buro_STATUS_X_mean,buro_STATUS_X_std,buro_STATUS_X_min,buro_STATUS_X_max,buro_STATUS_4_mean,buro_STATUS_4_std,buro_STATUS_4_min,buro_STATUS_4_max,buro_DAYS_END_DIFF_2_mean,buro_DAYS_END_DIFF_2_std,buro_DAYS_END_DIFF_2_min,buro_DAYS_END_DIFF_2_max,buro_CNT_CREDIT_PROLONG_mean,buro_CNT_CREDIT_PROLONG_std,buro_CNT_CREDIT_PROLONG_min,buro_CNT_CREDIT_PROLONG_max,buro_DAYS_END_DIFF_1_mean,buro_DAYS_END_DIFF_1_std,buro_DAYS_END_DIFF_1_min,buro_DAYS_END_DIFF_1_max,buro_AMT_CREDIT_MAX_OVERDUE_mean,buro_AMT_CREDIT_MAX_OVERDUE_std,buro_AMT_CREDIT_MAX_OVERDUE_min,buro_AMT_CREDIT_MAX_OVERDUE_max,buro_DAYS_DURATION_1_mean,buro_DAYS_DURATION_1_std,buro_DAYS_DURATION_1_min,buro_DAYS_DURATION_1_max,buro_DAYS_DURATION_2_mean,buro_DAYS_DURATION_2_std,buro_DAYS_DURATION_2_min,buro_DAYS_DURATION_2_max,buro_AMT_SUM_DEBT_RATIO_2_mean,buro_AMT_SUM_DEBT_RATIO_2_std,buro_AMT_SUM_DEBT_RATIO_2_min,buro_AMT_SUM_DEBT_RATIO_2_max,buro_AMT_ANNUITY_mean,buro_AMT_ANNUITY_std,buro_AMT_ANNUITY_min,buro_AMT_ANNUITY_max,buro_DAYS_CREDIT_UPDATE_mean,buro_DAYS_CREDIT_UPDATE_std,buro_DAYS_CREDIT_UPDATE_min,buro_DAYS_CREDIT_UPDATE_max,buro_CNT_BURO_LOANS_mean,buro_CNT_BURO_LOANS_std,buro_CNT_BURO_LOANS_min,buro_CNT_BURO_LOANS_max,buro_CNT_BURO_BAD_mean,buro_CNT_BURO_BAD_std,buro_CNT_BURO_BAD_min,buro_CNT_BURO_BAD_max,buro_STATUS_2_mean,buro_STATUS_2_std,buro_STATUS_2_min,buro_STATUS_2_max,buro_AMT_MAX_OVERDUE_RATIO_2_mean,buro_AMT_MAX_OVERDUE_RATIO_2_std,buro_AMT_MAX_OVERDUE_RATIO_2_min,buro_AMT_MAX_OVERDUE_RATIO_2_max,buro_CREDIT_ACTIVE_mode,buro_CREDIT_ACTIVE_unique,buro_CREDIT_CURRENCY_mode,buro_CREDIT_CURRENCY_unique,buro_CREDIT_TYPE_mode,buro_CREDIT_TYPE_unique,buro_BURO_COUNT
SK_ID_CURR,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1
100001,12.080036,0.628917,11.356283,12.842652,0.336651,0.381334,0.019231,1.0,0.0,0.0,0.0,0.0,856.142857,263.761526,544.0,1328.0,1.0,0.0,1,1,735.0,489.942514,49.0,1572.0,0.282518,0.399523,0.0,0.987405,748.285714,378.244504,179.0,1329.0,0.007519,0.019893,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1,1,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,,,,,4.0,0.0,4.0,4.0,0.0,0.0,0.0,0.0,24.571429,16.050515,2.0,52.0,3.0,0.0,3.0,3.0,0.0,0.0,0.0,0.0,0.44124,0.428578,0.0,0.966667,5.153914,6.440628,0.0,12.829977,0.21459,0.182611,0.0,0.5,0.0,0.0,0.0,0.0,-473.5,491.424121,-1174.0,-24.0,0.0,0.0,0,0,197.0,334.957709,-1.0,698.0,,,,,-425.75,210.829117,-730.0,-243.0,-228.75,136.497558,-335.0,-32.0,inf,,inf,inf,3.839271,4.795585,0.0,9.289475,93.142857,77.20412,6.0,155.0,7,0.0,7,7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,Closed,2,currency 1,1,Consumer credit,1,7
100002,9.811994,4.084715,0.0,13.017005,0.40696,0.196494,0.1875,0.818182,0.0,0.0,0.0,0.0,747.375,445.764492,36.0,1185.0,0.375,0.517549,0,1,874.0,431.45104,103.0,1437.0,0.136545,0.27309,0.0,0.54618,945.125,68.43023,908.0,1072.0,0.255682,0.204094,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1,1,0.0,0.0,0.0,0.0,0.125,0.353553,0,1,1.296646,3.667468,0.0,10.373165,inf,,inf,inf,6.0,0.0,6.0,6.0,,,,,13.75,6.363961,4.0,22.0,2.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0,0.175426,0.263147,0.0,0.8125,1.551525,4.388375,0.0,12.4122,0.161932,0.16165,0.0,0.5,0.0,0.0,0.0,0.0,36.0,66.730802,-5.0,113.0,0.0,0.0,0,0,37.666667,65.24058,0.0,113.0,1681.029,2363.2469,0.0,5043.645,-220.666667,139.306616,-365.0,-87.0,-277.0,207.190733,-609.0,-76.0,inf,,0.0,inf,0.0,0.0,0.0,0.0,499.875,518.522472,7.0,1185.0,8,0.0,8,8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043436,0.075229,0.0,0.174139,Closed,2,currency 1,1,Consumer credit,2,8
100003,11.609754,1.495879,10.010052,13.604791,0.333333,0.0,0.333333,0.333333,0.0,0.0,0.0,0.0,1047.25,738.485105,540.0,2131.0,0.0,0.0,0,0,1400.75,909.826128,606.0,2586.0,0.0,0.0,0.0,0.0,1075.5,929.160732,420.0,2434.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,1.0,0.0,1,1,3.401198,6.802395,0.0,13.604791,,,,,3.0,0.0,3.0,3.0,,,,,25.0,0.0,25.0,25.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.240741,0.0,0.240741,0.240741,0.0,0.0,0.0,0.0,0.025641,0.0,0.025641,0.025641,0.0,0.0,0.0,0.0,-57.666667,222.185358,-303.0,130.0,0.0,0.0,0,0,-34.0,253.714406,-303.0,201.0,0.0,0.0,0.0,0.0,-534.333333,496.89469,-1096.0,-152.0,-568.333333,481.117796,-1096.0,-154.0,0.0,,0.0,0.0,,,,,816.0,908.053963,43.0,2131.0,4,0.0,4,4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Closed,2,currency 1,1,Consumer credit,2,4
100004,11.456566,0.000283,11.456366,11.456766,0.333333,0.0,0.333333,0.333333,0.0,0.0,0.0,0.0,532.5,212.839141,382.0,683.0,0.5,0.707107,0,1,867.0,649.124025,408.0,1326.0,0.0,0.0,0.0,0.0,488.5,150.613744,382.0,595.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,1.0,0.0,1,1,0.0,0.0,0.0,0.0,,,,,2.0,0.0,2.0,2.0,,,,,25.0,0.0,25.0,25.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.240741,0.0,0.240741,0.240741,0.0,0.0,0.0,0.0,0.025641,0.0,0.025641,0.025641,0.0,0.0,0.0,0.0,43.5,61.51829,0.0,87.0,0.0,0.0,0,0,44.0,62.225397,0.0,88.0,0.0,,0.0,0.0,-378.5,498.510281,-731.0,-26.0,-334.5,436.284884,-643.0,-26.0,,,,,,,,,532.0,212.132034,382.0,682.0,2,0.0,2,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,Closed,1,currency 1,1,Consumer credit,1,2
100005,11.510418,1.544801,10.303169,13.251286,0.735043,0.238245,0.538462,1.0,0.0,0.0,0.0,0.0,639.0,446.869108,123.0,897.0,0.666667,0.57735,0,1,190.666667,162.297053,62.0,373.0,0.601256,0.523384,0.0,0.954794,648.0,450.33321,128.0,908.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1,1,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,1.0,0.0,1.0,1.0,0.0,,0.0,0.0,7.0,5.291503,3.0,13.0,2.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0,0.128205,0.222058,0.0,0.384615,7.781492,6.911089,0.0,13.205027,0.136752,0.174535,0.0,0.333333,0.0,0.0,0.0,0.0,-7.0,,-7.0,-7.0,0.0,0.0,0,0,-5.0,,-5.0,-5.0,0.0,,0.0,0.0,-245.0,,-245.0,-245.0,-250.0,,-250.0,-250.0,inf,,inf,inf,2.78587,4.825269,0.0,8.357611,54.333333,58.594653,11.0,121.0,3,0.0,3,3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,Active,2,currency 1,1,Consumer credit,2,3


In [86]:
# clear memory
del buro

## 4.3. PREVIOUS LOAN DATA

### 4.3.1. CHECKS

In [87]:
# check prev data
prev.head()

Unnamed: 0,SK_ID_PREV,SK_ID_CURR,NAME_CONTRACT_TYPE,AMT_ANNUITY,AMT_APPLICATION,AMT_CREDIT,AMT_DOWN_PAYMENT,AMT_GOODS_PRICE,WEEKDAY_APPR_PROCESS_START,HOUR_APPR_PROCESS_START,FLAG_LAST_APPL_PER_CONTRACT,NFLAG_LAST_APPL_IN_DAY,RATE_DOWN_PAYMENT,RATE_INTEREST_PRIMARY,RATE_INTEREST_PRIVILEGED,NAME_CASH_LOAN_PURPOSE,NAME_CONTRACT_STATUS,DAYS_DECISION,NAME_PAYMENT_TYPE,CODE_REJECT_REASON,NAME_TYPE_SUITE,NAME_CLIENT_TYPE,NAME_GOODS_CATEGORY,NAME_PORTFOLIO,NAME_PRODUCT_TYPE,CHANNEL_TYPE,SELLERPLACE_AREA,NAME_SELLER_INDUSTRY,CNT_PAYMENT,NAME_YIELD_GROUP,PRODUCT_COMBINATION,DAYS_FIRST_DRAWING,DAYS_FIRST_DUE,DAYS_LAST_DUE_1ST_VERSION,DAYS_LAST_DUE,DAYS_TERMINATION,NFLAG_INSURED_ON_APPROVAL
0,2030495,271877,Consumer loans,1730.43,17145.0,17145.0,0.0,17145.0,SATURDAY,15,Y,1,0.0,0.182832,0.867336,XAP,Approved,-73,Cash through the bank,XAP,,Repeater,Mobile,POS,XNA,Country-wide,35,Connectivity,12.0,middle,POS mobile with interest,365243.0,-42.0,300.0,-42.0,-37.0,0.0
1,2802425,108129,Cash loans,25188.615,607500.0,679671.0,,607500.0,THURSDAY,11,Y,1,,,,XNA,Approved,-164,XNA,XAP,Unaccompanied,Repeater,XNA,Cash,x-sell,Contact center,-1,XNA,36.0,low_action,Cash X-Sell: low,365243.0,-134.0,916.0,365243.0,365243.0,1.0
2,2523466,122040,Cash loans,15060.735,112500.0,136444.5,,112500.0,TUESDAY,11,Y,1,,,,XNA,Approved,-301,Cash through the bank,XAP,"Spouse, partner",Repeater,XNA,Cash,x-sell,Credit and cash offices,-1,XNA,12.0,high,Cash X-Sell: high,365243.0,-271.0,59.0,365243.0,365243.0,1.0
3,2819243,176158,Cash loans,47041.335,450000.0,470790.0,,450000.0,MONDAY,7,Y,1,,,,XNA,Approved,-512,Cash through the bank,XAP,,Repeater,XNA,Cash,x-sell,Credit and cash offices,-1,XNA,12.0,middle,Cash X-Sell: middle,365243.0,-482.0,-152.0,-182.0,-177.0,1.0
4,1784265,202054,Cash loans,31924.395,337500.0,404055.0,,337500.0,THURSDAY,9,Y,1,,,,Repairs,Refused,-781,Cash through the bank,HC,,Repeater,XNA,Cash,walk-in,Credit and cash offices,-1,XNA,24.0,high,Cash Street: high,,,,,,


In [88]:
# check inst data
inst.head()

Unnamed: 0,SK_ID_PREV,SK_ID_CURR,NUM_INSTALMENT_VERSION,NUM_INSTALMENT_NUMBER,DAYS_INSTALMENT,DAYS_ENTRY_PAYMENT,AMT_INSTALMENT,AMT_PAYMENT
0,1054186,161674,1.0,6,-1180.0,-1187.0,6948.36,6948.36
1,1330831,151639,0.0,34,-2156.0,-2156.0,1716.525,1716.525
2,2085231,193053,2.0,1,-63.0,-63.0,25425.0,25425.0
3,2452527,199697,1.0,3,-2418.0,-2426.0,24350.13,24350.13
4,2714724,167756,1.0,2,-1383.0,-1366.0,2165.04,2160.585


In [89]:
# check poca data
poca.head()

Unnamed: 0,SK_ID_PREV,SK_ID_CURR,MONTHS_BALANCE,CNT_INSTALMENT,CNT_INSTALMENT_FUTURE,NAME_CONTRACT_STATUS,SK_DPD,SK_DPD_DEF
0,1803195,182943,-31,48.0,45.0,Active,0,0
1,1715348,367990,-33,36.0,35.0,Active,0,0
2,1784872,397406,-32,12.0,9.0,Active,0,0
3,1903291,269225,-35,48.0,42.0,Active,0,0
4,2341044,334279,-35,36.0,35.0,Active,0,0


In [90]:
# check card data
card.head()

Unnamed: 0,SK_ID_PREV,SK_ID_CURR,MONTHS_BALANCE,AMT_BALANCE,AMT_CREDIT_LIMIT_ACTUAL,AMT_DRAWINGS_ATM_CURRENT,AMT_DRAWINGS_CURRENT,AMT_DRAWINGS_OTHER_CURRENT,AMT_DRAWINGS_POS_CURRENT,AMT_INST_MIN_REGULARITY,AMT_PAYMENT_CURRENT,AMT_PAYMENT_TOTAL_CURRENT,AMT_RECEIVABLE_PRINCIPAL,AMT_RECIVABLE,AMT_TOTAL_RECEIVABLE,CNT_DRAWINGS_ATM_CURRENT,CNT_DRAWINGS_CURRENT,CNT_DRAWINGS_OTHER_CURRENT,CNT_DRAWINGS_POS_CURRENT,CNT_INSTALMENT_MATURE_CUM,NAME_CONTRACT_STATUS,SK_DPD,SK_DPD_DEF
0,2562384,378907,-6,56.97,135000,0.0,877.5,0.0,877.5,1700.325,1800.0,1800.0,0.0,0.0,0.0,0.0,1,0.0,1.0,35.0,Active,0,0
1,2582071,363914,-1,63975.555,45000,2250.0,2250.0,0.0,0.0,2250.0,2250.0,2250.0,60175.08,64875.555,64875.555,1.0,1,0.0,0.0,69.0,Active,0,0
2,1740877,371185,-7,31815.225,450000,0.0,0.0,0.0,0.0,2250.0,2250.0,2250.0,26926.425,31460.085,31460.085,0.0,0,0.0,0.0,30.0,Active,0,0
3,1389973,337855,-4,236572.11,225000,2250.0,2250.0,0.0,0.0,11795.76,11925.0,11925.0,224949.285,233048.97,233048.97,1.0,1,0.0,0.0,10.0,Active,0,0
4,1891521,126868,-1,453919.455,450000,0.0,11547.0,0.0,11547.0,22924.89,27000.0,27000.0,443044.395,453919.455,453919.455,0.0,1,0.0,1.0,101.0,Active,0,0


### 4.3.2. INST DATA

In [91]:
### FEATURE ENGINEERING

# day difference
inst["DAYS_INST_DIF"] = inst["DAYS_INSTALMENT"] - inst["DAYS_ENTRY_PAYMENT"]

# percentage paid
inst["AMT_PERCENT_PAID"] = inst["AMT_PAYMENT"] / inst["AMT_INSTALMENT"]

# logarithms
inst["AMT_INSTALMENT"] = np.log(1 + inst["AMT_INSTALMENT"])
inst["AMT_PAYMENT"]    = np.log(1 + inst["AMT_PAYMENT"])

In [92]:
# count missings
nas = count_missings(inst)
nas

Unnamed: 0,Total,Percent
AMT_PERCENT_PAID,2907,0.021367
DAYS_INST_DIF,2905,0.021352
AMT_PAYMENT,2905,0.021352
DAYS_ENTRY_PAYMENT,2905,0.021352


In [93]:
##### IMPUTE MISSINGS

### PAYMENT INFO

# find variables
payment_vars = ["DAYS_INST_DIF", "AMT_PAYMENT", "DAYS_ENTRY_PAYMENT"]

# dummy indicator
inst["isnull_PAYMENTS"] = (inst[payment_vars].isnull().sum(axis = 1) == 0).astype(int)

# impute stats_vars
for var in payment_vars:
    inst[var].fillna((inst[var].median()), inplace = True)
         
             
### OTHER FEATURES
             
# find variables
other_vars = ["AMT_PERCENT_PAID"]
for var in other_vars:
    inst[var].fillna((inst[var].median()), inplace = True)

In [94]:
### AGGREGATIONS

# count instalments
cnt_inst = inst[["SK_ID_PREV", "NUM_INSTALMENT_NUMBER"]].groupby("SK_ID_PREV").count()
del inst["NUM_INSTALMENT_NUMBER"]

# delete ID_CURR
inst_id = inst[["SK_ID_CURR", "SK_ID_PREV"]]
del inst["SK_ID_CURR"]

# aggregate data
agg_inst = aggregate_data(inst, id_var = "SK_ID_PREV")

# add instalment count
agg_inst["inst_INST_COUNT"] = cnt_inst

# put back ID_CURR
inst_id = inst_id.drop_duplicates()
agg_inst = inst_id.merge(right = agg_inst.reset_index(), how = "right", on = "SK_ID_PREV")
del agg_inst["SK_ID_PREV"]

# aggregate data (round 2)
agg_inst = aggregate_data(agg_inst, id_var = "SK_ID_CURR", label = "inst")

- Preparing the dataset...
- Extracted 0 factors and 8 numerics...
- Aggregating numeric features...
- Final dimensions: (997752, 32)
- Preparing the dataset...
- Extracted 0 factors and 33 numerics...
- Aggregating numeric features...
- Final dimensions: (339587, 132)


In [95]:
# count missings
nas = count_missings(agg_inst)
nas

Unnamed: 0,Total,Percent
inst_AMT_PERCENT_PAID_std_std,99692,29.356836
inst_DAYS_INST_DIF_std_std,99686,29.35507
inst_DAYS_INSTALMENT_std_std,99686,29.35507
inst_isnull_PAYMENTS_std_std,99686,29.35507
inst_AMT_PAYMENT_std_std,99686,29.35507
inst_AMT_INSTALMENT_std_std,99686,29.35507
inst_DAYS_ENTRY_PAYMENT_std_std,99686,29.35507
inst_NUM_INSTALMENT_VERSION_std_std,99686,29.35507
inst_AMT_PERCENT_PAID_max_std,94657,27.874153
inst_AMT_PERCENT_PAID_mean_std,94657,27.874153


In [96]:
# check data
agg_inst.head()

Unnamed: 0_level_0,inst_isnull_PAYMENTS_mean_mean,inst_isnull_PAYMENTS_mean_std,inst_isnull_PAYMENTS_mean_min,inst_isnull_PAYMENTS_mean_max,inst_isnull_PAYMENTS_min_mean,inst_isnull_PAYMENTS_min_std,inst_isnull_PAYMENTS_min_min,inst_isnull_PAYMENTS_min_max,inst_NUM_INSTALMENT_VERSION_max_mean,inst_NUM_INSTALMENT_VERSION_max_std,inst_NUM_INSTALMENT_VERSION_max_min,inst_NUM_INSTALMENT_VERSION_max_max,inst_DAYS_ENTRY_PAYMENT_max_mean,inst_DAYS_ENTRY_PAYMENT_max_std,inst_DAYS_ENTRY_PAYMENT_max_min,inst_DAYS_ENTRY_PAYMENT_max_max,inst_NUM_INSTALMENT_VERSION_std_mean,inst_NUM_INSTALMENT_VERSION_std_std,inst_NUM_INSTALMENT_VERSION_std_min,inst_NUM_INSTALMENT_VERSION_std_max,inst_DAYS_ENTRY_PAYMENT_min_mean,inst_DAYS_ENTRY_PAYMENT_min_std,inst_DAYS_ENTRY_PAYMENT_min_min,inst_DAYS_ENTRY_PAYMENT_min_max,inst_DAYS_ENTRY_PAYMENT_std_mean,inst_DAYS_ENTRY_PAYMENT_std_std,inst_DAYS_ENTRY_PAYMENT_std_min,inst_DAYS_ENTRY_PAYMENT_std_max,inst_AMT_PAYMENT_min_mean,inst_AMT_PAYMENT_min_std,inst_AMT_PAYMENT_min_min,inst_AMT_PAYMENT_min_max,inst_inst_INST_COUNT_mean,inst_inst_INST_COUNT_std,inst_inst_INST_COUNT_min,inst_inst_INST_COUNT_max,inst_DAYS_INST_DIF_max_mean,inst_DAYS_INST_DIF_max_std,inst_DAYS_INST_DIF_max_min,inst_DAYS_INST_DIF_max_max,inst_DAYS_INST_DIF_std_mean,inst_DAYS_INST_DIF_std_std,inst_DAYS_INST_DIF_std_min,inst_DAYS_INST_DIF_std_max,inst_DAYS_INST_DIF_mean_mean,inst_DAYS_INST_DIF_mean_std,inst_DAYS_INST_DIF_mean_min,inst_DAYS_INST_DIF_mean_max,inst_AMT_PERCENT_PAID_mean_mean,inst_AMT_PERCENT_PAID_mean_std,inst_AMT_PERCENT_PAID_mean_min,inst_AMT_PERCENT_PAID_mean_max,inst_AMT_PAYMENT_max_mean,inst_AMT_PAYMENT_max_std,inst_AMT_PAYMENT_max_min,inst_AMT_PAYMENT_max_max,inst_NUM_INSTALMENT_VERSION_mean_mean,inst_NUM_INSTALMENT_VERSION_mean_std,inst_NUM_INSTALMENT_VERSION_mean_min,inst_NUM_INSTALMENT_VERSION_mean_max,inst_DAYS_INSTALMENT_std_mean,inst_DAYS_INSTALMENT_std_std,inst_DAYS_INSTALMENT_std_min,inst_DAYS_INSTALMENT_std_max,inst_AMT_PERCENT_PAID_std_mean,inst_AMT_PERCENT_PAID_std_std,inst_AMT_PERCENT_PAID_std_min,inst_AMT_PERCENT_PAID_std_max,inst_DAYS_INSTALMENT_min_mean,inst_DAYS_INSTALMENT_min_std,inst_DAYS_INSTALMENT_min_min,inst_DAYS_INSTALMENT_min_max,inst_isnull_PAYMENTS_max_mean,inst_isnull_PAYMENTS_max_std,inst_isnull_PAYMENTS_max_min,inst_isnull_PAYMENTS_max_max,inst_isnull_PAYMENTS_std_mean,inst_isnull_PAYMENTS_std_std,inst_isnull_PAYMENTS_std_min,inst_isnull_PAYMENTS_std_max,inst_AMT_PAYMENT_std_mean,inst_AMT_PAYMENT_std_std,inst_AMT_PAYMENT_std_min,inst_AMT_PAYMENT_std_max,inst_DAYS_INSTALMENT_max_mean,inst_DAYS_INSTALMENT_max_std,inst_DAYS_INSTALMENT_max_min,inst_DAYS_INSTALMENT_max_max,inst_AMT_INSTALMENT_min_mean,inst_AMT_INSTALMENT_min_std,inst_AMT_INSTALMENT_min_min,inst_AMT_INSTALMENT_min_max,inst_DAYS_INSTALMENT_mean_mean,inst_DAYS_INSTALMENT_mean_std,inst_DAYS_INSTALMENT_mean_min,inst_DAYS_INSTALMENT_mean_max,inst_NUM_INSTALMENT_VERSION_min_mean,inst_NUM_INSTALMENT_VERSION_min_std,inst_NUM_INSTALMENT_VERSION_min_min,inst_NUM_INSTALMENT_VERSION_min_max,inst_DAYS_INST_DIF_min_mean,inst_DAYS_INST_DIF_min_std,inst_DAYS_INST_DIF_min_min,inst_DAYS_INST_DIF_min_max,inst_AMT_INSTALMENT_max_mean,inst_AMT_INSTALMENT_max_std,inst_AMT_INSTALMENT_max_min,inst_AMT_INSTALMENT_max_max,inst_AMT_PERCENT_PAID_min_mean,inst_AMT_PERCENT_PAID_min_std,inst_AMT_PERCENT_PAID_min_min,inst_AMT_PERCENT_PAID_min_max,inst_AMT_PAYMENT_mean_mean,inst_AMT_PAYMENT_mean_std,inst_AMT_PAYMENT_mean_min,inst_AMT_PAYMENT_mean_max,inst_AMT_INSTALMENT_mean_mean,inst_AMT_INSTALMENT_mean_std,inst_AMT_INSTALMENT_mean_min,inst_AMT_INSTALMENT_mean_max,inst_AMT_PERCENT_PAID_max_mean,inst_AMT_PERCENT_PAID_max_std,inst_AMT_PERCENT_PAID_max_min,inst_AMT_PERCENT_PAID_max_max,inst_AMT_INSTALMENT_std_mean,inst_AMT_INSTALMENT_std_std,inst_AMT_INSTALMENT_std_min,inst_AMT_INSTALMENT_std_max,inst_DAYS_ENTRY_PAYMENT_mean_mean,inst_DAYS_ENTRY_PAYMENT_mean_std,inst_DAYS_ENTRY_PAYMENT_mean_min,inst_DAYS_ENTRY_PAYMENT_mean_max
SK_ID_CURR,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1
100001,1.0,0.0,1.0,1.0,1.0,0.0,1,1,1.5,0.707107,1.0,2.0,-2242.0,868.327127,-2856.0,-1628.0,0.25,0.353553,0.0,0.5,-2315.5,849.235244,-2916.0,-1715.0,36.844052,8.738705,30.664855,43.02325,8.285749,0.005334,8.281977,8.289521,3.5,0.707107,3,4,18.0,25.455844,0.0,36.0,10.085564,5.281679,6.350853,13.820275,5.916667,13.55288,-3.666667,15.5,1.0,0.0,1.0,1.0,9.026983,1.042529,8.289803,9.764162,1.125,0.176777,1.0,1.25,34.364917,6.172924,30.0,38.729833,0.0,0.0,0.0,0.0,-2312.5,853.477885,-2916.0,-1709.0,1.0,0.0,1,1,0.0,0.0,0.0,0.0,0.370628,0.523916,0.000163,0.741093,-2237.5,874.691088,-2856.0,-1619.0,8.285749,0.005334,8.281977,8.289521,-2275.0,864.084487,-2886.0,-1664.0,1.0,0.0,1.0,1.0,-2.5,12.020815,-11.0,6.0,9.026983,1.042529,8.289803,9.764162,1.0,0.0,1.0,1.0,8.471116,0.256549,8.289709,8.652523,8.471116,0.256549,8.289709,8.652523,1.0,0.0,1.0,1.0,0.370628,0.523916,0.000163,0.741093,-2280.916667,850.531607,-2882.333333,-1679.5
100002,1.0,,1.0,1.0,1.0,,1,1,2.0,,2.0,2.0,-49.0,,-49.0,-49.0,0.229416,,0.229416,0.229416,-587.0,,-587.0,-587.0,172.058877,,172.058877,172.058877,9.132679,,9.132679,9.132679,19.0,,19,19,31.0,,31.0,31.0,4.925171,,4.925171,4.925171,20.421053,,20.421053,20.421053,1.0,,1.0,1.0,10.879833,,10.879833,10.879833,1.052632,,1.052632,1.052632,168.81943,,168.81943,168.81943,0.0,,0.0,0.0,-565.0,,-565.0,-565.0,1.0,,1,1,0.0,,0.0,0.0,0.400825,,0.400825,0.400825,-25.0,,-25.0,-25.0,9.132679,,9.132679,9.132679,-295.0,,-295.0,-295.0,1.0,,1.0,1.0,12.0,,12.0,12.0,10.879833,,10.879833,10.879833,1.0,,1.0,1.0,9.224634,,9.224634,9.224634,9.224634,,9.224634,9.224634,1.0,,1.0,1.0,0.400825,,0.400825,0.400825,-315.421053,,-315.421053,-315.421053
100003,1.0,0.0,1.0,1.0,1.0,0.0,1,1,1.333333,0.57735,1.0,2.0,-1063.333333,800.327641,-1985.0,-544.0,0.125988,0.218218,0.0,0.377964,-1283.0,902.581298,-2324.0,-719.0,76.380137,30.572883,54.153178,111.24594,10.457264,1.447079,8.804471,11.496369,8.333333,3.21455,6,12,12.0,3.464102,8.0,14.0,2.48377,0.985781,1.718249,3.596084,7.448413,3.422911,4.428571,11.166667,1.0,0.0,1.0,1.0,11.042744,2.210992,8.815564,13.237184,1.047619,0.082479,1.0,1.142857,76.366269,27.879912,56.124861,108.166538,0.0,0.0,0.0,0.0,-1274.333333,897.827563,-2310.0,-716.0,1.0,0.0,1,1,0.0,0.0,0.0,0.0,0.221007,0.378419,0.001851,0.657966,-1054.333333,803.569744,-1980.0,-536.0,10.457264,1.447079,8.804471,11.496369,-1164.333333,850.637604,-2145.0,-626.0,1.0,0.0,1.0,1.0,4.333333,4.163332,1.0,9.0,11.042744,2.210992,8.815564,13.237184,1.0,0.0,1.0,1.0,10.544809,1.535398,8.81464,11.745057,10.544809,1.535398,8.81464,11.745057,1.0,0.0,1.0,1.0,0.221007,0.378419,0.001851,0.657966,-1171.781746,850.230624,-2151.75,-630.428571
100004,1.0,,1.0,1.0,1.0,,1,1,2.0,,2.0,2.0,-727.0,,-727.0,-727.0,0.57735,,0.57735,0.57735,-795.0,,-795.0,-795.0,34.019602,,34.019602,34.019602,8.586393,,8.586393,8.586393,3.0,,3,3,11.0,,11.0,11.0,4.163332,,4.163332,4.163332,7.666667,,7.666667,7.666667,1.0,,1.0,1.0,9.266245,,9.266245,9.266245,1.333333,,1.333333,1.333333,30.0,,30.0,30.0,0.0,,0.0,0.0,-784.0,,-784.0,-784.0,1.0,,1,1,0.0,,0.0,0.0,0.392513,,0.392513,0.392513,-724.0,,-724.0,-724.0,8.586393,,8.586393,8.586393,-754.0,,-754.0,-754.0,1.0,,1.0,1.0,3.0,,3.0,3.0,9.266245,,9.266245,9.266245,1.0,,1.0,1.0,8.81301,,8.81301,8.81301,8.81301,,8.81301,8.81301,1.0,,1.0,1.0,0.392513,,0.392513,0.392513,-761.666667,,-761.666667,-761.666667
100005,1.0,,1.0,1.0,1.0,,1,1,2.0,,2.0,2.0,-470.0,,-470.0,-470.0,0.333333,,0.333333,0.333333,-736.0,,-736.0,-736.0,90.554005,,90.554005,90.554005,8.479325,,8.479325,8.479325,9.0,,9,9,37.0,,37.0,37.0,13.510284,,13.510284,13.510284,23.555556,,23.555556,23.555556,1.0,,1.0,1.0,9.778901,,9.778901,9.778901,1.111111,,1.111111,1.111111,82.158384,,82.158384,82.158384,0.0,,0.0,0.0,-706.0,,-706.0,-706.0,1.0,,1,1,0.0,,0.0,0.0,0.433192,,0.433192,0.433192,-466.0,,-466.0,-466.0,8.479325,,8.479325,8.479325,-586.0,,-586.0,-586.0,1.0,,1.0,1.0,-1.0,,-1.0,-1.0,9.778901,,9.778901,9.778901,1.0,,1.0,1.0,8.623723,,8.623723,8.623723,8.623723,,8.623723,8.623723,1.0,,1.0,1.0,0.433192,,0.433192,0.433192,-609.555556,,-609.555556,-609.555556


In [97]:
# clear memory
del inst

### 4.3.3. POCA DATA

In [98]:
### FEATURE ENGINEERING

# installments percentage
poca["INSTALLMENTS_PERCENT"] = poca["CNT_INSTALMENT_FUTURE"] / poca["CNT_INSTALMENT"]

In [99]:
# count missings
nas = count_missings(poca)
nas

Unnamed: 0,Total,Percent
INSTALLMENTS_PERCENT,26184,0.261804
CNT_INSTALMENT_FUTURE,26087,0.260835
CNT_INSTALMENT,26071,0.260675


In [100]:
##### IMPUTE MISSINGS
             
### OTHER FEATURES
             
# find variables
other_vars = ["INSTALLMENTS_PERCENT", "CNT_INSTALMENT_FUTURE", "CNT_INSTALMENT"]
for var in other_vars:
    poca[var].fillna((poca[var].median()), inplace = True)

In [101]:
### AGGREGATIONS

# count months
cnt_mon = poca[["SK_ID_PREV", "MONTHS_BALANCE"]].groupby("SK_ID_PREV").count()
del poca["MONTHS_BALANCE"]

# delete ID_CURR
poca_id = poca[["SK_ID_CURR", "SK_ID_PREV"]]
del poca["SK_ID_CURR"]

# aggregate data
agg_poca = aggregate_data(poca, id_var = "SK_ID_PREV")

# add month count
agg_poca["poca_MON_COUNT"] = cnt_mon

# put back ID_CURR
poca_id = poca_id.drop_duplicates()
agg_poca = poca_id.merge(right = agg_poca.reset_index(), how = "right", on = "SK_ID_PREV")
del agg_poca["SK_ID_PREV"]

# aggregate data (round 2)
agg_poca = aggregate_data(agg_poca, id_var = "SK_ID_CURR", label = "poca")

- Preparing the dataset...
- Extracted 1 factors and 5 numerics...
- Aggregating numeric features...
- Aggregating factor features...
- Final dimensions: (936325, 22)
- Preparing the dataset...
- Extracted 1 factors and 22 numerics...
- Aggregating numeric features...
- Aggregating factor features...
- Final dimensions: (337252, 90)


In [102]:
# count missings
nas = count_missings(agg_poca)
nas

Unnamed: 0,Total,Percent
poca_SK_DPD_DEF_std_std,106037,31.441474
poca_CNT_INSTALMENT_FUTURE_std_std,106037,31.441474
poca_CNT_INSTALMENT_std_std,106037,31.441474
poca_INSTALLMENTS_PERCENT_std_std,106037,31.441474
poca_SK_DPD_std_std,106037,31.441474
poca_SK_DPD_max_std,103736,30.759195
poca_SK_DPD_min_std,103736,30.759195
poca_SK_DPD_DEF_min_std,103736,30.759195
poca_poca_MON_COUNT_std,103736,30.759195
poca_SK_DPD_mean_std,103736,30.759195


In [103]:
# check data
agg_poca.head()

Unnamed: 0_level_0,poca_CNT_INSTALMENT_FUTURE_mean_mean,poca_CNT_INSTALMENT_FUTURE_mean_std,poca_CNT_INSTALMENT_FUTURE_mean_min,poca_CNT_INSTALMENT_FUTURE_mean_max,poca_SK_DPD_DEF_max_mean,poca_SK_DPD_DEF_max_std,poca_SK_DPD_DEF_max_min,poca_SK_DPD_DEF_max_max,poca_NAME_CONTRACT_STATUS_unique_mean,poca_NAME_CONTRACT_STATUS_unique_std,poca_NAME_CONTRACT_STATUS_unique_min,poca_NAME_CONTRACT_STATUS_unique_max,poca_SK_DPD_std_mean,poca_SK_DPD_std_std,poca_SK_DPD_std_min,poca_SK_DPD_std_max,poca_SK_DPD_DEF_std_mean,poca_SK_DPD_DEF_std_std,poca_SK_DPD_DEF_std_min,poca_SK_DPD_DEF_std_max,poca_CNT_INSTALMENT_std_mean,poca_CNT_INSTALMENT_std_std,poca_CNT_INSTALMENT_std_min,poca_CNT_INSTALMENT_std_max,poca_SK_DPD_DEF_mean_mean,poca_SK_DPD_DEF_mean_std,poca_SK_DPD_DEF_mean_min,poca_SK_DPD_DEF_mean_max,poca_poca_MON_COUNT_mean,poca_poca_MON_COUNT_std,poca_poca_MON_COUNT_min,poca_poca_MON_COUNT_max,poca_CNT_INSTALMENT_mean_mean,poca_CNT_INSTALMENT_mean_std,poca_CNT_INSTALMENT_mean_min,poca_CNT_INSTALMENT_mean_max,poca_SK_DPD_min_mean,poca_SK_DPD_min_std,poca_SK_DPD_min_min,poca_SK_DPD_min_max,poca_CNT_INSTALMENT_FUTURE_std_mean,poca_CNT_INSTALMENT_FUTURE_std_std,poca_CNT_INSTALMENT_FUTURE_std_min,poca_CNT_INSTALMENT_FUTURE_std_max,poca_SK_DPD_DEF_min_mean,poca_SK_DPD_DEF_min_std,poca_SK_DPD_DEF_min_min,poca_SK_DPD_DEF_min_max,poca_SK_DPD_mean_mean,poca_SK_DPD_mean_std,poca_SK_DPD_mean_min,poca_SK_DPD_mean_max,poca_INSTALLMENTS_PERCENT_max_mean,poca_INSTALLMENTS_PERCENT_max_std,poca_INSTALLMENTS_PERCENT_max_min,poca_INSTALLMENTS_PERCENT_max_max,poca_INSTALLMENTS_PERCENT_mean_mean,poca_INSTALLMENTS_PERCENT_mean_std,poca_INSTALLMENTS_PERCENT_mean_min,poca_INSTALLMENTS_PERCENT_mean_max,poca_CNT_INSTALMENT_FUTURE_min_mean,poca_CNT_INSTALMENT_FUTURE_min_std,poca_CNT_INSTALMENT_FUTURE_min_min,poca_CNT_INSTALMENT_FUTURE_min_max,poca_INSTALLMENTS_PERCENT_std_mean,poca_INSTALLMENTS_PERCENT_std_std,poca_INSTALLMENTS_PERCENT_std_min,poca_INSTALLMENTS_PERCENT_std_max,poca_CNT_INSTALMENT_min_mean,poca_CNT_INSTALMENT_min_std,poca_CNT_INSTALMENT_min_min,poca_CNT_INSTALMENT_min_max,poca_SK_DPD_max_mean,poca_SK_DPD_max_std,poca_SK_DPD_max_min,poca_SK_DPD_max_max,poca_CNT_INSTALMENT_FUTURE_max_mean,poca_CNT_INSTALMENT_FUTURE_max_std,poca_CNT_INSTALMENT_FUTURE_max_min,poca_CNT_INSTALMENT_FUTURE_max_max,poca_INSTALLMENTS_PERCENT_min_mean,poca_INSTALLMENTS_PERCENT_min_std,poca_INSTALLMENTS_PERCENT_min_min,poca_INSTALLMENTS_PERCENT_min_max,poca_CNT_INSTALMENT_max_mean,poca_CNT_INSTALMENT_max_std,poca_CNT_INSTALMENT_max_min,poca_CNT_INSTALMENT_max_max,poca_NAME_CONTRACT_STATUS_mode_mode,poca_NAME_CONTRACT_STATUS_mode_unique
SK_ID_CURR,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1
100001,1.375,0.883883,0.75,2.0,3.5,4.949747,0,7,2.0,0.0,2,2,1.75,2.474874,0.0,3.5,1.75,2.474874,0.0,3.5,0.0,0.0,0.0,0.0,0.875,1.237437,0.0,1.75,4.5,0.707107,4,5,4.0,0.0,4.0,4.0,0.0,0.0,0,0,1.269283,0.441031,0.957427,1.581139,0.0,0.0,0,0,0.875,1.237437,0.0,1.75,0.75,0.353553,0.5,1.0,0.34375,0.220971,0.1875,0.5,0.0,0.0,0.0,0.0,0.317321,0.110258,0.239357,0.395285,4.0,0.0,4.0,4.0,3.5,4.949747,0,7,3.0,1.414214,2.0,4.0,0.0,0.0,0.0,0.0,4.0,0.0,4.0,4.0,Active,1
100002,15.0,,15.0,15.0,0.0,,0,0,1.0,,1,1,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,19.0,,19,19,24.0,,24.0,24.0,0.0,,0,0,5.627314,,5.627314,5.627314,0.0,,0,0,0.0,,0.0,0.0,1.0,,1.0,1.0,0.625,,0.625,0.625,6.0,,6.0,6.0,0.234471,,0.234471,0.234471,24.0,,24.0,24.0,0.0,,0,0,24.0,,24.0,24.0,0.25,,0.25,0.25,24.0,,24.0,24.0,Active,1
100003,5.666667,2.722399,2.625,7.875,0.0,0.0,0,0,1.666667,0.57735,1,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.589256,1.020621,0.0,1.767767,0.0,0.0,0.0,0.0,9.333333,2.309401,8,12,9.791667,3.298516,6.0,12.0,0.0,0.0,0,0,3.209241,0.822291,2.263846,3.758324,0.0,0.0,0,0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.545139,0.109416,0.4375,0.65625,0.333333,0.57735,0.0,1.0,0.330321,0.041186,0.300463,0.377308,8.333333,3.21455,6.0,12.0,0.0,0.0,0,0,10.0,3.464102,6.0,12.0,0.027778,0.048113,0.0,0.083333,10.0,3.464102,6.0,12.0,Active,1
100004,2.25,,2.25,2.25,0.0,,0,0,2.0,,2,2,0.0,,0.0,0.0,0.0,,0.0,0.0,0.5,,0.5,0.5,0.0,,0.0,0.0,4.0,,4,4,3.75,,3.75,3.75,0.0,,0,0,1.707825,,1.707825,1.707825,0.0,,0,0,0.0,,0.0,0.0,1.0,,1.0,1.0,0.5625,,0.5625,0.5625,0.0,,0.0,0.0,0.426956,,0.426956,0.426956,3.0,,3.0,3.0,0.0,,0,0,4.0,,4.0,4.0,0.0,,0.0,0.0,4.0,,4.0,4.0,Active,1
100005,7.181818,,7.181818,7.181818,0.0,,0,0,3.0,,3,3,0.0,,0.0,0.0,0.0,,0.0,0.0,0.904534,,0.904534,0.904534,0.0,,0.0,0.0,11.0,,11,11,11.727273,,11.727273,11.727273,0.0,,0,0,3.429816,,3.429816,3.429816,0.0,,0,0,0.0,,0.0,0.0,1.0,,1.0,1.0,0.598485,,0.598485,0.598485,0.0,,0.0,0.0,0.285818,,0.285818,0.285818,9.0,,9.0,9.0,0.0,,0,0,12.0,,12.0,12.0,0.0,,0.0,0.0,12.0,,12.0,12.0,Active,1


In [104]:
# clear memory
del poca

### 4.3.4. CARD DATA

In [105]:
### FEATURE ENGINEERING

# logarithms
card["AMT_BALANCE"]                = np.log(1 + card["AMT_BALANCE"])
card["AMT_CREDIT_LIMIT_ACTUAL"]    = np.log(1 + card["AMT_CREDIT_LIMIT_ACTUAL"])
card["AMT_DRAWINGS_ATM_CURRENT"]   = np.log(1 + card["AMT_DRAWINGS_ATM_CURRENT"])
card["AMT_DRAWINGS_CURRENT"]       = np.log(1 + card["AMT_DRAWINGS_CURRENT"])
card["AMT_DRAWINGS_OTHER_CURRENT"] = np.log(1 + card["AMT_DRAWINGS_OTHER_CURRENT"])
card["AMT_DRAWINGS_POS_CURRENT"]   = np.log(1 + card["AMT_DRAWINGS_POS_CURRENT"])
card["AMT_INST_MIN_REGULARITY"]    = np.log(1 + card["AMT_INST_MIN_REGULARITY"])
card["AMT_PAYMENT_CURRENT"]        = np.log(1 + card["AMT_PAYMENT_CURRENT"])
card["AMT_PAYMENT_TOTAL_CURRENT"]  = np.log(1 + card["AMT_PAYMENT_TOTAL_CURRENT"])
card["AMT_RECEIVABLE_PRINCIPAL"]   = np.log(1 + card["AMT_RECEIVABLE_PRINCIPAL"])
card["AMT_RECIVABLE"]              = np.log(1 + card["AMT_RECIVABLE"])
card["AMT_TOTAL_RECEIVABLE"]       = np.log(1 + card["AMT_TOTAL_RECEIVABLE"])

In [106]:
# count missings
nas = count_missings(card)
nas

Unnamed: 0,Total,Percent
AMT_PAYMENT_CURRENT,767988,19.998063
AMT_DRAWINGS_ATM_CURRENT,749817,19.524898
AMT_DRAWINGS_OTHER_CURRENT,749816,19.524872
CNT_DRAWINGS_POS_CURRENT,749816,19.524872
CNT_DRAWINGS_OTHER_CURRENT,749816,19.524872
AMT_DRAWINGS_POS_CURRENT,749816,19.524872
CNT_DRAWINGS_ATM_CURRENT,749816,19.524872
CNT_INSTALMENT_MATURE_CUM,305236,7.948208
AMT_INST_MIN_REGULARITY,305236,7.948208
AMT_RECIVABLE,108738,2.831489


In [107]:
##### IMPUTE MISSINGS
             
### AMOUNTS
amnts = ["AMT_PAYMENT_CURRENT", "AMT_DRAWINGS_ATM_CURRENT", "AMT_DRAWINGS_OTHER_CURRENT", "CNT_INSTALMENT_MATURE_CUM"] 
for var in amnts:
    card["isnull_" + var] = card[var].isnull() + 0


### OTHER FEATURES    
    
# find variables
other_vars = ["AMT_PAYMENT_CURRENT", "AMT_DRAWINGS_ATM_CURRENT",
              "AMT_DRAWINGS_OTHER_CURRENT", "CNT_DRAWINGS_POS_CURRENT",
              "CNT_DRAWINGS_OTHER_CURRENT", "AMT_DRAWINGS_POS_CURRENT",
              "CNT_DRAWINGS_ATM_CURRENT", "CNT_INSTALMENT_MATURE_CUM",
              "AMT_INST_MIN_REGULARITY", "AMT_RECIVABLE", "AMT_TOTAL_RECEIVABLE",
              "AMT_RECEIVABLE_PRINCIPAL", "AMT_BALANCE", "AMT_DRAWINGS_CURRENT"]
for var in other_vars:
    card[var].fillna((card[var].median()), inplace = True)

In [108]:
### AGGREGATIONS

# count months
cnt_mon = card[["SK_ID_PREV", "MONTHS_BALANCE"]].groupby("SK_ID_PREV").count()
del card["MONTHS_BALANCE"]

# delete ID_CURR
card_id = card[["SK_ID_CURR", "SK_ID_PREV"]]
del card["SK_ID_CURR"]

# aggregate data
agg_card = aggregate_data(card, id_var = "SK_ID_PREV")

# add month count
agg_card["card_MON_COUNT"] = cnt_mon

# put back ID_CURR
card_id = card_id.drop_duplicates()
agg_card = card_id.merge(right = agg_card.reset_index(), how = "right", on = "SK_ID_PREV")
del agg_card["SK_ID_PREV"]

# aggregate data (round 2)
agg_card = aggregate_data(agg_card, id_var = "SK_ID_CURR", label = "card")

- Preparing the dataset...
- Extracted 1 factors and 23 numerics...
- Aggregating numeric features...
- Aggregating factor features...
- Final dimensions: (104307, 94)
- Preparing the dataset...
- Extracted 1 factors and 94 numerics...
- Aggregating numeric features...
- Aggregating factor features...
- Final dimensions: (103558, 378)


In [109]:
# count missings
nas = count_missings(agg_card)
nas

Unnamed: 0,Total,Percent
card_AMT_TOTAL_RECEIVABLE_std_std,102846,99.312463
card_isnull_AMT_DRAWINGS_OTHER_CURRENT_std_std,102846,99.312463
card_CNT_INSTALMENT_MATURE_CUM_std_std,102846,99.312463
card_AMT_BALANCE_std_std,102846,99.312463
card_isnull_CNT_INSTALMENT_MATURE_CUM_std_std,102846,99.312463
card_AMT_RECIVABLE_std_std,102846,99.312463
card_SK_DPD_std_std,102846,99.312463
card_isnull_AMT_PAYMENT_CURRENT_std_std,102846,99.312463
card_AMT_CREDIT_LIMIT_ACTUAL_std_std,102846,99.312463
card_AMT_RECEIVABLE_PRINCIPAL_std_std,102846,99.312463


In [110]:
# check data
agg_card.head()

Unnamed: 0_level_0,card_isnull_AMT_PAYMENT_CURRENT_max_mean,card_isnull_AMT_PAYMENT_CURRENT_max_std,card_isnull_AMT_PAYMENT_CURRENT_max_min,card_isnull_AMT_PAYMENT_CURRENT_max_max,card_AMT_DRAWINGS_CURRENT_max_mean,card_AMT_DRAWINGS_CURRENT_max_std,card_AMT_DRAWINGS_CURRENT_max_min,card_AMT_DRAWINGS_CURRENT_max_max,card_CNT_DRAWINGS_ATM_CURRENT_max_mean,card_CNT_DRAWINGS_ATM_CURRENT_max_std,card_CNT_DRAWINGS_ATM_CURRENT_max_min,card_CNT_DRAWINGS_ATM_CURRENT_max_max,card_SK_DPD_std_mean,card_SK_DPD_std_std,card_SK_DPD_std_min,card_SK_DPD_std_max,card_CNT_DRAWINGS_CURRENT_mean_mean,card_CNT_DRAWINGS_CURRENT_mean_std,card_CNT_DRAWINGS_CURRENT_mean_min,card_CNT_DRAWINGS_CURRENT_mean_max,card_isnull_AMT_DRAWINGS_ATM_CURRENT_mean_mean,card_isnull_AMT_DRAWINGS_ATM_CURRENT_mean_std,card_isnull_AMT_DRAWINGS_ATM_CURRENT_mean_min,card_isnull_AMT_DRAWINGS_ATM_CURRENT_mean_max,card_AMT_RECEIVABLE_PRINCIPAL_std_mean,card_AMT_RECEIVABLE_PRINCIPAL_std_std,card_AMT_RECEIVABLE_PRINCIPAL_std_min,card_AMT_RECEIVABLE_PRINCIPAL_std_max,card_AMT_CREDIT_LIMIT_ACTUAL_std_mean,card_AMT_CREDIT_LIMIT_ACTUAL_std_std,card_AMT_CREDIT_LIMIT_ACTUAL_std_min,card_AMT_CREDIT_LIMIT_ACTUAL_std_max,card_AMT_DRAWINGS_OTHER_CURRENT_std_mean,card_AMT_DRAWINGS_OTHER_CURRENT_std_std,card_AMT_DRAWINGS_OTHER_CURRENT_std_min,card_AMT_DRAWINGS_OTHER_CURRENT_std_max,card_isnull_CNT_INSTALMENT_MATURE_CUM_mean_mean,card_isnull_CNT_INSTALMENT_MATURE_CUM_mean_std,card_isnull_CNT_INSTALMENT_MATURE_CUM_mean_min,card_isnull_CNT_INSTALMENT_MATURE_CUM_mean_max,card_isnull_AMT_DRAWINGS_OTHER_CURRENT_std_mean,card_isnull_AMT_DRAWINGS_OTHER_CURRENT_std_std,card_isnull_AMT_DRAWINGS_OTHER_CURRENT_std_min,card_isnull_AMT_DRAWINGS_OTHER_CURRENT_std_max,card_CNT_DRAWINGS_OTHER_CURRENT_mean_mean,card_CNT_DRAWINGS_OTHER_CURRENT_mean_std,card_CNT_DRAWINGS_OTHER_CURRENT_mean_min,card_CNT_DRAWINGS_OTHER_CURRENT_mean_max,card_AMT_RECIVABLE_max_mean,card_AMT_RECIVABLE_max_std,card_AMT_RECIVABLE_max_min,card_AMT_RECIVABLE_max_max,card_SK_DPD_mean_mean,card_SK_DPD_mean_std,card_SK_DPD_mean_min,card_SK_DPD_mean_max,card_AMT_DRAWINGS_POS_CURRENT_max_mean,card_AMT_DRAWINGS_POS_CURRENT_max_std,card_AMT_DRAWINGS_POS_CURRENT_max_min,card_AMT_DRAWINGS_POS_CURRENT_max_max,card_AMT_PAYMENT_CURRENT_std_mean,card_AMT_PAYMENT_CURRENT_std_std,card_AMT_PAYMENT_CURRENT_std_min,card_AMT_PAYMENT_CURRENT_std_max,card_AMT_TOTAL_RECEIVABLE_min_mean,card_AMT_TOTAL_RECEIVABLE_min_std,card_AMT_TOTAL_RECEIVABLE_min_min,card_AMT_TOTAL_RECEIVABLE_min_max,card_CNT_DRAWINGS_OTHER_CURRENT_min_mean,card_CNT_DRAWINGS_OTHER_CURRENT_min_std,card_CNT_DRAWINGS_OTHER_CURRENT_min_min,card_CNT_DRAWINGS_OTHER_CURRENT_min_max,card_SK_DPD_max_mean,card_SK_DPD_max_std,card_SK_DPD_max_min,card_SK_DPD_max_max,card_isnull_CNT_INSTALMENT_MATURE_CUM_std_mean,card_isnull_CNT_INSTALMENT_MATURE_CUM_std_std,card_isnull_CNT_INSTALMENT_MATURE_CUM_std_min,card_isnull_CNT_INSTALMENT_MATURE_CUM_std_max,card_CNT_DRAWINGS_ATM_CURRENT_std_mean,card_CNT_DRAWINGS_ATM_CURRENT_std_std,card_CNT_DRAWINGS_ATM_CURRENT_std_min,card_CNT_DRAWINGS_ATM_CURRENT_std_max,card_AMT_RECIVABLE_std_mean,card_AMT_RECIVABLE_std_std,card_AMT_RECIVABLE_std_min,card_AMT_RECIVABLE_std_max,card_AMT_DRAWINGS_POS_CURRENT_min_mean,card_AMT_DRAWINGS_POS_CURRENT_min_std,card_AMT_DRAWINGS_POS_CURRENT_min_min,card_AMT_DRAWINGS_POS_CURRENT_min_max,card_AMT_RECEIVABLE_PRINCIPAL_mean_mean,card_AMT_RECEIVABLE_PRINCIPAL_mean_std,card_AMT_RECEIVABLE_PRINCIPAL_mean_min,card_AMT_RECEIVABLE_PRINCIPAL_mean_max,card_CNT_DRAWINGS_ATM_CURRENT_min_mean,card_CNT_DRAWINGS_ATM_CURRENT_min_std,card_CNT_DRAWINGS_ATM_CURRENT_min_min,card_CNT_DRAWINGS_ATM_CURRENT_min_max,card_AMT_PAYMENT_TOTAL_CURRENT_max_mean,card_AMT_PAYMENT_TOTAL_CURRENT_max_std,card_AMT_PAYMENT_TOTAL_CURRENT_max_min,card_AMT_PAYMENT_TOTAL_CURRENT_max_max,card_AMT_TOTAL_RECEIVABLE_mean_mean,card_AMT_TOTAL_RECEIVABLE_mean_std,card_AMT_TOTAL_RECEIVABLE_mean_min,card_AMT_TOTAL_RECEIVABLE_mean_max,card_SK_DPD_DEF_max_mean,card_SK_DPD_DEF_max_std,card_SK_DPD_DEF_max_min,card_SK_DPD_DEF_max_max,card_AMT_RECEIVABLE_PRINCIPAL_min_mean,card_AMT_RECEIVABLE_PRINCIPAL_min_std,card_AMT_RECEIVABLE_PRINCIPAL_min_min,card_AMT_RECEIVABLE_PRINCIPAL_min_max,card_AMT_DRAWINGS_POS_CURRENT_std_mean,card_AMT_DRAWINGS_POS_CURRENT_std_std,card_AMT_DRAWINGS_POS_CURRENT_std_min,card_AMT_DRAWINGS_POS_CURRENT_std_max,card_CNT_DRAWINGS_POS_CURRENT_mean_mean,card_CNT_DRAWINGS_POS_CURRENT_mean_std,card_CNT_DRAWINGS_POS_CURRENT_mean_min,card_CNT_DRAWINGS_POS_CURRENT_mean_max,card_isnull_AMT_PAYMENT_CURRENT_mean_mean,card_isnull_AMT_PAYMENT_CURRENT_mean_std,card_isnull_AMT_PAYMENT_CURRENT_mean_min,card_isnull_AMT_PAYMENT_CURRENT_mean_max,card_AMT_TOTAL_RECEIVABLE_std_mean,card_AMT_TOTAL_RECEIVABLE_std_std,card_AMT_TOTAL_RECEIVABLE_std_min,card_AMT_TOTAL_RECEIVABLE_std_max,card_AMT_PAYMENT_TOTAL_CURRENT_min_mean,card_AMT_PAYMENT_TOTAL_CURRENT_min_std,card_AMT_PAYMENT_TOTAL_CURRENT_min_min,card_AMT_PAYMENT_TOTAL_CURRENT_min_max,card_isnull_AMT_DRAWINGS_ATM_CURRENT_max_mean,card_isnull_AMT_DRAWINGS_ATM_CURRENT_max_std,card_isnull_AMT_DRAWINGS_ATM_CURRENT_max_min,card_isnull_AMT_DRAWINGS_ATM_CURRENT_max_max,card_card_MON_COUNT_mean,card_card_MON_COUNT_std,card_card_MON_COUNT_min,card_card_MON_COUNT_max,card_isnull_AMT_DRAWINGS_OTHER_CURRENT_min_mean,card_isnull_AMT_DRAWINGS_OTHER_CURRENT_min_std,card_isnull_AMT_DRAWINGS_OTHER_CURRENT_min_min,card_isnull_AMT_DRAWINGS_OTHER_CURRENT_min_max,card_SK_DPD_DEF_min_mean,card_SK_DPD_DEF_min_std,card_SK_DPD_DEF_min_min,card_SK_DPD_DEF_min_max,card_AMT_INST_MIN_REGULARITY_max_mean,card_AMT_INST_MIN_REGULARITY_max_std,card_AMT_INST_MIN_REGULARITY_max_min,card_AMT_INST_MIN_REGULARITY_max_max,card_isnull_AMT_PAYMENT_CURRENT_min_mean,card_isnull_AMT_PAYMENT_CURRENT_min_std,card_isnull_AMT_PAYMENT_CURRENT_min_min,card_isnull_AMT_PAYMENT_CURRENT_min_max,card_AMT_CREDIT_LIMIT_ACTUAL_max_mean,card_AMT_CREDIT_LIMIT_ACTUAL_max_std,card_AMT_CREDIT_LIMIT_ACTUAL_max_min,card_AMT_CREDIT_LIMIT_ACTUAL_max_max,card_AMT_DRAWINGS_OTHER_CURRENT_mean_mean,card_AMT_DRAWINGS_OTHER_CURRENT_mean_std,card_AMT_DRAWINGS_OTHER_CURRENT_mean_min,card_AMT_DRAWINGS_OTHER_CURRENT_mean_max,card_AMT_BALANCE_min_mean,card_AMT_BALANCE_min_std,card_AMT_BALANCE_min_min,card_AMT_BALANCE_min_max,card_AMT_DRAWINGS_POS_CURRENT_mean_mean,card_AMT_DRAWINGS_POS_CURRENT_mean_std,card_AMT_DRAWINGS_POS_CURRENT_mean_min,card_AMT_DRAWINGS_POS_CURRENT_mean_max,card_CNT_DRAWINGS_POS_CURRENT_min_mean,card_CNT_DRAWINGS_POS_CURRENT_min_std,card_CNT_DRAWINGS_POS_CURRENT_min_min,card_CNT_DRAWINGS_POS_CURRENT_min_max,card_isnull_AMT_PAYMENT_CURRENT_std_mean,card_isnull_AMT_PAYMENT_CURRENT_std_std,card_isnull_AMT_PAYMENT_CURRENT_std_min,card_isnull_AMT_PAYMENT_CURRENT_std_max,card_AMT_DRAWINGS_ATM_CURRENT_min_mean,card_AMT_DRAWINGS_ATM_CURRENT_min_std,card_AMT_DRAWINGS_ATM_CURRENT_min_min,card_AMT_DRAWINGS_ATM_CURRENT_min_max,card_AMT_RECIVABLE_min_mean,card_AMT_RECIVABLE_min_std,card_AMT_RECIVABLE_min_min,card_AMT_RECIVABLE_min_max,card_NAME_CONTRACT_STATUS_unique_mean,card_NAME_CONTRACT_STATUS_unique_std,card_NAME_CONTRACT_STATUS_unique_min,card_NAME_CONTRACT_STATUS_unique_max,card_CNT_DRAWINGS_POS_CURRENT_std_mean,card_CNT_DRAWINGS_POS_CURRENT_std_std,card_CNT_DRAWINGS_POS_CURRENT_std_min,card_CNT_DRAWINGS_POS_CURRENT_std_max,card_SK_DPD_DEF_std_mean,card_SK_DPD_DEF_std_std,card_SK_DPD_DEF_std_min,card_SK_DPD_DEF_std_max,card_isnull_AMT_DRAWINGS_OTHER_CURRENT_mean_mean,card_isnull_AMT_DRAWINGS_OTHER_CURRENT_mean_std,card_isnull_AMT_DRAWINGS_OTHER_CURRENT_mean_min,card_isnull_AMT_DRAWINGS_OTHER_CURRENT_mean_max,card_AMT_PAYMENT_CURRENT_min_mean,card_AMT_PAYMENT_CURRENT_min_std,card_AMT_PAYMENT_CURRENT_min_min,card_AMT_PAYMENT_CURRENT_min_max,card_CNT_DRAWINGS_OTHER_CURRENT_std_mean,card_CNT_DRAWINGS_OTHER_CURRENT_std_std,card_CNT_DRAWINGS_OTHER_CURRENT_std_min,card_CNT_DRAWINGS_OTHER_CURRENT_std_max,card_AMT_INST_MIN_REGULARITY_mean_mean,card_AMT_INST_MIN_REGULARITY_mean_std,card_AMT_INST_MIN_REGULARITY_mean_min,card_AMT_INST_MIN_REGULARITY_mean_max,card_AMT_INST_MIN_REGULARITY_std_mean,card_AMT_INST_MIN_REGULARITY_std_std,card_AMT_INST_MIN_REGULARITY_std_min,card_AMT_INST_MIN_REGULARITY_std_max,card_AMT_DRAWINGS_ATM_CURRENT_max_mean,card_AMT_DRAWINGS_ATM_CURRENT_max_std,card_AMT_DRAWINGS_ATM_CURRENT_max_min,card_AMT_DRAWINGS_ATM_CURRENT_max_max,card_CNT_DRAWINGS_POS_CURRENT_max_mean,card_CNT_DRAWINGS_POS_CURRENT_max_std,card_CNT_DRAWINGS_POS_CURRENT_max_min,card_CNT_DRAWINGS_POS_CURRENT_max_max,card_AMT_DRAWINGS_OTHER_CURRENT_max_mean,card_AMT_DRAWINGS_OTHER_CURRENT_max_std,card_AMT_DRAWINGS_OTHER_CURRENT_max_min,card_AMT_DRAWINGS_OTHER_CURRENT_max_max,card_AMT_PAYMENT_TOTAL_CURRENT_mean_mean,card_AMT_PAYMENT_TOTAL_CURRENT_mean_std,card_AMT_PAYMENT_TOTAL_CURRENT_mean_min,card_AMT_PAYMENT_TOTAL_CURRENT_mean_max,card_CNT_INSTALMENT_MATURE_CUM_std_mean,card_CNT_INSTALMENT_MATURE_CUM_std_std,card_CNT_INSTALMENT_MATURE_CUM_std_min,card_CNT_INSTALMENT_MATURE_CUM_std_max,card_AMT_DRAWINGS_CURRENT_min_mean,card_AMT_DRAWINGS_CURRENT_min_std,card_AMT_DRAWINGS_CURRENT_min_min,card_AMT_DRAWINGS_CURRENT_min_max,card_AMT_PAYMENT_CURRENT_mean_mean,card_AMT_PAYMENT_CURRENT_mean_std,card_AMT_PAYMENT_CURRENT_mean_min,card_AMT_PAYMENT_CURRENT_mean_max,card_CNT_DRAWINGS_ATM_CURRENT_mean_mean,card_CNT_DRAWINGS_ATM_CURRENT_mean_std,card_CNT_DRAWINGS_ATM_CURRENT_mean_min,card_CNT_DRAWINGS_ATM_CURRENT_mean_max,card_AMT_DRAWINGS_CURRENT_std_mean,card_AMT_DRAWINGS_CURRENT_std_std,card_AMT_DRAWINGS_CURRENT_std_min,card_AMT_DRAWINGS_CURRENT_std_max,card_AMT_TOTAL_RECEIVABLE_max_mean,card_AMT_TOTAL_RECEIVABLE_max_std,card_AMT_TOTAL_RECEIVABLE_max_min,card_AMT_TOTAL_RECEIVABLE_max_max,card_AMT_DRAWINGS_ATM_CURRENT_std_mean,card_AMT_DRAWINGS_ATM_CURRENT_std_std,card_AMT_DRAWINGS_ATM_CURRENT_std_min,card_AMT_DRAWINGS_ATM_CURRENT_std_max,card_AMT_INST_MIN_REGULARITY_min_mean,card_AMT_INST_MIN_REGULARITY_min_std,card_AMT_INST_MIN_REGULARITY_min_min,card_AMT_INST_MIN_REGULARITY_min_max,card_AMT_DRAWINGS_CURRENT_mean_mean,card_AMT_DRAWINGS_CURRENT_mean_std,card_AMT_DRAWINGS_CURRENT_mean_min,card_AMT_DRAWINGS_CURRENT_mean_max,card_CNT_INSTALMENT_MATURE_CUM_mean_mean,card_CNT_INSTALMENT_MATURE_CUM_mean_std,card_CNT_INSTALMENT_MATURE_CUM_mean_min,card_CNT_INSTALMENT_MATURE_CUM_mean_max,card_CNT_INSTALMENT_MATURE_CUM_min_mean,card_CNT_INSTALMENT_MATURE_CUM_min_std,card_CNT_INSTALMENT_MATURE_CUM_min_min,card_CNT_INSTALMENT_MATURE_CUM_min_max,card_CNT_DRAWINGS_CURRENT_min_mean,card_CNT_DRAWINGS_CURRENT_min_std,card_CNT_DRAWINGS_CURRENT_min_min,card_CNT_DRAWINGS_CURRENT_min_max,card_AMT_PAYMENT_CURRENT_max_mean,card_AMT_PAYMENT_CURRENT_max_std,card_AMT_PAYMENT_CURRENT_max_min,card_AMT_PAYMENT_CURRENT_max_max,card_AMT_DRAWINGS_OTHER_CURRENT_min_mean,card_AMT_DRAWINGS_OTHER_CURRENT_min_std,card_AMT_DRAWINGS_OTHER_CURRENT_min_min,card_AMT_DRAWINGS_OTHER_CURRENT_min_max,card_AMT_BALANCE_max_mean,card_AMT_BALANCE_max_std,card_AMT_BALANCE_max_min,card_AMT_BALANCE_max_max,card_AMT_RECEIVABLE_PRINCIPAL_max_mean,card_AMT_RECEIVABLE_PRINCIPAL_max_std,card_AMT_RECEIVABLE_PRINCIPAL_max_min,card_AMT_RECEIVABLE_PRINCIPAL_max_max,card_AMT_DRAWINGS_ATM_CURRENT_mean_mean,card_AMT_DRAWINGS_ATM_CURRENT_mean_std,card_AMT_DRAWINGS_ATM_CURRENT_mean_min,card_AMT_DRAWINGS_ATM_CURRENT_mean_max,card_isnull_AMT_DRAWINGS_OTHER_CURRENT_max_mean,card_isnull_AMT_DRAWINGS_OTHER_CURRENT_max_std,card_isnull_AMT_DRAWINGS_OTHER_CURRENT_max_min,card_isnull_AMT_DRAWINGS_OTHER_CURRENT_max_max,card_SK_DPD_DEF_mean_mean,card_SK_DPD_DEF_mean_std,card_SK_DPD_DEF_mean_min,card_SK_DPD_DEF_mean_max,card_isnull_AMT_DRAWINGS_ATM_CURRENT_min_mean,card_isnull_AMT_DRAWINGS_ATM_CURRENT_min_std,card_isnull_AMT_DRAWINGS_ATM_CURRENT_min_min,card_isnull_AMT_DRAWINGS_ATM_CURRENT_min_max,card_CNT_DRAWINGS_CURRENT_max_mean,card_CNT_DRAWINGS_CURRENT_max_std,card_CNT_DRAWINGS_CURRENT_max_min,card_CNT_DRAWINGS_CURRENT_max_max,card_SK_DPD_min_mean,card_SK_DPD_min_std,card_SK_DPD_min_min,card_SK_DPD_min_max,card_CNT_DRAWINGS_CURRENT_std_mean,card_CNT_DRAWINGS_CURRENT_std_std,card_CNT_DRAWINGS_CURRENT_std_min,card_CNT_DRAWINGS_CURRENT_std_max,card_isnull_CNT_INSTALMENT_MATURE_CUM_min_mean,card_isnull_CNT_INSTALMENT_MATURE_CUM_min_std,card_isnull_CNT_INSTALMENT_MATURE_CUM_min_min,card_isnull_CNT_INSTALMENT_MATURE_CUM_min_max,card_CNT_INSTALMENT_MATURE_CUM_max_mean,card_CNT_INSTALMENT_MATURE_CUM_max_std,card_CNT_INSTALMENT_MATURE_CUM_max_min,card_CNT_INSTALMENT_MATURE_CUM_max_max,card_CNT_DRAWINGS_OTHER_CURRENT_max_mean,card_CNT_DRAWINGS_OTHER_CURRENT_max_std,card_CNT_DRAWINGS_OTHER_CURRENT_max_min,card_CNT_DRAWINGS_OTHER_CURRENT_max_max,card_AMT_CREDIT_LIMIT_ACTUAL_min_mean,card_AMT_CREDIT_LIMIT_ACTUAL_min_std,card_AMT_CREDIT_LIMIT_ACTUAL_min_min,card_AMT_CREDIT_LIMIT_ACTUAL_min_max,card_AMT_RECIVABLE_mean_mean,card_AMT_RECIVABLE_mean_std,card_AMT_RECIVABLE_mean_min,card_AMT_RECIVABLE_mean_max,card_AMT_BALANCE_std_mean,card_AMT_BALANCE_std_std,card_AMT_BALANCE_std_min,card_AMT_BALANCE_std_max,card_isnull_CNT_INSTALMENT_MATURE_CUM_max_mean,card_isnull_CNT_INSTALMENT_MATURE_CUM_max_std,card_isnull_CNT_INSTALMENT_MATURE_CUM_max_min,card_isnull_CNT_INSTALMENT_MATURE_CUM_max_max,card_AMT_CREDIT_LIMIT_ACTUAL_mean_mean,card_AMT_CREDIT_LIMIT_ACTUAL_mean_std,card_AMT_CREDIT_LIMIT_ACTUAL_mean_min,card_AMT_CREDIT_LIMIT_ACTUAL_mean_max,card_AMT_BALANCE_mean_mean,card_AMT_BALANCE_mean_std,card_AMT_BALANCE_mean_min,card_AMT_BALANCE_mean_max,card_isnull_AMT_DRAWINGS_ATM_CURRENT_std_mean,card_isnull_AMT_DRAWINGS_ATM_CURRENT_std_std,card_isnull_AMT_DRAWINGS_ATM_CURRENT_std_min,card_isnull_AMT_DRAWINGS_ATM_CURRENT_std_max,card_AMT_PAYMENT_TOTAL_CURRENT_std_mean,card_AMT_PAYMENT_TOTAL_CURRENT_std_std,card_AMT_PAYMENT_TOTAL_CURRENT_std_min,card_AMT_PAYMENT_TOTAL_CURRENT_std_max,card_NAME_CONTRACT_STATUS_mode_mode,card_NAME_CONTRACT_STATUS_mode_unique
SK_ID_CURR,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1,Unnamed: 182_level_1,Unnamed: 183_level_1,Unnamed: 184_level_1,Unnamed: 185_level_1,Unnamed: 186_level_1,Unnamed: 187_level_1,Unnamed: 188_level_1,Unnamed: 189_level_1,Unnamed: 190_level_1,Unnamed: 191_level_1,Unnamed: 192_level_1,Unnamed: 193_level_1,Unnamed: 194_level_1,Unnamed: 195_level_1,Unnamed: 196_level_1,Unnamed: 197_level_1,Unnamed: 198_level_1,Unnamed: 199_level_1,Unnamed: 200_level_1,Unnamed: 201_level_1,Unnamed: 202_level_1,Unnamed: 203_level_1,Unnamed: 204_level_1,Unnamed: 205_level_1,Unnamed: 206_level_1,Unnamed: 207_level_1,Unnamed: 208_level_1,Unnamed: 209_level_1,Unnamed: 210_level_1,Unnamed: 211_level_1,Unnamed: 212_level_1,Unnamed: 213_level_1,Unnamed: 214_level_1,Unnamed: 215_level_1,Unnamed: 216_level_1,Unnamed: 217_level_1,Unnamed: 218_level_1,Unnamed: 219_level_1,Unnamed: 220_level_1,Unnamed: 221_level_1,Unnamed: 222_level_1,Unnamed: 223_level_1,Unnamed: 224_level_1,Unnamed: 225_level_1,Unnamed: 226_level_1,Unnamed: 227_level_1,Unnamed: 228_level_1,Unnamed: 229_level_1,Unnamed: 230_level_1,Unnamed: 231_level_1,Unnamed: 232_level_1,Unnamed: 233_level_1,Unnamed: 234_level_1,Unnamed: 235_level_1,Unnamed: 236_level_1,Unnamed: 237_level_1,Unnamed: 238_level_1,Unnamed: 239_level_1,Unnamed: 240_level_1,Unnamed: 241_level_1,Unnamed: 242_level_1,Unnamed: 243_level_1,Unnamed: 244_level_1,Unnamed: 245_level_1,Unnamed: 246_level_1,Unnamed: 247_level_1,Unnamed: 248_level_1,Unnamed: 249_level_1,Unnamed: 250_level_1,Unnamed: 251_level_1,Unnamed: 252_level_1,Unnamed: 253_level_1,Unnamed: 254_level_1,Unnamed: 255_level_1,Unnamed: 256_level_1,Unnamed: 257_level_1,Unnamed: 258_level_1,Unnamed: 259_level_1,Unnamed: 260_level_1,Unnamed: 261_level_1,Unnamed: 262_level_1,Unnamed: 263_level_1,Unnamed: 264_level_1,Unnamed: 265_level_1,Unnamed: 266_level_1,Unnamed: 267_level_1,Unnamed: 268_level_1,Unnamed: 269_level_1,Unnamed: 270_level_1,Unnamed: 271_level_1,Unnamed: 272_level_1,Unnamed: 273_level_1,Unnamed: 274_level_1,Unnamed: 275_level_1,Unnamed: 276_level_1,Unnamed: 277_level_1,Unnamed: 278_level_1,Unnamed: 279_level_1,Unnamed: 280_level_1,Unnamed: 281_level_1,Unnamed: 282_level_1,Unnamed: 283_level_1,Unnamed: 284_level_1,Unnamed: 285_level_1,Unnamed: 286_level_1,Unnamed: 287_level_1,Unnamed: 288_level_1,Unnamed: 289_level_1,Unnamed: 290_level_1,Unnamed: 291_level_1,Unnamed: 292_level_1,Unnamed: 293_level_1,Unnamed: 294_level_1,Unnamed: 295_level_1,Unnamed: 296_level_1,Unnamed: 297_level_1,Unnamed: 298_level_1,Unnamed: 299_level_1,Unnamed: 300_level_1,Unnamed: 301_level_1,Unnamed: 302_level_1,Unnamed: 303_level_1,Unnamed: 304_level_1,Unnamed: 305_level_1,Unnamed: 306_level_1,Unnamed: 307_level_1,Unnamed: 308_level_1,Unnamed: 309_level_1,Unnamed: 310_level_1,Unnamed: 311_level_1,Unnamed: 312_level_1,Unnamed: 313_level_1,Unnamed: 314_level_1,Unnamed: 315_level_1,Unnamed: 316_level_1,Unnamed: 317_level_1,Unnamed: 318_level_1,Unnamed: 319_level_1,Unnamed: 320_level_1,Unnamed: 321_level_1,Unnamed: 322_level_1,Unnamed: 323_level_1,Unnamed: 324_level_1,Unnamed: 325_level_1,Unnamed: 326_level_1,Unnamed: 327_level_1,Unnamed: 328_level_1,Unnamed: 329_level_1,Unnamed: 330_level_1,Unnamed: 331_level_1,Unnamed: 332_level_1,Unnamed: 333_level_1,Unnamed: 334_level_1,Unnamed: 335_level_1,Unnamed: 336_level_1,Unnamed: 337_level_1,Unnamed: 338_level_1,Unnamed: 339_level_1,Unnamed: 340_level_1,Unnamed: 341_level_1,Unnamed: 342_level_1,Unnamed: 343_level_1,Unnamed: 344_level_1,Unnamed: 345_level_1,Unnamed: 346_level_1,Unnamed: 347_level_1,Unnamed: 348_level_1,Unnamed: 349_level_1,Unnamed: 350_level_1,Unnamed: 351_level_1,Unnamed: 352_level_1,Unnamed: 353_level_1,Unnamed: 354_level_1,Unnamed: 355_level_1,Unnamed: 356_level_1,Unnamed: 357_level_1,Unnamed: 358_level_1,Unnamed: 359_level_1,Unnamed: 360_level_1,Unnamed: 361_level_1,Unnamed: 362_level_1,Unnamed: 363_level_1,Unnamed: 364_level_1,Unnamed: 365_level_1,Unnamed: 366_level_1,Unnamed: 367_level_1,Unnamed: 368_level_1,Unnamed: 369_level_1,Unnamed: 370_level_1,Unnamed: 371_level_1,Unnamed: 372_level_1,Unnamed: 373_level_1,Unnamed: 374_level_1,Unnamed: 375_level_1,Unnamed: 376_level_1,Unnamed: 377_level_1,Unnamed: 378_level_1
100006,1.0,,1,1,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,1.0,,1.0,1.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0,0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0,0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,1.0,,1.0,1.0,0.0,,0.0,0.0,0.0,,0.0,0.0,1.0,,1,1,6.0,,6,6,1.0,,1,1,0,,0,0,0.0,,0.0,0.0,1.0,,1,1,12.506181,,12.506181,12.506181,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,1.0,,1,1,0.0,,0.0,0.0,0.0,,0.0,0.0,1.0,,1.0,1.0,7.902376,,7.902376,7.902376,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,7.902376,,7.902376,7.902376,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0,0,7.902376,,7.902376,7.902376,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,1.0,,1,1,0.0,,0.0,0.0,1.0,,1,1,0.0,,0,0,0,,0,0,0.0,,0.0,0.0,0,,0,0,0.0,,0.0,0.0,0.0,,0.0,0.0,12.506181,,12.506181,12.506181,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0,0,12.506181,,12.506181,12.506181,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,Active,1
100011,0.0,,0,0,12.100718,,12.100718,12.100718,4.0,,4.0,4.0,0.0,,0.0,0.0,0.054054,,0.054054,0.054054,0.0,,0.0,0.0,5.813196,,5.813196,5.813196,0.265571,,0.265571,0.265571,0.0,,0.0,0.0,0.013514,,0.013514,0.013514,0.0,,0.0,0.0,0.0,,0.0,0.0,12.149508,,12.149508,12.149508,0.0,,0.0,0.0,0.0,,0.0,0.0,1.663975,,1.663975,1.663975,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0,0,0.116248,,0.116248,0.116248,0.464991,,0.464991,0.464991,5.833708,,5.833708,5.833708,0.0,,0.0,0.0,5.035465,,5.035465,5.035465,0.0,,0.0,0.0,10.923886,,10.923886,10.923886,5.053491,,5.053491,5.053491,0.0,,0,0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,5.833708,,5.833708,5.833708,0.0,,0.0,0.0,0.0,,0,0,74.0,,74,74,0.0,,0,0,0,,0,0,9.105091,,9.105091,9.105091,0.0,,0,0,12.100718,,12.100718,12.100718,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,1.0,,1,1,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,4.027757,,4.027757,4.027757,4.528702,,4.528702,4.528702,12.100718,,12.100718,12.100718,0.0,,0.0,0.0,0.0,,0.0,0.0,3.961915,,3.961915,3.961915,10.293904,,10.293904,10.293904,0.0,,0.0,0.0,7.484197,,7.484197,7.484197,0.054054,,0.054054,0.054054,1.40668,,1.40668,1.40668,12.149508,,12.149508,12.149508,1.40668,,1.40668,1.40668,0.0,,0.0,0.0,0.163523,,0.163523,0.163523,25.621622,,25.621622,25.621622,1.0,,1.0,1.0,0.0,,0,0,10.923886,,10.923886,10.923886,0.0,,0.0,0.0,12.149508,,12.149508,12.149508,12.100718,,12.100718,12.100718,0.163523,,0.163523,0.163523,0.0,,0,0,0.0,,0.0,0.0,0.0,,0,0,4.0,,4,4,0,,0,0,0.464991,,0.464991,0.464991,0,,0,0,33.0,,33.0,33.0,0.0,,0.0,0.0,11.407576,,11.407576,11.407576,5.053491,,5.053491,5.053491,5.806097,,5.806097,5.806097,1.0,,1,1,11.97895,,11.97895,11.97895,5.143911,,5.143911,5.143911,0.0,,0.0,0.0,4.574727,,4.574727,4.574727,Active,1
100013,0.0,,0,0,11.967187,,11.967187,11.967187,7.0,,7.0,7.0,0.102062,,0.102062,0.102062,0.239583,,0.239583,0.239583,0.0625,,0.0625,0.0625,4.30392,,4.30392,4.30392,0.529289,,0.529289,0.529289,0.0,,0.0,0.0,0.072917,,0.072917,0.072917,0.243332,,0.243332,0.243332,0.0,,0.0,0.0,11.991773,,11.991773,11.991773,0.010417,,0.010417,0.010417,0.0,,0.0,0.0,2.538063,,2.538063,2.538063,0.0,,0.0,0.0,0.0,,0.0,0.0,1.0,,1,1,0.261365,,0.261365,0.261365,1.149323,,1.149323,1.149323,4.323253,,4.323253,4.323253,0.0,,0.0,0.0,1.913403,,1.913403,1.913403,0.0,,0.0,0.0,11.942602,,11.942602,11.942602,1.922071,,1.922071,1.922071,1.0,,1,1,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,4.323253,,4.323253,4.323253,0.0,,0.0,0.0,1.0,,1,1,96.0,,96,96,0.0,,0,0,0,,0,0,8.971575,,8.971575,8.971575,0.0,,0,0,11.967187,,11.967187,11.967187,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,1.0,,1,1,0.0,,0.0,0.0,0.102062,,0.102062,0.102062,0.0625,,0.0625,0.0625,0.0,,0.0,0.0,0.0,,0.0,0.0,1.860215,,1.860215,1.860215,3.507408,,3.507408,3.507408,11.967187,,11.967187,11.967187,0.0,,0.0,0.0,0.0,,0.0,0.0,1.622986,,1.622986,1.622986,5.71585,,5.71585,5.71585,0.0,,0.0,0.0,6.068084,,6.068084,6.068084,0.239583,,0.239583,0.239583,2.565483,,2.565483,2.565483,11.991773,,11.991773,11.991773,2.565483,,2.565483,2.565483,0.0,,0.0,0.0,0.596945,,0.596945,0.596945,18.447917,,18.447917,18.447917,1.0,,1.0,1.0,0.0,,0,0,11.942602,,11.942602,11.942602,0.0,,0.0,0.0,11.991773,,11.991773,11.991773,11.967187,,11.967187,11.967187,0.596945,,0.596945,0.596945,1.0,,1,1,0.010417,,0.010417,0.010417,0.0,,0,0,7.0,,7,7,0,,0,0,1.149323,,1.149323,1.149323,0,,0,0,22.0,,22.0,22.0,0.0,,0.0,0.0,10.71444,,10.71444,10.71444,1.922071,,1.922071,1.922071,4.402373,,4.402373,4.402373,1.0,,1,1,11.680099,,11.680099,11.680099,2.287024,,2.287024,2.287024,0.243332,,0.243332,0.243332,3.803238,,3.803238,3.803238,Active,1
100021,1.0,,1,1,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,1.0,,1.0,1.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0,0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0,0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,1.0,,1.0,1.0,0.0,,0.0,0.0,0.0,,0.0,0.0,1.0,,1,1,17.0,,17,17,1.0,,1,1,0,,0,0,0.0,,0.0,0.0,1.0,,1,1,13.422469,,13.422469,13.422469,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,2.0,,2,2,0.0,,0.0,0.0,0.0,,0.0,0.0,1.0,,1.0,1.0,7.902376,,7.902376,7.902376,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,7.902376,,7.902376,7.902376,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0,0,7.902376,,7.902376,7.902376,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,1.0,,1,1,0.0,,0.0,0.0,1.0,,1,1,0.0,,0,0,0,,0,0,0.0,,0.0,0.0,0,,0,0,0.0,,0.0,0.0,0.0,,0.0,0.0,13.422469,,13.422469,13.422469,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0,0,13.422469,,13.422469,13.422469,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,Completed,1
100023,1.0,,1,1,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,1.0,,1.0,1.0,0.0,,0.0,0.0,0.860271,,0.860271,0.860271,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0,0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0,0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,1.0,,1.0,1.0,0.0,,0.0,0.0,0.0,,0.0,0.0,1.0,,1,1,8.0,,8,8,1.0,,1,1,0,,0,0,0.0,,0.0,0.0,1.0,,1,1,12.32386,,12.32386,12.32386,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,1.0,,1,1,0.0,,0.0,0.0,0.0,,0.0,0.0,1.0,,1.0,1.0,7.902376,,7.902376,7.902376,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,7.902376,,7.902376,7.902376,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0,0,7.902376,,7.902376,7.902376,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,1.0,,1,1,0.0,,0.0,0.0,1.0,,1,1,0.0,,0,0,0,,0,0,0.0,,0.0,0.0,0,,0,0,0.0,,0.0,0.0,0.0,,0.0,0.0,10.71444,,10.71444,10.71444,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0,0,11.51915,,11.51915,11.51915,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,Active,1


In [111]:
# clear memory
del card

### 4.3.5. PREV DATA

In [112]:
### FEATURE ENGINEERING

# amount ratios
prev["AMT_GIVEN_RATIO_1"]  = prev["AMT_CREDIT"] / prev["AMT_APPLICATION"]
prev["AMT_GIVEN_RATIO_2"]  = prev["AMT_GOODS_PRICE"] / prev["AMT_APPLICATION"]
prev["DOWN_PAYMENT_RATIO"] = prev["AMT_DOWN_PAYMENT"] / prev["AMT_APPLICATION"]

# logarithms
log_vars = ["AMT_CREDIT", "AMT_ANNUITY", "AMT_APPLICATION", "AMT_DOWN_PAYMENT", "AMT_GOODS_PRICE"]
prev = create_logs(prev, log_vars, replace = True)

# convert days
day_vars = ["DAYS_FIRST_DRAWING", "DAYS_FIRST_DUE", "DAYS_LAST_DUE_1ST_VERSION", 
            "DAYS_LAST_DUE", "DAYS_TERMINATION", "DAYS_DECISION"]
prev = convert_days(prev, day_vars, t = 1, rounding = False, replace = True)

# number of applications 
cnt_prev = prev[["SK_ID_CURR", "SK_ID_PREV"]].groupby(["SK_ID_CURR"], as_index = False).count()
cnt_prev.columns = ["SK_ID_CURR", "CNT_PREV_APPLICATIONS"]
prev = prev.merge(cnt_prev, how = "left", on = "SK_ID_CURR")

# number of contracts
cnt_prev = prev[["SK_ID_CURR", "FLAG_LAST_APPL_PER_CONTRACT"]]
cnt_prev.columns = ["SK_ID_CURR", "CNT_PREV_CONTRACTS"]
cnt_prev = cnt_prev[cnt_prev["CNT_PREV_CONTRACTS"] == "Y"]
cnt_prev = cnt_prev[["SK_ID_CURR", "CNT_PREV_CONTRACTS"]].groupby(["SK_ID_CURR"], as_index = False).count()
prev = prev.merge(cnt_prev, how = "left", on = "SK_ID_CURR")

# number ratio
prev["APPL_PER_CONTRACT_RATIO"] = prev["CNT_PREV_APPLICATIONS"] / prev["CNT_PREV_CONTRACTS"]

# loan decision ratios
prev = compute_accept_reject_ratio(prev, lags = [1, 3, 5])

# day differences
prev["DAYS_DUE_DIFF_1"] = prev["DAYS_LAST_DUE_1ST_VERSION"] - prev["DAYS_FIRST_DUE"]
prev["DAYS_DUE_DIFF_2"] = prev["DAYS_LAST_DUE"] - prev["DAYS_FIRST_DUE"]
prev["DAYS_TERMINATION_DIFF_1"] = prev["DAYS_TERMINATION"] - prev["DAYS_FIRST_DRAWING"]
prev["DAYS_TERMINATION_DIFF_2"] = prev["DAYS_TERMINATION"] - prev["DAYS_FIRST_DUE"]
prev["DAYS_TERMINATION_DIFF_3"] = prev["DAYS_TERMINATION"] - prev["DAYS_LAST_DUE"]

# application dates
prev["DAY_APPR_PROCESS_START"] = "Working day"
prev["DAY_APPR_PROCESS_START"][(prev["WEEKDAY_APPR_PROCESS_START"] == "SATURDAY") |
                               (prev["WEEKDAY_APPR_PROCESS_START"] == "SUNDAY")] = "Weekend"


##### FEATURE REMOVAL
drops = ["NAME_CLIENT_TYPE"]
prev = prev.drop(columns = drops)

In [113]:
# count missings
nas = count_missings(prev)
nas

Unnamed: 0,Total,Percent
RATE_INTEREST_PRIMARY,1664263,99.643698
RATE_INTEREST_PRIVILEGED,1664263,99.643698
DAYS_TERMINATION_DIFF_1,1661862,99.499944
DAYS_FIRST_DRAWING,1607509,96.245691
DAYS_LAST_DUE_1ST_VERSION,991321,59.352933
DAYS_DUE_DIFF_1,991321,59.352933
DAYS_TERMINATION_DIFF_3,904090,54.130189
DAYS_TERMINATION_DIFF_2,904031,54.126657
DAYS_TERMINATION,898978,53.824121
DOWN_PAYMENT_RATIO,895869,53.637977


In [114]:
##### IMPUTE MISSINGS

### FACTORS
prev_facs = [f for f in prev.columns if prev[f].dtype == "object"]
for var in prev_facs:
    prev[var].fillna("Unknown level", inplace = True)       
    

### MERGED FEATURES

# impute -99 for CARD
cards = prev.filter(like = "card_").columns
for var in cards:
    prev[var].fillna(-99, inplace = True)
    
# impute -90 for INST
insts = prev.filter(like = "inst_").columns
for var in insts:
    prev[var].fillna(-99, inplace = True)
    
# impute -90 for POCA
pocas = prev.filter(like = "poca_").columns
for var in pocas:
    prev[var].fillna(-99, inplace = True)
    
    
### INTERST RATES
rate_vars = ["RATE_INTEREST_PRIVILEGED", "RATE_INTEREST_PRIMARY"]
for var in rate_vars:
    prev[var].fillna(-99, inplace = True)
    
    
### OTHER FEATURES
             
# find variables
other_vars = ["AMT_DOWN_PAYMENT", "RATE_DOWN_PAYMENT", "DAYS_FIRST_DUE",
              "DAYS_LAST_DUE", "DAYS_FIRST_DRAWING", "DAYS_LAST_DUE_1ST_VERSION",
              "DAYS_TERMINATION", "NFLAG_INSURED_ON_APPROVAL", "AMT_GOODS_PRICE",
              "AMT_ANNUITY", "CNT_PAYMENT", "AMT_CREDIT"]
for var in other_vars:
    prev[var].fillna(-99, inplace = True)

In [115]:
### AGGREGATIONS

# count previous loans
cnt_loan = prev[["SK_ID_CURR", "SK_ID_PREV"]].groupby("SK_ID_CURR").count()
del prev["SK_ID_PREV"]

# aggregate data
agg_prev = aggregate_data(prev, id_var = "SK_ID_CURR", label = "prev")

# add previous loan count
agg_prev["prev_LOAN_COUNT"] = cnt_loan

- Preparing the dataset...
- Extracted 16 factors and 36 numerics...
- Aggregating numeric features...
- Aggregating factor features...
- Final dimensions: (338857, 176)


In [116]:
# count missings
nas = count_missings(agg_prev)
nas

Unnamed: 0,Total,Percent
prev_DAYS_TERMINATION_DIFF_1_std,338827,99.991147
prev_DAYS_TERMINATION_DIFF_1_min,330535,97.544097
prev_DAYS_TERMINATION_DIFF_1_mean,330535,97.544097
prev_DAYS_TERMINATION_DIFF_1_max,330535,97.544097
prev_DAYS_DUE_DIFF_1_std,166567,49.155543
prev_DOWN_PAYMENT_RATIO_std,146204,43.146224
prev_DAYS_TERMINATION_DIFF_3_std,145452,42.924301
prev_DAYS_TERMINATION_DIFF_2_std,145442,42.92135
prev_DAYS_DUE_DIFF_2_std,141044,41.623458
prev_AMT_GIVEN_RATIO_1_std,119224,35.184163


In [117]:
# check data
agg_prev.head()

Unnamed: 0_level_0,prev_RATE_INTEREST_PRIVILEGED_mean,prev_RATE_INTEREST_PRIVILEGED_std,prev_RATE_INTEREST_PRIVILEGED_min,prev_RATE_INTEREST_PRIVILEGED_max,prev_REJECT_RATIO_5_mean,prev_REJECT_RATIO_5_std,prev_REJECT_RATIO_5_min,prev_REJECT_RATIO_5_max,prev_SELLERPLACE_AREA_mean,prev_SELLERPLACE_AREA_std,prev_SELLERPLACE_AREA_min,prev_SELLERPLACE_AREA_max,prev_APPROVE_RATIO_1_mean,prev_APPROVE_RATIO_1_std,prev_APPROVE_RATIO_1_min,prev_APPROVE_RATIO_1_max,prev_DAYS_LAST_DUE_1ST_VERSION_mean,prev_DAYS_LAST_DUE_1ST_VERSION_std,prev_DAYS_LAST_DUE_1ST_VERSION_min,prev_DAYS_LAST_DUE_1ST_VERSION_max,prev_AMT_DOWN_PAYMENT_mean,prev_AMT_DOWN_PAYMENT_std,prev_AMT_DOWN_PAYMENT_min,prev_AMT_DOWN_PAYMENT_max,prev_DAYS_DUE_DIFF_2_mean,prev_DAYS_DUE_DIFF_2_std,prev_DAYS_DUE_DIFF_2_min,prev_DAYS_DUE_DIFF_2_max,prev_REJECT_RATIO_3_mean,prev_REJECT_RATIO_3_std,prev_REJECT_RATIO_3_min,prev_REJECT_RATIO_3_max,prev_RATE_DOWN_PAYMENT_mean,prev_RATE_DOWN_PAYMENT_std,prev_RATE_DOWN_PAYMENT_min,prev_RATE_DOWN_PAYMENT_max,prev_AMT_GIVEN_RATIO_1_mean,prev_AMT_GIVEN_RATIO_1_std,prev_AMT_GIVEN_RATIO_1_min,prev_AMT_GIVEN_RATIO_1_max,prev_DAYS_TERMINATION_mean,prev_DAYS_TERMINATION_std,prev_DAYS_TERMINATION_min,prev_DAYS_TERMINATION_max,prev_DAYS_TERMINATION_DIFF_3_mean,prev_DAYS_TERMINATION_DIFF_3_std,prev_DAYS_TERMINATION_DIFF_3_min,prev_DAYS_TERMINATION_DIFF_3_max,prev_DOWN_PAYMENT_RATIO_mean,prev_DOWN_PAYMENT_RATIO_std,prev_DOWN_PAYMENT_RATIO_min,prev_DOWN_PAYMENT_RATIO_max,prev_DAYS_FIRST_DRAWING_mean,prev_DAYS_FIRST_DRAWING_std,prev_DAYS_FIRST_DRAWING_min,prev_DAYS_FIRST_DRAWING_max,prev_APPROVE_RATIO_3_mean,prev_APPROVE_RATIO_3_std,prev_APPROVE_RATIO_3_min,prev_APPROVE_RATIO_3_max,prev_DAYS_DUE_DIFF_1_mean,prev_DAYS_DUE_DIFF_1_std,prev_DAYS_DUE_DIFF_1_min,prev_DAYS_DUE_DIFF_1_max,prev_DAYS_FIRST_DUE_mean,prev_DAYS_FIRST_DUE_std,prev_DAYS_FIRST_DUE_min,prev_DAYS_FIRST_DUE_max,prev_AMT_GOODS_PRICE_mean,prev_AMT_GOODS_PRICE_std,prev_AMT_GOODS_PRICE_min,prev_AMT_GOODS_PRICE_max,prev_APPL_PER_CONTRACT_RATIO_mean,prev_APPL_PER_CONTRACT_RATIO_std,prev_APPL_PER_CONTRACT_RATIO_min,prev_APPL_PER_CONTRACT_RATIO_max,prev_APPROVE_RATIO_5_mean,prev_APPROVE_RATIO_5_std,prev_APPROVE_RATIO_5_min,prev_APPROVE_RATIO_5_max,prev_HOUR_APPR_PROCESS_START_mean,prev_HOUR_APPR_PROCESS_START_std,prev_HOUR_APPR_PROCESS_START_min,prev_HOUR_APPR_PROCESS_START_max,prev_NFLAG_INSURED_ON_APPROVAL_mean,prev_NFLAG_INSURED_ON_APPROVAL_std,prev_NFLAG_INSURED_ON_APPROVAL_min,prev_NFLAG_INSURED_ON_APPROVAL_max,prev_CNT_PAYMENT_mean,prev_CNT_PAYMENT_std,prev_CNT_PAYMENT_min,prev_CNT_PAYMENT_max,prev_DAYS_TERMINATION_DIFF_1_mean,prev_DAYS_TERMINATION_DIFF_1_std,prev_DAYS_TERMINATION_DIFF_1_min,prev_DAYS_TERMINATION_DIFF_1_max,prev_CNT_PREV_APPLICATIONS_mean,prev_CNT_PREV_APPLICATIONS_std,prev_CNT_PREV_APPLICATIONS_min,prev_CNT_PREV_APPLICATIONS_max,prev_DAYS_LAST_DUE_mean,prev_DAYS_LAST_DUE_std,prev_DAYS_LAST_DUE_min,prev_DAYS_LAST_DUE_max,prev_AMT_GIVEN_RATIO_2_mean,prev_AMT_GIVEN_RATIO_2_std,prev_AMT_GIVEN_RATIO_2_min,prev_AMT_GIVEN_RATIO_2_max,prev_NFLAG_LAST_APPL_IN_DAY_mean,prev_NFLAG_LAST_APPL_IN_DAY_std,prev_NFLAG_LAST_APPL_IN_DAY_min,prev_NFLAG_LAST_APPL_IN_DAY_max,prev_RATE_INTEREST_PRIMARY_mean,prev_RATE_INTEREST_PRIMARY_std,prev_RATE_INTEREST_PRIMARY_min,prev_RATE_INTEREST_PRIMARY_max,prev_AMT_CREDIT_mean,prev_AMT_CREDIT_std,prev_AMT_CREDIT_min,prev_AMT_CREDIT_max,prev_AMT_ANNUITY_mean,prev_AMT_ANNUITY_std,prev_AMT_ANNUITY_min,prev_AMT_ANNUITY_max,prev_REJECT_RATIO_1_mean,prev_REJECT_RATIO_1_std,prev_REJECT_RATIO_1_min,prev_REJECT_RATIO_1_max,prev_AMT_APPLICATION_mean,prev_AMT_APPLICATION_std,prev_AMT_APPLICATION_min,prev_AMT_APPLICATION_max,prev_DAYS_DECISION_mean,prev_DAYS_DECISION_std,prev_DAYS_DECISION_min,prev_DAYS_DECISION_max,prev_CNT_PREV_CONTRACTS_mean,prev_CNT_PREV_CONTRACTS_std,prev_CNT_PREV_CONTRACTS_min,prev_CNT_PREV_CONTRACTS_max,prev_DAYS_TERMINATION_DIFF_2_mean,prev_DAYS_TERMINATION_DIFF_2_std,prev_DAYS_TERMINATION_DIFF_2_min,prev_DAYS_TERMINATION_DIFF_2_max,prev_NAME_CONTRACT_TYPE_mode,prev_NAME_CONTRACT_TYPE_unique,prev_WEEKDAY_APPR_PROCESS_START_mode,prev_WEEKDAY_APPR_PROCESS_START_unique,prev_FLAG_LAST_APPL_PER_CONTRACT_mode,prev_FLAG_LAST_APPL_PER_CONTRACT_unique,prev_NAME_CASH_LOAN_PURPOSE_mode,prev_NAME_CASH_LOAN_PURPOSE_unique,prev_NAME_CONTRACT_STATUS_mode,prev_NAME_CONTRACT_STATUS_unique,prev_NAME_PAYMENT_TYPE_mode,prev_NAME_PAYMENT_TYPE_unique,prev_CODE_REJECT_REASON_mode,prev_CODE_REJECT_REASON_unique,prev_NAME_TYPE_SUITE_mode,prev_NAME_TYPE_SUITE_unique,prev_NAME_GOODS_CATEGORY_mode,prev_NAME_GOODS_CATEGORY_unique,prev_NAME_PORTFOLIO_mode,prev_NAME_PORTFOLIO_unique,prev_NAME_PRODUCT_TYPE_mode,prev_NAME_PRODUCT_TYPE_unique,prev_CHANNEL_TYPE_mode,prev_CHANNEL_TYPE_unique,prev_NAME_SELLER_INDUSTRY_mode,prev_NAME_SELLER_INDUSTRY_unique,prev_NAME_YIELD_GROUP_mode,prev_NAME_YIELD_GROUP_unique,prev_PRODUCT_COMBINATION_mode,prev_PRODUCT_COMBINATION_unique,prev_DAY_APPR_PROCESS_START_mode,prev_DAY_APPR_PROCESS_START_unique,prev_LOAN_COUNT
SK_ID_CURR,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1
100001,-99.0,,-99.0,-99.0,0,,0,0,23.0,,23,23,1,,1,1,1499.0,,1499.0,1499.0,7.832411,,7.832411,7.832411,-90.0,,-90.0,-90.0,0,,0,0,0.104326,,0.104326,0.104326,0.957782,,0.957782,0.957782,1612.0,,1612.0,1612.0,-7.0,,-7.0,-7.0,0.101468,,0.101468,0.101468,-99.0,,-99.0,-99.0,1,,1,1,-210.0,,-210.0,-210.0,1709.0,,1709.0,1709.0,10.12007,,10.12007,10.12007,1.0,,1.0,1.0,1,,1,1,13.0,,13,13,0.0,,0.0,0.0,8.0,,8.0,8.0,,,,,1,,1,1,1619.0,,1619.0,1619.0,1.0,,1.0,1.0,1.0,,1,1,-99.0,,-99.0,-99.0,10.076937,,10.076937,10.076937,8.281977,,8.281977,8.281977,0,,0,0,10.12007,,10.12007,10.12007,1740.0,,1740.0,1740.0,1,,1,1,-97.0,,-97.0,-97.0,Consumer loans,1,FRIDAY,1,Y,1,XAP,1,Approved,1,Cash through the bank,1,XAP,1,Family,1,Mobile,1,POS,1,XNA,1,Country-wide,1,Connectivity,1,high,1,POS mobile with interest,1,Working day,1,1
100002,-99.0,,-99.0,-99.0,0,,0,0,500.0,,500,500,1,,1,1,-99.0,,-99.0,-99.0,0.0,,0.0,0.0,-540.0,,-540.0,-540.0,0,,0,0,0.0,,0.0,0.0,1.0,,1.0,1.0,17.0,,17.0,17.0,-8.0,,-8.0,-8.0,0.0,,0.0,0.0,-99.0,,-99.0,-99.0,1,,1,1,,,,,565.0,,565.0,565.0,12.095454,,12.095454,12.095454,1.0,,1.0,1.0,1,,1,1,9.0,,9,9,0.0,,0.0,0.0,24.0,,24.0,24.0,,,,,1,,1,1,25.0,,25.0,25.0,1.0,,1.0,1.0,1.0,,1,1,-99.0,,-99.0,-99.0,12.095454,,12.095454,12.095454,9.132679,,9.132679,9.132679,0,,0,0,12.095454,,12.095454,12.095454,606.0,,606.0,606.0,1,,1,1,-548.0,,-548.0,-548.0,Consumer loans,1,SATURDAY,1,Y,1,XAP,1,Approved,1,XNA,1,XAP,1,Unknown level,1,Vehicles,1,POS,1,XNA,1,Stone,1,Auto technology,1,low_normal,1,POS other with interest,1,Weekend,1,1
100003,-99.0,0.0,-99.0,-99.0,0,0.0,0,0,533.0,757.540098,-1,1400,1,0.0,1,1,1004.333333,854.97037,386.0,1980.0,-30.054251,59.872042,-99.0,8.837246,-220.0,96.436508,-330.0,-150.0,0,0.0,0,0,-32.966646,57.186584,-99.0,0.100061,1.057664,0.083753,0.989013,1.15098,1047.333333,806.196213,527.0,1976.0,-7.0,2.645751,-9.0,-4.0,0.050029,0.070752,0.0,0.100059,-99.0,0.0,-99.0,-99.0,1,0.0,1,1,-270.0,103.923048,-330.0,-150.0,1274.333333,897.827563,716.0,2310.0,12.526196,1.2975,11.139112,13.710151,1.0,0.0,1.0,1.0,1,0.0,1,1,14.666667,2.516611,12,17,0.666667,0.57735,0.0,1.0,10.0,3.464102,6.0,12.0,,,,,3,0.0,3,3,1054.333333,803.569744,536.0,1980.0,1.0,0.0,1.0,1.0,1.0,0.0,1,1,-99.0,0.0,-99.0,-99.0,12.580207,1.370403,11.128064,13.850765,10.462473,1.441706,8.815564,11.496369,0,0.0,0,0,12.526196,1.2975,11.139112,13.710151,1305.0,898.138631,746.0,2341.0,3,0.0,3,3,-227.0,93.952115,-334.0,-158.0,Consumer loans,2,FRIDAY,3,Y,1,XAP,2,Approved,1,Cash through the bank,2,XAP,1,Family,2,Consumer Electronics,3,POS,2,XNA,2,Country-wide,3,Consumer electronics,3,middle,2,Cash X-Sell: low,3,Weekend,2,3
100004,-99.0,,-99.0,-99.0,0,,0,0,30.0,,30,30,1,,1,1,694.0,,694.0,694.0,8.488999,,8.488999,8.488999,-60.0,,-60.0,-60.0,0,,0,0,0.212008,,0.212008,0.212008,0.828021,,0.828021,0.828021,714.0,,714.0,714.0,-10.0,,-10.0,-10.0,0.200148,,0.200148,0.200148,-99.0,,-99.0,-99.0,1,,1,1,-90.0,,-90.0,-90.0,784.0,,784.0,784.0,10.097532,,10.097532,10.097532,1.0,,1.0,1.0,1,,1,1,5.0,,5,5,0.0,,0.0,0.0,4.0,,4.0,4.0,,,,,1,,1,1,724.0,,724.0,724.0,1.0,,1.0,1.0,1.0,,1,1,-99.0,,-99.0,-99.0,9.908823,,9.908823,9.908823,8.586393,,8.586393,8.586393,0,,0,0,10.097532,,10.097532,10.097532,815.0,,815.0,815.0,1,,1,1,-70.0,,-70.0,-70.0,Consumer loans,1,FRIDAY,1,Y,1,XAP,1,Approved,1,Cash through the bank,1,XAP,1,Unaccompanied,1,Mobile,1,POS,1,XNA,1,Regional / Local,1,Connectivity,1,middle,1,POS mobile without interest,1,Working day,1,1
100005,-99.0,0.0,-99.0,-99.0,0,0.0,0,0,18.0,26.870058,-1,37,1,0.0,1,1,138.5,335.875721,-99.0,376.0,-45.297988,75.946114,-99.0,8.404024,-240.0,,-240.0,-240.0,0,0.0,0,0,-49.445518,70.080621,-99.0,0.108964,0.89995,,0.89995,0.89995,180.5,395.272691,-99.0,460.0,-6.0,,-6.0,-6.0,0.10005,,0.10005,0.10005,-99.0,0.0,-99.0,-99.0,1,0.0,1,1,-330.0,,-330.0,-330.0,303.5,569.220959,-99.0,706.0,-44.147048,77.573789,-99.0,10.705904,1.0,0.0,1.0,1.0,1,0.0,1,1,10.5,0.707107,10,11,-49.5,70.003571,-99.0,0.0,-43.5,78.488853,-99.0,12.0,,,,,2,0.0,2,2,183.5,399.515331,-99.0,466.0,1.0,,1.0,1.0,1.0,0.0,1,1,-99.0,0.0,-99.0,-99.0,5.300245,7.495678,0.0,10.60049,-45.260337,75.99936,-99.0,8.479325,0,0.0,0,0,5.352952,7.570217,0.0,10.705904,536.0,312.541197,315.0,757.0,2,0.0,2,2,-246.0,,-246.0,-246.0,Cash loans,2,FRIDAY,2,Y,1,XAP,2,Approved,2,Cash through the bank,2,XAP,1,Unknown level,1,Mobile,2,POS,2,XNA,1,Country-wide,2,Connectivity,2,XNA,2,Cash,2,Working day,1,2


In [118]:
# clear memory
del prev

## 4.4. BUILD DATASET

In [119]:
# merge data
print(appl.shape)
appl = appl.merge(right = agg_buro.reset_index(), how = "left", on = "SK_ID_CURR")
print(appl.shape)
appl = appl.merge(right = agg_prev.reset_index(), how = "left", on = "SK_ID_CURR")
print(appl.shape)
appl = appl.merge(right = agg_inst.reset_index(), how = "left", on = "SK_ID_CURR")
print(appl.shape)
appl = appl.merge(right = agg_poca.reset_index(), how = "left", on = "SK_ID_CURR")
print(appl.shape)
#appl = appl.merge(right = agg_card.reset_index(), how = "left", on = "SK_ID_CURR")
#print(appl.shape)

(356255, 132)
(356255, 291)
(356255, 468)
(356255, 600)
(356255, 690)


In [120]:
##### CROSS-TABLE FEATURE ENGINEERING

# credit ratios
appl["mix_AMT_PREV_ANNUITY_RATIO"]     = appl["app_AMT_ANNUITY"] / appl["prev_AMT_ANNUITY_mean"]
appl["mix_AMT_PREV_CREDIT_RATIO"]      = appl["app_AMT_CREDIT"] / appl["prev_AMT_CREDIT_mean"]
appl["mix_AMT_PREV_GOODS_PRICE_RATIO"] = appl["app_AMT_GOODS_PRICE"] / appl["prev_AMT_GOODS_PRICE_mean"]
appl["mix_AMT_BURO_ANNUITY_RATIO"]     = appl["app_AMT_ANNUITY"] / appl["buro_AMT_ANNUITY_mean"]
appl["mix_AMT_BURO_CREDIT_RATIO"]      = appl["app_AMT_CREDIT"] / appl["buro_AMT_CREDIT_SUM_mean"]

In [121]:
# count missings
nas = count_missings(appl)
nas

Unnamed: 0,Total,Percent
prev_DAYS_TERMINATION_DIFF_1_std,356225,99.991579
buro_AMT_SUM_DEBT_RATIO_2_std,352451,98.932226
prev_DAYS_TERMINATION_DIFF_1_max,347933,97.664033
prev_DAYS_TERMINATION_DIFF_1_mean,347933,97.664033
prev_DAYS_TERMINATION_DIFF_1_min,347933,97.664033
buro_AMT_MAX_OVERDUE_RATIO_1_std,338806,95.102104
buro_AMT_MAX_OVERDUE_RATIO_1_max,304236,85.398380
buro_AMT_MAX_OVERDUE_RATIO_1_min,304236,85.398380
buro_AMT_MAX_OVERDUE_RATIO_1_mean,304236,85.398380
buro_AMT_SUM_OVERDUE_RATIO_1_std,301392,84.600076


In [122]:
##### IMPUTE MISSINGS

### FACTORS
appl_factors = [f for f in appl.columns if appl[f].dtype == "object"]
for var in appl_factors:
    appl[var].fillna("Unknown level", inplace = True)       

# impute -99 for PREV
appls = appl.filter(like = "prev_").columns
for var in appls:
    appl[var].fillna(-99, inplace = True)
    
# impute -99 for BURO
buros = appl.filter(like = "buro_").columns
for var in buros:
    appl[var].fillna(-99, inplace = True)

In [123]:
# label encoder for factors
data_factors = [f for f in appl.columns if appl[f].dtype == "object"]
le = LabelEncoder()
for var in data_factors:
    appl[var] = le.fit_transform(appl[var])

In [124]:
# count missings
nas = count_missings(appl)
nas

Unnamed: 0,Total,Percent
mix_AMT_BURO_ANNUITY_RATIO,238031,66.814782
poca_SK_DPD_std_std,125040,35.098455
poca_CNT_INSTALMENT_FUTURE_std_std,125040,35.098455
poca_INSTALLMENTS_PERCENT_std_std,125040,35.098455
poca_CNT_INSTALMENT_std_std,125040,35.098455
poca_SK_DPD_DEF_std_std,125040,35.098455
poca_CNT_INSTALMENT_FUTURE_mean_std,122739,34.452569
poca_SK_DPD_min_std,122739,34.452569
poca_CNT_INSTALMENT_FUTURE_min_std,122739,34.452569
poca_INSTALLMENTS_PERCENT_mean_std,122739,34.452569


In [125]:
# check data
appl.head()

Unnamed: 0,SK_ID_CURR,app_NAME_CONTRACT_TYPE,app_CODE_GENDER,app_FLAG_OWN_CAR,app_FLAG_OWN_REALTY,app_CNT_CHILDREN,app_AMT_INCOME_TOTAL,app_AMT_CREDIT,app_AMT_ANNUITY,app_AMT_GOODS_PRICE,app_NAME_TYPE_SUITE,app_NAME_INCOME_TYPE,app_NAME_EDUCATION_TYPE,app_NAME_FAMILY_STATUS,app_NAME_HOUSING_TYPE,app_REGION_POPULATION_RELATIVE,app_DAYS_BIRTH,app_DAYS_EMPLOYED,app_DAYS_REGISTRATION,app_DAYS_ID_PUBLISH,app_OWN_CAR_AGE,app_FLAG_MOBIL,app_FLAG_EMP_PHONE,app_FLAG_WORK_PHONE,app_FLAG_CONT_MOBILE,app_FLAG_PHONE,app_FLAG_EMAIL,app_OCCUPATION_TYPE,app_CNT_FAM_MEMBERS,app_REGION_RATING_CLIENT,app_REGION_RATING_CLIENT_W_CITY,app_WEEKDAY_APPR_PROCESS_START,app_HOUR_APPR_PROCESS_START,app_REG_REGION_NOT_LIVE_REGION,app_REG_REGION_NOT_WORK_REGION,app_LIVE_REGION_NOT_WORK_REGION,app_REG_CITY_NOT_LIVE_CITY,app_REG_CITY_NOT_WORK_CITY,app_LIVE_CITY_NOT_WORK_CITY,app_ORGANIZATION_TYPE,app_EXT_SOURCE_1,app_EXT_SOURCE_2,app_EXT_SOURCE_3,app_APARTMENTS_AVG,app_BASEMENTAREA_AVG,app_YEARS_BEGINEXPLUATATION_AVG,app_YEARS_BUILD_AVG,app_COMMONAREA_AVG,app_ELEVATORS_AVG,app_ENTRANCES_AVG,app_FLOORSMAX_AVG,app_FLOORSMIN_AVG,app_LANDAREA_AVG,app_LIVINGAPARTMENTS_AVG,app_LIVINGAREA_AVG,app_NONLIVINGAPARTMENTS_AVG,app_NONLIVINGAREA_AVG,app_APARTMENTS_MODE,app_BASEMENTAREA_MODE,app_YEARS_BEGINEXPLUATATION_MODE,app_YEARS_BUILD_MODE,app_COMMONAREA_MODE,app_ELEVATORS_MODE,app_ENTRANCES_MODE,app_FLOORSMAX_MODE,app_FLOORSMIN_MODE,app_LANDAREA_MODE,app_LIVINGAPARTMENTS_MODE,app_LIVINGAREA_MODE,app_NONLIVINGAPARTMENTS_MODE,app_NONLIVINGAREA_MODE,app_APARTMENTS_MEDI,app_BASEMENTAREA_MEDI,app_YEARS_BEGINEXPLUATATION_MEDI,app_YEARS_BUILD_MEDI,app_COMMONAREA_MEDI,app_ELEVATORS_MEDI,app_ENTRANCES_MEDI,app_FLOORSMAX_MEDI,app_FLOORSMIN_MEDI,app_LANDAREA_MEDI,app_LIVINGAPARTMENTS_MEDI,app_LIVINGAREA_MEDI,app_NONLIVINGAPARTMENTS_MEDI,app_NONLIVINGAREA_MEDI,app_FONDKAPREMONT_MODE,app_HOUSETYPE_MODE,app_TOTALAREA_MODE,app_WALLSMATERIAL_MODE,app_EMERGENCYSTATE_MODE,app_OBS_30_CNT_SOCIAL_CIRCLE,app_DEF_30_CNT_SOCIAL_CIRCLE,app_OBS_60_CNT_SOCIAL_CIRCLE,app_DEF_60_CNT_SOCIAL_CIRCLE,app_DAYS_LAST_PHONE_CHANGE,app_FLAG_DOCUMENT_2,app_FLAG_DOCUMENT_3,app_FLAG_DOCUMENT_4,app_FLAG_DOCUMENT_5,app_FLAG_DOCUMENT_6,app_FLAG_DOCUMENT_7,app_FLAG_DOCUMENT_8,app_FLAG_DOCUMENT_9,app_FLAG_DOCUMENT_10,app_FLAG_DOCUMENT_11,app_FLAG_DOCUMENT_12,app_FLAG_DOCUMENT_13,app_FLAG_DOCUMENT_14,app_FLAG_DOCUMENT_15,app_FLAG_DOCUMENT_16,app_FLAG_DOCUMENT_17,app_FLAG_DOCUMENT_18,app_FLAG_DOCUMENT_19,app_FLAG_DOCUMENT_20,app_FLAG_DOCUMENT_21,app_AMT_REQ_CREDIT_BUREAU_HOUR,app_AMT_REQ_CREDIT_BUREAU_DAY,app_AMT_REQ_CREDIT_BUREAU_WEEK,app_AMT_REQ_CREDIT_BUREAU_MON,app_AMT_REQ_CREDIT_BUREAU_QRT,app_AMT_REQ_CREDIT_BUREAU_YEAR,app_CREDIT_BY_INCOME,app_ANNUITY_BY_INCOME,app_GOODS_PRICE_BY_INCOME,app_PERCENT_WORKED,app_CNT_ADULTS,app_NUM_EXT_SOURCES,app_NUM_DOCUMENTS,app_DAY_APPR_PROCESS_START,app_isnull_HOUSE,app_isnull_BURO_ENQUIRIES,app_isnull_SOCIAL_CIRCLE,buro_AMT_CREDIT_SUM_mean,buro_AMT_CREDIT_SUM_std,buro_AMT_CREDIT_SUM_min,buro_AMT_CREDIT_SUM_max,buro_STATUS_0_mean,buro_STATUS_0_std,buro_STATUS_0_min,buro_STATUS_0_max,buro_STATUS_3_mean,buro_STATUS_3_std,buro_STATUS_3_min,buro_STATUS_3_max,buro_DAYS_ENDDATE_FACT_mean,buro_DAYS_ENDDATE_FACT_std,buro_DAYS_ENDDATE_FACT_min,buro_DAYS_ENDDATE_FACT_max,buro_isnull_AMT_CREDIT_MAX_OVERDUE_mean,buro_isnull_AMT_CREDIT_MAX_OVERDUE_std,buro_isnull_AMT_CREDIT_MAX_OVERDUE_min,buro_isnull_AMT_CREDIT_MAX_OVERDUE_max,buro_DAYS_CREDIT_mean,buro_DAYS_CREDIT_std,buro_DAYS_CREDIT_min,buro_DAYS_CREDIT_max,buro_AMT_SUM_DEBT_RATIO_1_mean,buro_AMT_SUM_DEBT_RATIO_1_std,buro_AMT_SUM_DEBT_RATIO_1_min,buro_AMT_SUM_DEBT_RATIO_1_max,buro_DAYS_CREDIT_ENDDATE_mean,buro_DAYS_CREDIT_ENDDATE_std,buro_DAYS_CREDIT_ENDDATE_min,buro_DAYS_CREDIT_ENDDATE_max,buro_STATUS_1_mean,buro_STATUS_1_std,buro_STATUS_1_min,buro_STATUS_1_max,buro_AMT_CREDIT_SUM_OVERDUE_mean,buro_AMT_CREDIT_SUM_OVERDUE_std,buro_AMT_CREDIT_SUM_OVERDUE_min,buro_AMT_CREDIT_SUM_OVERDUE_max,buro_CREDIT_DAY_OVERDUE_mean,buro_CREDIT_DAY_OVERDUE_std,buro_CREDIT_DAY_OVERDUE_min,buro_CREDIT_DAY_OVERDUE_max,buro_isnull_STATUS_mean,buro_isnull_STATUS_std,buro_isnull_STATUS_min,buro_isnull_STATUS_max,buro_AMT_SUM_OVERDUE_RATIO_2_mean,buro_AMT_SUM_OVERDUE_RATIO_2_std,buro_AMT_SUM_OVERDUE_RATIO_2_min,buro_AMT_SUM_OVERDUE_RATIO_2_max,buro_isnull_AMT_ANNUITY_mean,buro_isnull_AMT_ANNUITY_std,buro_isnull_AMT_ANNUITY_min,buro_isnull_AMT_ANNUITY_max,buro_AMT_CREDIT_SUM_LIMIT_mean,buro_AMT_CREDIT_SUM_LIMIT_std,buro_AMT_CREDIT_SUM_LIMIT_min,buro_AMT_CREDIT_SUM_LIMIT_max,buro_AMT_MAX_OVERDUE_RATIO_1_mean,buro_AMT_MAX_OVERDUE_RATIO_1_std,buro_AMT_MAX_OVERDUE_RATIO_1_min,buro_AMT_MAX_OVERDUE_RATIO_1_max,buro_CNT_BURO_CLOSED_mean,buro_CNT_BURO_CLOSED_std,buro_CNT_BURO_CLOSED_min,buro_CNT_BURO_CLOSED_max,buro_AMT_SUM_OVERDUE_RATIO_1_mean,buro_AMT_SUM_OVERDUE_RATIO_1_std,buro_AMT_SUM_OVERDUE_RATIO_1_min,buro_AMT_SUM_OVERDUE_RATIO_1_max,buro_MONTH_COUNT_mean,buro_MONTH_COUNT_std,buro_MONTH_COUNT_min,buro_MONTH_COUNT_max,buro_CNT_BURO_ACTIVE_mean,buro_CNT_BURO_ACTIVE_std,buro_CNT_BURO_ACTIVE_min,buro_CNT_BURO_ACTIVE_max,buro_STATUS_5_mean,buro_STATUS_5_std,buro_STATUS_5_min,buro_STATUS_5_max,buro_STATUS_C_mean,buro_STATUS_C_std,buro_STATUS_C_min,buro_STATUS_C_max,buro_AMT_CREDIT_SUM_DEBT_mean,buro_AMT_CREDIT_SUM_DEBT_std,buro_AMT_CREDIT_SUM_DEBT_min,buro_AMT_CREDIT_SUM_DEBT_max,buro_STATUS_X_mean,buro_STATUS_X_std,buro_STATUS_X_min,buro_STATUS_X_max,buro_STATUS_4_mean,buro_STATUS_4_std,buro_STATUS_4_min,buro_STATUS_4_max,buro_DAYS_END_DIFF_2_mean,buro_DAYS_END_DIFF_2_std,buro_DAYS_END_DIFF_2_min,buro_DAYS_END_DIFF_2_max,buro_CNT_CREDIT_PROLONG_mean,buro_CNT_CREDIT_PROLONG_std,buro_CNT_CREDIT_PROLONG_min,buro_CNT_CREDIT_PROLONG_max,buro_DAYS_END_DIFF_1_mean,buro_DAYS_END_DIFF_1_std,buro_DAYS_END_DIFF_1_min,buro_DAYS_END_DIFF_1_max,buro_AMT_CREDIT_MAX_OVERDUE_mean,buro_AMT_CREDIT_MAX_OVERDUE_std,buro_AMT_CREDIT_MAX_OVERDUE_min,buro_AMT_CREDIT_MAX_OVERDUE_max,buro_DAYS_DURATION_1_mean,buro_DAYS_DURATION_1_std,buro_DAYS_DURATION_1_min,buro_DAYS_DURATION_1_max,buro_DAYS_DURATION_2_mean,buro_DAYS_DURATION_2_std,buro_DAYS_DURATION_2_min,buro_DAYS_DURATION_2_max,buro_AMT_SUM_DEBT_RATIO_2_mean,buro_AMT_SUM_DEBT_RATIO_2_std,buro_AMT_SUM_DEBT_RATIO_2_min,buro_AMT_SUM_DEBT_RATIO_2_max,buro_AMT_ANNUITY_mean,buro_AMT_ANNUITY_std,buro_AMT_ANNUITY_min,buro_AMT_ANNUITY_max,buro_DAYS_CREDIT_UPDATE_mean,buro_DAYS_CREDIT_UPDATE_std,buro_DAYS_CREDIT_UPDATE_min,buro_DAYS_CREDIT_UPDATE_max,buro_CNT_BURO_LOANS_mean,buro_CNT_BURO_LOANS_std,buro_CNT_BURO_LOANS_min,buro_CNT_BURO_LOANS_max,buro_CNT_BURO_BAD_mean,buro_CNT_BURO_BAD_std,buro_CNT_BURO_BAD_min,buro_CNT_BURO_BAD_max,buro_STATUS_2_mean,buro_STATUS_2_std,buro_STATUS_2_min,buro_STATUS_2_max,buro_AMT_MAX_OVERDUE_RATIO_2_mean,buro_AMT_MAX_OVERDUE_RATIO_2_std,buro_AMT_MAX_OVERDUE_RATIO_2_min,buro_AMT_MAX_OVERDUE_RATIO_2_max,buro_CREDIT_ACTIVE_mode,buro_CREDIT_ACTIVE_unique,buro_CREDIT_CURRENCY_mode,buro_CREDIT_CURRENCY_unique,buro_CREDIT_TYPE_mode,buro_CREDIT_TYPE_unique,buro_BURO_COUNT,prev_RATE_INTEREST_PRIVILEGED_mean,prev_RATE_INTEREST_PRIVILEGED_std,prev_RATE_INTEREST_PRIVILEGED_min,prev_RATE_INTEREST_PRIVILEGED_max,prev_REJECT_RATIO_5_mean,prev_REJECT_RATIO_5_std,prev_REJECT_RATIO_5_min,prev_REJECT_RATIO_5_max,prev_SELLERPLACE_AREA_mean,prev_SELLERPLACE_AREA_std,prev_SELLERPLACE_AREA_min,prev_SELLERPLACE_AREA_max,prev_APPROVE_RATIO_1_mean,prev_APPROVE_RATIO_1_std,prev_APPROVE_RATIO_1_min,prev_APPROVE_RATIO_1_max,prev_DAYS_LAST_DUE_1ST_VERSION_mean,prev_DAYS_LAST_DUE_1ST_VERSION_std,prev_DAYS_LAST_DUE_1ST_VERSION_min,prev_DAYS_LAST_DUE_1ST_VERSION_max,prev_AMT_DOWN_PAYMENT_mean,prev_AMT_DOWN_PAYMENT_std,prev_AMT_DOWN_PAYMENT_min,prev_AMT_DOWN_PAYMENT_max,prev_DAYS_DUE_DIFF_2_mean,prev_DAYS_DUE_DIFF_2_std,prev_DAYS_DUE_DIFF_2_min,prev_DAYS_DUE_DIFF_2_max,prev_REJECT_RATIO_3_mean,prev_REJECT_RATIO_3_std,prev_REJECT_RATIO_3_min,prev_REJECT_RATIO_3_max,prev_RATE_DOWN_PAYMENT_mean,prev_RATE_DOWN_PAYMENT_std,prev_RATE_DOWN_PAYMENT_min,prev_RATE_DOWN_PAYMENT_max,prev_AMT_GIVEN_RATIO_1_mean,prev_AMT_GIVEN_RATIO_1_std,prev_AMT_GIVEN_RATIO_1_min,prev_AMT_GIVEN_RATIO_1_max,prev_DAYS_TERMINATION_mean,prev_DAYS_TERMINATION_std,prev_DAYS_TERMINATION_min,prev_DAYS_TERMINATION_max,prev_DAYS_TERMINATION_DIFF_3_mean,prev_DAYS_TERMINATION_DIFF_3_std,prev_DAYS_TERMINATION_DIFF_3_min,prev_DAYS_TERMINATION_DIFF_3_max,prev_DOWN_PAYMENT_RATIO_mean,prev_DOWN_PAYMENT_RATIO_std,prev_DOWN_PAYMENT_RATIO_min,prev_DOWN_PAYMENT_RATIO_max,prev_DAYS_FIRST_DRAWING_mean,prev_DAYS_FIRST_DRAWING_std,prev_DAYS_FIRST_DRAWING_min,prev_DAYS_FIRST_DRAWING_max,prev_APPROVE_RATIO_3_mean,prev_APPROVE_RATIO_3_std,prev_APPROVE_RATIO_3_min,prev_APPROVE_RATIO_3_max,prev_DAYS_DUE_DIFF_1_mean,prev_DAYS_DUE_DIFF_1_std,prev_DAYS_DUE_DIFF_1_min,prev_DAYS_DUE_DIFF_1_max,prev_DAYS_FIRST_DUE_mean,prev_DAYS_FIRST_DUE_std,prev_DAYS_FIRST_DUE_min,prev_DAYS_FIRST_DUE_max,prev_AMT_GOODS_PRICE_mean,prev_AMT_GOODS_PRICE_std,prev_AMT_GOODS_PRICE_min,prev_AMT_GOODS_PRICE_max,prev_APPL_PER_CONTRACT_RATIO_mean,prev_APPL_PER_CONTRACT_RATIO_std,prev_APPL_PER_CONTRACT_RATIO_min,prev_APPL_PER_CONTRACT_RATIO_max,prev_APPROVE_RATIO_5_mean,prev_APPROVE_RATIO_5_std,prev_APPROVE_RATIO_5_min,prev_APPROVE_RATIO_5_max,prev_HOUR_APPR_PROCESS_START_mean,prev_HOUR_APPR_PROCESS_START_std,prev_HOUR_APPR_PROCESS_START_min,prev_HOUR_APPR_PROCESS_START_max,prev_NFLAG_INSURED_ON_APPROVAL_mean,prev_NFLAG_INSURED_ON_APPROVAL_std,prev_NFLAG_INSURED_ON_APPROVAL_min,prev_NFLAG_INSURED_ON_APPROVAL_max,prev_CNT_PAYMENT_mean,prev_CNT_PAYMENT_std,prev_CNT_PAYMENT_min,prev_CNT_PAYMENT_max,prev_DAYS_TERMINATION_DIFF_1_mean,prev_DAYS_TERMINATION_DIFF_1_std,prev_DAYS_TERMINATION_DIFF_1_min,prev_DAYS_TERMINATION_DIFF_1_max,prev_CNT_PREV_APPLICATIONS_mean,prev_CNT_PREV_APPLICATIONS_std,prev_CNT_PREV_APPLICATIONS_min,prev_CNT_PREV_APPLICATIONS_max,prev_DAYS_LAST_DUE_mean,prev_DAYS_LAST_DUE_std,prev_DAYS_LAST_DUE_min,prev_DAYS_LAST_DUE_max,prev_AMT_GIVEN_RATIO_2_mean,prev_AMT_GIVEN_RATIO_2_std,prev_AMT_GIVEN_RATIO_2_min,prev_AMT_GIVEN_RATIO_2_max,prev_NFLAG_LAST_APPL_IN_DAY_mean,prev_NFLAG_LAST_APPL_IN_DAY_std,prev_NFLAG_LAST_APPL_IN_DAY_min,prev_NFLAG_LAST_APPL_IN_DAY_max,prev_RATE_INTEREST_PRIMARY_mean,prev_RATE_INTEREST_PRIMARY_std,prev_RATE_INTEREST_PRIMARY_min,prev_RATE_INTEREST_PRIMARY_max,prev_AMT_CREDIT_mean,prev_AMT_CREDIT_std,prev_AMT_CREDIT_min,prev_AMT_CREDIT_max,prev_AMT_ANNUITY_mean,prev_AMT_ANNUITY_std,prev_AMT_ANNUITY_min,prev_AMT_ANNUITY_max,prev_REJECT_RATIO_1_mean,prev_REJECT_RATIO_1_std,prev_REJECT_RATIO_1_min,prev_REJECT_RATIO_1_max,prev_AMT_APPLICATION_mean,prev_AMT_APPLICATION_std,prev_AMT_APPLICATION_min,prev_AMT_APPLICATION_max,prev_DAYS_DECISION_mean,prev_DAYS_DECISION_std,prev_DAYS_DECISION_min,prev_DAYS_DECISION_max,prev_CNT_PREV_CONTRACTS_mean,prev_CNT_PREV_CONTRACTS_std,prev_CNT_PREV_CONTRACTS_min,prev_CNT_PREV_CONTRACTS_max,prev_DAYS_TERMINATION_DIFF_2_mean,prev_DAYS_TERMINATION_DIFF_2_std,prev_DAYS_TERMINATION_DIFF_2_min,prev_DAYS_TERMINATION_DIFF_2_max,prev_NAME_CONTRACT_TYPE_mode,prev_NAME_CONTRACT_TYPE_unique,prev_WEEKDAY_APPR_PROCESS_START_mode,prev_WEEKDAY_APPR_PROCESS_START_unique,prev_FLAG_LAST_APPL_PER_CONTRACT_mode,prev_FLAG_LAST_APPL_PER_CONTRACT_unique,prev_NAME_CASH_LOAN_PURPOSE_mode,prev_NAME_CASH_LOAN_PURPOSE_unique,prev_NAME_CONTRACT_STATUS_mode,prev_NAME_CONTRACT_STATUS_unique,prev_NAME_PAYMENT_TYPE_mode,prev_NAME_PAYMENT_TYPE_unique,prev_CODE_REJECT_REASON_mode,prev_CODE_REJECT_REASON_unique,prev_NAME_TYPE_SUITE_mode,prev_NAME_TYPE_SUITE_unique,prev_NAME_GOODS_CATEGORY_mode,prev_NAME_GOODS_CATEGORY_unique,prev_NAME_PORTFOLIO_mode,prev_NAME_PORTFOLIO_unique,prev_NAME_PRODUCT_TYPE_mode,prev_NAME_PRODUCT_TYPE_unique,prev_CHANNEL_TYPE_mode,prev_CHANNEL_TYPE_unique,prev_NAME_SELLER_INDUSTRY_mode,prev_NAME_SELLER_INDUSTRY_unique,prev_NAME_YIELD_GROUP_mode,prev_NAME_YIELD_GROUP_unique,prev_PRODUCT_COMBINATION_mode,prev_PRODUCT_COMBINATION_unique,prev_DAY_APPR_PROCESS_START_mode,prev_DAY_APPR_PROCESS_START_unique,prev_LOAN_COUNT,inst_isnull_PAYMENTS_mean_mean,inst_isnull_PAYMENTS_mean_std,inst_isnull_PAYMENTS_mean_min,inst_isnull_PAYMENTS_mean_max,inst_isnull_PAYMENTS_min_mean,inst_isnull_PAYMENTS_min_std,inst_isnull_PAYMENTS_min_min,inst_isnull_PAYMENTS_min_max,inst_NUM_INSTALMENT_VERSION_max_mean,inst_NUM_INSTALMENT_VERSION_max_std,inst_NUM_INSTALMENT_VERSION_max_min,inst_NUM_INSTALMENT_VERSION_max_max,inst_DAYS_ENTRY_PAYMENT_max_mean,inst_DAYS_ENTRY_PAYMENT_max_std,inst_DAYS_ENTRY_PAYMENT_max_min,inst_DAYS_ENTRY_PAYMENT_max_max,inst_NUM_INSTALMENT_VERSION_std_mean,inst_NUM_INSTALMENT_VERSION_std_std,inst_NUM_INSTALMENT_VERSION_std_min,inst_NUM_INSTALMENT_VERSION_std_max,inst_DAYS_ENTRY_PAYMENT_min_mean,inst_DAYS_ENTRY_PAYMENT_min_std,inst_DAYS_ENTRY_PAYMENT_min_min,inst_DAYS_ENTRY_PAYMENT_min_max,inst_DAYS_ENTRY_PAYMENT_std_mean,inst_DAYS_ENTRY_PAYMENT_std_std,inst_DAYS_ENTRY_PAYMENT_std_min,inst_DAYS_ENTRY_PAYMENT_std_max,inst_AMT_PAYMENT_min_mean,inst_AMT_PAYMENT_min_std,inst_AMT_PAYMENT_min_min,inst_AMT_PAYMENT_min_max,inst_inst_INST_COUNT_mean,inst_inst_INST_COUNT_std,inst_inst_INST_COUNT_min,inst_inst_INST_COUNT_max,inst_DAYS_INST_DIF_max_mean,inst_DAYS_INST_DIF_max_std,inst_DAYS_INST_DIF_max_min,inst_DAYS_INST_DIF_max_max,inst_DAYS_INST_DIF_std_mean,inst_DAYS_INST_DIF_std_std,inst_DAYS_INST_DIF_std_min,inst_DAYS_INST_DIF_std_max,inst_DAYS_INST_DIF_mean_mean,inst_DAYS_INST_DIF_mean_std,inst_DAYS_INST_DIF_mean_min,inst_DAYS_INST_DIF_mean_max,inst_AMT_PERCENT_PAID_mean_mean,inst_AMT_PERCENT_PAID_mean_std,inst_AMT_PERCENT_PAID_mean_min,inst_AMT_PERCENT_PAID_mean_max,inst_AMT_PAYMENT_max_mean,inst_AMT_PAYMENT_max_std,inst_AMT_PAYMENT_max_min,inst_AMT_PAYMENT_max_max,inst_NUM_INSTALMENT_VERSION_mean_mean,inst_NUM_INSTALMENT_VERSION_mean_std,inst_NUM_INSTALMENT_VERSION_mean_min,inst_NUM_INSTALMENT_VERSION_mean_max,inst_DAYS_INSTALMENT_std_mean,inst_DAYS_INSTALMENT_std_std,inst_DAYS_INSTALMENT_std_min,inst_DAYS_INSTALMENT_std_max,inst_AMT_PERCENT_PAID_std_mean,inst_AMT_PERCENT_PAID_std_std,inst_AMT_PERCENT_PAID_std_min,inst_AMT_PERCENT_PAID_std_max,inst_DAYS_INSTALMENT_min_mean,inst_DAYS_INSTALMENT_min_std,inst_DAYS_INSTALMENT_min_min,inst_DAYS_INSTALMENT_min_max,inst_isnull_PAYMENTS_max_mean,inst_isnull_PAYMENTS_max_std,inst_isnull_PAYMENTS_max_min,inst_isnull_PAYMENTS_max_max,inst_isnull_PAYMENTS_std_mean,inst_isnull_PAYMENTS_std_std,inst_isnull_PAYMENTS_std_min,inst_isnull_PAYMENTS_std_max,inst_AMT_PAYMENT_std_mean,inst_AMT_PAYMENT_std_std,inst_AMT_PAYMENT_std_min,inst_AMT_PAYMENT_std_max,inst_DAYS_INSTALMENT_max_mean,inst_DAYS_INSTALMENT_max_std,inst_DAYS_INSTALMENT_max_min,inst_DAYS_INSTALMENT_max_max,inst_AMT_INSTALMENT_min_mean,inst_AMT_INSTALMENT_min_std,inst_AMT_INSTALMENT_min_min,inst_AMT_INSTALMENT_min_max,inst_DAYS_INSTALMENT_mean_mean,inst_DAYS_INSTALMENT_mean_std,inst_DAYS_INSTALMENT_mean_min,inst_DAYS_INSTALMENT_mean_max,inst_NUM_INSTALMENT_VERSION_min_mean,inst_NUM_INSTALMENT_VERSION_min_std,inst_NUM_INSTALMENT_VERSION_min_min,inst_NUM_INSTALMENT_VERSION_min_max,inst_DAYS_INST_DIF_min_mean,inst_DAYS_INST_DIF_min_std,inst_DAYS_INST_DIF_min_min,inst_DAYS_INST_DIF_min_max,inst_AMT_INSTALMENT_max_mean,inst_AMT_INSTALMENT_max_std,inst_AMT_INSTALMENT_max_min,inst_AMT_INSTALMENT_max_max,inst_AMT_PERCENT_PAID_min_mean,inst_AMT_PERCENT_PAID_min_std,inst_AMT_PERCENT_PAID_min_min,inst_AMT_PERCENT_PAID_min_max,inst_AMT_PAYMENT_mean_mean,inst_AMT_PAYMENT_mean_std,inst_AMT_PAYMENT_mean_min,inst_AMT_PAYMENT_mean_max,inst_AMT_INSTALMENT_mean_mean,inst_AMT_INSTALMENT_mean_std,inst_AMT_INSTALMENT_mean_min,inst_AMT_INSTALMENT_mean_max,inst_AMT_PERCENT_PAID_max_mean,inst_AMT_PERCENT_PAID_max_std,inst_AMT_PERCENT_PAID_max_min,inst_AMT_PERCENT_PAID_max_max,inst_AMT_INSTALMENT_std_mean,inst_AMT_INSTALMENT_std_std,inst_AMT_INSTALMENT_std_min,inst_AMT_INSTALMENT_std_max,inst_DAYS_ENTRY_PAYMENT_mean_mean,inst_DAYS_ENTRY_PAYMENT_mean_std,inst_DAYS_ENTRY_PAYMENT_mean_min,inst_DAYS_ENTRY_PAYMENT_mean_max,poca_CNT_INSTALMENT_FUTURE_mean_mean,poca_CNT_INSTALMENT_FUTURE_mean_std,poca_CNT_INSTALMENT_FUTURE_mean_min,poca_CNT_INSTALMENT_FUTURE_mean_max,poca_SK_DPD_DEF_max_mean,poca_SK_DPD_DEF_max_std,poca_SK_DPD_DEF_max_min,poca_SK_DPD_DEF_max_max,poca_NAME_CONTRACT_STATUS_unique_mean,poca_NAME_CONTRACT_STATUS_unique_std,poca_NAME_CONTRACT_STATUS_unique_min,poca_NAME_CONTRACT_STATUS_unique_max,poca_SK_DPD_std_mean,poca_SK_DPD_std_std,poca_SK_DPD_std_min,poca_SK_DPD_std_max,poca_SK_DPD_DEF_std_mean,poca_SK_DPD_DEF_std_std,poca_SK_DPD_DEF_std_min,poca_SK_DPD_DEF_std_max,poca_CNT_INSTALMENT_std_mean,poca_CNT_INSTALMENT_std_std,poca_CNT_INSTALMENT_std_min,poca_CNT_INSTALMENT_std_max,poca_SK_DPD_DEF_mean_mean,poca_SK_DPD_DEF_mean_std,poca_SK_DPD_DEF_mean_min,poca_SK_DPD_DEF_mean_max,poca_poca_MON_COUNT_mean,poca_poca_MON_COUNT_std,poca_poca_MON_COUNT_min,poca_poca_MON_COUNT_max,poca_CNT_INSTALMENT_mean_mean,poca_CNT_INSTALMENT_mean_std,poca_CNT_INSTALMENT_mean_min,poca_CNT_INSTALMENT_mean_max,poca_SK_DPD_min_mean,poca_SK_DPD_min_std,poca_SK_DPD_min_min,poca_SK_DPD_min_max,poca_CNT_INSTALMENT_FUTURE_std_mean,poca_CNT_INSTALMENT_FUTURE_std_std,poca_CNT_INSTALMENT_FUTURE_std_min,poca_CNT_INSTALMENT_FUTURE_std_max,poca_SK_DPD_DEF_min_mean,poca_SK_DPD_DEF_min_std,poca_SK_DPD_DEF_min_min,poca_SK_DPD_DEF_min_max,poca_SK_DPD_mean_mean,poca_SK_DPD_mean_std,poca_SK_DPD_mean_min,poca_SK_DPD_mean_max,poca_INSTALLMENTS_PERCENT_max_mean,poca_INSTALLMENTS_PERCENT_max_std,poca_INSTALLMENTS_PERCENT_max_min,poca_INSTALLMENTS_PERCENT_max_max,poca_INSTALLMENTS_PERCENT_mean_mean,poca_INSTALLMENTS_PERCENT_mean_std,poca_INSTALLMENTS_PERCENT_mean_min,poca_INSTALLMENTS_PERCENT_mean_max,poca_CNT_INSTALMENT_FUTURE_min_mean,poca_CNT_INSTALMENT_FUTURE_min_std,poca_CNT_INSTALMENT_FUTURE_min_min,poca_CNT_INSTALMENT_FUTURE_min_max,poca_INSTALLMENTS_PERCENT_std_mean,poca_INSTALLMENTS_PERCENT_std_std,poca_INSTALLMENTS_PERCENT_std_min,poca_INSTALLMENTS_PERCENT_std_max,poca_CNT_INSTALMENT_min_mean,poca_CNT_INSTALMENT_min_std,poca_CNT_INSTALMENT_min_min,poca_CNT_INSTALMENT_min_max,poca_SK_DPD_max_mean,poca_SK_DPD_max_std,poca_SK_DPD_max_min,poca_SK_DPD_max_max,poca_CNT_INSTALMENT_FUTURE_max_mean,poca_CNT_INSTALMENT_FUTURE_max_std,poca_CNT_INSTALMENT_FUTURE_max_min,poca_CNT_INSTALMENT_FUTURE_max_max,poca_INSTALLMENTS_PERCENT_min_mean,poca_INSTALLMENTS_PERCENT_min_std,poca_INSTALLMENTS_PERCENT_min_min,poca_INSTALLMENTS_PERCENT_min_max,poca_CNT_INSTALMENT_max_mean,poca_CNT_INSTALMENT_max_std,poca_CNT_INSTALMENT_max_min,poca_CNT_INSTALMENT_max_max,poca_NAME_CONTRACT_STATUS_mode_mode,poca_NAME_CONTRACT_STATUS_mode_unique,mix_AMT_PREV_ANNUITY_RATIO,mix_AMT_PREV_CREDIT_RATIO,mix_AMT_PREV_GOODS_PRICE_RATIO,mix_AMT_BURO_ANNUITY_RATIO,mix_AMT_BURO_CREDIT_RATIO
0,100002,0,1,0,1,0,12.2185,12.915581,10.114619,12.768544,6,7,4,3,1,0.018801,315.0,21.0,122.0,71.0,-9.0,1,1,0,1,1,0,8,1.0,2,2,6,10,0,0,0,0,0,0,5,0.083037,0.262949,0.139376,0.0247,0.0369,0.9722,0.6192,0.0143,0.0,0.069,0.0833,0.125,0.0369,0.0202,0.019,0.0,0.0,0.0252,0.0383,0.9722,0.6341,0.0144,0.0,0.069,0.0833,0.125,0.0377,0.022,0.0198,0.0,0.0,0.025,0.0369,0.9722,0.6243,0.0144,0.0,0.069,0.0833,0.125,0.0375,0.0205,0.0193,0.0,0.0,3,1,0.0149,5,0,2.0,2.0,2.0,2.0,38.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,1.0,2.007889,0.121978,1.733333,0.067329,1.0,3,1,1,1,1,1,9.811994,4.084715,0.0,13.017005,0.40696,0.196494,0.1875,0.818182,0.0,0.0,0.0,0.0,747.375,445.764492,36.0,1185.0,0.375,0.517549,0.0,1.0,874.0,431.45104,103.0,1437.0,0.136545,0.27309,0.0,0.54618,945.125,68.43023,908.0,1072.0,0.255682,0.204094,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.125,0.353553,0.0,1.0,1.296646,3.667468,0.0,10.373165,inf,-99.0,inf,inf,6.0,0.0,6.0,6.0,-99.0,-99.0,-99.0,-99.0,13.75,6.363961,4.0,22.0,2.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0,0.175426,0.263147,0.0,0.8125,1.551525,4.388375,0.0,12.4122,0.161932,0.16165,0.0,0.5,0.0,0.0,0.0,0.0,36.0,66.730802,-5.0,113.0,0.0,0.0,0.0,0.0,37.666667,65.24058,0.0,113.0,1681.029,2363.2469,0.0,5043.645,-220.666667,139.306616,-365.0,-87.0,-277.0,207.190733,-609.0,-76.0,inf,-99.0,0.0,inf,0.0,0.0,0.0,0.0,499.875,518.522472,7.0,1185.0,8.0,0.0,8.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.043436,0.075229,0.0,0.174139,2,2.0,1,1.0,3,2.0,8.0,-99.0,-99.0,-99.0,-99.0,0.0,-99.0,0.0,0.0,500.0,-99.0,500.0,500.0,1.0,-99.0,1.0,1.0,-99.0,-99.0,-99.0,-99.0,0.0,-99.0,0.0,0.0,-540.0,-99.0,-540.0,-540.0,0.0,-99.0,0.0,0.0,0.0,-99.0,0.0,0.0,1.0,-99.0,1.0,1.0,17.0,-99.0,17.0,17.0,-8.0,-99.0,-8.0,-8.0,0.0,-99.0,0.0,0.0,-99.0,-99.0,-99.0,-99.0,1.0,-99.0,1.0,1.0,-99.0,-99.0,-99.0,-99.0,565.0,-99.0,565.0,565.0,12.095454,-99.0,12.095454,12.095454,1.0,-99.0,1.0,1.0,1.0,-99.0,1.0,1.0,9.0,-99.0,9.0,9.0,0.0,-99.0,0.0,0.0,24.0,-99.0,24.0,24.0,-99.0,-99.0,-99.0,-99.0,1.0,-99.0,1.0,1.0,25.0,-99.0,25.0,25.0,1.0,-99.0,1.0,1.0,1.0,-99.0,1.0,1.0,-99.0,-99.0,-99.0,-99.0,12.095454,-99.0,12.095454,12.095454,9.132679,-99.0,9.132679,9.132679,0.0,-99.0,0.0,0.0,12.095454,-99.0,12.095454,12.095454,606.0,-99.0,606.0,606.0,1.0,-99.0,1.0,1.0,-548.0,-99.0,-548.0,-548.0,1,1.0,2,1.0,2,1.0,24,1.0,0,1.0,4,1.0,8,1.0,7,1.0,24,1.0,3,1.0,1,1.0,7,1.0,0,1.0,4,1.0,15,1.0,1,1.0,1.0,1.0,,1.0,1.0,1.0,,1.0,1.0,2.0,,2.0,2.0,-49.0,,-49.0,-49.0,0.229416,,0.229416,0.229416,-587.0,,-587.0,-587.0,172.058877,,172.058877,172.058877,9.132679,,9.132679,9.132679,19.0,,19.0,19.0,31.0,,31.0,31.0,4.925171,,4.925171,4.925171,20.421053,,20.421053,20.421053,1.0,,1.0,1.0,10.879833,,10.879833,10.879833,1.052632,,1.052632,1.052632,168.81943,,168.81943,168.81943,0.0,,0.0,0.0,-565.0,,-565.0,-565.0,1.0,,1.0,1.0,0.0,,0.0,0.0,0.400825,,0.400825,0.400825,-25.0,,-25.0,-25.0,9.132679,,9.132679,9.132679,-295.0,,-295.0,-295.0,1.0,,1.0,1.0,12.0,,12.0,12.0,10.879833,,10.879833,10.879833,1.0,,1.0,1.0,9.224634,,9.224634,9.224634,9.224634,,9.224634,9.224634,1.0,,1.0,1.0,0.400825,,0.400825,0.400825,-315.421053,,-315.421053,-315.421053,15.0,,15.0,15.0,0.0,,0.0,0.0,1.0,,1.0,1.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,19.0,,19.0,19.0,24.0,,24.0,24.0,0.0,,0.0,0.0,5.627314,,5.627314,5.627314,0.0,,0.0,0.0,0.0,,0.0,0.0,1.0,,1.0,1.0,0.625,,0.625,0.625,6.0,,6.0,6.0,0.234471,,0.234471,0.234471,24.0,,24.0,24.0,0.0,,0.0,0.0,24.0,,24.0,24.0,0.25,,0.25,0.25,24.0,,24.0,24.0,0,1.0,1.107519,1.067805,1.055648,inf,1.316306
1,100003,0,0,0,0,0,12.506181,14.072865,10.482892,13.937287,1,4,1,1,1,0.003541,559.0,40.0,40.0,10.0,-9.0,1,1,0,1,1,0,3,2.0,1,1,1,11,0,0,0,0,0,0,39,0.311267,0.622246,-9.0,0.0959,0.0529,0.9851,0.796,0.0605,0.08,0.0345,0.2917,0.3333,0.013,0.0773,0.0549,0.0039,0.0098,0.0924,0.0538,0.9851,0.804,0.0497,0.0806,0.0345,0.2917,0.3333,0.0128,0.079,0.0554,0.0,0.0,0.0968,0.0529,0.9851,0.7987,0.0608,0.08,0.0345,0.2917,0.3333,0.0132,0.0787,0.0558,0.0039,0.01,3,1,0.0714,0,0,1.0,0.0,1.0,0.0,28.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,4.79075,0.132217,4.183333,0.070862,2.0,2,1,1,1,1,1,11.609754,1.495879,10.010052,13.604791,0.333333,0.0,0.333333,0.333333,0.0,0.0,0.0,0.0,1047.25,738.485105,540.0,2131.0,0.0,0.0,0.0,0.0,1400.75,909.826128,606.0,2586.0,0.0,0.0,0.0,0.0,1075.5,929.160732,420.0,2434.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,3.401198,6.802395,0.0,13.604791,-99.0,-99.0,-99.0,-99.0,3.0,0.0,3.0,3.0,-99.0,-99.0,-99.0,-99.0,25.0,0.0,25.0,25.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.240741,0.0,0.240741,0.240741,0.0,0.0,0.0,0.0,0.025641,0.0,0.025641,0.025641,0.0,0.0,0.0,0.0,-57.666667,222.185358,-303.0,130.0,0.0,0.0,0.0,0.0,-34.0,253.714406,-303.0,201.0,0.0,0.0,0.0,0.0,-534.333333,496.89469,-1096.0,-152.0,-568.333333,481.117796,-1096.0,-154.0,0.0,-99.0,0.0,0.0,-99.0,-99.0,-99.0,-99.0,816.0,908.053963,43.0,2131.0,4.0,0.0,4.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2,2.0,1,1.0,3,2.0,4.0,-99.0,0.0,-99.0,-99.0,0.0,0.0,0.0,0.0,533.0,757.540098,-1.0,1400.0,1.0,0.0,1.0,1.0,1004.333333,854.97037,386.0,1980.0,-30.054251,59.872042,-99.0,8.837246,-220.0,96.436508,-330.0,-150.0,0.0,0.0,0.0,0.0,-32.966646,57.186584,-99.0,0.100061,1.057664,0.083753,0.989013,1.15098,1047.333333,806.196213,527.0,1976.0,-7.0,2.645751,-9.0,-4.0,0.050029,0.070752,0.0,0.100059,-99.0,0.0,-99.0,-99.0,1.0,0.0,1.0,1.0,-270.0,103.923048,-330.0,-150.0,1274.333333,897.827563,716.0,2310.0,12.526196,1.2975,11.139112,13.710151,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,14.666667,2.516611,12.0,17.0,0.666667,0.57735,0.0,1.0,10.0,3.464102,6.0,12.0,-99.0,-99.0,-99.0,-99.0,3.0,0.0,3.0,3.0,1054.333333,803.569744,536.0,1980.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,-99.0,0.0,-99.0,-99.0,12.580207,1.370403,11.128064,13.850765,10.462473,1.441706,8.815564,11.496369,0.0,0.0,0.0,0.0,12.526196,1.2975,11.139112,13.710151,1305.0,898.138631,746.0,2341.0,3.0,0.0,3.0,3.0,-227.0,93.952115,-334.0,-158.0,1,2.0,0,3.0,2,1.0,24,2.0,0,1.0,0,2.0,8,1.0,1,2.0,6,3.0,3,2.0,1,2.0,4,3.0,4,3.0,5,2.0,7,3.0,1,2.0,3.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.333333,0.57735,1.0,2.0,-1063.333333,800.327641,-1985.0,-544.0,0.125988,0.218218,0.0,0.377964,-1283.0,902.581298,-2324.0,-719.0,76.380137,30.572883,54.153178,111.24594,10.457264,1.447079,8.804471,11.496369,8.333333,3.21455,6.0,12.0,12.0,3.464102,8.0,14.0,2.48377,0.985781,1.718249,3.596084,7.448413,3.422911,4.428571,11.166667,1.0,0.0,1.0,1.0,11.042744,2.210992,8.815564,13.237184,1.047619,0.082479,1.0,1.142857,76.366269,27.879912,56.124861,108.166538,0.0,0.0,0.0,0.0,-1274.333333,897.827563,-2310.0,-716.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.221007,0.378419,0.001851,0.657966,-1054.333333,803.569744,-1980.0,-536.0,10.457264,1.447079,8.804471,11.496369,-1164.333333,850.637604,-2145.0,-626.0,1.0,0.0,1.0,1.0,4.333333,4.163332,1.0,9.0,11.042744,2.210992,8.815564,13.237184,1.0,0.0,1.0,1.0,10.544809,1.535398,8.81464,11.745057,10.544809,1.535398,8.81464,11.745057,1.0,0.0,1.0,1.0,0.221007,0.378419,0.001851,0.657966,-1171.781746,850.230624,-2151.75,-630.428571,5.666667,2.722399,2.625,7.875,0.0,0.0,0.0,0.0,1.666667,0.57735,1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.589256,1.020621,0.0,1.767767,0.0,0.0,0.0,0.0,9.333333,2.309401,8.0,12.0,9.791667,3.298516,6.0,12.0,0.0,0.0,0.0,0.0,3.209241,0.822291,2.263846,3.758324,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.545139,0.109416,0.4375,0.65625,0.333333,0.57735,0.0,1.0,0.330321,0.041186,0.300463,0.377308,8.333333,3.21455,6.0,12.0,0.0,0.0,0.0,0.0,10.0,3.464102,6.0,12.0,0.027778,0.048113,0.0,0.083333,10.0,3.464102,6.0,12.0,0,1.0,1.001952,1.118651,1.112651,,1.212159
2,100004,1,1,1,1,0,11.119898,11.813037,8.817446,11.813037,6,7,4,3,1,0.010032,635.0,8.0,142.0,84.0,26.0,1,1,1,1,1,0,8,1.0,2,2,1,9,0,0,0,0,0,0,11,-9.0,0.555912,0.729567,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,0,0,-99.0,6,1,0.0,0.0,0.0,0.0,27.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.1,2.0,0.011814,1.0,2,0,1,0,1,1,11.456566,0.000283,11.456366,11.456766,0.333333,0.0,0.333333,0.333333,0.0,0.0,0.0,0.0,532.5,212.839141,382.0,683.0,0.5,0.707107,0.0,1.0,867.0,649.124025,408.0,1326.0,0.0,0.0,0.0,0.0,488.5,150.613744,382.0,595.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,-99.0,-99.0,-99.0,-99.0,2.0,0.0,2.0,2.0,-99.0,-99.0,-99.0,-99.0,25.0,0.0,25.0,25.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.240741,0.0,0.240741,0.240741,0.0,0.0,0.0,0.0,0.025641,0.0,0.025641,0.025641,0.0,0.0,0.0,0.0,43.5,61.51829,0.0,87.0,0.0,0.0,0.0,0.0,44.0,62.225397,0.0,88.0,0.0,-99.0,0.0,0.0,-378.5,498.510281,-731.0,-26.0,-334.5,436.284884,-643.0,-26.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,532.0,212.132034,382.0,682.0,2.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-99.0,0.0,0.0,2,1.0,1,1.0,3,1.0,2.0,-99.0,-99.0,-99.0,-99.0,0.0,-99.0,0.0,0.0,30.0,-99.0,30.0,30.0,1.0,-99.0,1.0,1.0,694.0,-99.0,694.0,694.0,8.488999,-99.0,8.488999,8.488999,-60.0,-99.0,-60.0,-60.0,0.0,-99.0,0.0,0.0,0.212008,-99.0,0.212008,0.212008,0.828021,-99.0,0.828021,0.828021,714.0,-99.0,714.0,714.0,-10.0,-99.0,-10.0,-10.0,0.200148,-99.0,0.200148,0.200148,-99.0,-99.0,-99.0,-99.0,1.0,-99.0,1.0,1.0,-90.0,-99.0,-90.0,-90.0,784.0,-99.0,784.0,784.0,10.097532,-99.0,10.097532,10.097532,1.0,-99.0,1.0,1.0,1.0,-99.0,1.0,1.0,5.0,-99.0,5.0,5.0,0.0,-99.0,0.0,0.0,4.0,-99.0,4.0,4.0,-99.0,-99.0,-99.0,-99.0,1.0,-99.0,1.0,1.0,724.0,-99.0,724.0,724.0,1.0,-99.0,1.0,1.0,1.0,-99.0,1.0,1.0,-99.0,-99.0,-99.0,-99.0,9.908823,-99.0,9.908823,9.908823,8.586393,-99.0,8.586393,8.586393,0.0,-99.0,0.0,0.0,10.097532,-99.0,10.097532,10.097532,815.0,-99.0,815.0,815.0,1.0,-99.0,1.0,1.0,-70.0,-99.0,-70.0,-70.0,1,1.0,0,1.0,2,1.0,24,1.0,0,1.0,0,1.0,8,1.0,6,1.0,17,1.0,3,1.0,1,1.0,6,1.0,2,1.0,5,1.0,14,1.0,2,1.0,1.0,1.0,,1.0,1.0,1.0,,1.0,1.0,2.0,,2.0,2.0,-727.0,,-727.0,-727.0,0.57735,,0.57735,0.57735,-795.0,,-795.0,-795.0,34.019602,,34.019602,34.019602,8.586393,,8.586393,8.586393,3.0,,3.0,3.0,11.0,,11.0,11.0,4.163332,,4.163332,4.163332,7.666667,,7.666667,7.666667,1.0,,1.0,1.0,9.266245,,9.266245,9.266245,1.333333,,1.333333,1.333333,30.0,,30.0,30.0,0.0,,0.0,0.0,-784.0,,-784.0,-784.0,1.0,,1.0,1.0,0.0,,0.0,0.0,0.392513,,0.392513,0.392513,-724.0,,-724.0,-724.0,8.586393,,8.586393,8.586393,-754.0,,-754.0,-754.0,1.0,,1.0,1.0,3.0,,3.0,3.0,9.266245,,9.266245,9.266245,1.0,,1.0,1.0,8.81301,,8.81301,8.81301,8.81301,,8.81301,8.81301,1.0,,1.0,1.0,0.392513,,0.392513,0.392513,-761.666667,,-761.666667,-761.666667,2.25,,2.25,2.25,0.0,,0.0,0.0,2.0,,2.0,2.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.5,,0.5,0.5,0.0,,0.0,0.0,4.0,,4.0,4.0,3.75,,3.75,3.75,0.0,,0.0,0.0,1.707825,,1.707825,1.707825,0.0,,0.0,0.0,0.0,,0.0,0.0,1.0,,1.0,1.0,0.5625,,0.5625,0.5625,0.0,,0.0,0.0,0.426956,,0.426956,0.426956,3.0,,3.0,3.0,0.0,,0.0,0.0,4.0,,4.0,4.0,0.0,,0.0,0.0,4.0,,4.0,4.0,0,1.0,1.026909,1.192174,1.169894,,1.031115
3,100006,0,0,0,1,0,11.813037,12.652947,10.298481,12.601491,6,7,4,0,1,0.008019,634.0,101.0,328.0,81.0,-9.0,1,1,0,1,0,0,8,2.0,2,2,6,17,0,0,0,0,0,0,5,-9.0,0.650442,-9.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,0,0,-99.0,6,1,2.0,0.0,2.0,0.0,21.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,2.316167,0.2199,2.2,0.159905,2.0,1,1,1,0,0,1,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,4,-99.0,0,-99.0,11,-99.0,-99.0,-99.0,0.0,-99.0,-99.0,0.0,0.0,0.0,0.0,894.222222,2674.06132,-1.0,8025.0,1.0,0.0,1.0,1.0,-64.111111,104.666667,-99.0,215.0,-74.887647,47.853211,-99.0,11.112269,-60.0,84.852814,-120.0,0.0,0.0,0.0,0.0,0.0,-76.963686,43.726963,-99.0,0.21783,1.012684,0.174896,0.799989,1.316797,-14.888889,180.318083,-99.0,416.0,-8.5,0.707107,-9.0,-8.0,0.150045,0.070663,0.100078,0.200011,-99.0,0.0,-99.0,-99.0,1.0,0.0,1.0,1.0,-330.0,-99.0,-330.0,-330.0,42.555556,234.865446,-99.0,545.0,-24.631123,55.784879,-99.0,13.442272,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,14.666667,1.0,12.0,15.0,-55.0,52.177581,-99.0,0.0,-17.666667,63.061478,-99.0,48.0,-99.0,-99.0,-99.0,-99.0,9.0,0.0,9.0,9.0,-13.0,183.885834,-99.0,425.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,-99.0,0.0,-99.0,-99.0,8.369355,6.360517,0.0,13.717474,-26.478805,54.397001,-99.0,10.595522,0.0,0.0,0.0,0.0,8.368877,6.349295,0.0,13.442272,272.444444,157.236375,181.0,617.0,9.0,0.0,9.0,9.0,-68.5,85.559921,-129.0,-8.0,0,3.0,4,4.0,2,1.0,25,2.0,0,3.0,4,2.0,8,2.0,7,3.0,26,3.0,2,4.0,1,2.0,5,3.0,11,3.0,1,4.0,2,7.0,2,2.0,9.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.666667,0.57735,1.0,2.0,-208.0,215.02558,-438.0,-12.0,0.223607,0.316228,0.0,0.447214,-344.666667,207.051523,-575.0,-174.0,74.169251,25.900022,55.85517,92.483332,10.513554,2.822227,7.817593,13.447035,5.333333,4.50925,1.0,10.0,37.666667,34.42867,13.0,77.0,15.776745,17.180085,3.62859,27.924899,25.3,22.040191,4.5,48.4,1.0,0.0,1.0,1.0,11.116509,2.04426,9.626458,13.447035,1.4,0.52915,1.0,2.0,69.131838,30.685143,47.434165,90.829511,0.0,0.0,0.0,0.0,-325.666667,200.761882,-545.0,-151.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.404475,0.572013,0.0,0.808949,-195.666667,210.583317,-425.0,-11.0,10.513554,2.822227,7.817593,13.447035,-260.666667,194.29445,-485.0,-146.0,1.333333,0.57735,1.0,2.0,12.333333,11.015141,1.0,23.0,11.116509,2.04426,9.626458,13.447035,1.0,0.0,1.0,1.0,10.634145,2.65203,8.179366,13.447035,10.634145,2.65203,8.179366,13.447035,1.0,0.0,1.0,1.0,0.404475,0.572013,0.0,0.808949,-285.966667,214.60546,-533.4,-150.5,10.208333,5.019234,7.125,16.0,0.0,0.0,0.0,0.0,2.0,1.0,1.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.125278,14.820081,0.0,27.135463,0.0,0.0,0.0,0.0,7.0,3.605551,3.0,10.0,12.972222,3.316974,10.25,16.666667,0.0,0.0,0.0,0.0,11.804572,13.801489,3.02765,27.712813,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.517361,0.160137,0.333333,0.625,1.0,1.732051,0.0,3.0,0.406364,0.163183,0.252304,0.57735,6.0,5.567764,1.0,12.0,0.0,0.0,0.0,0.0,24.0,20.78461,12.0,48.0,0.083333,0.144338,0.0,0.25,24.0,20.78461,12.0,48.0,0,1.0,-0.388933,1.511819,-0.511608,,
4,100007,0,1,0,1,0,11.707678,13.148033,9.992711,13.148033,6,7,4,3,1,0.028663,664.0,101.0,144.0,115.0,-9.0,1,1,0,1,0,0,3,1.0,2,2,4,11,0,0,0,0,1,1,37,-9.0,0.322738,-9.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,0,0,-99.0,6,1,0.0,0.0,0.0,0.0,37.0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,4.222222,0.179963,4.222222,0.152418,1.0,1,1,1,0,1,1,11.89308,-99.0,11.89308,11.89308,0.333333,-99.0,0.333333,0.333333,0.0,-99.0,0.0,0.0,783.0,-99.0,783.0,783.0,0.0,-99.0,0.0,0.0,1149.0,-99.0,1149.0,1149.0,0.0,-99.0,0.0,0.0,783.0,-99.0,783.0,783.0,0.0,-99.0,0.0,0.0,0.0,-99.0,0.0,0.0,0.0,-99.0,0.0,0.0,0.0,-99.0,0.0,0.0,0.0,-99.0,0.0,0.0,1.0,-99.0,1.0,1.0,0.0,-99.0,0.0,0.0,-99.0,-99.0,-99.0,-99.0,1.0,-99.0,1.0,1.0,-99.0,-99.0,-99.0,-99.0,25.0,-99.0,25.0,25.0,0.0,-99.0,0.0,0.0,0.0,-99.0,0.0,0.0,0.240741,-99.0,0.240741,0.240741,0.0,-99.0,0.0,0.0,0.025641,-99.0,0.025641,0.025641,0.0,-99.0,0.0,0.0,0.0,-99.0,0.0,0.0,0.0,-99.0,0.0,0.0,0.0,-99.0,0.0,0.0,0.0,-99.0,0.0,0.0,-366.0,-99.0,-366.0,-366.0,-366.0,-99.0,-366.0,-366.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,783.0,-99.0,783.0,783.0,1.0,-99.0,1.0,1.0,0.0,-99.0,0.0,0.0,0.0,-99.0,0.0,0.0,0.0,-99.0,0.0,0.0,2,1.0,1,1.0,3,1.0,1.0,-99.0,0.0,-99.0,-99.0,0.0,0.0,0.0,0.0,409.166667,612.914812,-1.0,1200.0,1.0,0.0,1.0,1.0,722.333333,866.541786,-99.0,2056.0,-63.291487,55.319417,-99.0,8.209988,-352.5,89.582364,-480.0,-270.0,0.0,0.0,0.0,0.0,-65.946828,51.205768,-99.0,0.21889,1.046356,0.137998,0.85093,1.264,721.0,858.414352,-99.0,2041.0,-9.5,3.785939,-15.0,-7.0,0.157094,0.080538,0.100145,0.214042,-99.0,0.0,-99.0,-99.0,1.0,0.0,1.0,1.0,-360.0,103.923048,-510.0,-270.0,1036.166667,890.347442,-99.0,2326.0,11.525923,1.16581,9.751356,12.41917,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,12.333333,2.503331,8.0,15.0,-16.0,40.664481,-99.0,1.0,20.666667,14.348054,10.0,48.0,-99.0,-99.0,-99.0,-99.0,6.0,0.0,6.0,6.0,727.333333,863.866116,-99.0,2056.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,-99.0,0.0,-99.0,-99.0,11.563999,1.274969,9.589941,12.55814,9.100286,1.005831,7.514958,10.029229,0.0,0.0,0.0,0.0,11.525923,1.16581,9.751356,12.41917,1222.833333,717.268546,374.0,2357.0,6.0,0.0,6.0,6.0,-362.0,87.001916,-487.0,-285.0,0,2.0,3,5.0,2,1.0,25,2.0,0,1.0,0,2.0,8,1.0,1,3.0,26,2.0,2,2.0,3,3.0,4,4.0,4,3.0,2,2.0,8,4.0,1,2.0,6.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.2,0.447214,1.0,2.0,-917.4,812.989422,-2044.0,-14.0,0.098518,0.220294,0.0,0.492592,-1270.4,772.12227,-2318.0,-348.0,115.159539,21.157827,91.408485,149.323376,6.044626,4.223975,0.165514,9.683283,13.2,2.588436,10.0,17.0,16.8,14.720734,-1.0,31.0,5.780185,3.985962,1.422226,11.311037,2.88722,4.986433,-4.8,8.705882,0.956044,0.064546,0.857143,1.0,9.025619,1.105805,7.514958,10.029229,1.129412,0.289374,1.0,1.647059,115.050659,22.235773,90.829511,151.492574,0.127996,0.177877,0.0,0.362935,-1263.2,777.378415,-2326.0,-344.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.920105,1.286806,0.0,2.693002,-915.2,821.286308,-2056.0,-14.0,9.015482,1.103123,7.508118,10.020679,-1087.881319,795.134247,-2191.0,-179.0,1.0,0.0,1.0,1.0,-3.8,5.674504,-12.0,2.0,9.025619,1.105805,7.514958,10.029229,0.600214,0.54743,5e-05,1.0,8.708587,1.273506,7.133929,9.710083,9.02482,1.105814,7.514274,10.028572,1.0,0.0,1.0,1.0,0.002783,0.00252,0.0,0.006906,-1090.768539,792.114868,-2186.2,-181.25,8.966667,5.489131,5.5,18.5,0.0,0.0,0.0,0.0,1.8,0.83666,1.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.097014,0.21693,0.0,0.485071,0.0,0.0,0.0,0.0,13.2,2.949576,10.0,18.0,15.066667,5.688194,10.0,24.0,0.0,0.0,0.0,0.0,4.032817,1.018887,3.02765,5.742002,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.564711,0.117175,0.5,0.770833,2.8,5.718391,0.0,13.0,0.283749,0.075166,0.150231,0.324537,15.0,5.656854,10.0,24.0,0.0,0.0,0.0,0.0,15.2,5.761944,10.0,24.0,0.128333,0.235083,0.0,0.541667,15.2,5.761944,10.0,24.0,0,1.0,1.098066,1.13698,1.140736,,1.10552


In [126]:
# partitioning
train = appl[appl["SK_ID_CURR"].isin(y["SK_ID_CURR"]) == True]
test  = appl[appl["SK_ID_CURR"].isin(y["SK_ID_CURR"]) == False]
del appl

In [127]:
# check dimensions
print(train.shape)
print(test.shape)

(307511, 695)
(48744, 695)


# 5. DATA EXPORT

In [128]:
# export CSV
train.to_csv("../data/prepared/train_red.csv", index = False, float_format = "%.8f")
test.to_csv("../data/prepared/test_red.csv",   index = False, float_format = "%.8f")
y.to_csv("../data/prepared/y_red.csv",         index = False, float_format = "%.8f")