# 1. SETTINGS

In [1]:
# libraries
import pandas as pd
import numpy as np
import scipy.stats
import matplotlib.pyplot as plt

In [2]:
# pandas options
pd.set_option("display.max_columns", None)

In [3]:
# ignore warnings
import warnings
warnings.filterwarnings("ignore")

In [4]:
# garbage collection
import gc
gc.enable()

# 2. FUNCTIONS

In [5]:
##### FUNCTION FOR COUNTING MISSINGS
def count_missings(data):
    total = data.isnull().sum().sort_values(ascending = False)
    percent = (data.isnull().sum() / data.isnull().count() * 100).sort_values(ascending = False)
    table = pd.concat([total, percent], axis = 1, keys = ["Total", "Percent"])
    table = table[table["Total"] > 0]
    return table

In [6]:
##### FUNCTION FOR CREATING LOGARITHMS
def convert_days(data, features, t = 12, rounding = True, replace = False):
    for var in features:
        if replace == True:
            if rounding == True:
                data[var] = round(-data[var]/t)
            else:
                data[var] = -data[var]/t
            data[var][data[var] < 0] = None
        else:
            if rounding == True:
                data["CONVETRTED_" + str(var)] = round(-data[var]/t)
            else:
                data["CONVETRTED_" + str(var)] = -data[var]/t
            data["CONVETRTED_" + str(var)][data["CONVETRTED_" + str(var)] < 0] = None
    return data

In [7]:
##### FUNCTION FOR CREATING LOGARITHMS
def create_logs(data, features, replace = False):
    for var in features:
        if replace == True:
            data[var] = np.log(data[var].abs() + 1)
        else:
            data["LOG_" + str(var)] = np.log(data[var].abs() + 1)      
    return data

In [8]:
##### FUNCTION FOR CREATING FLAGS FOR MISSINGS
def create_null_flags(data, features = None):
    if features == None:
        features = data.columns
    for var in features:
        num_null = data[var].isnull() + 0
        if num_null.sum() > 0:
            data["ISNULL_" + str(var)] = num_null
    return data

In [9]:
##### FUNCTION FOR TREATING FACTORS
def treat_factors(data, method = "label"):
    
    # label encoding
    if method == "label":
        factors = [f for f in data.columns if data[f].dtype == "object"]
        for var in factors:
            data[var], _ = pd.factorize(data[var])
        
    # dummy encoding
    if method == "dummy":
        data = pd.get_dummies(data, drop_first = True)
    
    # dataset
    return data

In [10]:
##### FUNCTION FOR COMPUTING ACCEPT/REJECT RATIOS
def compute_accept_reject_ratio(data, lags = [1, 3, 5]):
    
    # preparations
    dec_prev = data[["SK_ID_CURR", "SK_ID_PREV", "DAYS_DECISION", "NAME_CONTRACT_STATUS"]]
    dec_prev["DAYS_DECISION"] = -dec_prev["DAYS_DECISION"]
    dec_prev = dec_prev.sort_values(by = ["SK_ID_CURR", "DAYS_DECISION"])
    dec_prev = pd.get_dummies(dec_prev)
     
    # compuatation
    for t in lags:
        
        # acceptance ratios
        tmp = dec_prev[["SK_ID_CURR", "NAME_CONTRACT_STATUS_Approved"]].groupby(["SK_ID_CURR"]).head(1)
        tmp = tmp.groupby(["SK_ID_CURR"], as_index = False).mean()
        tmp.columns = ["SK_ID_CURR", "APPROVE_RATIO_" + str(t)]
        data = data.merge(tmp, how = "left", on = "SK_ID_CURR")
        
        # rejection ratios
        tmp = dec_prev[["SK_ID_CURR", "NAME_CONTRACT_STATUS_Refused"]].groupby(["SK_ID_CURR"]).head(1)
        tmp = tmp.groupby(["SK_ID_CURR"], as_index = False).mean()
        tmp.columns = ["SK_ID_CURR", "REJECT_RATIO_" + str(t)]
        data = data.merge(tmp, how = "left", on = "SK_ID_CURR")
        
    # dataset
    return data

In [11]:
##### FUNCTION FOR AGGREGATING FEATURES
def aggregate_data(data, id_var, label = None, stats = ["mean", "std", "min", "max"]):
    
    
    ### SEPARATE NUMERIC AND FACTORS
  
    # display info
    print("- Preparing the dataset...")
      
    # find factors
    data_factors = [f for f in data.columns if data[f].dtype == "object"]
    
    # partition subsets
    num_data = data[list(set(data.columns) - set(data_factors))]
    fac_data = data[[id_var] + data_factors]
    
    # display info
    num_facs = fac_data.shape[1] - 1
    num_nums = num_data.shape[1] - 1
    print("- Extracted %.0f factors and %.0f numerics..." % (num_facs, num_nums))


    ##### AGGREGATION
 
    # aggregate numerics
    if (num_nums > 0):
        print("- Aggregating numeric features...")
        num_data = num_data.groupby(id_var).agg(stats)
        num_data.columns = ["_".join(col).strip() for col in num_data.columns.values]
        num_data = num_data.sort_index()

    # aggregate factors
    if (num_facs > 0):
        print("- Aggregating factor features...")
        fac_data = fac_data.groupby(id_var).agg([("mode",    lambda x: scipy.stats.mode(x)[0][0]),
                                                 ("nunique", lambda x: x.nunique())])
        fac_data.columns = ["_".join(col).strip() for col in fac_data.columns.values]
        fac_data = fac_data.sort_index()


    ##### MERGER

    # merge numerics and factors
    if ((num_facs > 0) & (num_nums > 0)):
        agg_data = pd.concat([num_data, fac_data], axis = 1)
    
    # use factors only
    if ((num_facs > 0) & (num_nums == 0)):
        agg_data = fac_data
        
    # use numerics only
    if ((num_facs == 0) & (num_nums > 0)):
        agg_data = num_data
        

    ##### LAST STEPS

    # update labels
    if label != None:
        agg_data.columns = [label + "_" + str(col) for col in agg_data.columns]

    # display info
    print("- Final dimensions:", agg_data.shape)
    
    # return dataset
    return agg_data

# 3. DATA CHECKS

In [12]:
# import data
train = pd.read_csv("../data/raw/application_train.csv")
test  = pd.read_csv("../data/raw/application_test.csv")
buro  = pd.read_csv("../data/raw/bureau.csv")
bbal  = pd.read_csv("../data/raw/bureau_balance.csv")
prev  = pd.read_csv("../data/raw/previous_application.csv")
card  = pd.read_csv("../data/raw/credit_card_balance.csv")
poca  = pd.read_csv("../data/raw/POS_CASH_balance.csv")
inst  = pd.read_csv("../data/raw/installments_payments.csv")

In [13]:
# check dimensions
print("Application:", train.shape, test.shape)
print("Buro:", buro.shape)
print("Bbal:", bbal.shape)
print("Prev:", prev.shape)
print("Card:", card.shape)
print("Poca:", poca.shape)
print("Inst:", inst.shape)

Application: (307511, 122) (48744, 121)
Buro: (1716428, 17)
Bbal: (27299925, 3)
Prev: (1670214, 37)
Card: (3840312, 23)
Poca: (10001358, 8)
Inst: (13605401, 8)


In [14]:
### CHECK LOAN ID DISTRIBUTION

# check unique IDs
print("IDs in TRAIN:", train.SK_ID_CURR.nunique())
print("IDs in TEST:",  test.SK_ID_CURR.nunique())
print("IDs in BURO:",  buro.SK_ID_CURR.nunique())
print("IDs in PREV:",  prev.SK_ID_CURR.nunique())
print("IDs in CARD:",  card.SK_ID_CURR.nunique())
print("IDs in POCA:",  poca.SK_ID_CURR.nunique())
print("IDs in INST:",  inst.SK_ID_CURR.nunique())
print("")

# check current loan differences
print("IDs in TRAIN but not in BURO:", 
      len(list(set(train.SK_ID_CURR.unique()) - set(buro.SK_ID_CURR.unique()))))
print("IDs in TRAIN but not in PREV:", 
      len(list(set(train.SK_ID_CURR.unique()) - set(prev.SK_ID_CURR.unique()))))
print("IDs in TRAIN but not in CARD:", 
      len(list(set(train.SK_ID_CURR.unique()) - set(card.SK_ID_CURR.unique()))))
print("IDs in TRAIN but not in POCA:", 
      len(list(set(train.SK_ID_CURR.unique()) - set(poca.SK_ID_CURR.unique()))))
print("IDs in TRAIN but not in INST:", 
      len(list(set(train.SK_ID_CURR.unique()) - set(poca.SK_ID_CURR.unique()))))
print("")

# check current loan differences
print("IDs in TEST but not in BURO:", 
      len(list(set(test.SK_ID_CURR.unique()) - set(buro.SK_ID_CURR.unique()))))
print("IDs in TEST but not in PREV:", 
      len(list(set(test.SK_ID_CURR.unique()) - set(prev.SK_ID_CURR.unique()))))
print("IDs in TEST but not in CARD:", 
      len(list(set(test.SK_ID_CURR.unique()) - set(card.SK_ID_CURR.unique()))))
print("IDs in TEST but not in POCA:", 
      len(list(set(test.SK_ID_CURR.unique()) - set(poca.SK_ID_CURR.unique()))))
print("IDs in TEST but not in INST:", 
      len(list(set(test.SK_ID_CURR.unique()) - set(poca.SK_ID_CURR.unique()))))
print("")

print("IDs in TRAIN and BURO:", 
       len(set.intersection(set(train.SK_ID_CURR.unique()), set(buro.SK_ID_CURR.unique()))))
print("IDs in TRAIN and PREV:", 
       len(set.intersection(set(train.SK_ID_CURR.unique()), set(prev.SK_ID_CURR.unique()))))
print("IDs in TRAIN and CARD:", 
       len(set.intersection(set(train.SK_ID_CURR.unique()), set(card.SK_ID_CURR.unique())))) 
print("IDs in TRAIN and POCA:", 
       len(set.intersection(set(train.SK_ID_CURR.unique()), set(poca.SK_ID_CURR.unique())))) 
print("IDs in TRAIN and INST:", 
       len(set.intersection(set(train.SK_ID_CURR.unique()), set(inst.SK_ID_CURR.unique())))) 
print("IDs in TRAIN and BURO and PREV:", 
       len(set.intersection(set(train.SK_ID_CURR.unique()), set(buro.SK_ID_CURR.unique()), 
                            set(prev.SK_ID_CURR.unique()))))
print("IDs in TRAIN and PREV and POCA and INST:", 
       len(set.intersection(set(train.SK_ID_CURR.unique()), set(prev.SK_ID_CURR.unique()), 
                            set(poca.SK_ID_CURR.unique()), set(inst.SK_ID_CURR.unique()))))
print("")

print("IDs in TEST and BURO:", 
       len(set.intersection(set(test.SK_ID_CURR.unique()), set(buro.SK_ID_CURR.unique()))))
print("IDs in TEST and PREV:", 
       len(set.intersection(set(test.SK_ID_CURR.unique()), set(prev.SK_ID_CURR.unique()))))
print("IDs in TEST and CARD:", 
       len(set.intersection(set(test.SK_ID_CURR.unique()), set(card.SK_ID_CURR.unique())))) 
print("IDs in TEST and POCA:", 
       len(set.intersection(set(test.SK_ID_CURR.unique()), set(poca.SK_ID_CURR.unique())))) 
print("IDs in TEST and INST:", 
       len(set.intersection(set(test.SK_ID_CURR.unique()), set(inst.SK_ID_CURR.unique())))) 
print("IDs in TEST and BURO and PREV:", 
       len(set.intersection(set(test.SK_ID_CURR.unique()), set(buro.SK_ID_CURR.unique()), 
                            set(prev.SK_ID_CURR.unique()))))
print("IDs in TEST and PREV and POCA and INST:", 
       len(set.intersection(set(test.SK_ID_CURR.unique()), set(prev.SK_ID_CURR.unique()), 
                            set(poca.SK_ID_CURR.unique()), set(inst.SK_ID_CURR.unique()))))

IDs in TRAIN: 307511
IDs in TEST: 48744
IDs in BURO: 305811
IDs in PREV: 338857
IDs in CARD: 103558
IDs in POCA: 337252
IDs in INST: 339587

IDs in TRAIN but not in BURO: 44020
IDs in TRAIN but not in PREV: 16454
IDs in TRAIN but not in CARD: 220606
IDs in TRAIN but not in POCA: 18067
IDs in TRAIN but not in INST: 18067

IDs in TEST but not in BURO: 6424
IDs in TEST but not in PREV: 944
IDs in TEST but not in CARD: 32091
IDs in TEST but not in POCA: 936
IDs in TEST but not in INST: 936

IDs in TRAIN and BURO: 263491
IDs in TRAIN and PREV: 291057
IDs in TRAIN and CARD: 86905
IDs in TRAIN and POCA: 289444
IDs in TRAIN and INST: 291643
IDs in TRAIN and BURO and PREV: 249507
IDs in TRAIN and PREV and POCA and INST: 288028

IDs in TEST and BURO: 42320
IDs in TEST and PREV: 47800
IDs in TEST and CARD: 16653
IDs in TEST and POCA: 47808
IDs in TEST and INST: 47944
IDs in TEST and BURO and PREV: 41584
IDs in TEST and PREV and POCA and INST: 47537


# 4. PREPROCESSING

## 4.1. APPLICATION DATA

In [12]:
# import data
train = pd.read_csv("../data/raw/application_train.csv")
test  = pd.read_csv("../data/raw/application_test.csv")
print("Dimensions:", train.shape, test.shape)

Dimensions: (307511, 122) (48744, 121)


In [13]:
# extract target
y = train[["SK_ID_CURR", "TARGET"]]
del train["TARGET"]

In [14]:
# concatenate application data
appl = pd.concat([train, test])
del train, test

In [15]:
# check data
appl.head()

Unnamed: 0,SK_ID_CURR,NAME_CONTRACT_TYPE,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,AMT_GOODS_PRICE,NAME_TYPE_SUITE,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,NAME_HOUSING_TYPE,REGION_POPULATION_RELATIVE,DAYS_BIRTH,DAYS_EMPLOYED,DAYS_REGISTRATION,DAYS_ID_PUBLISH,OWN_CAR_AGE,FLAG_MOBIL,FLAG_EMP_PHONE,FLAG_WORK_PHONE,FLAG_CONT_MOBILE,FLAG_PHONE,FLAG_EMAIL,OCCUPATION_TYPE,CNT_FAM_MEMBERS,REGION_RATING_CLIENT,REGION_RATING_CLIENT_W_CITY,WEEKDAY_APPR_PROCESS_START,HOUR_APPR_PROCESS_START,REG_REGION_NOT_LIVE_REGION,REG_REGION_NOT_WORK_REGION,LIVE_REGION_NOT_WORK_REGION,REG_CITY_NOT_LIVE_CITY,REG_CITY_NOT_WORK_CITY,LIVE_CITY_NOT_WORK_CITY,ORGANIZATION_TYPE,EXT_SOURCE_1,EXT_SOURCE_2,EXT_SOURCE_3,APARTMENTS_AVG,BASEMENTAREA_AVG,YEARS_BEGINEXPLUATATION_AVG,YEARS_BUILD_AVG,COMMONAREA_AVG,ELEVATORS_AVG,ENTRANCES_AVG,FLOORSMAX_AVG,FLOORSMIN_AVG,LANDAREA_AVG,LIVINGAPARTMENTS_AVG,LIVINGAREA_AVG,NONLIVINGAPARTMENTS_AVG,NONLIVINGAREA_AVG,APARTMENTS_MODE,BASEMENTAREA_MODE,YEARS_BEGINEXPLUATATION_MODE,YEARS_BUILD_MODE,COMMONAREA_MODE,ELEVATORS_MODE,ENTRANCES_MODE,FLOORSMAX_MODE,FLOORSMIN_MODE,LANDAREA_MODE,LIVINGAPARTMENTS_MODE,LIVINGAREA_MODE,NONLIVINGAPARTMENTS_MODE,NONLIVINGAREA_MODE,APARTMENTS_MEDI,BASEMENTAREA_MEDI,YEARS_BEGINEXPLUATATION_MEDI,YEARS_BUILD_MEDI,COMMONAREA_MEDI,ELEVATORS_MEDI,ENTRANCES_MEDI,FLOORSMAX_MEDI,FLOORSMIN_MEDI,LANDAREA_MEDI,LIVINGAPARTMENTS_MEDI,LIVINGAREA_MEDI,NONLIVINGAPARTMENTS_MEDI,NONLIVINGAREA_MEDI,FONDKAPREMONT_MODE,HOUSETYPE_MODE,TOTALAREA_MODE,WALLSMATERIAL_MODE,EMERGENCYSTATE_MODE,OBS_30_CNT_SOCIAL_CIRCLE,DEF_30_CNT_SOCIAL_CIRCLE,OBS_60_CNT_SOCIAL_CIRCLE,DEF_60_CNT_SOCIAL_CIRCLE,DAYS_LAST_PHONE_CHANGE,FLAG_DOCUMENT_2,FLAG_DOCUMENT_3,FLAG_DOCUMENT_4,FLAG_DOCUMENT_5,FLAG_DOCUMENT_6,FLAG_DOCUMENT_7,FLAG_DOCUMENT_8,FLAG_DOCUMENT_9,FLAG_DOCUMENT_10,FLAG_DOCUMENT_11,FLAG_DOCUMENT_12,FLAG_DOCUMENT_13,FLAG_DOCUMENT_14,FLAG_DOCUMENT_15,FLAG_DOCUMENT_16,FLAG_DOCUMENT_17,FLAG_DOCUMENT_18,FLAG_DOCUMENT_19,FLAG_DOCUMENT_20,FLAG_DOCUMENT_21,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_YEAR
0,100002,Cash loans,M,N,Y,0,202500.0,406597.5,24700.5,351000.0,Unaccompanied,Working,Secondary / secondary special,Single / not married,House / apartment,0.018801,-9461,-637,-3648.0,-2120,,1,1,0,1,1,0,Laborers,1.0,2,2,WEDNESDAY,10,0,0,0,0,0,0,Business Entity Type 3,0.083037,0.262949,0.139376,0.0247,0.0369,0.9722,0.6192,0.0143,0.0,0.069,0.0833,0.125,0.0369,0.0202,0.019,0.0,0.0,0.0252,0.0383,0.9722,0.6341,0.0144,0.0,0.069,0.0833,0.125,0.0377,0.022,0.0198,0.0,0.0,0.025,0.0369,0.9722,0.6243,0.0144,0.0,0.069,0.0833,0.125,0.0375,0.0205,0.0193,0.0,0.0,reg oper account,block of flats,0.0149,"Stone, brick",No,2.0,2.0,2.0,2.0,-1134.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,1.0
1,100003,Cash loans,F,N,N,0,270000.0,1293502.5,35698.5,1129500.0,Family,State servant,Higher education,Married,House / apartment,0.003541,-16765,-1188,-1186.0,-291,,1,1,0,1,1,0,Core staff,2.0,1,1,MONDAY,11,0,0,0,0,0,0,School,0.311267,0.622246,,0.0959,0.0529,0.9851,0.796,0.0605,0.08,0.0345,0.2917,0.3333,0.013,0.0773,0.0549,0.0039,0.0098,0.0924,0.0538,0.9851,0.804,0.0497,0.0806,0.0345,0.2917,0.3333,0.0128,0.079,0.0554,0.0,0.0,0.0968,0.0529,0.9851,0.7987,0.0608,0.08,0.0345,0.2917,0.3333,0.0132,0.0787,0.0558,0.0039,0.01,reg oper account,block of flats,0.0714,Block,No,1.0,0.0,1.0,0.0,-828.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
2,100004,Revolving loans,M,Y,Y,0,67500.0,135000.0,6750.0,135000.0,Unaccompanied,Working,Secondary / secondary special,Single / not married,House / apartment,0.010032,-19046,-225,-4260.0,-2531,26.0,1,1,1,1,1,0,Laborers,1.0,2,2,MONDAY,9,0,0,0,0,0,0,Government,,0.555912,0.729567,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,-815.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
3,100006,Cash loans,F,N,Y,0,135000.0,312682.5,29686.5,297000.0,Unaccompanied,Working,Secondary / secondary special,Civil marriage,House / apartment,0.008019,-19005,-3039,-9833.0,-2437,,1,1,0,1,0,0,Laborers,2.0,2,2,WEDNESDAY,17,0,0,0,0,0,0,Business Entity Type 3,,0.650442,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.0,0.0,2.0,0.0,-617.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,,,,
4,100007,Cash loans,M,N,Y,0,121500.0,513000.0,21865.5,513000.0,Unaccompanied,Working,Secondary / secondary special,Single / not married,House / apartment,0.028663,-19932,-3038,-4311.0,-3458,,1,1,0,1,0,0,Core staff,1.0,2,2,THURSDAY,11,0,0,0,0,1,1,Religion,,0.322738,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,-1106.0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0


In [16]:
# count missings
nas = count_missings(appl)
nas

Unnamed: 0,Total,Percent
COMMONAREA_MEDI,248360,69.714109
COMMONAREA_AVG,248360,69.714109
COMMONAREA_MODE,248360,69.714109
NONLIVINGAPARTMENTS_MODE,246861,69.293343
NONLIVINGAPARTMENTS_MEDI,246861,69.293343
NONLIVINGAPARTMENTS_AVG,246861,69.293343
FONDKAPREMONT_MODE,243092,68.235393
LIVINGAPARTMENTS_MEDI,242979,68.203674
LIVINGAPARTMENTS_MODE,242979,68.203674
LIVINGAPARTMENTS_AVG,242979,68.203674


In [17]:
# print factor levels
facs =  [f for f in appl.columns if appl[f].dtype == "object"]
for var in facs:
    print("----------")
    print(appl[var].value_counts())
    print(appl[var].isnull().sum(), " null values")
    print("")

----------
Cash loans         326537
Revolving loans     29718
Name: NAME_CONTRACT_TYPE, dtype: int64
0  null values

----------
F      235126
M      121125
XNA         4
Name: CODE_GENDER, dtype: int64
0  null values

----------
N    235235
Y    121020
Name: FLAG_OWN_CAR, dtype: int64
0  null values

----------
Y    246970
N    109285
Name: FLAG_OWN_REALTY, dtype: int64
0  null values

----------
Unaccompanied      288253
Family              46030
Spouse, partner     12818
Children             3675
Other_B              1981
Other_A               975
Group of people       320
Name: NAME_TYPE_SUITE, dtype: int64
2203  null values

----------
Working                 183307
Commercial associate     83019
Pensioner                64635
State servant            25235
Unemployed                  23
Student                     20
Businessman                 11
Maternity leave              5
Name: NAME_INCOME_TYPE, dtype: int64
0  null values

----------
Secondary / secondary special    252379

In [18]:
##### FEATURE ENGINEERING

# income ratios
appl["CREDIT_BY_INCOME"]      = appl["AMT_CREDIT"]      / appl["AMT_INCOME_TOTAL"]
appl["ANNUITY_BY_INCOME"]     = appl["AMT_ANNUITY"]     / appl["AMT_INCOME_TOTAL"]
appl["GOODS_PRICE_BY_INCOME"] = appl["AMT_GOODS_PRICE"] / appl["AMT_INCOME_TOTAL"]

# career ratio
appl["PERCENT_WORKED"] = appl["DAYS_EMPLOYED"] / appl["DAYS_BIRTH"]
appl["PERCENT_WORKED"][appl["PERCENT_WORKED"] < 0] = None

# number of adults
appl["CNT_ADULTS"] = appl["CNT_FAM_MEMBERS"] - appl["CNT_CHILDREN"]

# external sources
#appl["EXT_SOURCE_MEAN"] = appl[["EXT_SOURCE_1", "EXT_SOURCE_1", "EXT_SOURCE_3"]].mean(axis = 1)
#appl["EXT_SOURCE_SD"]   = appl[["EXT_SOURCE_1", "EXT_SOURCE_1", "EXT_SOURCE_3"]].std(axis = 1)
appl["NUM_EXT_SOURCES"] = 3 - (appl["EXT_SOURCE_1"].isnull().astype(int) +
                               appl["EXT_SOURCE_2"].isnull().astype(int) +
                               appl["EXT_SOURCE_3"].isnull().astype(int))

# number of documents
doc_vars = ["FLAG_DOCUMENT_2",  "FLAG_DOCUMENT_3",  "FLAG_DOCUMENT_4",  "FLAG_DOCUMENT_5",  "FLAG_DOCUMENT_6",
            "FLAG_DOCUMENT_7",  "FLAG_DOCUMENT_8",  "FLAG_DOCUMENT_9",  "FLAG_DOCUMENT_10", "FLAG_DOCUMENT_11",
            "FLAG_DOCUMENT_12", "FLAG_DOCUMENT_13", "FLAG_DOCUMENT_14", "FLAG_DOCUMENT_15", "FLAG_DOCUMENT_16",
            "FLAG_DOCUMENT_17", "FLAG_DOCUMENT_18", "FLAG_DOCUMENT_19", "FLAG_DOCUMENT_20", "FLAG_DOCUMENT_21"]
appl["NUM_DOCUMENTS"] = appl[doc_vars].sum(axis = 1)

# application date
appl["DAY_APPR_PROCESS_START"] = "Working day"
appl["DAY_APPR_PROCESS_START"][(appl["WEEKDAY_APPR_PROCESS_START"] == "SATURDAY") |
                               (appl["WEEKDAY_APPR_PROCESS_START"] == "SUNDAY")] = "Weekend"

# logarithms
log_vars = ["AMT_CREDIT", "AMT_INCOME_TOTAL", "AMT_GOODS_PRICE", "AMT_ANNUITY"]
appl = create_logs(appl, log_vars, replace = True)

# convert days
day_vars = ["DAYS_BIRTH", "DAYS_REGISTRATION", "DAYS_ID_PUBLISH", "DAYS_EMPLOYED", "DAYS_LAST_PHONE_CHANGE"]
appl = convert_days(appl, day_vars, t = 30, rounding = True, replace = True)



##### FEATURE REMOVAL
drops = ['APARTMENTS_MEDI', 'BASEMENTAREA_MEDI', 'COMMONAREA_MEDI', 'ELEVATORS_MEDI', 'ENTRANCES_MEDI', 
         'FLOORSMAX_MEDI', 'FLOORSMIN_MEDI', 'LANDAREA_MEDI', 'LIVINGAPARTMENTS_MEDI', 'LIVINGAREA_MEDI',
         'NONLIVINGAPARTMENTS_MEDI', 'NONLIVINGAREA_MEDI','YEARS_BEGINEXPLUATATION_MEDI', 'YEARS_BUILD_MEDI',
         'APARTMENTS_MODE', 'BASEMENTAREA_MODE', 'COMMONAREA_MODE','ELEVATORS_MODE', 'ENTRANCES_MODE', 
         'FLOORSMAX_MODE', 'FLOORSMIN_MODE', 'LANDAREA_MODE', 'LIVINGAPARTMENTS_MODE', 'LIVINGAREA_MODE', 
         'NONLIVINGAPARTMENTS_MODE', 'NONLIVINGAREA_MODE', 'TOTALAREA_MODE',  'YEARS_BEGINEXPLUATATION_MODE']
appl = appl.drop(columns = drops)



##### ADDING NULL FLAGS

# all null flags
#appl = create_null_flags(appl)

# buro enquiry flag
buro_vars = ["AMT_REQ_CREDIT_BUREAU_YEAR", "AMT_REQ_CREDIT_BUREAU_QRT", 
             "AMT_REQ_CREDIT_BUREAU_MON",  "AMT_REQ_CREDIT_BUREAU_WEEK", 
             "AMT_REQ_CREDIT_BUREAU_DAY",  "AMT_REQ_CREDIT_BUREAU_HOUR"]
appl["ISNULL_BURO_ENQUIRIES"] = (appl[buro_vars].isnull().sum(axis = 1) == 0).astype(int)

# social circle flag
social_vars = ["OBS_30_CNT_SOCIAL_CIRCLE", "DEF_30_CNT_SOCIAL_CIRCLE", "OBS_60_CNT_SOCIAL_CIRCLE", 
               "DEF_60_CNT_SOCIAL_CIRCLE", "AMT_GOODS_PRICE"]
appl["ISNULL_SOCIAL_CIRCLE"] = (appl[social_vars].isnull().sum(axis = 1) == 0).astype(int)



##### TREAT FACTORS

# merge some levels
#appl["CODE_GENDER"][appl["CODE_GENDER"] == "XNA"] = "F"
#appl["NAME_TYPE_SUITE"][(appl["NAME_TYPE_SUITE"] == "Other_B") | 
#                        (appl["NAME_TYPE_SUITE"] == "Other_A") |
#                        (appl["NAME_TYPE_SUITE"] == None)] = "Other"
#appl["NAME_INCOME_TYPE"][appl["NAME_INCOME_TYPE"] == "Businessman"] = "Working"
#appl["NAME_INCOME_TYPE"][(appl["NAME_INCOME_TYPE"] == "Student") |
#                         (appl["NAME_INCOME_TYPE"] == "Maternity leave")] = "Unemployed"
#appl["NAME_FAMILY_STATUS"][appl["NAME_FAMILY_STATUS"] == None] = "Unknown"
#appl["OCCUPATION_TYPE"][appl["OCCUPATION_TYPE"] == "IT staff"] = "High skill tech staff"
#appl["OCCUPATION_TYPE"][appl["OCCUPATION_TYPE"] == "HR staff"] = "Secretaries"
#appl["OCCUPATION_TYPE"][appl["NAME_INCOME_TYPE"] == "Unemployed"] = "No occupation"
#appl["ORGANIZATION_TYPE"][(appl["ORGANIZATION_TYPE"] == "XNA")] = "Other"
#appl["ORGANIZATION_TYPE"][(appl["ORGANIZATION_TYPE"] == "Business Entity Type 1") |
#                          (appl["ORGANIZATION_TYPE"] == "Business Entity Type 2") |
#                          (appl["ORGANIZATION_TYPE"] == "Business Entity Type 3")] = "Business Entity"
#appl["ORGANIZATION_TYPE"][(appl["ORGANIZATION_TYPE"] == "Transport: type 1") |
#                          (appl["ORGANIZATION_TYPE"] == "Transport: type 2") |
#                          (appl["ORGANIZATION_TYPE"] == "Transport: type 3") |
#                          (appl["ORGANIZATION_TYPE"] == "Transport: type 4")] = "Transport"
#appl["ORGANIZATION_TYPE"][(appl["ORGANIZATION_TYPE"] == "Industry: type 1") |
#                          (appl["ORGANIZATION_TYPE"] == "Industry: type 2") |
#                          (appl["ORGANIZATION_TYPE"] == "Industry: type 3") |
#                          (appl["ORGANIZATION_TYPE"] == "Industry: type 4") |
#                          (appl["ORGANIZATION_TYPE"] == "Industry: type 5") |
#                          (appl["ORGANIZATION_TYPE"] == "Industry: type 6") |
#                          (appl["ORGANIZATION_TYPE"] == "Industry: type 7") |
#                          (appl["ORGANIZATION_TYPE"] == "Industry: type 8") |
#                          (appl["ORGANIZATION_TYPE"] == "Industry: type 9") |
#                          (appl["ORGANIZATION_TYPE"] == "Industry: type 10")|
#                          (appl["ORGANIZATION_TYPE"] == "Industry: type 11")|
#                          (appl["ORGANIZATION_TYPE"] == "Industry: type 12")|
#                          (appl["ORGANIZATION_TYPE"] == "Industry: type 13")] = "Industry"
#appl["ORGANIZATION_TYPE"][(appl["ORGANIZATION_TYPE"] == "Trade: type 1") |
#                          (appl["ORGANIZATION_TYPE"] == "Trade: type 2") |
#                          (appl["ORGANIZATION_TYPE"] == "Trade: type 3") |
#                          (appl["ORGANIZATION_TYPE"] == "Trade: type 4") |
#                          (appl["ORGANIZATION_TYPE"] == "Trade: type 5") |
#                          (appl["ORGANIZATION_TYPE"] == "Trade: type 6") |
#                          (appl["ORGANIZATION_TYPE"] == "Trade: type 7")] = "Trade"
#appl["FONDKAPREMONT_MODE"][(appl["FONDKAPREMONT_MODE"] == None)] = "not specified"

# encoding
appl = treat_factors(appl, method = "dummy")

In [19]:
# rename features
appl = appl.reindex_axis(["SK_ID_CURR"] + list([col for col in appl.columns if col not in "SK_ID_CURR"]), axis = 1)
appl.columns = ["SK_ID_CURR"] + ["app_" + str(col) for col in appl.columns if col not in "SK_ID_CURR"]

In [20]:
# check data
appl.head()

Unnamed: 0,SK_ID_CURR,app_CNT_CHILDREN,app_AMT_INCOME_TOTAL,app_AMT_CREDIT,app_AMT_ANNUITY,app_AMT_GOODS_PRICE,app_REGION_POPULATION_RELATIVE,app_DAYS_BIRTH,app_DAYS_EMPLOYED,app_DAYS_REGISTRATION,app_DAYS_ID_PUBLISH,app_OWN_CAR_AGE,app_FLAG_MOBIL,app_FLAG_EMP_PHONE,app_FLAG_WORK_PHONE,app_FLAG_CONT_MOBILE,app_FLAG_PHONE,app_FLAG_EMAIL,app_CNT_FAM_MEMBERS,app_REGION_RATING_CLIENT,app_REGION_RATING_CLIENT_W_CITY,app_HOUR_APPR_PROCESS_START,app_REG_REGION_NOT_LIVE_REGION,app_REG_REGION_NOT_WORK_REGION,app_LIVE_REGION_NOT_WORK_REGION,app_REG_CITY_NOT_LIVE_CITY,app_REG_CITY_NOT_WORK_CITY,app_LIVE_CITY_NOT_WORK_CITY,app_EXT_SOURCE_1,app_EXT_SOURCE_2,app_EXT_SOURCE_3,app_APARTMENTS_AVG,app_BASEMENTAREA_AVG,app_YEARS_BEGINEXPLUATATION_AVG,app_YEARS_BUILD_AVG,app_COMMONAREA_AVG,app_ELEVATORS_AVG,app_ENTRANCES_AVG,app_FLOORSMAX_AVG,app_FLOORSMIN_AVG,app_LANDAREA_AVG,app_LIVINGAPARTMENTS_AVG,app_LIVINGAREA_AVG,app_NONLIVINGAPARTMENTS_AVG,app_NONLIVINGAREA_AVG,app_YEARS_BUILD_MODE,app_OBS_30_CNT_SOCIAL_CIRCLE,app_DEF_30_CNT_SOCIAL_CIRCLE,app_OBS_60_CNT_SOCIAL_CIRCLE,app_DEF_60_CNT_SOCIAL_CIRCLE,app_DAYS_LAST_PHONE_CHANGE,app_FLAG_DOCUMENT_2,app_FLAG_DOCUMENT_3,app_FLAG_DOCUMENT_4,app_FLAG_DOCUMENT_5,app_FLAG_DOCUMENT_6,app_FLAG_DOCUMENT_7,app_FLAG_DOCUMENT_8,app_FLAG_DOCUMENT_9,app_FLAG_DOCUMENT_10,app_FLAG_DOCUMENT_11,app_FLAG_DOCUMENT_12,app_FLAG_DOCUMENT_13,app_FLAG_DOCUMENT_14,app_FLAG_DOCUMENT_15,app_FLAG_DOCUMENT_16,app_FLAG_DOCUMENT_17,app_FLAG_DOCUMENT_18,app_FLAG_DOCUMENT_19,app_FLAG_DOCUMENT_20,app_FLAG_DOCUMENT_21,app_AMT_REQ_CREDIT_BUREAU_HOUR,app_AMT_REQ_CREDIT_BUREAU_DAY,app_AMT_REQ_CREDIT_BUREAU_WEEK,app_AMT_REQ_CREDIT_BUREAU_MON,app_AMT_REQ_CREDIT_BUREAU_QRT,app_AMT_REQ_CREDIT_BUREAU_YEAR,app_CREDIT_BY_INCOME,app_ANNUITY_BY_INCOME,app_GOODS_PRICE_BY_INCOME,app_PERCENT_WORKED,app_CNT_ADULTS,app_NUM_EXT_SOURCES,app_NUM_DOCUMENTS,app_ISNULL_BURO_ENQUIRIES,app_ISNULL_SOCIAL_CIRCLE,app_NAME_CONTRACT_TYPE_Revolving loans,app_CODE_GENDER_M,app_CODE_GENDER_XNA,app_FLAG_OWN_CAR_Y,app_FLAG_OWN_REALTY_Y,app_NAME_TYPE_SUITE_Family,app_NAME_TYPE_SUITE_Group of people,app_NAME_TYPE_SUITE_Other_A,app_NAME_TYPE_SUITE_Other_B,"app_NAME_TYPE_SUITE_Spouse, partner",app_NAME_TYPE_SUITE_Unaccompanied,app_NAME_INCOME_TYPE_Commercial associate,app_NAME_INCOME_TYPE_Maternity leave,app_NAME_INCOME_TYPE_Pensioner,app_NAME_INCOME_TYPE_State servant,app_NAME_INCOME_TYPE_Student,app_NAME_INCOME_TYPE_Unemployed,app_NAME_INCOME_TYPE_Working,app_NAME_EDUCATION_TYPE_Higher education,app_NAME_EDUCATION_TYPE_Incomplete higher,app_NAME_EDUCATION_TYPE_Lower secondary,app_NAME_EDUCATION_TYPE_Secondary / secondary special,app_NAME_FAMILY_STATUS_Married,app_NAME_FAMILY_STATUS_Separated,app_NAME_FAMILY_STATUS_Single / not married,app_NAME_FAMILY_STATUS_Unknown,app_NAME_FAMILY_STATUS_Widow,app_NAME_HOUSING_TYPE_House / apartment,app_NAME_HOUSING_TYPE_Municipal apartment,app_NAME_HOUSING_TYPE_Office apartment,app_NAME_HOUSING_TYPE_Rented apartment,app_NAME_HOUSING_TYPE_With parents,app_OCCUPATION_TYPE_Cleaning staff,app_OCCUPATION_TYPE_Cooking staff,app_OCCUPATION_TYPE_Core staff,app_OCCUPATION_TYPE_Drivers,app_OCCUPATION_TYPE_HR staff,app_OCCUPATION_TYPE_High skill tech staff,app_OCCUPATION_TYPE_IT staff,app_OCCUPATION_TYPE_Laborers,app_OCCUPATION_TYPE_Low-skill Laborers,app_OCCUPATION_TYPE_Managers,app_OCCUPATION_TYPE_Medicine staff,app_OCCUPATION_TYPE_Private service staff,app_OCCUPATION_TYPE_Realty agents,app_OCCUPATION_TYPE_Sales staff,app_OCCUPATION_TYPE_Secretaries,app_OCCUPATION_TYPE_Security staff,app_OCCUPATION_TYPE_Waiters/barmen staff,app_WEEKDAY_APPR_PROCESS_START_MONDAY,app_WEEKDAY_APPR_PROCESS_START_SATURDAY,app_WEEKDAY_APPR_PROCESS_START_SUNDAY,app_WEEKDAY_APPR_PROCESS_START_THURSDAY,app_WEEKDAY_APPR_PROCESS_START_TUESDAY,app_WEEKDAY_APPR_PROCESS_START_WEDNESDAY,app_ORGANIZATION_TYPE_Agriculture,app_ORGANIZATION_TYPE_Bank,app_ORGANIZATION_TYPE_Business Entity Type 1,app_ORGANIZATION_TYPE_Business Entity Type 2,app_ORGANIZATION_TYPE_Business Entity Type 3,app_ORGANIZATION_TYPE_Cleaning,app_ORGANIZATION_TYPE_Construction,app_ORGANIZATION_TYPE_Culture,app_ORGANIZATION_TYPE_Electricity,app_ORGANIZATION_TYPE_Emergency,app_ORGANIZATION_TYPE_Government,app_ORGANIZATION_TYPE_Hotel,app_ORGANIZATION_TYPE_Housing,app_ORGANIZATION_TYPE_Industry: type 1,app_ORGANIZATION_TYPE_Industry: type 10,app_ORGANIZATION_TYPE_Industry: type 11,app_ORGANIZATION_TYPE_Industry: type 12,app_ORGANIZATION_TYPE_Industry: type 13,app_ORGANIZATION_TYPE_Industry: type 2,app_ORGANIZATION_TYPE_Industry: type 3,app_ORGANIZATION_TYPE_Industry: type 4,app_ORGANIZATION_TYPE_Industry: type 5,app_ORGANIZATION_TYPE_Industry: type 6,app_ORGANIZATION_TYPE_Industry: type 7,app_ORGANIZATION_TYPE_Industry: type 8,app_ORGANIZATION_TYPE_Industry: type 9,app_ORGANIZATION_TYPE_Insurance,app_ORGANIZATION_TYPE_Kindergarten,app_ORGANIZATION_TYPE_Legal Services,app_ORGANIZATION_TYPE_Medicine,app_ORGANIZATION_TYPE_Military,app_ORGANIZATION_TYPE_Mobile,app_ORGANIZATION_TYPE_Other,app_ORGANIZATION_TYPE_Police,app_ORGANIZATION_TYPE_Postal,app_ORGANIZATION_TYPE_Realtor,app_ORGANIZATION_TYPE_Religion,app_ORGANIZATION_TYPE_Restaurant,app_ORGANIZATION_TYPE_School,app_ORGANIZATION_TYPE_Security,app_ORGANIZATION_TYPE_Security Ministries,app_ORGANIZATION_TYPE_Self-employed,app_ORGANIZATION_TYPE_Services,app_ORGANIZATION_TYPE_Telecom,app_ORGANIZATION_TYPE_Trade: type 1,app_ORGANIZATION_TYPE_Trade: type 2,app_ORGANIZATION_TYPE_Trade: type 3,app_ORGANIZATION_TYPE_Trade: type 4,app_ORGANIZATION_TYPE_Trade: type 5,app_ORGANIZATION_TYPE_Trade: type 6,app_ORGANIZATION_TYPE_Trade: type 7,app_ORGANIZATION_TYPE_Transport: type 1,app_ORGANIZATION_TYPE_Transport: type 2,app_ORGANIZATION_TYPE_Transport: type 3,app_ORGANIZATION_TYPE_Transport: type 4,app_ORGANIZATION_TYPE_University,app_ORGANIZATION_TYPE_XNA,app_FONDKAPREMONT_MODE_org spec account,app_FONDKAPREMONT_MODE_reg oper account,app_FONDKAPREMONT_MODE_reg oper spec account,app_HOUSETYPE_MODE_specific housing,app_HOUSETYPE_MODE_terraced house,app_WALLSMATERIAL_MODE_Mixed,app_WALLSMATERIAL_MODE_Monolithic,app_WALLSMATERIAL_MODE_Others,app_WALLSMATERIAL_MODE_Panel,"app_WALLSMATERIAL_MODE_Stone, brick",app_WALLSMATERIAL_MODE_Wooden,app_EMERGENCYSTATE_MODE_Yes,app_DAY_APPR_PROCESS_START_Working day
0,100002,0,12.2185,12.915581,10.114619,12.768544,0.018801,315.0,21.0,122.0,71.0,,1,1,0,1,1,0,1.0,2,2,10,0,0,0,0,0,0,0.083037,0.262949,0.139376,0.0247,0.0369,0.9722,0.6192,0.0143,0.0,0.069,0.0833,0.125,0.0369,0.0202,0.019,0.0,0.0,0.6341,2.0,2.0,2.0,2.0,38.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,1.0,2.007889,0.121978,1.733333,0.067329,1.0,3,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1
1,100003,0,12.506181,14.072865,10.482892,13.937287,0.003541,559.0,40.0,40.0,10.0,,1,1,0,1,1,0,2.0,1,1,11,0,0,0,0,0,0,0.311267,0.622246,,0.0959,0.0529,0.9851,0.796,0.0605,0.08,0.0345,0.2917,0.3333,0.013,0.0773,0.0549,0.0039,0.0098,0.804,1.0,0.0,1.0,0.0,28.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,4.79075,0.132217,4.183333,0.070862,2.0,2,1,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1
2,100004,0,11.119898,11.813037,8.817446,11.813037,0.010032,635.0,8.0,142.0,84.0,26.0,1,1,1,1,1,0,1.0,2,2,9,0,0,0,0,0,0,,0.555912,0.729567,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,27.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.1,2.0,0.011814,1.0,2,0,1,1,1,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
3,100006,0,11.813037,12.652947,10.298481,12.601491,0.008019,634.0,101.0,328.0,81.0,,1,1,0,1,0,0,2.0,2,2,17,0,0,0,0,0,0,,0.650442,,,,,,,,,,,,,,,,,2.0,0.0,2.0,0.0,21.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,,,,,2.316167,0.2199,2.2,0.159905,2.0,1,1,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
4,100007,0,11.707678,13.148033,9.992711,13.148033,0.028663,664.0,101.0,144.0,115.0,,1,1,0,1,0,0,1.0,2,2,11,0,0,0,0,1,1,,0.322738,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,37.0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,4.222222,0.179963,4.222222,0.152418,1.0,1,1,1,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1


In [21]:
# check data
appl.shape

(356255, 211)

## 4.2. PREVIOUS APPLICATION DATA

In [25]:
# import data
prev = pd.read_csv("../data/raw/previous_application.csv")
print("Dimensions:", prev.shape)

Dimensions: (1670214, 37)


In [26]:
# check data
prev.head()

Unnamed: 0,SK_ID_PREV,SK_ID_CURR,NAME_CONTRACT_TYPE,AMT_ANNUITY,AMT_APPLICATION,AMT_CREDIT,AMT_DOWN_PAYMENT,AMT_GOODS_PRICE,WEEKDAY_APPR_PROCESS_START,HOUR_APPR_PROCESS_START,FLAG_LAST_APPL_PER_CONTRACT,NFLAG_LAST_APPL_IN_DAY,RATE_DOWN_PAYMENT,RATE_INTEREST_PRIMARY,RATE_INTEREST_PRIVILEGED,NAME_CASH_LOAN_PURPOSE,NAME_CONTRACT_STATUS,DAYS_DECISION,NAME_PAYMENT_TYPE,CODE_REJECT_REASON,NAME_TYPE_SUITE,NAME_CLIENT_TYPE,NAME_GOODS_CATEGORY,NAME_PORTFOLIO,NAME_PRODUCT_TYPE,CHANNEL_TYPE,SELLERPLACE_AREA,NAME_SELLER_INDUSTRY,CNT_PAYMENT,NAME_YIELD_GROUP,PRODUCT_COMBINATION,DAYS_FIRST_DRAWING,DAYS_FIRST_DUE,DAYS_LAST_DUE_1ST_VERSION,DAYS_LAST_DUE,DAYS_TERMINATION,NFLAG_INSURED_ON_APPROVAL
0,2030495,271877,Consumer loans,1730.43,17145.0,17145.0,0.0,17145.0,SATURDAY,15,Y,1,0.0,0.182832,0.867336,XAP,Approved,-73,Cash through the bank,XAP,,Repeater,Mobile,POS,XNA,Country-wide,35,Connectivity,12.0,middle,POS mobile with interest,365243.0,-42.0,300.0,-42.0,-37.0,0.0
1,2802425,108129,Cash loans,25188.615,607500.0,679671.0,,607500.0,THURSDAY,11,Y,1,,,,XNA,Approved,-164,XNA,XAP,Unaccompanied,Repeater,XNA,Cash,x-sell,Contact center,-1,XNA,36.0,low_action,Cash X-Sell: low,365243.0,-134.0,916.0,365243.0,365243.0,1.0
2,2523466,122040,Cash loans,15060.735,112500.0,136444.5,,112500.0,TUESDAY,11,Y,1,,,,XNA,Approved,-301,Cash through the bank,XAP,"Spouse, partner",Repeater,XNA,Cash,x-sell,Credit and cash offices,-1,XNA,12.0,high,Cash X-Sell: high,365243.0,-271.0,59.0,365243.0,365243.0,1.0
3,2819243,176158,Cash loans,47041.335,450000.0,470790.0,,450000.0,MONDAY,7,Y,1,,,,XNA,Approved,-512,Cash through the bank,XAP,,Repeater,XNA,Cash,x-sell,Credit and cash offices,-1,XNA,12.0,middle,Cash X-Sell: middle,365243.0,-482.0,-152.0,-182.0,-177.0,1.0
4,1784265,202054,Cash loans,31924.395,337500.0,404055.0,,337500.0,THURSDAY,9,Y,1,,,,Repairs,Refused,-781,Cash through the bank,HC,,Repeater,XNA,Cash,walk-in,Credit and cash offices,-1,XNA,24.0,high,Cash Street: high,,,,,,


In [27]:
# count missings
nas = count_missings(prev)
nas

Unnamed: 0,Total,Percent
RATE_INTEREST_PRIVILEGED,1664263,99.643698
RATE_INTEREST_PRIMARY,1664263,99.643698
RATE_DOWN_PAYMENT,895844,53.63648
AMT_DOWN_PAYMENT,895844,53.63648
NAME_TYPE_SUITE,820405,49.119754
DAYS_TERMINATION,673065,40.298129
NFLAG_INSURED_ON_APPROVAL,673065,40.298129
DAYS_FIRST_DRAWING,673065,40.298129
DAYS_FIRST_DUE,673065,40.298129
DAYS_LAST_DUE_1ST_VERSION,673065,40.298129


In [28]:
# print factor levels
facs =  [f for f in prev.columns if prev[f].dtype == "object"]
for var in facs:
    print("----------")
    print(prev[var].value_counts())
    print(prev[var].isnull().sum(), " null values")
    print("")

----------
Cash loans         747553
Consumer loans     729151
Revolving loans    193164
XNA                   346
Name: NAME_CONTRACT_TYPE, dtype: int64
0  null values

----------
TUESDAY      255118
WEDNESDAY    255010
MONDAY       253557
FRIDAY       252048
THURSDAY     249099
SATURDAY     240631
SUNDAY       164751
Name: WEEKDAY_APPR_PROCESS_START, dtype: int64
0  null values

----------
Y    1661739
N       8475
Name: FLAG_LAST_APPL_PER_CONTRACT, dtype: int64
0  null values

----------
XAP                                 922661
XNA                                 677918
Repairs                              23765
Other                                15608
Urgent needs                          8412
Buying a used car                     2888
Building a house or an annex          2693
Everyday expenses                     2416
Medicine                              2174
Payments on other loans               1931
Education                             1573
Journey                        

In [29]:
##### FEATURE ENGINEERING

# amount ratios
prev["AMT_GIVEN_RATIO_1"]  = prev["AMT_CREDIT"] / prev["AMT_APPLICATION"]
prev["AMT_GIVEN_RATIO_2"]  = prev["AMT_GOODS_PRICE"] / prev["AMT_APPLICATION"]
prev["DOWN_PAYMENT_RATIO"] = prev["AMT_DOWN_PAYMENT"] / prev["AMT_APPLICATION"]

# logarithms
log_vars = ["AMT_CREDIT", "AMT_ANNUITY", "AMT_APPLICATION", "AMT_DOWN_PAYMENT", "AMT_GOODS_PRICE"]
prev = create_logs(prev, log_vars, replace = True)

# convert days
day_vars = ["DAYS_FIRST_DRAWING", "DAYS_FIRST_DUE", "DAYS_LAST_DUE_1ST_VERSION", 
            "DAYS_LAST_DUE", "DAYS_TERMINATION", "DAYS_DECISION"]
prev = convert_days(prev, day_vars, t = 1, rounding = False, replace = True)

# number of applications 
cnt_prev = prev[["SK_ID_CURR", "SK_ID_PREV"]].groupby(["SK_ID_CURR"], as_index = False).count()
cnt_prev.columns = ["SK_ID_CURR", "CNT_PREV_APPLICATIONS"]
prev = prev.merge(cnt_prev, how = "left", on = "SK_ID_CURR")

# number of contracts
cnt_prev = prev[["SK_ID_CURR", "FLAG_LAST_APPL_PER_CONTRACT"]]
cnt_prev.columns = ["SK_ID_CURR", "CNT_PREV_CONTRACTS"]
cnt_prev = cnt_prev[cnt_prev["CNT_PREV_CONTRACTS"] == "Y"]
cnt_prev = cnt_prev[["SK_ID_CURR", "CNT_PREV_CONTRACTS"]].groupby(["SK_ID_CURR"], as_index = False).count()
prev = prev.merge(cnt_prev, how = "left", on = "SK_ID_CURR")

# number ratio
prev["APPL_PER_CONTRACT_RATIO"] = prev["CNT_PREV_APPLICATIONS"] / prev["CNT_PREV_CONTRACTS"]

# loan decision ratios
prev = compute_accept_reject_ratio(prev, lags = [1, 3, 5])

# day differences
prev["DAYS_DUE_DIFF_1"] = prev["DAYS_LAST_DUE_1ST_VERSION"] - prev["DAYS_FIRST_DUE"]
prev["DAYS_DUE_DIFF_2"] = prev["DAYS_LAST_DUE"] - prev["DAYS_FIRST_DUE"]
prev["DAYS_TERMINATION_DIFF_1"] = prev["DAYS_TERMINATION"] - prev["DAYS_FIRST_DRAWING"]
prev["DAYS_TERMINATION_DIFF_2"] = prev["DAYS_TERMINATION"] - prev["DAYS_FIRST_DUE"]
prev["DAYS_TERMINATION_DIFF_3"] = prev["DAYS_TERMINATION"] - prev["DAYS_LAST_DUE"]

# application dates
prev["DAY_APPR_PROCESS_START"] = "Working day"
prev["DAY_APPR_PROCESS_START"][(prev["WEEKDAY_APPR_PROCESS_START"] == "SATURDAY") |
                               (prev["WEEKDAY_APPR_PROCESS_START"] == "SUNDAY")] = "Weekend"



##### FEATURE REMOVAL
drops = ["NAME_CLIENT_TYPE", "SK_ID_PREV"]
prev = prev.drop(columns = drops)



##### ADDING NULL FLAGS

# all null flags
#prev = create_null_flags(prev)



##### TREAT FACTORS

# merge some levels
#prev["NAME_TYPE_SUITE"][(prev["NAME_TYPE_SUITE"] == "Other_B") | 
#                        (prev["NAME_TYPE_SUITE"] == "Other_A")] = None
#prev["NAME_CASH_LOAN_PURPOSE"][(prev["NAME_CASH_LOAN_PURPOSE"] == "XAP")] = "XNA"
#prev["CODE_REJECT_REASON"][(prev["CODE_REJECT_REASON"] == "XAP")] = "XNA"
#prev["NAME_GOODS_CATEGORY"][(prev["NAME_GOODS_CATEGORY"] == "Other")] = "XNA"
#prev["PRODUCT_COMBINATION"][(prev["PRODUCT_COMBINATION"] == "Cash X-Sell: low") |
#                            (prev["PRODUCT_COMBINATION"] == "Cash X-Sell: middle") |
#                            (prev["PRODUCT_COMBINATION"] == "Cash X-Sell: high")] = "Cash X-Sell"
#prev["PRODUCT_COMBINATION"][(prev["PRODUCT_COMBINATION"] == "Card Street: low") |
#                            (prev["PRODUCT_COMBINATION"] == "Card Street: middle") |
#                            (prev["PRODUCT_COMBINATION"] == "Card Street: high")] = "Card Street"

# encoding
prev = treat_factors(prev, method = "dummy")

In [30]:
# aggregate features
prev = aggregate_data(prev, id_var = "SK_ID_CURR", label = "prev")

# clean up
omits = ["APPROVE_RATIO_1", "APPROVE_RATIO_3", "APPROVE_RATIO_5",  
         "REJECT_RATIO_1", "REJECT_RATIO_3",  "REJECT_RATIO_5", 
         "FLAG_LAST_APPL_PER_CONTRACT_Y", "CNT_PREV_CONTRACTS", "CNT_PREV_APPLICATIONS", 
         "APPL_PER_CONTRACT_RATIO"]
for var in omits:
    del prev["prev_" + str(var) + "_std"]
    del prev["prev_" + str(var) + "_min"]
    del prev["prev_" + str(var) + "_max"]

- Preparing the dataset...
- Extracted 0 factors and 161 numerics...
- Aggregating numeric features...
- Final dimensions: (338857, 644)


In [31]:
# check data
prev.head()

Unnamed: 0_level_0,prev_NAME_CASH_LOAN_PURPOSE_Buying a holiday home / land_mean,prev_NAME_CASH_LOAN_PURPOSE_Buying a holiday home / land_std,prev_NAME_CASH_LOAN_PURPOSE_Buying a holiday home / land_min,prev_NAME_CASH_LOAN_PURPOSE_Buying a holiday home / land_max,prev_NAME_CASH_LOAN_PURPOSE_Payments on other loans_mean,prev_NAME_CASH_LOAN_PURPOSE_Payments on other loans_std,prev_NAME_CASH_LOAN_PURPOSE_Payments on other loans_min,prev_NAME_CASH_LOAN_PURPOSE_Payments on other loans_max,prev_CODE_REJECT_REASON_XAP_mean,prev_CODE_REJECT_REASON_XAP_std,prev_CODE_REJECT_REASON_XAP_min,prev_CODE_REJECT_REASON_XAP_max,prev_NAME_CASH_LOAN_PURPOSE_Refusal to name the goal_mean,prev_NAME_CASH_LOAN_PURPOSE_Refusal to name the goal_std,prev_NAME_CASH_LOAN_PURPOSE_Refusal to name the goal_min,prev_NAME_CASH_LOAN_PURPOSE_Refusal to name the goal_max,prev_NAME_CASH_LOAN_PURPOSE_Education_mean,prev_NAME_CASH_LOAN_PURPOSE_Education_std,prev_NAME_CASH_LOAN_PURPOSE_Education_min,prev_NAME_CASH_LOAN_PURPOSE_Education_max,prev_NAME_GOODS_CATEGORY_Computers_mean,prev_NAME_GOODS_CATEGORY_Computers_std,prev_NAME_GOODS_CATEGORY_Computers_min,prev_NAME_GOODS_CATEGORY_Computers_max,prev_NAME_GOODS_CATEGORY_Audio/Video_mean,prev_NAME_GOODS_CATEGORY_Audio/Video_std,prev_NAME_GOODS_CATEGORY_Audio/Video_min,prev_NAME_GOODS_CATEGORY_Audio/Video_max,prev_NAME_GOODS_CATEGORY_Consumer Electronics_mean,prev_NAME_GOODS_CATEGORY_Consumer Electronics_std,prev_NAME_GOODS_CATEGORY_Consumer Electronics_min,prev_NAME_GOODS_CATEGORY_Consumer Electronics_max,prev_APPL_PER_CONTRACT_RATIO_mean,prev_NAME_GOODS_CATEGORY_Weapon_mean,prev_NAME_GOODS_CATEGORY_Weapon_std,prev_NAME_GOODS_CATEGORY_Weapon_min,prev_NAME_GOODS_CATEGORY_Weapon_max,prev_NAME_CASH_LOAN_PURPOSE_Car repairs_mean,prev_NAME_CASH_LOAN_PURPOSE_Car repairs_std,prev_NAME_CASH_LOAN_PURPOSE_Car repairs_min,prev_NAME_CASH_LOAN_PURPOSE_Car repairs_max,prev_NAME_CASH_LOAN_PURPOSE_Buying a garage_mean,prev_NAME_CASH_LOAN_PURPOSE_Buying a garage_std,prev_NAME_CASH_LOAN_PURPOSE_Buying a garage_min,prev_NAME_CASH_LOAN_PURPOSE_Buying a garage_max,prev_NAME_GOODS_CATEGORY_Medical Supplies_mean,prev_NAME_GOODS_CATEGORY_Medical Supplies_std,prev_NAME_GOODS_CATEGORY_Medical Supplies_min,prev_NAME_GOODS_CATEGORY_Medical Supplies_max,prev_NAME_GOODS_CATEGORY_Jewelry_mean,prev_NAME_GOODS_CATEGORY_Jewelry_std,prev_NAME_GOODS_CATEGORY_Jewelry_min,prev_NAME_GOODS_CATEGORY_Jewelry_max,prev_RATE_INTEREST_PRIMARY_mean,prev_RATE_INTEREST_PRIMARY_std,prev_RATE_INTEREST_PRIMARY_min,prev_RATE_INTEREST_PRIMARY_max,prev_PRODUCT_COMBINATION_Cash X-Sell: low_mean,prev_PRODUCT_COMBINATION_Cash X-Sell: low_std,prev_PRODUCT_COMBINATION_Cash X-Sell: low_min,prev_PRODUCT_COMBINATION_Cash X-Sell: low_max,prev_DAYS_FIRST_DUE_mean,prev_DAYS_FIRST_DUE_std,prev_DAYS_FIRST_DUE_min,prev_DAYS_FIRST_DUE_max,prev_AMT_APPLICATION_mean,prev_AMT_APPLICATION_std,prev_AMT_APPLICATION_min,prev_AMT_APPLICATION_max,prev_NAME_YIELD_GROUP_low_normal_mean,prev_NAME_YIELD_GROUP_low_normal_std,prev_NAME_YIELD_GROUP_low_normal_min,prev_NAME_YIELD_GROUP_low_normal_max,prev_NAME_CONTRACT_TYPE_Revolving loans_mean,prev_NAME_CONTRACT_TYPE_Revolving loans_std,prev_NAME_CONTRACT_TYPE_Revolving loans_min,prev_NAME_CONTRACT_TYPE_Revolving loans_max,prev_NAME_SELLER_INDUSTRY_Industry_mean,prev_NAME_SELLER_INDUSTRY_Industry_std,prev_NAME_SELLER_INDUSTRY_Industry_min,prev_NAME_SELLER_INDUSTRY_Industry_max,prev_PRODUCT_COMBINATION_POS mobile without interest_mean,prev_PRODUCT_COMBINATION_POS mobile without interest_std,prev_PRODUCT_COMBINATION_POS mobile without interest_min,prev_PRODUCT_COMBINATION_POS mobile without interest_max,prev_PRODUCT_COMBINATION_POS other with interest_mean,prev_PRODUCT_COMBINATION_POS other with interest_std,prev_PRODUCT_COMBINATION_POS other with interest_min,prev_PRODUCT_COMBINATION_POS other with interest_max,prev_NAME_PORTFOLIO_POS_mean,prev_NAME_PORTFOLIO_POS_std,prev_NAME_PORTFOLIO_POS_min,prev_NAME_PORTFOLIO_POS_max,prev_NAME_PAYMENT_TYPE_XNA_mean,prev_NAME_PAYMENT_TYPE_XNA_std,prev_NAME_PAYMENT_TYPE_XNA_min,prev_NAME_PAYMENT_TYPE_XNA_max,prev_AMT_DOWN_PAYMENT_mean,prev_AMT_DOWN_PAYMENT_std,prev_AMT_DOWN_PAYMENT_min,prev_AMT_DOWN_PAYMENT_max,prev_DAYS_DECISION_mean,prev_DAYS_DECISION_std,prev_DAYS_DECISION_min,prev_DAYS_DECISION_max,prev_RATE_INTEREST_PRIVILEGED_mean,prev_RATE_INTEREST_PRIVILEGED_std,prev_RATE_INTEREST_PRIVILEGED_min,prev_RATE_INTEREST_PRIVILEGED_max,prev_PRODUCT_COMBINATION_POS household without interest_mean,prev_PRODUCT_COMBINATION_POS household without interest_std,prev_PRODUCT_COMBINATION_POS household without interest_min,prev_PRODUCT_COMBINATION_POS household without interest_max,prev_NAME_GOODS_CATEGORY_Tourism_mean,prev_NAME_GOODS_CATEGORY_Tourism_std,prev_NAME_GOODS_CATEGORY_Tourism_min,prev_NAME_GOODS_CATEGORY_Tourism_max,prev_DAYS_TERMINATION_DIFF_3_mean,prev_DAYS_TERMINATION_DIFF_3_std,prev_DAYS_TERMINATION_DIFF_3_min,prev_DAYS_TERMINATION_DIFF_3_max,prev_CHANNEL_TYPE_Stone_mean,prev_CHANNEL_TYPE_Stone_std,prev_CHANNEL_TYPE_Stone_min,prev_CHANNEL_TYPE_Stone_max,prev_NAME_SELLER_INDUSTRY_Connectivity_mean,prev_NAME_SELLER_INDUSTRY_Connectivity_std,prev_NAME_SELLER_INDUSTRY_Connectivity_min,prev_NAME_SELLER_INDUSTRY_Connectivity_max,prev_WEEKDAY_APPR_PROCESS_START_SUNDAY_mean,prev_WEEKDAY_APPR_PROCESS_START_SUNDAY_std,prev_WEEKDAY_APPR_PROCESS_START_SUNDAY_min,prev_WEEKDAY_APPR_PROCESS_START_SUNDAY_max,prev_NAME_YIELD_GROUP_middle_mean,prev_NAME_YIELD_GROUP_middle_std,prev_NAME_YIELD_GROUP_middle_min,prev_NAME_YIELD_GROUP_middle_max,prev_NAME_TYPE_SUITE_Unaccompanied_mean,prev_NAME_TYPE_SUITE_Unaccompanied_std,prev_NAME_TYPE_SUITE_Unaccompanied_min,prev_NAME_TYPE_SUITE_Unaccompanied_max,prev_NAME_CONTRACT_STATUS_Canceled_mean,prev_NAME_CONTRACT_STATUS_Canceled_std,prev_NAME_CONTRACT_STATUS_Canceled_min,prev_NAME_CONTRACT_STATUS_Canceled_max,prev_PRODUCT_COMBINATION_POS others without interest_mean,prev_PRODUCT_COMBINATION_POS others without interest_std,prev_PRODUCT_COMBINATION_POS others without interest_min,prev_PRODUCT_COMBINATION_POS others without interest_max,prev_AMT_GOODS_PRICE_mean,prev_AMT_GOODS_PRICE_std,prev_AMT_GOODS_PRICE_min,prev_AMT_GOODS_PRICE_max,prev_CODE_REJECT_REASON_LIMIT_mean,prev_CODE_REJECT_REASON_LIMIT_std,prev_CODE_REJECT_REASON_LIMIT_min,prev_CODE_REJECT_REASON_LIMIT_max,prev_WEEKDAY_APPR_PROCESS_START_SATURDAY_mean,prev_WEEKDAY_APPR_PROCESS_START_SATURDAY_std,prev_WEEKDAY_APPR_PROCESS_START_SATURDAY_min,prev_WEEKDAY_APPR_PROCESS_START_SATURDAY_max,prev_CHANNEL_TYPE_Contact center_mean,prev_CHANNEL_TYPE_Contact center_std,prev_CHANNEL_TYPE_Contact center_min,prev_CHANNEL_TYPE_Contact center_max,prev_NAME_SELLER_INDUSTRY_Jewelry_mean,prev_NAME_SELLER_INDUSTRY_Jewelry_std,prev_NAME_SELLER_INDUSTRY_Jewelry_min,prev_NAME_SELLER_INDUSTRY_Jewelry_max,prev_NAME_CONTRACT_STATUS_Unused offer_mean,prev_NAME_CONTRACT_STATUS_Unused offer_std,prev_NAME_CONTRACT_STATUS_Unused offer_min,prev_NAME_CONTRACT_STATUS_Unused offer_max,prev_NAME_GOODS_CATEGORY_Other_mean,prev_NAME_GOODS_CATEGORY_Other_std,prev_NAME_GOODS_CATEGORY_Other_min,prev_NAME_GOODS_CATEGORY_Other_max,prev_NAME_SELLER_INDUSTRY_Construction_mean,prev_NAME_SELLER_INDUSTRY_Construction_std,prev_NAME_SELLER_INDUSTRY_Construction_min,prev_NAME_SELLER_INDUSTRY_Construction_max,prev_NAME_CASH_LOAN_PURPOSE_Journey_mean,prev_NAME_CASH_LOAN_PURPOSE_Journey_std,prev_NAME_CASH_LOAN_PURPOSE_Journey_min,prev_NAME_CASH_LOAN_PURPOSE_Journey_max,prev_RATE_DOWN_PAYMENT_mean,prev_RATE_DOWN_PAYMENT_std,prev_RATE_DOWN_PAYMENT_min,prev_RATE_DOWN_PAYMENT_max,prev_AMT_CREDIT_mean,prev_AMT_CREDIT_std,prev_AMT_CREDIT_min,prev_AMT_CREDIT_max,prev_NAME_GOODS_CATEGORY_Mobile_mean,prev_NAME_GOODS_CATEGORY_Mobile_std,prev_NAME_GOODS_CATEGORY_Mobile_min,prev_NAME_GOODS_CATEGORY_Mobile_max,prev_NAME_SELLER_INDUSTRY_MLM partners_mean,prev_NAME_SELLER_INDUSTRY_MLM partners_std,prev_NAME_SELLER_INDUSTRY_MLM partners_min,prev_NAME_SELLER_INDUSTRY_MLM partners_max,prev_CODE_REJECT_REASON_HC_mean,prev_CODE_REJECT_REASON_HC_std,prev_CODE_REJECT_REASON_HC_min,prev_CODE_REJECT_REASON_HC_max,prev_NAME_TYPE_SUITE_Other_A_mean,prev_NAME_TYPE_SUITE_Other_A_std,prev_NAME_TYPE_SUITE_Other_A_min,prev_NAME_TYPE_SUITE_Other_A_max,prev_NAME_CASH_LOAN_PURPOSE_Buying a new car_mean,prev_NAME_CASH_LOAN_PURPOSE_Buying a new car_std,prev_NAME_CASH_LOAN_PURPOSE_Buying a new car_min,prev_NAME_CASH_LOAN_PURPOSE_Buying a new car_max,prev_CHANNEL_TYPE_Country-wide_mean,prev_CHANNEL_TYPE_Country-wide_std,prev_CHANNEL_TYPE_Country-wide_min,prev_CHANNEL_TYPE_Country-wide_max,prev_CNT_PAYMENT_mean,prev_CNT_PAYMENT_std,prev_CNT_PAYMENT_min,prev_CNT_PAYMENT_max,prev_NAME_CASH_LOAN_PURPOSE_Purchase of electronic equipment_mean,prev_NAME_CASH_LOAN_PURPOSE_Purchase of electronic equipment_std,prev_NAME_CASH_LOAN_PURPOSE_Purchase of electronic equipment_min,prev_NAME_CASH_LOAN_PURPOSE_Purchase of electronic equipment_max,prev_PRODUCT_COMBINATION_Cash X-Sell: high_mean,prev_PRODUCT_COMBINATION_Cash X-Sell: high_std,prev_PRODUCT_COMBINATION_Cash X-Sell: high_min,prev_PRODUCT_COMBINATION_Cash X-Sell: high_max,prev_CHANNEL_TYPE_Regional / Local_mean,prev_CHANNEL_TYPE_Regional / Local_std,prev_CHANNEL_TYPE_Regional / Local_min,prev_CHANNEL_TYPE_Regional / Local_max,prev_PRODUCT_COMBINATION_Cash X-Sell: middle_mean,prev_PRODUCT_COMBINATION_Cash X-Sell: middle_std,prev_PRODUCT_COMBINATION_Cash X-Sell: middle_min,prev_PRODUCT_COMBINATION_Cash X-Sell: middle_max,prev_SELLERPLACE_AREA_mean,prev_SELLERPLACE_AREA_std,prev_SELLERPLACE_AREA_min,prev_SELLERPLACE_AREA_max,prev_DAYS_TERMINATION_DIFF_1_mean,prev_DAYS_TERMINATION_DIFF_1_std,prev_DAYS_TERMINATION_DIFF_1_min,prev_DAYS_TERMINATION_DIFF_1_max,prev_NAME_CASH_LOAN_PURPOSE_Business development_mean,prev_NAME_CASH_LOAN_PURPOSE_Business development_std,prev_NAME_CASH_LOAN_PURPOSE_Business development_min,prev_NAME_CASH_LOAN_PURPOSE_Business development_max,prev_NAME_CASH_LOAN_PURPOSE_Repairs_mean,prev_NAME_CASH_LOAN_PURPOSE_Repairs_std,prev_NAME_CASH_LOAN_PURPOSE_Repairs_min,prev_NAME_CASH_LOAN_PURPOSE_Repairs_max,prev_NAME_CASH_LOAN_PURPOSE_Urgent needs_mean,prev_NAME_CASH_LOAN_PURPOSE_Urgent needs_std,prev_NAME_CASH_LOAN_PURPOSE_Urgent needs_min,prev_NAME_CASH_LOAN_PURPOSE_Urgent needs_max,prev_NAME_PAYMENT_TYPE_Non-cash from your account_mean,prev_NAME_PAYMENT_TYPE_Non-cash from your account_std,prev_NAME_PAYMENT_TYPE_Non-cash from your account_min,prev_NAME_PAYMENT_TYPE_Non-cash from your account_max,prev_WEEKDAY_APPR_PROCESS_START_WEDNESDAY_mean,prev_WEEKDAY_APPR_PROCESS_START_WEDNESDAY_std,prev_WEEKDAY_APPR_PROCESS_START_WEDNESDAY_min,prev_WEEKDAY_APPR_PROCESS_START_WEDNESDAY_max,prev_REJECT_RATIO_3_mean,prev_PRODUCT_COMBINATION_Cash Street: middle_mean,prev_PRODUCT_COMBINATION_Cash Street: middle_std,prev_PRODUCT_COMBINATION_Cash Street: middle_min,prev_PRODUCT_COMBINATION_Cash Street: middle_max,prev_NAME_CONTRACT_TYPE_XNA_mean,prev_NAME_CONTRACT_TYPE_XNA_std,prev_NAME_CONTRACT_TYPE_XNA_min,prev_NAME_CONTRACT_TYPE_XNA_max,prev_CODE_REJECT_REASON_SCO_mean,prev_CODE_REJECT_REASON_SCO_std,prev_CODE_REJECT_REASON_SCO_min,prev_CODE_REJECT_REASON_SCO_max,prev_PRODUCT_COMBINATION_Cash_mean,prev_PRODUCT_COMBINATION_Cash_std,prev_PRODUCT_COMBINATION_Cash_min,prev_PRODUCT_COMBINATION_Cash_max,prev_REJECT_RATIO_5_mean,prev_PRODUCT_COMBINATION_POS industry with interest_mean,prev_PRODUCT_COMBINATION_POS industry with interest_std,prev_PRODUCT_COMBINATION_POS industry with interest_min,prev_PRODUCT_COMBINATION_POS industry with interest_max,prev_NAME_YIELD_GROUP_low_action_mean,prev_NAME_YIELD_GROUP_low_action_std,prev_NAME_YIELD_GROUP_low_action_min,prev_NAME_YIELD_GROUP_low_action_max,prev_CHANNEL_TYPE_Channel of corporate sales_mean,prev_CHANNEL_TYPE_Channel of corporate sales_std,prev_CHANNEL_TYPE_Channel of corporate sales_min,prev_CHANNEL_TYPE_Channel of corporate sales_max,prev_NAME_PORTFOLIO_XNA_mean,prev_NAME_PORTFOLIO_XNA_std,prev_NAME_PORTFOLIO_XNA_min,prev_NAME_PORTFOLIO_XNA_max,prev_NAME_PAYMENT_TYPE_Cashless from the account of the employer_mean,prev_NAME_PAYMENT_TYPE_Cashless from the account of the employer_std,prev_NAME_PAYMENT_TYPE_Cashless from the account of the employer_min,prev_NAME_PAYMENT_TYPE_Cashless from the account of the employer_max,"prev_NAME_TYPE_SUITE_Spouse, partner_mean","prev_NAME_TYPE_SUITE_Spouse, partner_std","prev_NAME_TYPE_SUITE_Spouse, partner_min","prev_NAME_TYPE_SUITE_Spouse, partner_max",prev_NAME_GOODS_CATEGORY_Office Appliances_mean,prev_NAME_GOODS_CATEGORY_Office Appliances_std,prev_NAME_GOODS_CATEGORY_Office Appliances_min,prev_NAME_GOODS_CATEGORY_Office Appliances_max,prev_AMT_GIVEN_RATIO_1_mean,prev_AMT_GIVEN_RATIO_1_std,prev_AMT_GIVEN_RATIO_1_min,prev_AMT_GIVEN_RATIO_1_max,prev_NFLAG_INSURED_ON_APPROVAL_mean,prev_NFLAG_INSURED_ON_APPROVAL_std,prev_NFLAG_INSURED_ON_APPROVAL_min,prev_NFLAG_INSURED_ON_APPROVAL_max,prev_NAME_CASH_LOAN_PURPOSE_Hobby_mean,prev_NAME_CASH_LOAN_PURPOSE_Hobby_std,prev_NAME_CASH_LOAN_PURPOSE_Hobby_min,prev_NAME_CASH_LOAN_PURPOSE_Hobby_max,prev_NAME_GOODS_CATEGORY_Sport and Leisure_mean,prev_NAME_GOODS_CATEGORY_Sport and Leisure_std,prev_NAME_GOODS_CATEGORY_Sport and Leisure_min,prev_NAME_GOODS_CATEGORY_Sport and Leisure_max,prev_APPROVE_RATIO_3_mean,prev_NAME_SELLER_INDUSTRY_XNA_mean,prev_NAME_SELLER_INDUSTRY_XNA_std,prev_NAME_SELLER_INDUSTRY_XNA_min,prev_NAME_SELLER_INDUSTRY_XNA_max,prev_NAME_PORTFOLIO_Cash_mean,prev_NAME_PORTFOLIO_Cash_std,prev_NAME_PORTFOLIO_Cash_min,prev_NAME_PORTFOLIO_Cash_max,prev_CNT_PREV_APPLICATIONS_mean,prev_NAME_CASH_LOAN_PURPOSE_XAP_mean,prev_NAME_CASH_LOAN_PURPOSE_XAP_std,prev_NAME_CASH_LOAN_PURPOSE_XAP_min,prev_NAME_CASH_LOAN_PURPOSE_XAP_max,prev_NAME_GOODS_CATEGORY_Gardening_mean,prev_NAME_GOODS_CATEGORY_Gardening_std,prev_NAME_GOODS_CATEGORY_Gardening_min,prev_NAME_GOODS_CATEGORY_Gardening_max,prev_NAME_PORTFOLIO_Cars_mean,prev_NAME_PORTFOLIO_Cars_std,prev_NAME_PORTFOLIO_Cars_min,prev_NAME_PORTFOLIO_Cars_max,prev_APPROVE_RATIO_1_mean,prev_NAME_GOODS_CATEGORY_House Construction_mean,prev_NAME_GOODS_CATEGORY_House Construction_std,prev_NAME_GOODS_CATEGORY_House Construction_min,prev_NAME_GOODS_CATEGORY_House Construction_max,prev_CNT_PREV_CONTRACTS_mean,prev_NAME_CASH_LOAN_PURPOSE_Buying a used car_mean,prev_NAME_CASH_LOAN_PURPOSE_Buying a used car_std,prev_NAME_CASH_LOAN_PURPOSE_Buying a used car_min,prev_NAME_CASH_LOAN_PURPOSE_Buying a used car_max,prev_WEEKDAY_APPR_PROCESS_START_MONDAY_mean,prev_WEEKDAY_APPR_PROCESS_START_MONDAY_std,prev_WEEKDAY_APPR_PROCESS_START_MONDAY_min,prev_WEEKDAY_APPR_PROCESS_START_MONDAY_max,prev_DAYS_DUE_DIFF_2_mean,prev_DAYS_DUE_DIFF_2_std,prev_DAYS_DUE_DIFF_2_min,prev_DAYS_DUE_DIFF_2_max,prev_NAME_YIELD_GROUP_high_mean,prev_NAME_YIELD_GROUP_high_std,prev_NAME_YIELD_GROUP_high_min,prev_NAME_YIELD_GROUP_high_max,prev_NAME_CASH_LOAN_PURPOSE_Medicine_mean,prev_NAME_CASH_LOAN_PURPOSE_Medicine_std,prev_NAME_CASH_LOAN_PURPOSE_Medicine_min,prev_NAME_CASH_LOAN_PURPOSE_Medicine_max,prev_PRODUCT_COMBINATION_Cash Street: high_mean,prev_PRODUCT_COMBINATION_Cash Street: high_std,prev_PRODUCT_COMBINATION_Cash Street: high_min,prev_PRODUCT_COMBINATION_Cash Street: high_max,prev_WEEKDAY_APPR_PROCESS_START_THURSDAY_mean,prev_WEEKDAY_APPR_PROCESS_START_THURSDAY_std,prev_WEEKDAY_APPR_PROCESS_START_THURSDAY_min,prev_WEEKDAY_APPR_PROCESS_START_THURSDAY_max,prev_NAME_TYPE_SUITE_Group of people_mean,prev_NAME_TYPE_SUITE_Group of people_std,prev_NAME_TYPE_SUITE_Group of people_min,prev_NAME_TYPE_SUITE_Group of people_max,prev_AMT_GIVEN_RATIO_2_mean,prev_AMT_GIVEN_RATIO_2_std,prev_AMT_GIVEN_RATIO_2_min,prev_AMT_GIVEN_RATIO_2_max,prev_HOUR_APPR_PROCESS_START_mean,prev_HOUR_APPR_PROCESS_START_std,prev_HOUR_APPR_PROCESS_START_min,prev_HOUR_APPR_PROCESS_START_max,prev_NAME_GOODS_CATEGORY_Direct Sales_mean,prev_NAME_GOODS_CATEGORY_Direct Sales_std,prev_NAME_GOODS_CATEGORY_Direct Sales_min,prev_NAME_GOODS_CATEGORY_Direct Sales_max,prev_NFLAG_LAST_APPL_IN_DAY_mean,prev_NFLAG_LAST_APPL_IN_DAY_std,prev_NFLAG_LAST_APPL_IN_DAY_min,prev_NFLAG_LAST_APPL_IN_DAY_max,prev_NAME_CASH_LOAN_PURPOSE_Buying a home_mean,prev_NAME_CASH_LOAN_PURPOSE_Buying a home_std,prev_NAME_CASH_LOAN_PURPOSE_Buying a home_min,prev_NAME_CASH_LOAN_PURPOSE_Buying a home_max,prev_DAYS_FIRST_DRAWING_mean,prev_DAYS_FIRST_DRAWING_std,prev_DAYS_FIRST_DRAWING_min,prev_DAYS_FIRST_DRAWING_max,prev_CHANNEL_TYPE_Car dealer_mean,prev_CHANNEL_TYPE_Car dealer_std,prev_CHANNEL_TYPE_Car dealer_min,prev_CHANNEL_TYPE_Car dealer_max,prev_PRODUCT_COMBINATION_Card X-Sell_mean,prev_PRODUCT_COMBINATION_Card X-Sell_std,prev_PRODUCT_COMBINATION_Card X-Sell_min,prev_PRODUCT_COMBINATION_Card X-Sell_max,prev_DAYS_LAST_DUE_mean,prev_DAYS_LAST_DUE_std,prev_DAYS_LAST_DUE_min,prev_DAYS_LAST_DUE_max,prev_NAME_GOODS_CATEGORY_Animals_mean,prev_NAME_GOODS_CATEGORY_Animals_std,prev_NAME_GOODS_CATEGORY_Animals_min,prev_NAME_GOODS_CATEGORY_Animals_max,prev_APPROVE_RATIO_5_mean,prev_PRODUCT_COMBINATION_Cash Street: low_mean,prev_PRODUCT_COMBINATION_Cash Street: low_std,prev_PRODUCT_COMBINATION_Cash Street: low_min,prev_PRODUCT_COMBINATION_Cash Street: low_max,prev_NAME_GOODS_CATEGORY_Fitness_mean,prev_NAME_GOODS_CATEGORY_Fitness_std,prev_NAME_GOODS_CATEGORY_Fitness_min,prev_NAME_GOODS_CATEGORY_Fitness_max,prev_NAME_GOODS_CATEGORY_XNA_mean,prev_NAME_GOODS_CATEGORY_XNA_std,prev_NAME_GOODS_CATEGORY_XNA_min,prev_NAME_GOODS_CATEGORY_XNA_max,prev_DAY_APPR_PROCESS_START_Working day_mean,prev_DAY_APPR_PROCESS_START_Working day_std,prev_DAY_APPR_PROCESS_START_Working day_min,prev_DAY_APPR_PROCESS_START_Working day_max,prev_NAME_CASH_LOAN_PURPOSE_XNA_mean,prev_NAME_CASH_LOAN_PURPOSE_XNA_std,prev_NAME_CASH_LOAN_PURPOSE_XNA_min,prev_NAME_CASH_LOAN_PURPOSE_XNA_max,prev_DAYS_TERMINATION_DIFF_2_mean,prev_DAYS_TERMINATION_DIFF_2_std,prev_DAYS_TERMINATION_DIFF_2_min,prev_DAYS_TERMINATION_DIFF_2_max,prev_WEEKDAY_APPR_PROCESS_START_TUESDAY_mean,prev_WEEKDAY_APPR_PROCESS_START_TUESDAY_std,prev_WEEKDAY_APPR_PROCESS_START_TUESDAY_min,prev_WEEKDAY_APPR_PROCESS_START_TUESDAY_max,prev_CHANNEL_TYPE_Credit and cash offices_mean,prev_CHANNEL_TYPE_Credit and cash offices_std,prev_CHANNEL_TYPE_Credit and cash offices_min,prev_CHANNEL_TYPE_Credit and cash offices_max,prev_FLAG_LAST_APPL_PER_CONTRACT_Y_mean,prev_CODE_REJECT_REASON_XNA_mean,prev_CODE_REJECT_REASON_XNA_std,prev_CODE_REJECT_REASON_XNA_min,prev_CODE_REJECT_REASON_XNA_max,prev_CODE_REJECT_REASON_VERIF_mean,prev_CODE_REJECT_REASON_VERIF_std,prev_CODE_REJECT_REASON_VERIF_min,prev_CODE_REJECT_REASON_VERIF_max,prev_DAYS_LAST_DUE_1ST_VERSION_mean,prev_DAYS_LAST_DUE_1ST_VERSION_std,prev_DAYS_LAST_DUE_1ST_VERSION_min,prev_DAYS_LAST_DUE_1ST_VERSION_max,prev_PRODUCT_COMBINATION_POS household with interest_mean,prev_PRODUCT_COMBINATION_POS household with interest_std,prev_PRODUCT_COMBINATION_POS household with interest_min,prev_PRODUCT_COMBINATION_POS household with interest_max,prev_CODE_REJECT_REASON_SYSTEM_mean,prev_CODE_REJECT_REASON_SYSTEM_std,prev_CODE_REJECT_REASON_SYSTEM_min,prev_CODE_REJECT_REASON_SYSTEM_max,prev_NAME_GOODS_CATEGORY_Furniture_mean,prev_NAME_GOODS_CATEGORY_Furniture_std,prev_NAME_GOODS_CATEGORY_Furniture_min,prev_NAME_GOODS_CATEGORY_Furniture_max,prev_NAME_GOODS_CATEGORY_Medicine_mean,prev_NAME_GOODS_CATEGORY_Medicine_std,prev_NAME_GOODS_CATEGORY_Medicine_min,prev_NAME_GOODS_CATEGORY_Medicine_max,prev_NAME_SELLER_INDUSTRY_Consumer electronics_mean,prev_NAME_SELLER_INDUSTRY_Consumer electronics_std,prev_NAME_SELLER_INDUSTRY_Consumer electronics_min,prev_NAME_SELLER_INDUSTRY_Consumer electronics_max,prev_NAME_GOODS_CATEGORY_Auto Accessories_mean,prev_NAME_GOODS_CATEGORY_Auto Accessories_std,prev_NAME_GOODS_CATEGORY_Auto Accessories_min,prev_NAME_GOODS_CATEGORY_Auto Accessories_max,prev_NAME_SELLER_INDUSTRY_Clothing_mean,prev_NAME_SELLER_INDUSTRY_Clothing_std,prev_NAME_SELLER_INDUSTRY_Clothing_min,prev_NAME_SELLER_INDUSTRY_Clothing_max,prev_REJECT_RATIO_1_mean,prev_NAME_CASH_LOAN_PURPOSE_Gasification / water supply_mean,prev_NAME_CASH_LOAN_PURPOSE_Gasification / water supply_std,prev_NAME_CASH_LOAN_PURPOSE_Gasification / water supply_min,prev_NAME_CASH_LOAN_PURPOSE_Gasification / water supply_max,prev_NAME_TYPE_SUITE_Family_mean,prev_NAME_TYPE_SUITE_Family_std,prev_NAME_TYPE_SUITE_Family_min,prev_NAME_TYPE_SUITE_Family_max,prev_NAME_GOODS_CATEGORY_Vehicles_mean,prev_NAME_GOODS_CATEGORY_Vehicles_std,prev_NAME_GOODS_CATEGORY_Vehicles_min,prev_NAME_GOODS_CATEGORY_Vehicles_max,prev_NAME_CASH_LOAN_PURPOSE_Other_mean,prev_NAME_CASH_LOAN_PURPOSE_Other_std,prev_NAME_CASH_LOAN_PURPOSE_Other_min,prev_NAME_CASH_LOAN_PURPOSE_Other_max,prev_NAME_CASH_LOAN_PURPOSE_Wedding / gift / holiday_mean,prev_NAME_CASH_LOAN_PURPOSE_Wedding / gift / holiday_std,prev_NAME_CASH_LOAN_PURPOSE_Wedding / gift / holiday_min,prev_NAME_CASH_LOAN_PURPOSE_Wedding / gift / holiday_max,prev_NAME_PRODUCT_TYPE_walk-in_mean,prev_NAME_PRODUCT_TYPE_walk-in_std,prev_NAME_PRODUCT_TYPE_walk-in_min,prev_NAME_PRODUCT_TYPE_walk-in_max,prev_NAME_CONTRACT_TYPE_Consumer loans_mean,prev_NAME_CONTRACT_TYPE_Consumer loans_std,prev_NAME_CONTRACT_TYPE_Consumer loans_min,prev_NAME_CONTRACT_TYPE_Consumer loans_max,prev_NAME_GOODS_CATEGORY_Education_mean,prev_NAME_GOODS_CATEGORY_Education_std,prev_NAME_GOODS_CATEGORY_Education_min,prev_NAME_GOODS_CATEGORY_Education_max,prev_NAME_GOODS_CATEGORY_Insurance_mean,prev_NAME_GOODS_CATEGORY_Insurance_std,prev_NAME_GOODS_CATEGORY_Insurance_min,prev_NAME_GOODS_CATEGORY_Insurance_max,prev_NAME_GOODS_CATEGORY_Construction Materials_mean,prev_NAME_GOODS_CATEGORY_Construction Materials_std,prev_NAME_GOODS_CATEGORY_Construction Materials_min,prev_NAME_GOODS_CATEGORY_Construction Materials_max,prev_NAME_GOODS_CATEGORY_Homewares_mean,prev_NAME_GOODS_CATEGORY_Homewares_std,prev_NAME_GOODS_CATEGORY_Homewares_min,prev_NAME_GOODS_CATEGORY_Homewares_max,prev_CODE_REJECT_REASON_SCOFR_mean,prev_CODE_REJECT_REASON_SCOFR_std,prev_CODE_REJECT_REASON_SCOFR_min,prev_CODE_REJECT_REASON_SCOFR_max,prev_NAME_CASH_LOAN_PURPOSE_Furniture_mean,prev_NAME_CASH_LOAN_PURPOSE_Furniture_std,prev_NAME_CASH_LOAN_PURPOSE_Furniture_min,prev_NAME_CASH_LOAN_PURPOSE_Furniture_max,prev_NAME_GOODS_CATEGORY_Photo / Cinema Equipment_mean,prev_NAME_GOODS_CATEGORY_Photo / Cinema Equipment_std,prev_NAME_GOODS_CATEGORY_Photo / Cinema Equipment_min,prev_NAME_GOODS_CATEGORY_Photo / Cinema Equipment_max,prev_DOWN_PAYMENT_RATIO_mean,prev_DOWN_PAYMENT_RATIO_std,prev_DOWN_PAYMENT_RATIO_min,prev_DOWN_PAYMENT_RATIO_max,prev_DAYS_TERMINATION_mean,prev_DAYS_TERMINATION_std,prev_DAYS_TERMINATION_min,prev_DAYS_TERMINATION_max,prev_PRODUCT_COMBINATION_POS mobile with interest_mean,prev_PRODUCT_COMBINATION_POS mobile with interest_std,prev_PRODUCT_COMBINATION_POS mobile with interest_min,prev_PRODUCT_COMBINATION_POS mobile with interest_max,prev_NAME_SELLER_INDUSTRY_Furniture_mean,prev_NAME_SELLER_INDUSTRY_Furniture_std,prev_NAME_SELLER_INDUSTRY_Furniture_min,prev_NAME_SELLER_INDUSTRY_Furniture_max,prev_PRODUCT_COMBINATION_POS industry without interest_mean,prev_PRODUCT_COMBINATION_POS industry without interest_std,prev_PRODUCT_COMBINATION_POS industry without interest_min,prev_PRODUCT_COMBINATION_POS industry without interest_max,prev_AMT_ANNUITY_mean,prev_AMT_ANNUITY_std,prev_AMT_ANNUITY_min,prev_AMT_ANNUITY_max,prev_NAME_SELLER_INDUSTRY_Tourism_mean,prev_NAME_SELLER_INDUSTRY_Tourism_std,prev_NAME_SELLER_INDUSTRY_Tourism_min,prev_NAME_SELLER_INDUSTRY_Tourism_max,prev_DAYS_DUE_DIFF_1_mean,prev_DAYS_DUE_DIFF_1_std,prev_DAYS_DUE_DIFF_1_min,prev_DAYS_DUE_DIFF_1_max,prev_NAME_GOODS_CATEGORY_Clothing and Accessories_mean,prev_NAME_GOODS_CATEGORY_Clothing and Accessories_std,prev_NAME_GOODS_CATEGORY_Clothing and Accessories_min,prev_NAME_GOODS_CATEGORY_Clothing and Accessories_max,prev_NAME_TYPE_SUITE_Other_B_mean,prev_NAME_TYPE_SUITE_Other_B_std,prev_NAME_TYPE_SUITE_Other_B_min,prev_NAME_TYPE_SUITE_Other_B_max,prev_NAME_CASH_LOAN_PURPOSE_Money for a third person_mean,prev_NAME_CASH_LOAN_PURPOSE_Money for a third person_std,prev_NAME_CASH_LOAN_PURPOSE_Money for a third person_min,prev_NAME_CASH_LOAN_PURPOSE_Money for a third person_max,prev_NAME_CASH_LOAN_PURPOSE_Everyday expenses_mean,prev_NAME_CASH_LOAN_PURPOSE_Everyday expenses_std,prev_NAME_CASH_LOAN_PURPOSE_Everyday expenses_min,prev_NAME_CASH_LOAN_PURPOSE_Everyday expenses_max,prev_NAME_CONTRACT_STATUS_Refused_mean,prev_NAME_CONTRACT_STATUS_Refused_std,prev_NAME_CONTRACT_STATUS_Refused_min,prev_NAME_CONTRACT_STATUS_Refused_max,prev_NAME_PRODUCT_TYPE_x-sell_mean,prev_NAME_PRODUCT_TYPE_x-sell_std,prev_NAME_PRODUCT_TYPE_x-sell_min,prev_NAME_PRODUCT_TYPE_x-sell_max
SK_ID_CURR,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1,Unnamed: 182_level_1,Unnamed: 183_level_1,Unnamed: 184_level_1,Unnamed: 185_level_1,Unnamed: 186_level_1,Unnamed: 187_level_1,Unnamed: 188_level_1,Unnamed: 189_level_1,Unnamed: 190_level_1,Unnamed: 191_level_1,Unnamed: 192_level_1,Unnamed: 193_level_1,Unnamed: 194_level_1,Unnamed: 195_level_1,Unnamed: 196_level_1,Unnamed: 197_level_1,Unnamed: 198_level_1,Unnamed: 199_level_1,Unnamed: 200_level_1,Unnamed: 201_level_1,Unnamed: 202_level_1,Unnamed: 203_level_1,Unnamed: 204_level_1,Unnamed: 205_level_1,Unnamed: 206_level_1,Unnamed: 207_level_1,Unnamed: 208_level_1,Unnamed: 209_level_1,Unnamed: 210_level_1,Unnamed: 211_level_1,Unnamed: 212_level_1,Unnamed: 213_level_1,Unnamed: 214_level_1,Unnamed: 215_level_1,Unnamed: 216_level_1,Unnamed: 217_level_1,Unnamed: 218_level_1,Unnamed: 219_level_1,Unnamed: 220_level_1,Unnamed: 221_level_1,Unnamed: 222_level_1,Unnamed: 223_level_1,Unnamed: 224_level_1,Unnamed: 225_level_1,Unnamed: 226_level_1,Unnamed: 227_level_1,Unnamed: 228_level_1,Unnamed: 229_level_1,Unnamed: 230_level_1,Unnamed: 231_level_1,Unnamed: 232_level_1,Unnamed: 233_level_1,Unnamed: 234_level_1,Unnamed: 235_level_1,Unnamed: 236_level_1,Unnamed: 237_level_1,Unnamed: 238_level_1,Unnamed: 239_level_1,Unnamed: 240_level_1,Unnamed: 241_level_1,Unnamed: 242_level_1,Unnamed: 243_level_1,Unnamed: 244_level_1,Unnamed: 245_level_1,Unnamed: 246_level_1,Unnamed: 247_level_1,Unnamed: 248_level_1,Unnamed: 249_level_1,Unnamed: 250_level_1,Unnamed: 251_level_1,Unnamed: 252_level_1,Unnamed: 253_level_1,Unnamed: 254_level_1,Unnamed: 255_level_1,Unnamed: 256_level_1,Unnamed: 257_level_1,Unnamed: 258_level_1,Unnamed: 259_level_1,Unnamed: 260_level_1,Unnamed: 261_level_1,Unnamed: 262_level_1,Unnamed: 263_level_1,Unnamed: 264_level_1,Unnamed: 265_level_1,Unnamed: 266_level_1,Unnamed: 267_level_1,Unnamed: 268_level_1,Unnamed: 269_level_1,Unnamed: 270_level_1,Unnamed: 271_level_1,Unnamed: 272_level_1,Unnamed: 273_level_1,Unnamed: 274_level_1,Unnamed: 275_level_1,Unnamed: 276_level_1,Unnamed: 277_level_1,Unnamed: 278_level_1,Unnamed: 279_level_1,Unnamed: 280_level_1,Unnamed: 281_level_1,Unnamed: 282_level_1,Unnamed: 283_level_1,Unnamed: 284_level_1,Unnamed: 285_level_1,Unnamed: 286_level_1,Unnamed: 287_level_1,Unnamed: 288_level_1,Unnamed: 289_level_1,Unnamed: 290_level_1,Unnamed: 291_level_1,Unnamed: 292_level_1,Unnamed: 293_level_1,Unnamed: 294_level_1,Unnamed: 295_level_1,Unnamed: 296_level_1,Unnamed: 297_level_1,Unnamed: 298_level_1,Unnamed: 299_level_1,Unnamed: 300_level_1,Unnamed: 301_level_1,Unnamed: 302_level_1,Unnamed: 303_level_1,Unnamed: 304_level_1,Unnamed: 305_level_1,Unnamed: 306_level_1,Unnamed: 307_level_1,Unnamed: 308_level_1,Unnamed: 309_level_1,Unnamed: 310_level_1,Unnamed: 311_level_1,Unnamed: 312_level_1,Unnamed: 313_level_1,Unnamed: 314_level_1,Unnamed: 315_level_1,Unnamed: 316_level_1,Unnamed: 317_level_1,Unnamed: 318_level_1,Unnamed: 319_level_1,Unnamed: 320_level_1,Unnamed: 321_level_1,Unnamed: 322_level_1,Unnamed: 323_level_1,Unnamed: 324_level_1,Unnamed: 325_level_1,Unnamed: 326_level_1,Unnamed: 327_level_1,Unnamed: 328_level_1,Unnamed: 329_level_1,Unnamed: 330_level_1,Unnamed: 331_level_1,Unnamed: 332_level_1,Unnamed: 333_level_1,Unnamed: 334_level_1,Unnamed: 335_level_1,Unnamed: 336_level_1,Unnamed: 337_level_1,Unnamed: 338_level_1,Unnamed: 339_level_1,Unnamed: 340_level_1,Unnamed: 341_level_1,Unnamed: 342_level_1,Unnamed: 343_level_1,Unnamed: 344_level_1,Unnamed: 345_level_1,Unnamed: 346_level_1,Unnamed: 347_level_1,Unnamed: 348_level_1,Unnamed: 349_level_1,Unnamed: 350_level_1,Unnamed: 351_level_1,Unnamed: 352_level_1,Unnamed: 353_level_1,Unnamed: 354_level_1,Unnamed: 355_level_1,Unnamed: 356_level_1,Unnamed: 357_level_1,Unnamed: 358_level_1,Unnamed: 359_level_1,Unnamed: 360_level_1,Unnamed: 361_level_1,Unnamed: 362_level_1,Unnamed: 363_level_1,Unnamed: 364_level_1,Unnamed: 365_level_1,Unnamed: 366_level_1,Unnamed: 367_level_1,Unnamed: 368_level_1,Unnamed: 369_level_1,Unnamed: 370_level_1,Unnamed: 371_level_1,Unnamed: 372_level_1,Unnamed: 373_level_1,Unnamed: 374_level_1,Unnamed: 375_level_1,Unnamed: 376_level_1,Unnamed: 377_level_1,Unnamed: 378_level_1,Unnamed: 379_level_1,Unnamed: 380_level_1,Unnamed: 381_level_1,Unnamed: 382_level_1,Unnamed: 383_level_1,Unnamed: 384_level_1,Unnamed: 385_level_1,Unnamed: 386_level_1,Unnamed: 387_level_1,Unnamed: 388_level_1,Unnamed: 389_level_1,Unnamed: 390_level_1,Unnamed: 391_level_1,Unnamed: 392_level_1,Unnamed: 393_level_1,Unnamed: 394_level_1,Unnamed: 395_level_1,Unnamed: 396_level_1,Unnamed: 397_level_1,Unnamed: 398_level_1,Unnamed: 399_level_1,Unnamed: 400_level_1,Unnamed: 401_level_1,Unnamed: 402_level_1,Unnamed: 403_level_1,Unnamed: 404_level_1,Unnamed: 405_level_1,Unnamed: 406_level_1,Unnamed: 407_level_1,Unnamed: 408_level_1,Unnamed: 409_level_1,Unnamed: 410_level_1,Unnamed: 411_level_1,Unnamed: 412_level_1,Unnamed: 413_level_1,Unnamed: 414_level_1,Unnamed: 415_level_1,Unnamed: 416_level_1,Unnamed: 417_level_1,Unnamed: 418_level_1,Unnamed: 419_level_1,Unnamed: 420_level_1,Unnamed: 421_level_1,Unnamed: 422_level_1,Unnamed: 423_level_1,Unnamed: 424_level_1,Unnamed: 425_level_1,Unnamed: 426_level_1,Unnamed: 427_level_1,Unnamed: 428_level_1,Unnamed: 429_level_1,Unnamed: 430_level_1,Unnamed: 431_level_1,Unnamed: 432_level_1,Unnamed: 433_level_1,Unnamed: 434_level_1,Unnamed: 435_level_1,Unnamed: 436_level_1,Unnamed: 437_level_1,Unnamed: 438_level_1,Unnamed: 439_level_1,Unnamed: 440_level_1,Unnamed: 441_level_1,Unnamed: 442_level_1,Unnamed: 443_level_1,Unnamed: 444_level_1,Unnamed: 445_level_1,Unnamed: 446_level_1,Unnamed: 447_level_1,Unnamed: 448_level_1,Unnamed: 449_level_1,Unnamed: 450_level_1,Unnamed: 451_level_1,Unnamed: 452_level_1,Unnamed: 453_level_1,Unnamed: 454_level_1,Unnamed: 455_level_1,Unnamed: 456_level_1,Unnamed: 457_level_1,Unnamed: 458_level_1,Unnamed: 459_level_1,Unnamed: 460_level_1,Unnamed: 461_level_1,Unnamed: 462_level_1,Unnamed: 463_level_1,Unnamed: 464_level_1,Unnamed: 465_level_1,Unnamed: 466_level_1,Unnamed: 467_level_1,Unnamed: 468_level_1,Unnamed: 469_level_1,Unnamed: 470_level_1,Unnamed: 471_level_1,Unnamed: 472_level_1,Unnamed: 473_level_1,Unnamed: 474_level_1,Unnamed: 475_level_1,Unnamed: 476_level_1,Unnamed: 477_level_1,Unnamed: 478_level_1,Unnamed: 479_level_1,Unnamed: 480_level_1,Unnamed: 481_level_1,Unnamed: 482_level_1,Unnamed: 483_level_1,Unnamed: 484_level_1,Unnamed: 485_level_1,Unnamed: 486_level_1,Unnamed: 487_level_1,Unnamed: 488_level_1,Unnamed: 489_level_1,Unnamed: 490_level_1,Unnamed: 491_level_1,Unnamed: 492_level_1,Unnamed: 493_level_1,Unnamed: 494_level_1,Unnamed: 495_level_1,Unnamed: 496_level_1,Unnamed: 497_level_1,Unnamed: 498_level_1,Unnamed: 499_level_1,Unnamed: 500_level_1,Unnamed: 501_level_1,Unnamed: 502_level_1,Unnamed: 503_level_1,Unnamed: 504_level_1,Unnamed: 505_level_1,Unnamed: 506_level_1,Unnamed: 507_level_1,Unnamed: 508_level_1,Unnamed: 509_level_1,Unnamed: 510_level_1,Unnamed: 511_level_1,Unnamed: 512_level_1,Unnamed: 513_level_1,Unnamed: 514_level_1,Unnamed: 515_level_1,Unnamed: 516_level_1,Unnamed: 517_level_1,Unnamed: 518_level_1,Unnamed: 519_level_1,Unnamed: 520_level_1,Unnamed: 521_level_1,Unnamed: 522_level_1,Unnamed: 523_level_1,Unnamed: 524_level_1,Unnamed: 525_level_1,Unnamed: 526_level_1,Unnamed: 527_level_1,Unnamed: 528_level_1,Unnamed: 529_level_1,Unnamed: 530_level_1,Unnamed: 531_level_1,Unnamed: 532_level_1,Unnamed: 533_level_1,Unnamed: 534_level_1,Unnamed: 535_level_1,Unnamed: 536_level_1,Unnamed: 537_level_1,Unnamed: 538_level_1,Unnamed: 539_level_1,Unnamed: 540_level_1,Unnamed: 541_level_1,Unnamed: 542_level_1,Unnamed: 543_level_1,Unnamed: 544_level_1,Unnamed: 545_level_1,Unnamed: 546_level_1,Unnamed: 547_level_1,Unnamed: 548_level_1,Unnamed: 549_level_1,Unnamed: 550_level_1,Unnamed: 551_level_1,Unnamed: 552_level_1,Unnamed: 553_level_1,Unnamed: 554_level_1,Unnamed: 555_level_1,Unnamed: 556_level_1,Unnamed: 557_level_1,Unnamed: 558_level_1,Unnamed: 559_level_1,Unnamed: 560_level_1,Unnamed: 561_level_1,Unnamed: 562_level_1,Unnamed: 563_level_1,Unnamed: 564_level_1,Unnamed: 565_level_1,Unnamed: 566_level_1,Unnamed: 567_level_1,Unnamed: 568_level_1,Unnamed: 569_level_1,Unnamed: 570_level_1,Unnamed: 571_level_1,Unnamed: 572_level_1,Unnamed: 573_level_1,Unnamed: 574_level_1,Unnamed: 575_level_1,Unnamed: 576_level_1,Unnamed: 577_level_1,Unnamed: 578_level_1,Unnamed: 579_level_1,Unnamed: 580_level_1,Unnamed: 581_level_1,Unnamed: 582_level_1,Unnamed: 583_level_1,Unnamed: 584_level_1,Unnamed: 585_level_1,Unnamed: 586_level_1,Unnamed: 587_level_1,Unnamed: 588_level_1,Unnamed: 589_level_1,Unnamed: 590_level_1,Unnamed: 591_level_1,Unnamed: 592_level_1,Unnamed: 593_level_1,Unnamed: 594_level_1,Unnamed: 595_level_1,Unnamed: 596_level_1,Unnamed: 597_level_1,Unnamed: 598_level_1,Unnamed: 599_level_1,Unnamed: 600_level_1,Unnamed: 601_level_1,Unnamed: 602_level_1,Unnamed: 603_level_1,Unnamed: 604_level_1,Unnamed: 605_level_1,Unnamed: 606_level_1,Unnamed: 607_level_1,Unnamed: 608_level_1,Unnamed: 609_level_1,Unnamed: 610_level_1,Unnamed: 611_level_1,Unnamed: 612_level_1,Unnamed: 613_level_1,Unnamed: 614_level_1
100001,0.0,,0,0,0.0,,0,0,1.0,,1,1,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,1.0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,,,,,0.0,,0,0,1709.0,,1709.0,1709.0,10.12007,,10.12007,10.12007,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,1.0,,1,1,0.0,,0,0,7.832411,,7.832411,7.832411,1740.0,,1740.0,1740.0,,,,,0.0,,0,0,0.0,,0,0,-7.0,,-7.0,-7.0,0.0,,0,0,1.0,,1,1,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,10.12007,,10.12007,10.12007,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.104326,,0.104326,0.104326,10.076937,,10.076937,10.076937,1.0,,1,1,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,1.0,,1,1,8.0,,8.0,8.0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,23.0,,23,23,,,,,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.957782,,0.957782,0.957782,0.0,,0.0,0.0,0.0,,0,0,0.0,,0,0,1,0.0,,0,0,0.0,,0,0,1,1.0,,1,1,0.0,,0,0,0.0,,0,0,1,0.0,,0,0,1,0.0,,0,0,0.0,,0,0,-90.0,,-90.0,-90.0,1.0,,1,1,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,1.0,,1.0,1.0,13.0,,13,13,0.0,,0,0,1.0,,1,1,0.0,,0,0,,,,,0.0,,0,0,0.0,,0,0,1619.0,,1619.0,1619.0,0.0,,0,0,1,0.0,,0,0,0.0,,0,0,0.0,,0,0,1.0,,1,1,0.0,,0,0,-97.0,,-97.0,-97.0,0.0,,0,0,0.0,,0,0,1.0,0.0,,0,0,0.0,,0,0,1499.0,,1499.0,1499.0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0,0.0,,0,0,1.0,,1,1,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,1.0,,1,1,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.101468,,0.101468,0.101468,1612.0,,1612.0,1612.0,1.0,,1,1,0.0,,0,0,0.0,,0,0,8.281977,,8.281977,8.281977,0.0,,0,0,-210.0,,-210.0,-210.0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0
100002,0.0,,0,0,0.0,,0,0,1.0,,1,1,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,1.0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,,,,,0.0,,0,0,565.0,,565.0,565.0,12.095454,,12.095454,12.095454,1.0,,1,1,0.0,,0,0,0.0,,0,0,0.0,,0,0,1.0,,1,1,1.0,,1,1,1.0,,1,1,0.0,,0.0,0.0,606.0,,606.0,606.0,,,,,0.0,,0,0,0.0,,0,0,-8.0,,-8.0,-8.0,1.0,,1,1,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,12.095454,,12.095454,12.095454,0.0,,0,0,1.0,,1,1,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0.0,0.0,12.095454,,12.095454,12.095454,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,24.0,,24.0,24.0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,500.0,,500,500,,,,,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,1.0,,1.0,1.0,0.0,,0.0,0.0,0.0,,0,0,0.0,,0,0,1,0.0,,0,0,0.0,,0,0,1,1.0,,1,1,0.0,,0,0,0.0,,0,0,1,0.0,,0,0,1,0.0,,0,0,0.0,,0,0,-540.0,,-540.0,-540.0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,1.0,,1.0,1.0,9.0,,9,9,0.0,,0,0,1.0,,1,1,0.0,,0,0,,,,,0.0,,0,0,0.0,,0,0,25.0,,25.0,25.0,0.0,,0,0,1,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,-548.0,,-548.0,-548.0,0.0,,0,0,0.0,,0,0,1.0,0.0,,0,0,0.0,,0,0,,,,,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0,0.0,,0,0,0.0,,0,0,1.0,,1,1,0.0,,0,0,0.0,,0,0,0.0,,0,0,1.0,,1,1,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0.0,0.0,17.0,,17.0,17.0,0.0,,0,0,0.0,,0,0,0.0,,0,0,9.132679,,9.132679,9.132679,0.0,,0,0,,,,,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0
100003,0.0,0.0,0,0,0.0,0.0,0,0,1.0,0.0,1,1,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.333333,0.57735,0,1,1.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,,,,,0.333333,0.57735,0,1,1274.333333,897.827563,716.0,2310.0,12.526196,1.2975,11.139112,13.710151,0.333333,0.57735,0,1,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.666667,0.57735,0,1,0.333333,0.57735,0,1,4.418623,6.248876,0.0,8.837246,1305.0,898.138631,746.0,2341.0,,,,,0.0,0.0,0,0,0.0,0.0,0,0,-7.0,2.645751,-9.0,-4.0,0.333333,0.57735,0,1,0.0,0.0,0,0,0.333333,0.57735,0,1,0.666667,0.57735,0,1,0.333333,0.57735,0,1,0.0,0.0,0,0,0.0,0.0,0,0,12.526196,1.2975,11.139112,13.710151,0.0,0.0,0,0,0.333333,0.57735,0,1,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.05003,0.070754,0.0,0.100061,12.580207,1.370403,11.128064,13.850765,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.333333,0.57735,0,1,10.0,3.464102,6.0,12.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,533.0,757.540098,-1,1400,,,,,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0,0.333333,0.57735,0,1,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,1.057664,0.083753,0.989013,1.15098,0.666667,0.57735,0.0,1.0,0.0,0.0,0,0,0.0,0.0,0,0,1,0.333333,0.57735,0,1,0.333333,0.57735,0,1,3,0.666667,0.57735,0,1,0.0,0.0,0,0,0.0,0.0,0,0,1,0.0,0.0,0,0,3,0.0,0.0,0,0,0.0,0.0,0,0,-220.0,96.436508,-330.0,-150.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,1.0,0.0,1.0,1.0,14.666667,2.516611,12,17,0.0,0.0,0,0,1.0,0.0,1,1,0.0,0.0,0,0,,,,,0.0,0.0,0,0,0.0,0.0,0,0,1054.333333,803.569744,536.0,1980.0,0.0,0.0,0,0,1,0.0,0.0,0,0,0.0,0.0,0,0,0.333333,0.57735,0,1,0.333333,0.57735,0,1,0.333333,0.57735,0,1,-227.0,93.952115,-334.0,-158.0,0.0,0.0,0,0,0.333333,0.57735,0,1,1.0,0.0,0.0,0,0,0.0,0.0,0,0,1004.333333,854.97037,386.0,1980.0,0.333333,0.57735,0,1,0.0,0.0,0,0,0.333333,0.57735,0,1,0.0,0.0,0,0,0.333333,0.57735,0,1,0.0,0.0,0,0,0.0,0.0,0,0,0,0.0,0.0,0,0,0.666667,0.57735,0,1,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.666667,0.57735,0,1,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.050029,0.070752,0.0,0.100059,1047.333333,806.196213,527.0,1976.0,0.0,0.0,0,0,0.333333,0.57735,0,1,0.0,0.0,0,0,10.462473,1.441706,8.815564,11.496369,0.0,0.0,0,0,-270.0,103.923048,-330.0,-150.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.333333,0.57735,0,1
100004,0.0,,0,0,0.0,,0,0,1.0,,1,1,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,1.0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,,,,,0.0,,0,0,784.0,,784.0,784.0,10.097532,,10.097532,10.097532,0.0,,0,0,0.0,,0,0,0.0,,0,0,1.0,,1,1,0.0,,0,0,1.0,,1,1,0.0,,0,0,8.488999,,8.488999,8.488999,815.0,,815.0,815.0,,,,,0.0,,0,0,0.0,,0,0,-10.0,,-10.0,-10.0,0.0,,0,0,1.0,,1,1,0.0,,0,0,1.0,,1,1,1.0,,1,1,0.0,,0,0,0.0,,0,0,10.097532,,10.097532,10.097532,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.212008,,0.212008,0.212008,9.908823,,9.908823,9.908823,1.0,,1,1,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,4.0,,4.0,4.0,0.0,,0,0,0.0,,0,0,1.0,,1,1,0.0,,0,0,30.0,,30,30,,,,,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.828021,,0.828021,0.828021,0.0,,0.0,0.0,0.0,,0,0,0.0,,0,0,1,0.0,,0,0,0.0,,0,0,1,1.0,,1,1,0.0,,0,0,0.0,,0,0,1,0.0,,0,0,1,0.0,,0,0,0.0,,0,0,-60.0,,-60.0,-60.0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,1.0,,1.0,1.0,5.0,,5,5,0.0,,0,0,1.0,,1,1,0.0,,0,0,,,,,0.0,,0,0,0.0,,0,0,724.0,,724.0,724.0,0.0,,0,0,1,0.0,,0,0,0.0,,0,0,0.0,,0,0,1.0,,1,1,0.0,,0,0,-70.0,,-70.0,-70.0,0.0,,0,0,0.0,,0,0,1.0,0.0,,0,0,0.0,,0,0,694.0,,694.0,694.0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,1.0,,1,1,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.200148,,0.200148,0.200148,714.0,,714.0,714.0,0.0,,0,0,0.0,,0,0,0.0,,0,0,8.586393,,8.586393,8.586393,0.0,,0,0,-90.0,,-90.0,-90.0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0
100005,0.0,0.0,0,0,0.0,0.0,0,0,1.0,0.0,1,1,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,1.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,,,,,0.0,0.0,0,0,706.0,,706.0,706.0,5.352952,7.570217,0.0,10.705904,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.5,0.707107,0,1,0.5,0.707107,0,1,8.404024,,8.404024,8.404024,536.0,312.541197,315.0,757.0,,,,,0.0,0.0,0,0,0.0,0.0,0,0,-6.0,,-6.0,-6.0,0.0,0.0,0,0,0.5,0.707107,0,1,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.5,0.707107,0,1,0.0,0.0,0,0,10.705904,,10.705904,10.705904,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.108964,,0.108964,0.108964,5.300245,7.495678,0.0,10.60049,0.5,0.707107,0,1,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.5,0.707107,0,1,12.0,,12.0,12.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,18.0,26.870058,-1,37,,,,,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.5,0.707107,0,1,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.5,0.707107,0,1,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.89995,,0.89995,0.89995,0.0,,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0,0,1,0.5,0.707107,0,1,0.0,0.0,0,0,2,0.5,0.707107,0,1,0.0,0.0,0,0,0.0,0.0,0,0,1,0.0,0.0,0,0,2,0.0,0.0,0,0,0.0,0.0,0,0,-240.0,,-240.0,-240.0,0.5,0.707107,0,1,0.0,0.0,0,0,0.0,0.0,0,0,0.5,0.707107,0,1,0.0,0.0,0,0,1.0,,1.0,1.0,10.5,0.707107,10,11,0.0,0.0,0,0,1.0,0.0,1,1,0.0,0.0,0,0,,,,,0.0,0.0,0,0,0.0,0.0,0,0,466.0,,466.0,466.0,0.0,0.0,0,0,1,0.0,0.0,0,0,0.0,0.0,0,0,0.5,0.707107,0,1,1.0,0.0,1,1,0.5,0.707107,0,1,-246.0,,-246.0,-246.0,0.0,0.0,0,0,0.5,0.707107,0,1,1.0,0.0,0.0,0,0,0.0,0.0,0,0,376.0,,376.0,376.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.5,0.707107,0,1,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.10005,,0.10005,0.10005,460.0,,460.0,460.0,0.5,0.707107,0,1,0.0,0.0,0,0,0.0,0.0,0,0,8.479325,,8.479325,8.479325,0.0,0.0,0,0,-330.0,,-330.0,-330.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0


In [32]:
# check data
prev.shape

(338857, 614)

## 4.3. BBAL DATA

In [33]:
# import data
bbal = pd.read_csv("../data/raw/bureau_balance.csv")
print("Dimensions:", bbal.shape)

Dimensions: (27299925, 3)


In [34]:
# check data
bbal.head()

Unnamed: 0,SK_ID_BUREAU,MONTHS_BALANCE,STATUS
0,5715448,0,C
1,5715448,-1,C
2,5715448,-2,C
3,5715448,-3,C
4,5715448,-4,C


In [35]:
# count missings
nas = count_missings(bbal)
nas

Unnamed: 0,Total,Percent


In [36]:
# print factor levels
facs =  [f for f in bbal.columns if bbal[f].dtype == "object"]
for var in facs:
    print("----------")
    print(bbal[var].value_counts())
    print(bbal[var].isnull().sum(), " null values")
    print("")

----------
C    13646993
0     7499507
X     5810482
1      242347
5       62406
2       23419
3        8924
4        5847
Name: STATUS, dtype: int64
0  null values



In [37]:
##### FEATURE ENGINEERING

# number of months
cnt_bbal = bbal[["SK_ID_BUREAU", "MONTHS_BALANCE"]].groupby(["SK_ID_BUREAU"], as_index = False).count()
cnt_bbal.columns = ["SK_ID_BUREAU", "CNT_BBAL_MONTHS"]
bbal = bbal.merge(cnt_bbal, how = "left", on = "SK_ID_BUREAU")



##### FEATURE REMOVAL
drops = ["MONTHS_BALANCE"]
bbal = bbal.drop(columns = drops)



##### ADDING NULL FLAGS

# all null flags
#bbal = create_null_flags(bbal)



##### TREAT FACTORS

# merge some levels

# encoding
#bfac = bbal[["STATUS"]]
#bbal = treat_factors(bbal, method = "dummy")
#bbal = pd.concat([bbal, bfac], axis = 1)

In [38]:
# aggregate features
bbal = aggregate_data(bbal, id_var = "SK_ID_BUREAU", label = "bbal", stats = ["mean", "sum"])

# clean up mode
#del bbal["bbal_STATUS_mode"]

# clean up
omits = ["CNT_BBAL_MONTHS"]
for var in omits:
    del bbal["bbal_" + str(var) + "_sum"]

- Preparing the dataset...
- Extracted 1 factors and 1 numerics...
- Aggregating numeric features...
- Aggregating factor features...
- Final dimensions: (817395, 4)


In [39]:
# check data
bbal.head()

Unnamed: 0_level_0,bbal_CNT_BBAL_MONTHS_mean,bbal_STATUS_mode,bbal_STATUS_nunique
SK_ID_BUREAU,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
5001709,97,C,2
5001710,83,C,3
5001711,4,0,2
5001712,19,0,2
5001713,22,X,1


In [40]:
# check data
bbal.shape

(817395, 3)

## 4.4. BUREAU DATA

In [41]:
# import data
buro = pd.read_csv("../data/raw/bureau.csv")
print("Dimensions:", buro.shape)

Dimensions: (1716428, 17)


In [42]:
# check data
buro.head()

Unnamed: 0,SK_ID_CURR,SK_ID_BUREAU,CREDIT_ACTIVE,CREDIT_CURRENCY,DAYS_CREDIT,CREDIT_DAY_OVERDUE,DAYS_CREDIT_ENDDATE,DAYS_ENDDATE_FACT,AMT_CREDIT_MAX_OVERDUE,CNT_CREDIT_PROLONG,AMT_CREDIT_SUM,AMT_CREDIT_SUM_DEBT,AMT_CREDIT_SUM_LIMIT,AMT_CREDIT_SUM_OVERDUE,CREDIT_TYPE,DAYS_CREDIT_UPDATE,AMT_ANNUITY
0,215354,5714462,Closed,currency 1,-497,0,-153.0,-153.0,,0,91323.0,0.0,,0.0,Consumer credit,-131,
1,215354,5714463,Active,currency 1,-208,0,1075.0,,,0,225000.0,171342.0,,0.0,Credit card,-20,
2,215354,5714464,Active,currency 1,-203,0,528.0,,,0,464323.5,,,0.0,Consumer credit,-16,
3,215354,5714465,Active,currency 1,-203,0,,,,0,90000.0,,,0.0,Credit card,-16,
4,215354,5714466,Active,currency 1,-629,0,1197.0,,77674.5,0,2700000.0,,,0.0,Consumer credit,-21,


In [43]:
# count missings
nas = count_missings(buro)
nas

Unnamed: 0,Total,Percent
AMT_ANNUITY,1226791,71.47349
AMT_CREDIT_MAX_OVERDUE,1124488,65.513264
DAYS_ENDDATE_FACT,633653,36.916958
AMT_CREDIT_SUM_LIMIT,591780,34.477415
AMT_CREDIT_SUM_DEBT,257669,15.011932
DAYS_CREDIT_ENDDATE,105553,6.149573
AMT_CREDIT_SUM,13,0.000757


In [44]:
# print factor levels
facs =  [f for f in buro.columns if buro[f].dtype == "object"]
for var in facs:
    print("----------")
    print(buro[var].value_counts())
    print(buro[var].isnull().sum(), " null values")
    print("")

----------
Closed      1079273
Active       630607
Sold           6527
Bad debt         21
Name: CREDIT_ACTIVE, dtype: int64
0  null values

----------
currency 1    1715020
currency 2       1224
currency 3        174
currency 4         10
Name: CREDIT_CURRENCY, dtype: int64
0  null values

----------
Consumer credit                                 1251615
Credit card                                      402195
Car loan                                          27690
Mortgage                                          18391
Microloan                                         12413
Loan for business development                      1975
Another type of loan                               1017
Unknown type of loan                                555
Loan for working capital replenishment              469
Cash loan (non-earmarked)                            56
Real estate loan                                     27
Loan for the purchase of equipment                   19
Loan for purchase of shar

In [45]:
##### FEATURE ENGINEERING

# number of buro loans 
cnt_buro = buro[["SK_ID_CURR", "SK_ID_BUREAU"]].groupby(["SK_ID_CURR"], as_index = False).count()
cnt_buro.columns = ["SK_ID_CURR", "CNT_BURO_LOANS"]
buro = buro.merge(cnt_buro, how = "left", on = "SK_ID_CURR")

# amount ratios
buro["AMT_SUM_OVERDUE_RATIO_1"] = buro["AMT_CREDIT_SUM_OVERDUE"] / buro["AMT_ANNUITY"]
buro["AMT_SUM_OVERDUE_RATIO_2"] = buro["AMT_CREDIT_SUM_OVERDUE"] / buro["AMT_CREDIT_SUM"]
buro["AMT_MAX_OVERDUE_RATIO_1"] = buro["AMT_CREDIT_MAX_OVERDUE"] / buro["AMT_ANNUITY"]
buro["AMT_MAX_OVERDUE_RATIO_2"] = buro["AMT_CREDIT_MAX_OVERDUE"] / buro["AMT_CREDIT_SUM"]
buro["AMT_SUM_DEBT_RATIO_1"]    = buro["AMT_CREDIT_SUM_DEBT"] / buro["AMT_CREDIT_SUM"]
buro["AMT_SUM_DEBT_RATIO_2"]    = buro["AMT_CREDIT_SUM_DEBT"] / buro["AMT_CREDIT_SUM_LIMIT"]

# logarithms
log_vars = ["AMT_CREDIT_SUM", "AMT_CREDIT_SUM_DEBT", "AMT_CREDIT_SUM_LIMIT", "AMT_CREDIT_SUM_OVERDUE", "AMT_ANNUITY"]
buro = create_logs(buro, log_vars, replace = True)

# convert days
day_vars = ["DAYS_CREDIT", "CREDIT_DAY_OVERDUE", "DAYS_CREDIT_ENDDATE", "DAYS_ENDDATE_FACT", "DAYS_CREDIT_UPDATE"]
buro = convert_days(buro, day_vars, t = 1, rounding = False, replace = True)

# day differences
buro["DAYS_END_DIFF_1"] = buro["DAYS_ENDDATE_FACT"]   - buro["DAYS_CREDIT_ENDDATE"]
buro["DAYS_END_DIFF_2"] = buro["DAYS_CREDIT_UPDATE"]  - buro["DAYS_CREDIT_ENDDATE"]
buro["DAYS_DURATION_1"] = buro["DAYS_CREDIT_ENDDATE"] - buro["DAYS_CREDIT"]
buro["DAYS_DURATION_2"] = buro["DAYS_ENDDATE_FACT"]   - buro["DAYS_CREDIT"]

# number of active buro loans
cnt_buro = buro[["SK_ID_CURR", "CREDIT_ACTIVE"]]
cnt_buro.columns = ["SK_ID_CURR", "CNT_BURO_ACTIVE"]
cnt_buro = cnt_buro[cnt_buro["CNT_BURO_ACTIVE"] == "Active"]
cnt_buro = cnt_buro[["SK_ID_CURR", "CNT_BURO_ACTIVE"]].groupby(["SK_ID_CURR"], as_index = False).count()
buro = buro.merge(cnt_buro, how = "left", on = "SK_ID_CURR")
buro["CNT_BURO_ACTIVE"].fillna(0, inplace = True)

# number of closed buro loans
cnt_buro = buro[["SK_ID_CURR", "CREDIT_ACTIVE"]]
cnt_buro.columns = ["SK_ID_CURR", "CNT_BURO_CLOSED"]
cnt_buro = cnt_buro[cnt_buro["CNT_BURO_CLOSED"] == "Closed"]
cnt_buro = cnt_buro[["SK_ID_CURR", "CNT_BURO_CLOSED"]].groupby(["SK_ID_CURR"], as_index = False).count()
buro = buro.merge(cnt_buro, how = "left", on = "SK_ID_CURR")
buro["CNT_BURO_CLOSED"].fillna(0, inplace = True)

# number of defaulted buro loans
cnt_buro = buro[["SK_ID_CURR", "CREDIT_ACTIVE"]]
cnt_buro.columns = ["SK_ID_CURR", "CNT_BURO_BAD"]
cnt_buro = cnt_buro[cnt_buro["CNT_BURO_BAD"] == "Bad debt"]
cnt_buro = cnt_buro[["SK_ID_CURR", "CNT_BURO_BAD"]].groupby(["SK_ID_CURR"], as_index = False).count()
buro = buro.merge(cnt_buro, how = "left", on = "SK_ID_CURR")
buro["CNT_BURO_BAD"].fillna(0, inplace = True)



##### MERGE ADDITIONAL DATA

# merge bbal
#buro = buro.merge(bbal, how = "left", on = "SK_ID_BUREAU")



##### FEATURE REMOVAL
drops = ["SK_ID_BUREAU"]
buro = buro.drop(columns = drops)



##### ADDING NULL FLAGS

# all null flags
#buro = create_null_flags(buro)



##### TREAT FACTORS

# merge some levels

# encoding
buro = treat_factors(buro, method = "dummy")

In [46]:
# aggregate features
buro = aggregate_data(buro, id_var = "SK_ID_CURR", label = "buro")

# clean up
omits = ["CNT_BURO_LOANS", "CNT_BURO_ACTIVE", "CNT_BURO_CLOSED", "CNT_BURO_BAD"]
for var in omits:
    del buro["buro_" + str(var) + "_std"]
    del buro["buro_" + str(var) + "_min"]
    del buro["buro_" + str(var) + "_max"]

- Preparing the dataset...
- Extracted 0 factors and 65 numerics...
- Aggregating numeric features...
- Final dimensions: (305811, 260)


In [47]:
# check data
buro.head()

Unnamed: 0_level_0,buro_DAYS_END_DIFF_2_mean,buro_DAYS_END_DIFF_2_std,buro_DAYS_END_DIFF_2_min,buro_DAYS_END_DIFF_2_max,buro_CNT_BURO_BAD_mean,buro_CNT_CREDIT_PROLONG_mean,buro_CNT_CREDIT_PROLONG_std,buro_CNT_CREDIT_PROLONG_min,buro_CNT_CREDIT_PROLONG_max,buro_ISNULL_AMT_SUM_OVERDUE_RATIO_2_mean,buro_ISNULL_AMT_SUM_OVERDUE_RATIO_2_std,buro_ISNULL_AMT_SUM_OVERDUE_RATIO_2_min,buro_ISNULL_AMT_SUM_OVERDUE_RATIO_2_max,buro_CREDIT_TYPE_Mobile operator loan_mean,buro_CREDIT_TYPE_Mobile operator loan_std,buro_CREDIT_TYPE_Mobile operator loan_min,buro_CREDIT_TYPE_Mobile operator loan_max,buro_ISNULL_DAYS_CREDIT_ENDDATE_mean,buro_ISNULL_DAYS_CREDIT_ENDDATE_std,buro_ISNULL_DAYS_CREDIT_ENDDATE_min,buro_ISNULL_DAYS_CREDIT_ENDDATE_max,buro_CREDIT_ACTIVE_Closed_mean,buro_CREDIT_ACTIVE_Closed_std,buro_CREDIT_ACTIVE_Closed_min,buro_CREDIT_ACTIVE_Closed_max,buro_CREDIT_TYPE_Car loan_mean,buro_CREDIT_TYPE_Car loan_std,buro_CREDIT_TYPE_Car loan_min,buro_CREDIT_TYPE_Car loan_max,buro_CREDIT_TYPE_Interbank credit_mean,buro_CREDIT_TYPE_Interbank credit_std,buro_CREDIT_TYPE_Interbank credit_min,buro_CREDIT_TYPE_Interbank credit_max,buro_ISNULL_DAYS_END_DIFF_2_mean,buro_ISNULL_DAYS_END_DIFF_2_std,buro_ISNULL_DAYS_END_DIFF_2_min,buro_ISNULL_DAYS_END_DIFF_2_max,buro_AMT_CREDIT_SUM_mean,buro_AMT_CREDIT_SUM_std,buro_AMT_CREDIT_SUM_min,buro_AMT_CREDIT_SUM_max,buro_CREDIT_TYPE_Microloan_mean,buro_CREDIT_TYPE_Microloan_std,buro_CREDIT_TYPE_Microloan_min,buro_CREDIT_TYPE_Microloan_max,buro_ISNULL_AMT_ANNUITY_mean,buro_ISNULL_AMT_ANNUITY_std,buro_ISNULL_AMT_ANNUITY_min,buro_ISNULL_AMT_ANNUITY_max,buro_AMT_CREDIT_SUM_DEBT_mean,buro_AMT_CREDIT_SUM_DEBT_std,buro_AMT_CREDIT_SUM_DEBT_min,buro_AMT_CREDIT_SUM_DEBT_max,buro_CREDIT_TYPE_Real estate loan_mean,buro_CREDIT_TYPE_Real estate loan_std,buro_CREDIT_TYPE_Real estate loan_min,buro_CREDIT_TYPE_Real estate loan_max,buro_ISNULL_AMT_MAX_OVERDUE_RATIO_2_mean,buro_ISNULL_AMT_MAX_OVERDUE_RATIO_2_std,buro_ISNULL_AMT_MAX_OVERDUE_RATIO_2_min,buro_ISNULL_AMT_MAX_OVERDUE_RATIO_2_max,buro_ISNULL_DAYS_DURATION_1_mean,buro_ISNULL_DAYS_DURATION_1_std,buro_ISNULL_DAYS_DURATION_1_min,buro_ISNULL_DAYS_DURATION_1_max,buro_AMT_SUM_OVERDUE_RATIO_1_mean,buro_AMT_SUM_OVERDUE_RATIO_1_std,buro_AMT_SUM_OVERDUE_RATIO_1_min,buro_AMT_SUM_OVERDUE_RATIO_1_max,buro_CREDIT_TYPE_Consumer credit_mean,buro_CREDIT_TYPE_Consumer credit_std,buro_CREDIT_TYPE_Consumer credit_min,buro_CREDIT_TYPE_Consumer credit_max,buro_CREDIT_TYPE_Credit card_mean,buro_CREDIT_TYPE_Credit card_std,buro_CREDIT_TYPE_Credit card_min,buro_CREDIT_TYPE_Credit card_max,buro_AMT_SUM_DEBT_RATIO_1_mean,buro_AMT_SUM_DEBT_RATIO_1_std,buro_AMT_SUM_DEBT_RATIO_1_min,buro_AMT_SUM_DEBT_RATIO_1_max,buro_ISNULL_AMT_CREDIT_MAX_OVERDUE_mean,buro_ISNULL_AMT_CREDIT_MAX_OVERDUE_std,buro_ISNULL_AMT_CREDIT_MAX_OVERDUE_min,buro_ISNULL_AMT_CREDIT_MAX_OVERDUE_max,buro_CREDIT_ACTIVE_Sold_mean,buro_CREDIT_ACTIVE_Sold_std,buro_CREDIT_ACTIVE_Sold_min,buro_CREDIT_ACTIVE_Sold_max,buro_AMT_SUM_OVERDUE_RATIO_2_mean,buro_AMT_SUM_OVERDUE_RATIO_2_std,buro_AMT_SUM_OVERDUE_RATIO_2_min,buro_AMT_SUM_OVERDUE_RATIO_2_max,buro_CREDIT_TYPE_Mortgage_mean,buro_CREDIT_TYPE_Mortgage_std,buro_CREDIT_TYPE_Mortgage_min,buro_CREDIT_TYPE_Mortgage_max,buro_CREDIT_TYPE_Loan for purchase of shares (margin lending)_mean,buro_CREDIT_TYPE_Loan for purchase of shares (margin lending)_std,buro_CREDIT_TYPE_Loan for purchase of shares (margin lending)_min,buro_CREDIT_TYPE_Loan for purchase of shares (margin lending)_max,buro_ISNULL_DAYS_CREDIT_UPDATE_mean,buro_ISNULL_DAYS_CREDIT_UPDATE_std,buro_ISNULL_DAYS_CREDIT_UPDATE_min,buro_ISNULL_DAYS_CREDIT_UPDATE_max,buro_CREDIT_TYPE_Loan for working capital replenishment_mean,buro_CREDIT_TYPE_Loan for working capital replenishment_std,buro_CREDIT_TYPE_Loan for working capital replenishment_min,buro_CREDIT_TYPE_Loan for working capital replenishment_max,buro_DAYS_CREDIT_ENDDATE_mean,buro_DAYS_CREDIT_ENDDATE_std,buro_DAYS_CREDIT_ENDDATE_min,buro_DAYS_CREDIT_ENDDATE_max,buro_ISNULL_AMT_SUM_OVERDUE_RATIO_1_mean,buro_ISNULL_AMT_SUM_OVERDUE_RATIO_1_std,buro_ISNULL_AMT_SUM_OVERDUE_RATIO_1_min,buro_ISNULL_AMT_SUM_OVERDUE_RATIO_1_max,buro_AMT_MAX_OVERDUE_RATIO_2_mean,buro_AMT_MAX_OVERDUE_RATIO_2_std,buro_AMT_MAX_OVERDUE_RATIO_2_min,buro_AMT_MAX_OVERDUE_RATIO_2_max,buro_ISNULL_DAYS_ENDDATE_FACT_mean,buro_ISNULL_DAYS_ENDDATE_FACT_std,buro_ISNULL_DAYS_ENDDATE_FACT_min,buro_ISNULL_DAYS_ENDDATE_FACT_max,buro_CNT_BURO_LOANS_mean,buro_CREDIT_CURRENCY_currency 3_mean,buro_CREDIT_CURRENCY_currency 3_std,buro_CREDIT_CURRENCY_currency 3_min,buro_CREDIT_CURRENCY_currency 3_max,buro_ISNULL_DAYS_END_DIFF_1_mean,buro_ISNULL_DAYS_END_DIFF_1_std,buro_ISNULL_DAYS_END_DIFF_1_min,buro_ISNULL_DAYS_END_DIFF_1_max,buro_CREDIT_CURRENCY_currency 2_mean,buro_CREDIT_CURRENCY_currency 2_std,buro_CREDIT_CURRENCY_currency 2_min,buro_CREDIT_CURRENCY_currency 2_max,buro_ISNULL_AMT_CREDIT_SUM_DEBT_mean,buro_ISNULL_AMT_CREDIT_SUM_DEBT_std,buro_ISNULL_AMT_CREDIT_SUM_DEBT_min,buro_ISNULL_AMT_CREDIT_SUM_DEBT_max,buro_ISNULL_AMT_CREDIT_SUM_LIMIT_mean,buro_ISNULL_AMT_CREDIT_SUM_LIMIT_std,buro_ISNULL_AMT_CREDIT_SUM_LIMIT_min,buro_ISNULL_AMT_CREDIT_SUM_LIMIT_max,buro_CNT_BURO_CLOSED_mean,buro_CREDIT_TYPE_Cash loan (non-earmarked)_mean,buro_CREDIT_TYPE_Cash loan (non-earmarked)_std,buro_CREDIT_TYPE_Cash loan (non-earmarked)_min,buro_CREDIT_TYPE_Cash loan (non-earmarked)_max,buro_ISNULL_DAYS_DURATION_2_mean,buro_ISNULL_DAYS_DURATION_2_std,buro_ISNULL_DAYS_DURATION_2_min,buro_ISNULL_DAYS_DURATION_2_max,buro_CREDIT_CURRENCY_currency 4_mean,buro_CREDIT_CURRENCY_currency 4_std,buro_CREDIT_CURRENCY_currency 4_min,buro_CREDIT_CURRENCY_currency 4_max,buro_ISNULL_AMT_SUM_DEBT_RATIO_1_mean,buro_ISNULL_AMT_SUM_DEBT_RATIO_1_std,buro_ISNULL_AMT_SUM_DEBT_RATIO_1_min,buro_ISNULL_AMT_SUM_DEBT_RATIO_1_max,buro_DAYS_DURATION_2_mean,buro_DAYS_DURATION_2_std,buro_DAYS_DURATION_2_min,buro_DAYS_DURATION_2_max,buro_ISNULL_AMT_SUM_DEBT_RATIO_2_mean,buro_ISNULL_AMT_SUM_DEBT_RATIO_2_std,buro_ISNULL_AMT_SUM_DEBT_RATIO_2_min,buro_ISNULL_AMT_SUM_DEBT_RATIO_2_max,buro_ISNULL_CREDIT_DAY_OVERDUE_mean,buro_ISNULL_CREDIT_DAY_OVERDUE_std,buro_ISNULL_CREDIT_DAY_OVERDUE_min,buro_ISNULL_CREDIT_DAY_OVERDUE_max,buro_CREDIT_DAY_OVERDUE_mean,buro_CREDIT_DAY_OVERDUE_std,buro_CREDIT_DAY_OVERDUE_min,buro_CREDIT_DAY_OVERDUE_max,buro_DAYS_CREDIT_UPDATE_mean,buro_DAYS_CREDIT_UPDATE_std,buro_DAYS_CREDIT_UPDATE_min,buro_DAYS_CREDIT_UPDATE_max,buro_AMT_CREDIT_SUM_LIMIT_mean,buro_AMT_CREDIT_SUM_LIMIT_std,buro_AMT_CREDIT_SUM_LIMIT_min,buro_AMT_CREDIT_SUM_LIMIT_max,buro_AMT_MAX_OVERDUE_RATIO_1_mean,buro_AMT_MAX_OVERDUE_RATIO_1_std,buro_AMT_MAX_OVERDUE_RATIO_1_min,buro_AMT_MAX_OVERDUE_RATIO_1_max,buro_AMT_ANNUITY_mean,buro_AMT_ANNUITY_std,buro_AMT_ANNUITY_min,buro_AMT_ANNUITY_max,buro_CREDIT_TYPE_Loan for business development_mean,buro_CREDIT_TYPE_Loan for business development_std,buro_CREDIT_TYPE_Loan for business development_min,buro_CREDIT_TYPE_Loan for business development_max,buro_CREDIT_TYPE_Loan for the purchase of equipment_mean,buro_CREDIT_TYPE_Loan for the purchase of equipment_std,buro_CREDIT_TYPE_Loan for the purchase of equipment_min,buro_CREDIT_TYPE_Loan for the purchase of equipment_max,buro_CREDIT_TYPE_Unknown type of loan_mean,buro_CREDIT_TYPE_Unknown type of loan_std,buro_CREDIT_TYPE_Unknown type of loan_min,buro_CREDIT_TYPE_Unknown type of loan_max,buro_AMT_CREDIT_MAX_OVERDUE_mean,buro_AMT_CREDIT_MAX_OVERDUE_std,buro_AMT_CREDIT_MAX_OVERDUE_min,buro_AMT_CREDIT_MAX_OVERDUE_max,buro_CREDIT_ACTIVE_Bad debt_mean,buro_CREDIT_ACTIVE_Bad debt_std,buro_CREDIT_ACTIVE_Bad debt_min,buro_CREDIT_ACTIVE_Bad debt_max,buro_DAYS_END_DIFF_1_mean,buro_DAYS_END_DIFF_1_std,buro_DAYS_END_DIFF_1_min,buro_DAYS_END_DIFF_1_max,buro_ISNULL_AMT_MAX_OVERDUE_RATIO_1_mean,buro_ISNULL_AMT_MAX_OVERDUE_RATIO_1_std,buro_ISNULL_AMT_MAX_OVERDUE_RATIO_1_min,buro_ISNULL_AMT_MAX_OVERDUE_RATIO_1_max,buro_ISNULL_AMT_CREDIT_SUM_mean,buro_ISNULL_AMT_CREDIT_SUM_std,buro_ISNULL_AMT_CREDIT_SUM_min,buro_ISNULL_AMT_CREDIT_SUM_max,buro_DAYS_DURATION_1_mean,buro_DAYS_DURATION_1_std,buro_DAYS_DURATION_1_min,buro_DAYS_DURATION_1_max,buro_AMT_SUM_DEBT_RATIO_2_mean,buro_AMT_SUM_DEBT_RATIO_2_std,buro_AMT_SUM_DEBT_RATIO_2_min,buro_AMT_SUM_DEBT_RATIO_2_max,buro_AMT_CREDIT_SUM_OVERDUE_mean,buro_AMT_CREDIT_SUM_OVERDUE_std,buro_AMT_CREDIT_SUM_OVERDUE_min,buro_AMT_CREDIT_SUM_OVERDUE_max,buro_DAYS_ENDDATE_FACT_mean,buro_DAYS_ENDDATE_FACT_std,buro_DAYS_ENDDATE_FACT_min,buro_DAYS_ENDDATE_FACT_max,buro_DAYS_CREDIT_mean,buro_DAYS_CREDIT_std,buro_DAYS_CREDIT_min,buro_DAYS_CREDIT_max,buro_CNT_BURO_ACTIVE_mean
SK_ID_CURR,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1,Unnamed: 182_level_1,Unnamed: 183_level_1,Unnamed: 184_level_1,Unnamed: 185_level_1,Unnamed: 186_level_1,Unnamed: 187_level_1,Unnamed: 188_level_1,Unnamed: 189_level_1,Unnamed: 190_level_1,Unnamed: 191_level_1,Unnamed: 192_level_1,Unnamed: 193_level_1,Unnamed: 194_level_1,Unnamed: 195_level_1,Unnamed: 196_level_1,Unnamed: 197_level_1,Unnamed: 198_level_1,Unnamed: 199_level_1,Unnamed: 200_level_1,Unnamed: 201_level_1,Unnamed: 202_level_1,Unnamed: 203_level_1,Unnamed: 204_level_1,Unnamed: 205_level_1,Unnamed: 206_level_1,Unnamed: 207_level_1,Unnamed: 208_level_1,Unnamed: 209_level_1,Unnamed: 210_level_1,Unnamed: 211_level_1,Unnamed: 212_level_1,Unnamed: 213_level_1,Unnamed: 214_level_1,Unnamed: 215_level_1,Unnamed: 216_level_1,Unnamed: 217_level_1,Unnamed: 218_level_1,Unnamed: 219_level_1,Unnamed: 220_level_1,Unnamed: 221_level_1,Unnamed: 222_level_1,Unnamed: 223_level_1,Unnamed: 224_level_1,Unnamed: 225_level_1,Unnamed: 226_level_1,Unnamed: 227_level_1,Unnamed: 228_level_1,Unnamed: 229_level_1,Unnamed: 230_level_1,Unnamed: 231_level_1,Unnamed: 232_level_1,Unnamed: 233_level_1,Unnamed: 234_level_1,Unnamed: 235_level_1,Unnamed: 236_level_1,Unnamed: 237_level_1,Unnamed: 238_level_1,Unnamed: 239_level_1,Unnamed: 240_level_1,Unnamed: 241_level_1,Unnamed: 242_level_1,Unnamed: 243_level_1,Unnamed: 244_level_1,Unnamed: 245_level_1,Unnamed: 246_level_1,Unnamed: 247_level_1,Unnamed: 248_level_1
100001,-473.5,491.424121,-1174.0,-24.0,0.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.428571,0.534522,0,1,0.571429,0.534522,0,1,0.0,0.0,0,0,0.0,0.0,0,0,0.428571,0.534522,0,1,12.080036,0.628917,11.356283,12.842652,0.0,0.0,0,0,0.0,0.0,0,0,5.153914,6.440628,0.0,12.829977,0.0,0.0,0,0,1.0,0.0,1,1,0.428571,0.534522,0,1,0.0,0.0,0.0,0.0,1.0,0.0,1,1,0.0,0.0,0,0,0.282518,0.399523,0.0,0.987405,1.0,0.0,1,1,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,628.5,491.424121,179.0,1329.0,0.571429,0.534522,0,1,,,,,0.428571,0.534522,0,1,7,0.0,0.0,0,0,0.428571,0.534522,0,1,0.0,0.0,0,0,0.0,0.0,0,0,0.142857,0.377964,0,1,4.0,0.0,0.0,0,0,0.428571,0.534522,0,1,0.0,0.0,0,0,0.0,0.0,0,0,-228.75,136.497558,-335.0,-32.0,0.714286,0.48795,0,1,0.0,0.0,0,0,0.0,0.0,0.0,0.0,93.142857,77.20412,6.0,155.0,0.0,0.0,0.0,0.0,,,,,3.839271,4.795585,0.0,9.289475,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,,,,,0.0,0.0,0,0,197.0,334.957709,-1.0,698.0,1.0,0.0,1,1,0.0,0.0,0,0,-425.75,210.829117,-730.0,-243.0,inf,,inf,inf,0.0,0.0,0.0,0.0,825.5,369.078582,544.0,1328.0,735.0,489.942514,49.0,1572.0,3.0
100002,36.0,66.730802,-5.0,113.0,0.0,0.0,0.0,0,0,0.125,0.353553,0,1,0.0,0.0,0,0,0.625,0.517549,0,1,0.75,0.46291,0,1,0.0,0.0,0,0,0.0,0.0,0,0,0.625,0.517549,0,1,9.811994,4.084715,0.0,13.017005,0.0,0.0,0,0,0.125,0.353553,0,1,2.48244,5.550905,0.0,12.4122,0.0,0.0,0,0,0.375,0.517549,0,1,0.625,0.517549,0,1,,,,,0.5,0.534522,0,1,0.5,0.534522,0,1,0.136545,0.27309,0.0,0.54618,0.375,0.517549,0,1,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,1007.0,84.858706,911.0,1072.0,1.0,0.0,1,1,0.043436,0.075229,0.0,0.174139,0.25,0.46291,0,1,8,0.0,0.0,0,0,0.625,0.517549,0,1,0.0,0.0,0,0,0.375,0.517549,0,1,0.5,0.534522,0,1,6.0,0.0,0.0,0,0,0.25,0.46291,0,1,0.0,0.0,0,0,0.5,0.534522,0,1,-277.0,207.190733,-609.0,-76.0,0.75,0.46291,0,1,0.0,0.0,0,0,0.0,0.0,0.0,0.0,499.875,518.522472,7.0,1185.0,2.593291,5.186583,0.0,10.373165,inf,,inf,inf,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,1681.029,2363.2469,0.0,5043.645,0.0,0.0,0,0,37.666667,65.24058,0.0,113.0,0.625,0.517549,0,1,0.0,0.0,0,0,-220.666667,139.306616,-365.0,-87.0,inf,,0.0,inf,0.0,0.0,0.0,0.0,697.5,515.992539,36.0,1185.0,874.0,431.45104,103.0,1437.0,2.0
100003,-57.666667,222.185358,-303.0,130.0,0.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.25,0.5,0,1,0.75,0.5,0,1,0.0,0.0,0,0,0.0,0.0,0,0,0.25,0.5,0,1,11.609754,1.495879,10.010052,13.604791,0.0,0.0,0,0,1.0,0.0,1,1,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0,0,0.25,0.5,0,1,,,,,0.5,0.57735,0,1,0.5,0.57735,0,1,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,1131.333333,1129.736843,420.0,2434.0,1.0,0.0,1,1,0.0,0.0,0.0,0.0,0.25,0.5,0,1,4,0.0,0.0,0,0,0.25,0.5,0,1,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,3.0,0.0,0.0,0,0,0.25,0.5,0,1,0.0,0.0,0,0,0.0,0.0,0,0,-568.333333,481.117796,-1096.0,-154.0,0.75,0.5,0,1,0.0,0.0,0,0,0.0,0.0,0.0,0.0,816.0,908.053963,43.0,2131.0,3.401198,6.802395,0.0,13.604791,,,,,,,,,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,-34.0,253.714406,-303.0,201.0,1.0,0.0,1,1,0.0,0.0,0,0,-534.333333,496.89469,-1096.0,-152.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,1097.333333,896.097279,540.0,2131.0,1400.75,909.826128,606.0,2586.0,1.0
100004,43.5,61.51829,0.0,87.0,0.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,1.0,0.0,1,1,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,11.456566,0.000283,11.456366,11.456766,0.0,0.0,0,0,1.0,0.0,1,1,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.5,0.707107,0,1,0.0,0.0,0,0,,,,,1.0,0.0,1,1,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.5,0.707107,0,1,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,488.5,150.613744,382.0,595.0,1.0,0.0,1,1,0.0,,0.0,0.0,0.0,0.0,0,0,2,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,2.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,-334.5,436.284884,-643.0,-26.0,1.0,0.0,1,1,0.0,0.0,0,0,0.0,0.0,0.0,0.0,532.0,212.132034,382.0,682.0,0.0,0.0,0.0,0.0,,,,,,,,,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,,0.0,0.0,0.0,0.0,0,0,44.0,62.225397,0.0,88.0,1.0,0.0,1,1,0.0,0.0,0,0,-378.5,498.510281,-731.0,-26.0,,,,,0.0,0.0,0.0,0.0,532.5,212.839141,382.0,683.0,867.0,649.124025,408.0,1326.0,0.0
100005,-7.0,,-7.0,-7.0,0.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.666667,0.57735,0,1,0.333333,0.57735,0,1,0.0,0.0,0,0,0.0,0.0,0,0,0.666667,0.57735,0,1,11.510418,1.544801,10.303169,13.251286,0.0,0.0,0,0,0.0,0.0,0,0,7.781492,6.911089,0.0,13.205027,0.0,0.0,0,0,0.666667,0.57735,0,1,0.666667,0.57735,0,1,0.0,,0.0,0.0,0.666667,0.57735,0,1,0.333333,0.57735,0,1,0.601256,0.523384,0.0,0.954794,0.666667,0.57735,0,1,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,128.0,,128.0,128.0,0.666667,0.57735,0,1,0.0,,0.0,0.0,0.666667,0.57735,0,1,3,0.0,0.0,0,0,0.666667,0.57735,0,1,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,1.0,0.0,0.0,0,0,0.666667,0.57735,0,1,0.0,0.0,0,0,0.0,0.0,0,0,-250.0,,-250.0,-250.0,0.333333,0.57735,0,1,0.0,0.0,0,0,0.0,0.0,0.0,0.0,54.333333,58.594653,11.0,121.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,2.78587,4.825269,0.0,8.357611,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,,0.0,0.0,0.0,0.0,0,0,-5.0,,-5.0,-5.0,0.666667,0.57735,0,1,0.0,0.0,0,0,-245.0,,-245.0,-245.0,inf,,inf,inf,0.0,0.0,0.0,0.0,123.0,,123.0,123.0,190.666667,162.297053,62.0,373.0,2.0


In [48]:
# check data
buro.shape

(305811, 248)

# 5. EXPORT DATA

In [None]:
# merging
appl = appl.merge(prev.reset_index(), how = "left", on = "SK_ID_CURR")
appl = appl.merge(buro.reset_index(), how = "left", on = "SK_ID_CURR")
del prev, buro

In [50]:
##### CROSS-TABLE FEATURE ENGINEERING

# credit ratios
appl["mix_AMT_PREV_ANNUITY_RATIO"]     = appl["app_AMT_ANNUITY"] / appl["prev_AMT_ANNUITY_mean"]
appl["mix_AMT_PREV_CREDIT_RATIO"]      = appl["app_AMT_CREDIT"] / appl["prev_AMT_CREDIT_mean"]
appl["mix_AMT_PREV_GOODS_PRICE_RATIO"] = appl["app_AMT_GOODS_PRICE"] / appl["prev_AMT_GOODS_PRICE_mean"]
appl["mix_AMT_BURO_ANNUITY_RATIO"]     = appl["app_AMT_ANNUITY"] / appl["buro_AMT_ANNUITY_mean"]
appl["mix_AMT_BURO_CREDIT_RATIO"]      = appl["app_AMT_CREDIT"] / appl["buro_AMT_CREDIT_SUM_mean"]

In [23]:
# partitioning
train = appl[appl["SK_ID_CURR"].isin(y["SK_ID_CURR"]) == True]
test  = appl[appl["SK_ID_CURR"].isin(y["SK_ID_CURR"]) == False]
del appl

In [24]:
# check dimensions
print(train.shape)
print(test.shape)

(307511, 211)
(48744, 211)


In [25]:
# export CSV
train.to_csv("../data/prepared/train_app.csv", index = False, float_format = "%.8f")
test.to_csv("../data/prepared/test_app.csv",   index = False, float_format = "%.8f")
y.to_csv("../data/prepared/y_app.csv",         index = False, float_format = "%.8f")