# 1. SETTINGS

In [1]:
# libraries
import pandas as pd
import numpy as np
import scipy.stats
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt

In [2]:
# pandas options
pd.set_option("display.max_columns", None)

In [3]:
# ignore warnings
import warnings
warnings.filterwarnings("ignore")

In [4]:
# garbage collection
import gc
gc.enable()

# 2. FUNCTIONS

In [5]:
##### FUNCTION FOR COUNTING MISSINGS
def count_missings(data):
    total = data.isnull().sum().sort_values(ascending = False)
    percent = (data.isnull().sum() / data.isnull().count() * 100).sort_values(ascending = False)
    table = pd.concat([total, percent], axis = 1, keys = ["Total", "Percent"])
    table = table[table["Total"] > 0]
    return table

In [6]:
##### FUNCTION FOR CREATING LOGARITHMS
def convert_days(data, features, t = 12, rounding = True, replace = False):
    for var in features:
        if replace == True:
            if rounding == True:
                data[var] = round(-data[var]/t)
            else:
                data[var] = -data[var]/t
            data[var][data[var] < 0] = None
        else:
            if rounding == True:
                data["CONVERTED_" + str(var)] = round(-data[var]/t)
            else:
                data["CONVERTED_" + str(var)] = -data[var]/t
            data["CONVERTED_" + str(var)][data["CONVERTED_" + str(var)] < 0] = None
    return data

In [7]:
##### FUNCTION FOR CREATING LOGARITHMS
def create_logs(data, features, replace = False):
    for var in features:
        if replace == True:
            data[var] = np.log(data[var].abs() + 1)
        else:
            data["LOG_" + str(var)] = np.log(data[var].abs() + 1)      
    return data

In [8]:
##### FUNCTION FOR CREATING FLAGS FOR MISSINGS
def create_null_flags(data, features = None):
    if features == None:
        features = data.columns
    for var in features:
        num_null = data[var].isnull() + 0
        if num_null.sum() > 0:
            data["ISNULL_" + str(var)] = num_null
    return data

In [9]:
##### FUNCTION FOR TREATING FACTORS
def treat_factors(data, method = "label"):
    
    # label encoding
    if method == "label":
        factors = [f for f in data.columns if data[f].dtype == "object"]
        for var in factors:
            data[var], _ = pd.factorize(data[var])
        
    # dummy encoding
    if method == "dummy":
        data = pd.get_dummies(data, drop_first = True)
    
    # dataset
    return data

In [10]:
##### FUNCTION FOR COMPUTING ACCEPT/REJECT RATIOS
def compute_accept_reject_ratio(data, lags = [1, 3, 5]):
    
    # preparations
    dec_prev = data[["SK_ID_CURR", "SK_ID_PREV", "DAYS_DECISION", "NAME_CONTRACT_STATUS"]]
    dec_prev["DAYS_DECISION"] = -dec_prev["DAYS_DECISION"]
    dec_prev = dec_prev.sort_values(by = ["SK_ID_CURR", "DAYS_DECISION"])
    dec_prev = pd.get_dummies(dec_prev)
     
    # compuatation
    for t in lags:
        
        # acceptance ratios
        tmp = dec_prev[["SK_ID_CURR", "NAME_CONTRACT_STATUS_Approved"]].groupby(["SK_ID_CURR"]).head(1)
        tmp = tmp.groupby(["SK_ID_CURR"], as_index = False).mean()
        tmp.columns = ["SK_ID_CURR", "APPROVE_RATIO_" + str(t)]
        data = data.merge(tmp, how = "left", on = "SK_ID_CURR")
        
        # rejection ratios
        tmp = dec_prev[["SK_ID_CURR", "NAME_CONTRACT_STATUS_Refused"]].groupby(["SK_ID_CURR"]).head(1)
        tmp = tmp.groupby(["SK_ID_CURR"], as_index = False).mean()
        tmp.columns = ["SK_ID_CURR", "REJECT_RATIO_" + str(t)]
        data = data.merge(tmp, how = "left", on = "SK_ID_CURR")
        
    # dataset
    return data

In [11]:
##### FUNCTION FOR AGGREGATING DATA
def aggregate_data(data, id_var, label = None):
    
    
    ### SEPARATE FEATURES
  
    # display info
    print("- Preparing the dataset...")

    # find factors
    data_factors = [f for f in data.columns if data[f].dtype == "object"]
    
    # partition subsets
    num_data = data[list(set(data.columns) - set(data_factors))]
    fac_data = data[[id_var] + data_factors]
    
    # display info
    num_facs = fac_data.shape[1] - 1
    num_nums = num_data.shape[1] - 1
    print("- Extracted %.0f factors and %.0f numerics..." % (num_facs, num_nums))


    ##### AGGREGATION
 
    # aggregate numerics
    if (num_nums > 0):
        print("- Aggregating numeric features...")
        num_data = num_data.groupby(id_var).agg(["mean", "std", "min", "max"])
        num_data.columns = ["_".join(col).strip() for col in num_data.columns.values]
        num_data = num_data.sort_index()

    # aggregate factors
    if (num_facs > 0):
        print("- Aggregating factor features...")
        fac_data = fac_data.groupby(id_var).agg([("mode",   lambda x: scipy.stats.mode(x)[0][0]),
                                                 ("unique", lambda x: x.nunique())])
        fac_data.columns = ["_".join(col).strip() for col in fac_data.columns.values]
        fac_data = fac_data.sort_index()


    ##### MERGER

    # merge numerics and factors
    if ((num_facs > 0) & (num_nums > 0)):
        agg_data = pd.concat([num_data, fac_data], axis = 1)
    
    # use factors only
    if ((num_facs > 0) & (num_nums == 0)):
        agg_data = fac_data
        
    # use numerics only
    if ((num_facs == 0) & (num_nums > 0)):
        agg_data = num_data
        

    ##### LAST STEPS

    # update labels
    if label != None:
        agg_data.columns = [label + "_" + str(col) for col in agg_data.columns]
    
    # impute zeros for SD
    #stdevs = agg_data.filter(like = "_std").columns
    #for var in stdevs:
    #    agg_data[var].fillna(0, inplace = True)

    # display info
    print("- Final dimensions:", agg_data.shape)
    
    # return dataset
    return agg_data

# 3. DATA IMPORT

In [12]:
# import data
train = pd.read_csv("../data/raw/application_train.csv")
test  = pd.read_csv("../data/raw/application_test.csv")
buro  = pd.read_csv("../data/raw/bureau.csv")
bbal  = pd.read_csv("../data/raw/bureau_balance.csv")
prev  = pd.read_csv("../data/raw/previous_application.csv")
card  = pd.read_csv("../data/raw/credit_card_balance.csv")
poca  = pd.read_csv("../data/raw/POS_CASH_balance.csv")
inst  = pd.read_csv("../data/raw/installments_payments.csv")

In [13]:
# check dimensions
print("Application:", train.shape, test.shape)
print("Buro:", buro.shape)
print("Bbal:", bbal.shape)
print("Prev:", prev.shape)
print("Card:", card.shape)
print("Poca:", poca.shape)
print("Inst:", inst.shape)

Application: (307511, 122) (48744, 121)
Buro: (1716428, 17)
Bbal: (27299925, 3)
Prev: (1670214, 37)
Card: (3840312, 23)
Poca: (10001358, 8)
Inst: (13605401, 8)


In [14]:
# extract target
y = train[["SK_ID_CURR", "TARGET"]]
del train["TARGET"]

# 4. PREPROCESSING

## 4.1. APPLICATION DATA

In [15]:
# concatenate application data
appl = pd.concat([train, test])
del train, test

In [16]:
### FEATURE ENGINEERING

# income ratios
appl["CREDIT_BY_INCOME"]      = appl["AMT_CREDIT"]      / appl["AMT_INCOME_TOTAL"]
appl["ANNUITY_BY_INCOME"]     = appl["AMT_ANNUITY"]     / appl["AMT_INCOME_TOTAL"]
appl["GOODS_PRICE_BY_INCOME"] = appl["AMT_GOODS_PRICE"] / appl["AMT_INCOME_TOTAL"]
appl["INCOME_PER_PERSON"]     = appl["AMT_INCOME_TOTAL"] / appl["CNT_FAM_MEMBERS"]

# career ratio
appl["PERCENT_WORKED"] = appl["DAYS_EMPLOYED"] / appl["DAYS_BIRTH"]
appl["PERCENT_WORKED"][appl["PERCENT_WORKED"] < 0] = None

# number of adults
appl["CNT_ADULTS"] = appl["CNT_FAM_MEMBERS"] - appl["CNT_CHILDREN"]
appl['CHILDREN_RATIO'] = appl['CNT_CHILDREN'] / appl['CNT_FAM_MEMBERS']

# number of overall payments
appl['ANNUITY LENGTH'] = appl['AMT_CREDIT'] / appl['AMT_ANNUITY']

# external sources
#appl["EXT_SOURCE_MIN"]  = appl[["EXT_SOURCE_1", "EXT_SOURCE_2", "EXT_SOURCE_3"]].min(axis = 1)
#appl["EXT_SOURCE_MAX"]  = appl[["EXT_SOURCE_1", "EXT_SOURCE_2", "EXT_SOURCE_3"]].max(axis = 1)
appl["EXT_SOURCE_MEAN"] = appl[["EXT_SOURCE_1", "EXT_SOURCE_2", "EXT_SOURCE_3"]].mean(axis = 1)
#appl["EXT_SOURCE_SD"]   = appl[["EXT_SOURCE_1", "EXT_SOURCE_2", "EXT_SOURCE_3"]].std(axis = 1)
appl["NUM_EXT_SOURCES"] = 3 - (appl["EXT_SOURCE_1"].isnull().astype(int) +
                               appl["EXT_SOURCE_2"].isnull().astype(int) +
                               appl["EXT_SOURCE_3"].isnull().astype(int))

# number of documents
doc_vars = ["FLAG_DOCUMENT_2",  "FLAG_DOCUMENT_3",  "FLAG_DOCUMENT_4",  "FLAG_DOCUMENT_5",  "FLAG_DOCUMENT_6",
            "FLAG_DOCUMENT_7",  "FLAG_DOCUMENT_8",  "FLAG_DOCUMENT_9",  "FLAG_DOCUMENT_10", "FLAG_DOCUMENT_11",
            "FLAG_DOCUMENT_12", "FLAG_DOCUMENT_13", "FLAG_DOCUMENT_14", "FLAG_DOCUMENT_15", "FLAG_DOCUMENT_16",
            "FLAG_DOCUMENT_17", "FLAG_DOCUMENT_18", "FLAG_DOCUMENT_19", "FLAG_DOCUMENT_20", "FLAG_DOCUMENT_21"]
appl["NUM_DOCUMENTS"] = appl[doc_vars].sum(axis = 1)

# application date
appl["DAY_APPR_PROCESS_START"] = "Working day"
appl["DAY_APPR_PROCESS_START"][(appl["WEEKDAY_APPR_PROCESS_START"] == "SATURDAY") |
                               (appl["WEEKDAY_APPR_PROCESS_START"] == "SUNDAY")] = "Weekend"

# logarithms
log_vars = ["AMT_CREDIT", "AMT_INCOME_TOTAL", "AMT_GOODS_PRICE", "AMT_ANNUITY"]
appl = create_logs(appl, log_vars, replace = True)

# convert days
day_vars = ["DAYS_BIRTH", "DAYS_REGISTRATION", "DAYS_ID_PUBLISH", "DAYS_EMPLOYED", "DAYS_LAST_PHONE_CHANGE"]
appl = convert_days(appl, day_vars, t = 30, rounding = True, replace = True)

# age ratios
appl["OWN_CAR_AGE_RATIO"] = appl["OWN_CAR_AGE"] / appl["DAYS_BIRTH"]
appl["DAYS_ID_PUBLISHED_RATIO"] = appl["DAYS_ID_PUBLISH"] / appl["DAYS_BIRTH"]
appl["DAYS_REGISTRATION_RATIO"] = appl["DAYS_REGISTRATION"] / appl["DAYS_BIRTH"]
appl["DAYS_LAST_PHONE_CHANGE_RATIO"] = appl["DAYS_LAST_PHONE_CHANGE"] / appl["DAYS_BIRTH"]


##### FEATURE REMOVAL
drops = ['APARTMENTS_MEDI', 'BASEMENTAREA_MEDI', 'COMMONAREA_MEDI', 'ELEVATORS_MEDI', 'ENTRANCES_MEDI', 
         'FLOORSMAX_MEDI', 'FLOORSMIN_MEDI', 'LANDAREA_MEDI', 'LIVINGAPARTMENTS_MEDI', 'LIVINGAREA_MEDI',
         'NONLIVINGAPARTMENTS_MEDI', 'NONLIVINGAREA_MEDI','YEARS_BEGINEXPLUATATION_MEDI', 'YEARS_BUILD_MEDI',
         'APARTMENTS_MODE', 'BASEMENTAREA_MODE', 'COMMONAREA_MODE','ELEVATORS_MODE', 'ENTRANCES_MODE', 
         'FLOORSMAX_MODE', 'FLOORSMIN_MODE', 'LANDAREA_MODE', 'LIVINGAPARTMENTS_MODE', 'LIVINGAREA_MODE', 
         'NONLIVINGAPARTMENTS_MODE', 'NONLIVINGAREA_MODE', 'TOTALAREA_MODE',  'YEARS_BEGINEXPLUATATION_MODE']
appl = appl.drop(columns = drops)

In [17]:
# rename features
appl.columns = ["SK_ID_CURR"] + ["app_" + str(col) for col in appl.columns if col not in "SK_ID_CURR"]

In [18]:
# check data
appl.head()

Unnamed: 0,SK_ID_CURR,app_NAME_CONTRACT_TYPE,app_CODE_GENDER,app_FLAG_OWN_CAR,app_FLAG_OWN_REALTY,app_CNT_CHILDREN,app_AMT_INCOME_TOTAL,app_AMT_CREDIT,app_AMT_ANNUITY,app_AMT_GOODS_PRICE,app_NAME_TYPE_SUITE,app_NAME_INCOME_TYPE,app_NAME_EDUCATION_TYPE,app_NAME_FAMILY_STATUS,app_NAME_HOUSING_TYPE,app_REGION_POPULATION_RELATIVE,app_DAYS_BIRTH,app_DAYS_EMPLOYED,app_DAYS_REGISTRATION,app_DAYS_ID_PUBLISH,app_OWN_CAR_AGE,app_FLAG_MOBIL,app_FLAG_EMP_PHONE,app_FLAG_WORK_PHONE,app_FLAG_CONT_MOBILE,app_FLAG_PHONE,app_FLAG_EMAIL,app_OCCUPATION_TYPE,app_CNT_FAM_MEMBERS,app_REGION_RATING_CLIENT,app_REGION_RATING_CLIENT_W_CITY,app_WEEKDAY_APPR_PROCESS_START,app_HOUR_APPR_PROCESS_START,app_REG_REGION_NOT_LIVE_REGION,app_REG_REGION_NOT_WORK_REGION,app_LIVE_REGION_NOT_WORK_REGION,app_REG_CITY_NOT_LIVE_CITY,app_REG_CITY_NOT_WORK_CITY,app_LIVE_CITY_NOT_WORK_CITY,app_ORGANIZATION_TYPE,app_EXT_SOURCE_1,app_EXT_SOURCE_2,app_EXT_SOURCE_3,app_APARTMENTS_AVG,app_BASEMENTAREA_AVG,app_YEARS_BEGINEXPLUATATION_AVG,app_YEARS_BUILD_AVG,app_COMMONAREA_AVG,app_ELEVATORS_AVG,app_ENTRANCES_AVG,app_FLOORSMAX_AVG,app_FLOORSMIN_AVG,app_LANDAREA_AVG,app_LIVINGAPARTMENTS_AVG,app_LIVINGAREA_AVG,app_NONLIVINGAPARTMENTS_AVG,app_NONLIVINGAREA_AVG,app_YEARS_BUILD_MODE,app_FONDKAPREMONT_MODE,app_HOUSETYPE_MODE,app_WALLSMATERIAL_MODE,app_EMERGENCYSTATE_MODE,app_OBS_30_CNT_SOCIAL_CIRCLE,app_DEF_30_CNT_SOCIAL_CIRCLE,app_OBS_60_CNT_SOCIAL_CIRCLE,app_DEF_60_CNT_SOCIAL_CIRCLE,app_DAYS_LAST_PHONE_CHANGE,app_FLAG_DOCUMENT_2,app_FLAG_DOCUMENT_3,app_FLAG_DOCUMENT_4,app_FLAG_DOCUMENT_5,app_FLAG_DOCUMENT_6,app_FLAG_DOCUMENT_7,app_FLAG_DOCUMENT_8,app_FLAG_DOCUMENT_9,app_FLAG_DOCUMENT_10,app_FLAG_DOCUMENT_11,app_FLAG_DOCUMENT_12,app_FLAG_DOCUMENT_13,app_FLAG_DOCUMENT_14,app_FLAG_DOCUMENT_15,app_FLAG_DOCUMENT_16,app_FLAG_DOCUMENT_17,app_FLAG_DOCUMENT_18,app_FLAG_DOCUMENT_19,app_FLAG_DOCUMENT_20,app_FLAG_DOCUMENT_21,app_AMT_REQ_CREDIT_BUREAU_HOUR,app_AMT_REQ_CREDIT_BUREAU_DAY,app_AMT_REQ_CREDIT_BUREAU_WEEK,app_AMT_REQ_CREDIT_BUREAU_MON,app_AMT_REQ_CREDIT_BUREAU_QRT,app_AMT_REQ_CREDIT_BUREAU_YEAR,app_CREDIT_BY_INCOME,app_ANNUITY_BY_INCOME,app_GOODS_PRICE_BY_INCOME,app_INCOME_PER_PERSON,app_PERCENT_WORKED,app_CNT_ADULTS,app_CHILDREN_RATIO,app_ANNUITY LENGTH,app_EXT_SOURCE_MEAN,app_NUM_EXT_SOURCES,app_NUM_DOCUMENTS,app_DAY_APPR_PROCESS_START,app_OWN_CAR_AGE_RATIO,app_DAYS_ID_PUBLISHED_RATIO,app_DAYS_REGISTRATION_RATIO,app_DAYS_LAST_PHONE_CHANGE_RATIO
0,100002,Cash loans,M,N,Y,0,12.2185,12.915581,10.114619,12.768544,Unaccompanied,Working,Secondary / secondary special,Single / not married,House / apartment,0.018801,315.0,21.0,122.0,71.0,,1,1,0,1,1,0,Laborers,1.0,2,2,WEDNESDAY,10,0,0,0,0,0,0,Business Entity Type 3,0.083037,0.262949,0.139376,0.0247,0.0369,0.9722,0.6192,0.0143,0.0,0.069,0.0833,0.125,0.0369,0.0202,0.019,0.0,0.0,0.6341,reg oper account,block of flats,"Stone, brick",No,2.0,2.0,2.0,2.0,38.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,1.0,2.007889,0.121978,1.733333,202500.0,0.067329,1.0,0.0,16.461104,0.161787,3,1,Working day,,0.225397,0.387302,0.120635
1,100003,Cash loans,F,N,N,0,12.506181,14.072865,10.482892,13.937287,Family,State servant,Higher education,Married,House / apartment,0.003541,559.0,40.0,40.0,10.0,,1,1,0,1,1,0,Core staff,2.0,1,1,MONDAY,11,0,0,0,0,0,0,School,0.311267,0.622246,,0.0959,0.0529,0.9851,0.796,0.0605,0.08,0.0345,0.2917,0.3333,0.013,0.0773,0.0549,0.0039,0.0098,0.804,reg oper account,block of flats,Block,No,1.0,0.0,1.0,0.0,28.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,4.79075,0.132217,4.183333,135000.0,0.070862,2.0,0.0,36.234085,0.466757,2,1,Working day,,0.017889,0.071556,0.050089
2,100004,Revolving loans,M,Y,Y,0,11.119898,11.813037,8.817446,11.813037,Unaccompanied,Working,Secondary / secondary special,Single / not married,House / apartment,0.010032,635.0,8.0,142.0,84.0,26.0,1,1,1,1,1,0,Laborers,1.0,2,2,MONDAY,9,0,0,0,0,0,0,Government,,0.555912,0.729567,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,27.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.1,2.0,67500.0,0.011814,1.0,0.0,20.0,0.642739,2,0,Working day,0.040945,0.132283,0.223622,0.04252
3,100006,Cash loans,F,N,Y,0,11.813037,12.652947,10.298481,12.601491,Unaccompanied,Working,Secondary / secondary special,Civil marriage,House / apartment,0.008019,634.0,101.0,328.0,81.0,,1,1,0,1,0,0,Laborers,2.0,2,2,WEDNESDAY,17,0,0,0,0,0,0,Business Entity Type 3,,0.650442,,,,,,,,,,,,,,,,,,,,,2.0,0.0,2.0,0.0,21.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,,,,,2.316167,0.2199,2.2,67500.0,0.159905,2.0,0.0,10.532818,0.650442,1,1,Working day,,0.12776,0.51735,0.033123
4,100007,Cash loans,M,N,Y,0,11.707678,13.148033,9.992711,13.148033,Unaccompanied,Working,Secondary / secondary special,Single / not married,House / apartment,0.028663,664.0,101.0,144.0,115.0,,1,1,0,1,0,0,Core staff,1.0,2,2,THURSDAY,11,0,0,0,0,1,1,Religion,,0.322738,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,37.0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,4.222222,0.179963,4.222222,121500.0,0.152418,1.0,0.0,23.461618,0.322738,1,1,Working day,,0.173193,0.216867,0.055723


In [19]:
# count missings
nas = count_missings(appl)
nas.head()

Unnamed: 0,Total,Percent
app_COMMONAREA_AVG,248360,69.714109
app_NONLIVINGAPARTMENTS_AVG,246861,69.293343
app_FONDKAPREMONT_MODE,243092,68.235393
app_LIVINGAPARTMENTS_AVG,242979,68.203674
app_FLOORSMIN_AVG,241108,67.678489


## 4.2. CREDIT BUREAU DATA

### 4.2.1. BBAL DATA

In [20]:
# check bbal data
bbal.head()

Unnamed: 0,SK_ID_BUREAU,MONTHS_BALANCE,STATUS
0,5715448,0,C
1,5715448,-1,C
2,5715448,-2,C
3,5715448,-3,C
4,5715448,-4,C


In [21]:
### FEATURE ENGINEERING

# loan default score
bbal["NUM_STATUS"] = 0
bbal["NUM_STATUS"][bbal["STATUS"] == "X"] = None
bbal["NUM_STATUS"][bbal["STATUS"] == "1"] = 1
bbal["NUM_STATUS"][bbal["STATUS"] == "2"] = 2
bbal["NUM_STATUS"][bbal["STATUS"] == "3"] = 3
bbal["NUM_STATUS"][bbal["STATUS"] == "4"] = 4
bbal["NUM_STATUS"][bbal["STATUS"] == "5"] = 5
bbal["LOAN_SCORE"] = bbal["NUM_STATUS"] / (abs(bbal["MONTHS_BALANCE"]) + 1)
loan_score = bbal.groupby("SK_ID_BUREAU", as_index = False).LOAN_SCORE.sum()
del bbal["NUM_STATUS"]
del bbal["LOAN_SCORE"]

# dummy encoding for STATUS
bbal = pd.get_dummies(bbal, columns = ["STATUS"], prefix = "STATUS")

In [22]:
# count missings
nas = count_missings(bbal)
nas.head()

Unnamed: 0,Total,Percent


In [23]:
### AGGREGATIONS

# total month count
cnt_mon = bbal[["SK_ID_BUREAU", "MONTHS_BALANCE"]].groupby("SK_ID_BUREAU").count()
del bbal["MONTHS_BALANCE"]

# aggregate data
agg_bbal = bbal.groupby("SK_ID_BUREAU").mean()

# add total month count
agg_bbal["MONTH_COUNT"] = cnt_mon

# add loan score
agg_bbal = agg_bbal.merge(loan_score, how = "left", on = "SK_ID_BUREAU")

In [24]:
# count missings
nas = count_missings(agg_bbal)
nas.head()

Unnamed: 0,Total,Percent


In [25]:
# check data
agg_bbal.head()

Unnamed: 0,SK_ID_BUREAU,STATUS_0,STATUS_1,STATUS_2,STATUS_3,STATUS_4,STATUS_5,STATUS_C,STATUS_X,MONTH_COUNT,LOAN_SCORE
0,5001709,0.0,0.0,0.0,0.0,0.0,0.0,0.886598,0.113402,97,0.0
1,5001710,0.060241,0.0,0.0,0.0,0.0,0.0,0.578313,0.361446,83,0.0
2,5001711,0.75,0.0,0.0,0.0,0.0,0.0,0.0,0.25,4,0.0
3,5001712,0.526316,0.0,0.0,0.0,0.0,0.0,0.473684,0.0,19,0.0
4,5001713,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,22,0.0


In [26]:
# clear memory
del bbal

### 4.2.2. BURO DATA

In [27]:
# check buro data
buro.head()

Unnamed: 0,SK_ID_CURR,SK_ID_BUREAU,CREDIT_ACTIVE,CREDIT_CURRENCY,DAYS_CREDIT,CREDIT_DAY_OVERDUE,DAYS_CREDIT_ENDDATE,DAYS_ENDDATE_FACT,AMT_CREDIT_MAX_OVERDUE,CNT_CREDIT_PROLONG,AMT_CREDIT_SUM,AMT_CREDIT_SUM_DEBT,AMT_CREDIT_SUM_LIMIT,AMT_CREDIT_SUM_OVERDUE,CREDIT_TYPE,DAYS_CREDIT_UPDATE,AMT_ANNUITY
0,215354,5714462,Closed,currency 1,-497,0,-153.0,-153.0,,0,91323.0,0.0,,0.0,Consumer credit,-131,
1,215354,5714463,Active,currency 1,-208,0,1075.0,,,0,225000.0,171342.0,,0.0,Credit card,-20,
2,215354,5714464,Active,currency 1,-203,0,528.0,,,0,464323.5,,,0.0,Consumer credit,-16,
3,215354,5714465,Active,currency 1,-203,0,,,,0,90000.0,,,0.0,Credit card,-16,
4,215354,5714466,Active,currency 1,-629,0,1197.0,,77674.5,0,2700000.0,,,0.0,Consumer credit,-21,


In [28]:
### MERGE
buro = buro.merge(right = agg_bbal.reset_index(), how = "left", on = "SK_ID_BUREAU")

In [29]:
##### FEATURE ENGINEERING

# number of buro loans 
cnt_buro = buro[["SK_ID_CURR", "SK_ID_BUREAU"]].groupby(["SK_ID_CURR"], as_index = False).count()
cnt_buro.columns = ["SK_ID_CURR", "CNT_BURO_LOANS"]
buro = buro.merge(cnt_buro, how = "left", on = "SK_ID_CURR")

# amount ratios
buro["AMT_SUM_OVERDUE_RATIO_1"] = buro["AMT_CREDIT_SUM_OVERDUE"] / buro["AMT_ANNUITY"]
buro["AMT_SUM_OVERDUE_RATIO_2"] = buro["AMT_CREDIT_SUM_OVERDUE"] / buro["AMT_CREDIT_SUM"]
buro["AMT_MAX_OVERDUE_RATIO_1"] = buro["AMT_CREDIT_MAX_OVERDUE"] / buro["AMT_ANNUITY"]
buro["AMT_MAX_OVERDUE_RATIO_2"] = buro["AMT_CREDIT_MAX_OVERDUE"] / buro["AMT_CREDIT_SUM"]
buro["AMT_SUM_DEBT_RATIO_1"]    = buro["AMT_CREDIT_SUM_DEBT"] / buro["AMT_CREDIT_SUM"]
buro["AMT_SUM_DEBT_RATIO_2"]    = buro["AMT_CREDIT_SUM_DEBT"] / buro["AMT_CREDIT_SUM_LIMIT"]

# logarithms
log_vars = ["AMT_CREDIT_SUM", "AMT_CREDIT_SUM_DEBT", "AMT_CREDIT_SUM_LIMIT", "AMT_CREDIT_SUM_OVERDUE", "AMT_ANNUITY"]
buro = create_logs(buro, log_vars, replace = True)

# convert days
day_vars = ["DAYS_CREDIT", "CREDIT_DAY_OVERDUE", "DAYS_CREDIT_ENDDATE", "DAYS_ENDDATE_FACT", "DAYS_CREDIT_UPDATE"]
buro = convert_days(buro, day_vars, t = 1, rounding = False, replace = True)

# recency-weighted loan score
buro["WEIGHTED_LOAN_SCORE"] = buro["LOAN_SCORE"] / (buro["DAYS_CREDIT"] / 12)

# day differences
buro["DAYS_END_DIFF_1"] = buro["DAYS_ENDDATE_FACT"]   - buro["DAYS_CREDIT_ENDDATE"]
buro["DAYS_END_DIFF_2"] = buro["DAYS_CREDIT_UPDATE"]  - buro["DAYS_CREDIT_ENDDATE"]
buro["DAYS_DURATION_1"] = buro["DAYS_CREDIT_ENDDATE"] - buro["DAYS_CREDIT"]
buro["DAYS_DURATION_2"] = buro["DAYS_ENDDATE_FACT"]   - buro["DAYS_CREDIT"]

# number of active buro loans
cnt_buro = buro[["SK_ID_CURR", "CREDIT_ACTIVE"]]
cnt_buro.columns = ["SK_ID_CURR", "CNT_BURO_ACTIVE"]
cnt_buro = cnt_buro[cnt_buro["CNT_BURO_ACTIVE"] == "Active"]
cnt_buro = cnt_buro[["SK_ID_CURR", "CNT_BURO_ACTIVE"]].groupby(["SK_ID_CURR"], as_index = False).count()
buro = buro.merge(cnt_buro, how = "left", on = "SK_ID_CURR")
buro["CNT_BURO_ACTIVE"].fillna(0, inplace = True)

# number of closed buro loans
cnt_buro = buro[["SK_ID_CURR", "CREDIT_ACTIVE"]]
cnt_buro.columns = ["SK_ID_CURR", "CNT_BURO_CLOSED"]
cnt_buro = cnt_buro[cnt_buro["CNT_BURO_CLOSED"] == "Closed"]
cnt_buro = cnt_buro[["SK_ID_CURR", "CNT_BURO_CLOSED"]].groupby(["SK_ID_CURR"], as_index = False).count()
buro = buro.merge(cnt_buro, how = "left", on = "SK_ID_CURR")
buro["CNT_BURO_CLOSED"].fillna(0, inplace = True)

# number of defaulted buro loans
cnt_buro = buro[["SK_ID_CURR", "CREDIT_ACTIVE"]]
cnt_buro.columns = ["SK_ID_CURR", "CNT_BURO_BAD"]
cnt_buro = cnt_buro[cnt_buro["CNT_BURO_BAD"] == "Bad debt"]
cnt_buro = cnt_buro[["SK_ID_CURR", "CNT_BURO_BAD"]].groupby(["SK_ID_CURR"], as_index = False).count()
buro = buro.merge(cnt_buro, how = "left", on = "SK_ID_CURR")
buro["CNT_BURO_BAD"].fillna(0, inplace = True)

In [30]:
# dummy encodnig for factors
buro = pd.get_dummies(buro, drop_first = True)

In [31]:
# count missings
nas = count_missings(buro)
nas.head()

Unnamed: 0,Total,Percent
AMT_MAX_OVERDUE_RATIO_1,1629591,94.940831
AMT_SUM_OVERDUE_RATIO_1,1483326,86.419355
AMT_SUM_DEBT_RATIO_2,1336100,77.84189
AMT_ANNUITY,1226791,71.47349
AMT_MAX_OVERDUE_RATIO_2,1149800,66.987954


In [32]:
### AGGREGATIONS

# count previous buro loans
cnt_buro = buro[["SK_ID_CURR", "SK_ID_BUREAU"]].groupby("SK_ID_CURR").count()
del buro["SK_ID_BUREAU"]

# aggregate data
agg_buro = aggregate_data(buro, id_var = "SK_ID_CURR", label = "buro")

# add buro loan count
agg_buro["buro_BURO_COUNT"] = cnt_buro

# clean up
omits = ["WEIGHTED_LOAN_SCORE"]
for var in omits:
    del agg_buro["buro_" + str(var) + "_std"]
    del agg_buro["buro_" + str(var) + "_min"]
    del agg_buro["buro_" + str(var) + "_max"]

- Preparing the dataset...
- Extracted 0 factors and 58 numerics...
- Aggregating numeric features...
- Final dimensions: (305811, 232)


In [33]:
# count missings
nas = count_missings(agg_buro)
nas.head()

Unnamed: 0,Total,Percent
buro_AMT_SUM_DEBT_RATIO_2_std,302007,98.756094
buro_AMT_MAX_OVERDUE_RATIO_1_std,288362,94.294188
buro_AMT_MAX_OVERDUE_RATIO_1_mean,253792,82.989821
buro_AMT_MAX_OVERDUE_RATIO_1_max,253792,82.989821
buro_AMT_MAX_OVERDUE_RATIO_1_min,253792,82.989821


In [34]:
# check data
agg_buro.head()

Unnamed: 0_level_0,buro_CREDIT_CURRENCY_currency 2_mean,buro_CREDIT_CURRENCY_currency 2_std,buro_CREDIT_CURRENCY_currency 2_min,buro_CREDIT_CURRENCY_currency 2_max,buro_STATUS_0_mean,buro_STATUS_0_std,buro_STATUS_0_min,buro_STATUS_0_max,buro_STATUS_X_mean,buro_STATUS_X_std,buro_STATUS_X_min,buro_STATUS_X_max,buro_index_mean,buro_index_std,buro_index_min,buro_index_max,buro_CREDIT_TYPE_Microloan_mean,buro_CREDIT_TYPE_Microloan_std,buro_CREDIT_TYPE_Microloan_min,buro_CREDIT_TYPE_Microloan_max,buro_AMT_MAX_OVERDUE_RATIO_1_mean,buro_AMT_MAX_OVERDUE_RATIO_1_std,buro_AMT_MAX_OVERDUE_RATIO_1_min,buro_AMT_MAX_OVERDUE_RATIO_1_max,buro_AMT_CREDIT_SUM_LIMIT_mean,buro_AMT_CREDIT_SUM_LIMIT_std,buro_AMT_CREDIT_SUM_LIMIT_min,buro_AMT_CREDIT_SUM_LIMIT_max,buro_CREDIT_TYPE_Car loan_mean,buro_CREDIT_TYPE_Car loan_std,buro_CREDIT_TYPE_Car loan_min,buro_CREDIT_TYPE_Car loan_max,buro_DAYS_DURATION_2_mean,buro_DAYS_DURATION_2_std,buro_DAYS_DURATION_2_min,buro_DAYS_DURATION_2_max,buro_AMT_CREDIT_SUM_DEBT_mean,buro_AMT_CREDIT_SUM_DEBT_std,buro_AMT_CREDIT_SUM_DEBT_min,buro_AMT_CREDIT_SUM_DEBT_max,buro_CREDIT_TYPE_Loan for purchase of shares (margin lending)_mean,buro_CREDIT_TYPE_Loan for purchase of shares (margin lending)_std,buro_CREDIT_TYPE_Loan for purchase of shares (margin lending)_min,buro_CREDIT_TYPE_Loan for purchase of shares (margin lending)_max,buro_STATUS_2_mean,buro_STATUS_2_std,buro_STATUS_2_min,buro_STATUS_2_max,buro_CREDIT_CURRENCY_currency 3_mean,buro_CREDIT_CURRENCY_currency 3_std,buro_CREDIT_CURRENCY_currency 3_min,buro_CREDIT_CURRENCY_currency 3_max,buro_CNT_BURO_ACTIVE_mean,buro_CNT_BURO_ACTIVE_std,buro_CNT_BURO_ACTIVE_min,buro_CNT_BURO_ACTIVE_max,buro_CREDIT_TYPE_Credit card_mean,buro_CREDIT_TYPE_Credit card_std,buro_CREDIT_TYPE_Credit card_min,buro_CREDIT_TYPE_Credit card_max,buro_AMT_MAX_OVERDUE_RATIO_2_mean,buro_AMT_MAX_OVERDUE_RATIO_2_std,buro_AMT_MAX_OVERDUE_RATIO_2_min,buro_AMT_MAX_OVERDUE_RATIO_2_max,buro_DAYS_DURATION_1_mean,buro_DAYS_DURATION_1_std,buro_DAYS_DURATION_1_min,buro_DAYS_DURATION_1_max,buro_CNT_BURO_BAD_mean,buro_CNT_BURO_BAD_std,buro_CNT_BURO_BAD_min,buro_CNT_BURO_BAD_max,buro_AMT_SUM_DEBT_RATIO_2_mean,buro_AMT_SUM_DEBT_RATIO_2_std,buro_AMT_SUM_DEBT_RATIO_2_min,buro_AMT_SUM_DEBT_RATIO_2_max,buro_CREDIT_TYPE_Loan for business development_mean,buro_CREDIT_TYPE_Loan for business development_std,buro_CREDIT_TYPE_Loan for business development_min,buro_CREDIT_TYPE_Loan for business development_max,buro_AMT_CREDIT_MAX_OVERDUE_mean,buro_AMT_CREDIT_MAX_OVERDUE_std,buro_AMT_CREDIT_MAX_OVERDUE_min,buro_AMT_CREDIT_MAX_OVERDUE_max,buro_CNT_BURO_LOANS_mean,buro_CNT_BURO_LOANS_std,buro_CNT_BURO_LOANS_min,buro_CNT_BURO_LOANS_max,buro_STATUS_5_mean,buro_STATUS_5_std,buro_STATUS_5_min,buro_STATUS_5_max,buro_AMT_SUM_DEBT_RATIO_1_mean,buro_AMT_SUM_DEBT_RATIO_1_std,buro_AMT_SUM_DEBT_RATIO_1_min,buro_AMT_SUM_DEBT_RATIO_1_max,buro_CREDIT_TYPE_Loan for the purchase of equipment_mean,buro_CREDIT_TYPE_Loan for the purchase of equipment_std,buro_CREDIT_TYPE_Loan for the purchase of equipment_min,buro_CREDIT_TYPE_Loan for the purchase of equipment_max,buro_DAYS_ENDDATE_FACT_mean,buro_DAYS_ENDDATE_FACT_std,buro_DAYS_ENDDATE_FACT_min,buro_DAYS_ENDDATE_FACT_max,buro_CREDIT_TYPE_Cash loan (non-earmarked)_mean,buro_CREDIT_TYPE_Cash loan (non-earmarked)_std,buro_CREDIT_TYPE_Cash loan (non-earmarked)_min,buro_CREDIT_TYPE_Cash loan (non-earmarked)_max,buro_STATUS_1_mean,buro_STATUS_1_std,buro_STATUS_1_min,buro_STATUS_1_max,buro_WEIGHTED_LOAN_SCORE_mean,buro_CREDIT_DAY_OVERDUE_mean,buro_CREDIT_DAY_OVERDUE_std,buro_CREDIT_DAY_OVERDUE_min,buro_CREDIT_DAY_OVERDUE_max,buro_CREDIT_TYPE_Loan for working capital replenishment_mean,buro_CREDIT_TYPE_Loan for working capital replenishment_std,buro_CREDIT_TYPE_Loan for working capital replenishment_min,buro_CREDIT_TYPE_Loan for working capital replenishment_max,buro_DAYS_CREDIT_mean,buro_DAYS_CREDIT_std,buro_DAYS_CREDIT_min,buro_DAYS_CREDIT_max,buro_CREDIT_ACTIVE_Closed_mean,buro_CREDIT_ACTIVE_Closed_std,buro_CREDIT_ACTIVE_Closed_min,buro_CREDIT_ACTIVE_Closed_max,buro_STATUS_C_mean,buro_STATUS_C_std,buro_STATUS_C_min,buro_STATUS_C_max,buro_CREDIT_TYPE_Consumer credit_mean,buro_CREDIT_TYPE_Consumer credit_std,buro_CREDIT_TYPE_Consumer credit_min,buro_CREDIT_TYPE_Consumer credit_max,buro_STATUS_4_mean,buro_STATUS_4_std,buro_STATUS_4_min,buro_STATUS_4_max,buro_CREDIT_ACTIVE_Sold_mean,buro_CREDIT_ACTIVE_Sold_std,buro_CREDIT_ACTIVE_Sold_min,buro_CREDIT_ACTIVE_Sold_max,buro_DAYS_CREDIT_ENDDATE_mean,buro_DAYS_CREDIT_ENDDATE_std,buro_DAYS_CREDIT_ENDDATE_min,buro_DAYS_CREDIT_ENDDATE_max,buro_AMT_SUM_OVERDUE_RATIO_2_mean,buro_AMT_SUM_OVERDUE_RATIO_2_std,buro_AMT_SUM_OVERDUE_RATIO_2_min,buro_AMT_SUM_OVERDUE_RATIO_2_max,buro_CREDIT_CURRENCY_currency 4_mean,buro_CREDIT_CURRENCY_currency 4_std,buro_CREDIT_CURRENCY_currency 4_min,buro_CREDIT_CURRENCY_currency 4_max,buro_CNT_CREDIT_PROLONG_mean,buro_CNT_CREDIT_PROLONG_std,buro_CNT_CREDIT_PROLONG_min,buro_CNT_CREDIT_PROLONG_max,buro_AMT_ANNUITY_mean,buro_AMT_ANNUITY_std,buro_AMT_ANNUITY_min,buro_AMT_ANNUITY_max,buro_CREDIT_ACTIVE_Bad debt_mean,buro_CREDIT_ACTIVE_Bad debt_std,buro_CREDIT_ACTIVE_Bad debt_min,buro_CREDIT_ACTIVE_Bad debt_max,buro_CREDIT_TYPE_Real estate loan_mean,buro_CREDIT_TYPE_Real estate loan_std,buro_CREDIT_TYPE_Real estate loan_min,buro_CREDIT_TYPE_Real estate loan_max,buro_LOAN_SCORE_mean,buro_LOAN_SCORE_std,buro_LOAN_SCORE_min,buro_LOAN_SCORE_max,buro_CREDIT_TYPE_Unknown type of loan_mean,buro_CREDIT_TYPE_Unknown type of loan_std,buro_CREDIT_TYPE_Unknown type of loan_min,buro_CREDIT_TYPE_Unknown type of loan_max,buro_CREDIT_TYPE_Interbank credit_mean,buro_CREDIT_TYPE_Interbank credit_std,buro_CREDIT_TYPE_Interbank credit_min,buro_CREDIT_TYPE_Interbank credit_max,buro_CREDIT_TYPE_Mobile operator loan_mean,buro_CREDIT_TYPE_Mobile operator loan_std,buro_CREDIT_TYPE_Mobile operator loan_min,buro_CREDIT_TYPE_Mobile operator loan_max,buro_CREDIT_TYPE_Mortgage_mean,buro_CREDIT_TYPE_Mortgage_std,buro_CREDIT_TYPE_Mortgage_min,buro_CREDIT_TYPE_Mortgage_max,buro_DAYS_END_DIFF_2_mean,buro_DAYS_END_DIFF_2_std,buro_DAYS_END_DIFF_2_min,buro_DAYS_END_DIFF_2_max,buro_DAYS_END_DIFF_1_mean,buro_DAYS_END_DIFF_1_std,buro_DAYS_END_DIFF_1_min,buro_DAYS_END_DIFF_1_max,buro_AMT_SUM_OVERDUE_RATIO_1_mean,buro_AMT_SUM_OVERDUE_RATIO_1_std,buro_AMT_SUM_OVERDUE_RATIO_1_min,buro_AMT_SUM_OVERDUE_RATIO_1_max,buro_AMT_CREDIT_SUM_mean,buro_AMT_CREDIT_SUM_std,buro_AMT_CREDIT_SUM_min,buro_AMT_CREDIT_SUM_max,buro_CNT_BURO_CLOSED_mean,buro_CNT_BURO_CLOSED_std,buro_CNT_BURO_CLOSED_min,buro_CNT_BURO_CLOSED_max,buro_DAYS_CREDIT_UPDATE_mean,buro_DAYS_CREDIT_UPDATE_std,buro_DAYS_CREDIT_UPDATE_min,buro_DAYS_CREDIT_UPDATE_max,buro_STATUS_3_mean,buro_STATUS_3_std,buro_STATUS_3_min,buro_STATUS_3_max,buro_MONTH_COUNT_mean,buro_MONTH_COUNT_std,buro_MONTH_COUNT_min,buro_MONTH_COUNT_max,buro_AMT_CREDIT_SUM_OVERDUE_mean,buro_AMT_CREDIT_SUM_OVERDUE_std,buro_AMT_CREDIT_SUM_OVERDUE_min,buro_AMT_CREDIT_SUM_OVERDUE_max,buro_BURO_COUNT
SK_ID_CURR,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1,Unnamed: 182_level_1,Unnamed: 183_level_1,Unnamed: 184_level_1,Unnamed: 185_level_1,Unnamed: 186_level_1,Unnamed: 187_level_1,Unnamed: 188_level_1,Unnamed: 189_level_1,Unnamed: 190_level_1,Unnamed: 191_level_1,Unnamed: 192_level_1,Unnamed: 193_level_1,Unnamed: 194_level_1,Unnamed: 195_level_1,Unnamed: 196_level_1,Unnamed: 197_level_1,Unnamed: 198_level_1,Unnamed: 199_level_1,Unnamed: 200_level_1,Unnamed: 201_level_1,Unnamed: 202_level_1,Unnamed: 203_level_1,Unnamed: 204_level_1,Unnamed: 205_level_1,Unnamed: 206_level_1,Unnamed: 207_level_1,Unnamed: 208_level_1,Unnamed: 209_level_1,Unnamed: 210_level_1,Unnamed: 211_level_1,Unnamed: 212_level_1,Unnamed: 213_level_1,Unnamed: 214_level_1,Unnamed: 215_level_1,Unnamed: 216_level_1,Unnamed: 217_level_1,Unnamed: 218_level_1,Unnamed: 219_level_1,Unnamed: 220_level_1,Unnamed: 221_level_1,Unnamed: 222_level_1,Unnamed: 223_level_1,Unnamed: 224_level_1,Unnamed: 225_level_1,Unnamed: 226_level_1,Unnamed: 227_level_1,Unnamed: 228_level_1,Unnamed: 229_level_1,Unnamed: 230_level_1
100001,0.0,0.0,0,0,0.336651,0.381334,0.019231,1.0,0.21459,0.182611,0.0,0.5,297266.0,2.160247,297263.0,297269.0,0.0,0.0,0,0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0,0,-228.75,136.497558,-335.0,-32.0,5.153914,6.440628,0.0,12.829977,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,3.0,0.0,3.0,3.0,0.0,0.0,0,0,,,,,-425.75,210.829117,-730.0,-243.0,0.0,0.0,0.0,0.0,inf,,inf,inf,0.0,0.0,0,0,,,,,7,0.0,7,7,0.0,0.0,0.0,0.0,0.282518,0.399523,0.0,0.987405,0.0,0.0,0,0,825.5,369.078582,544.0,1328.0,0.0,0.0,0,0,0.007519,0.019893,0.0,0.052632,0.003067,0.0,0.0,0.0,0.0,0.0,0.0,0,0,735.0,489.942514,49.0,1572.0,0.571429,0.534522,0,1,0.44124,0.428578,0.0,0.966667,1.0,0.0,1,1,0.0,0.0,0.0,0.0,0.0,0.0,0,0,628.5,491.424121,179.0,1329.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0,0,3.839271,4.795585,0.0,9.289475,0.0,0.0,0,0,0.0,0.0,0,0,0.142857,0.377964,0.0,1.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,-473.5,491.424121,-1174.0,-24.0,197.0,334.957709,-1.0,698.0,0.0,0.0,0.0,0.0,12.080036,0.628917,11.356283,12.842652,4.0,0.0,4.0,4.0,93.142857,77.20412,6.0,155.0,0.0,0.0,0.0,0.0,24.571429,16.050515,2.0,52.0,0.0,0.0,0.0,0.0,7
100002,0.0,0.0,0,0,0.40696,0.196494,0.1875,0.818182,0.161932,0.16165,0.0,0.5,468752.125,9822.774054,444442.0,472228.0,0.0,0.0,0,0,inf,,inf,inf,2.593291,5.186583,0.0,10.373165,0.0,0.0,0,0,-277.0,207.190733,-609.0,-76.0,2.48244,5.550905,0.0,12.4122,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,2.0,0.0,2.0,2.0,0.5,0.534522,0,1,0.043436,0.075229,0.0,0.174139,-220.666667,139.306616,-365.0,-87.0,0.0,0.0,0.0,0.0,inf,,0.0,inf,0.0,0.0,0,0,1681.029,2363.2469,0.0,5043.645,8,0.0,8,8,0.0,0.0,0.0,0.0,0.136545,0.27309,0.0,0.54618,0.0,0.0,0,0,697.5,515.992539,36.0,1185.0,0.0,0.0,0,0,0.255682,0.204094,0.0,0.5,0.001432,0.0,0.0,0.0,0.0,0.0,0.0,0,0,874.0,431.45104,103.0,1437.0,0.75,0.46291,0,1,0.175426,0.263147,0.0,0.8125,0.5,0.534522,0,1,0.0,0.0,0.0,0.0,0.0,0.0,0,0,1007.0,84.858706,911.0,1072.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0,0,0.124617,0.102371,0.0,0.237591,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,36.0,66.730802,-5.0,113.0,37.666667,65.24058,0.0,113.0,,,,,9.811994,4.084715,0.0,13.017005,6.0,0.0,6.0,6.0,499.875,518.522472,7.0,1185.0,0.0,0.0,0.0,0.0,13.75,6.363961,4.0,22.0,0.0,0.0,0.0,0.0,8
100003,0.0,0.0,0,0,,,,,,,,,,,,,0.0,0.0,0,0,,,,,3.401198,6.802395,0.0,13.604791,0.0,0.0,0,0,-568.333333,481.117796,-1096.0,-154.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,,,,,0.0,0.0,0,0,1.0,0.0,1.0,1.0,0.5,0.57735,0,1,0.0,0.0,0.0,0.0,-534.333333,496.89469,-1096.0,-152.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,4,0.0,4,4,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0,0,1097.333333,896.097279,540.0,2131.0,0.0,0.0,0,0,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0,0,1400.75,909.826128,606.0,2586.0,0.75,0.5,0,1,,,,,0.5,0.57735,0,1,,,,,0.0,0.0,0,0,1131.333333,1129.736843,420.0,2434.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0,0,,,,,0.0,0.0,0,0,0.0,0.0,0,0,,,,,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,-57.666667,222.185358,-303.0,130.0,-34.0,253.714406,-303.0,201.0,,,,,11.609754,1.495879,10.010052,13.604791,3.0,0.0,3.0,3.0,816.0,908.053963,43.0,2131.0,,,,,,,,,0.0,0.0,0.0,0.0,4
100004,0.0,0.0,0,0,,,,,,,,,,,,,0.0,0.0,0,0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0,0,-334.5,436.284884,-643.0,-26.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,,,,,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,,0.0,0.0,-378.5,498.510281,-731.0,-26.0,0.0,0.0,0.0,0.0,,,,,0.0,0.0,0,0,0.0,,0.0,0.0,2,0.0,2,2,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0,0,532.5,212.839141,382.0,683.0,0.0,0.0,0,0,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0,0,867.0,649.124025,408.0,1326.0,1.0,0.0,1,1,,,,,1.0,0.0,1,1,,,,,0.0,0.0,0,0,488.5,150.613744,382.0,595.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0,0,,,,,0.0,0.0,0,0,0.0,0.0,0,0,,,,,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,43.5,61.51829,0.0,87.0,44.0,62.225397,0.0,88.0,,,,,11.456566,0.000283,11.456366,11.456766,2.0,0.0,2.0,2.0,532.0,212.132034,382.0,682.0,,,,,,,,,0.0,0.0,0.0,0.0,2
100005,0.0,0.0,0,0,0.735043,0.238245,0.538462,1.0,0.136752,0.174535,0.0,0.333333,764525.0,1.0,764524.0,764526.0,0.0,0.0,0,0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,-250.0,,-250.0,-250.0,7.781492,6.911089,0.0,13.205027,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,2.0,0.0,2.0,2.0,0.333333,0.57735,0,1,0.0,,0.0,0.0,-245.0,,-245.0,-245.0,0.0,0.0,0.0,0.0,inf,,inf,inf,0.0,0.0,0,0,0.0,,0.0,0.0,3,0.0,3,3,0.0,0.0,0.0,0.0,0.601256,0.523384,0.0,0.954794,0.0,0.0,0,0,123.0,,123.0,123.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,190.666667,162.297053,62.0,373.0,0.333333,0.57735,0,1,0.128205,0.222058,0.0,0.384615,0.666667,0.57735,0,1,0.0,0.0,0.0,0.0,0.0,0.0,0,0,128.0,,128.0,128.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0,0,2.78587,4.825269,0.0,8.357611,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,-7.0,,-7.0,-7.0,-5.0,,-5.0,-5.0,0.0,,0.0,0.0,11.510418,1.544801,10.303169,13.251286,1.0,0.0,1.0,1.0,54.333333,58.594653,11.0,121.0,0.0,0.0,0.0,0.0,7.0,5.291503,3.0,13.0,0.0,0.0,0.0,0.0,3


In [35]:
# clear memory
del buro

## 4.3. PREVIOUS LOAN DATA

### 4.3.1. INST DATA

In [36]:
# check inst data
inst.head()

Unnamed: 0,SK_ID_PREV,SK_ID_CURR,NUM_INSTALMENT_VERSION,NUM_INSTALMENT_NUMBER,DAYS_INSTALMENT,DAYS_ENTRY_PAYMENT,AMT_INSTALMENT,AMT_PAYMENT
0,1054186,161674,1.0,6,-1180.0,-1187.0,6948.36,6948.36
1,1330831,151639,0.0,34,-2156.0,-2156.0,1716.525,1716.525
2,2085231,193053,2.0,1,-63.0,-63.0,25425.0,25425.0
3,2452527,199697,1.0,3,-2418.0,-2426.0,24350.13,24350.13
4,2714724,167756,1.0,2,-1383.0,-1366.0,2165.04,2160.585


In [37]:
### FEATURE ENGINEERING

# days past due and days before due (no negative values)
inst['DPD'] = inst['DAYS_ENTRY_PAYMENT'] - inst['DAYS_INSTALMENT']
inst['DBD'] = inst['DAYS_INSTALMENT'] - inst['DAYS_ENTRY_PAYMENT']
inst['DPD'] = inst['DPD'].apply(lambda x: x if x > 0 else 0)
inst['DBD'] = inst['DBD'].apply(lambda x: x if x > 0 else 0)

# percentage and difference paid in each installment 
inst['PAYMENT_PERC'] = inst['AMT_PAYMENT'] / inst['AMT_INSTALMENT']
inst['PAYMENT_DIFF'] = inst['AMT_INSTALMENT'] - inst['AMT_PAYMENT']

# logarithms
log_vars = ["AMT_INSTALMENT", "AMT_PAYMENT"]
inst = create_logs(inst, log_vars, replace = True)

In [38]:
# dummy encodnig for factors
inst = pd.get_dummies(inst, drop_first = True)

In [39]:
# count missings
nas = count_missings(inst)
nas.head()

Unnamed: 0,Total,Percent
PAYMENT_PERC,2907,0.021367
PAYMENT_DIFF,2905,0.021352
AMT_PAYMENT,2905,0.021352
DAYS_ENTRY_PAYMENT,2905,0.021352


In [40]:
### AGGREGATIONS

# count instalments
cnt_inst = inst[["SK_ID_PREV", "NUM_INSTALMENT_NUMBER"]].groupby("SK_ID_PREV").count()
del inst["NUM_INSTALMENT_NUMBER"]

# delete ID_CURR
inst_id = inst[["SK_ID_CURR", "SK_ID_PREV"]]
del inst["SK_ID_CURR"]

# aggregate data
agg_inst = aggregate_data(inst, id_var = "SK_ID_PREV")

# add instalment count
agg_inst["inst_INST_COUNT"] = cnt_inst

# put back ID_CURR
inst_id = inst_id.drop_duplicates()
agg_inst = inst_id.merge(right = agg_inst.reset_index(), how = "right", on = "SK_ID_PREV")
del agg_inst["SK_ID_PREV"]

# aggregate data (round 2)
agg_inst = aggregate_data(agg_inst, id_var = "SK_ID_CURR", label = "inst")

- Preparing the dataset...
- Extracted 0 factors and 9 numerics...
- Aggregating numeric features...
- Final dimensions: (997752, 36)
- Preparing the dataset...
- Extracted 0 factors and 37 numerics...
- Aggregating numeric features...
- Final dimensions: (339587, 148)


In [41]:
# count missings
nas = count_missings(agg_inst)
nas.head()

Unnamed: 0,Total,Percent
inst_PAYMENT_PERC_std_std,99706,29.360959
inst_PAYMENT_DIFF_std_std,99700,29.359192
inst_AMT_PAYMENT_std_std,99700,29.359192
inst_DAYS_ENTRY_PAYMENT_std_std,99700,29.359192
inst_DPD_std_std,99686,29.35507


In [42]:
# check data
agg_inst.head()

Unnamed: 0_level_0,inst_DAYS_INSTALMENT_max_mean,inst_DAYS_INSTALMENT_max_std,inst_DAYS_INSTALMENT_max_min,inst_DAYS_INSTALMENT_max_max,inst_NUM_INSTALMENT_VERSION_mean_mean,inst_NUM_INSTALMENT_VERSION_mean_std,inst_NUM_INSTALMENT_VERSION_mean_min,inst_NUM_INSTALMENT_VERSION_mean_max,inst_DPD_std_mean,inst_DPD_std_std,inst_DPD_std_min,inst_DPD_std_max,inst_NUM_INSTALMENT_VERSION_max_mean,inst_NUM_INSTALMENT_VERSION_max_std,inst_NUM_INSTALMENT_VERSION_max_min,inst_NUM_INSTALMENT_VERSION_max_max,inst_AMT_INSTALMENT_max_mean,inst_AMT_INSTALMENT_max_std,inst_AMT_INSTALMENT_max_min,inst_AMT_INSTALMENT_max_max,inst_PAYMENT_PERC_mean_mean,inst_PAYMENT_PERC_mean_std,inst_PAYMENT_PERC_mean_min,inst_PAYMENT_PERC_mean_max,inst_AMT_PAYMENT_std_mean,inst_AMT_PAYMENT_std_std,inst_AMT_PAYMENT_std_min,inst_AMT_PAYMENT_std_max,inst_DPD_min_mean,inst_DPD_min_std,inst_DPD_min_min,inst_DPD_min_max,inst_AMT_INSTALMENT_mean_mean,inst_AMT_INSTALMENT_mean_std,inst_AMT_INSTALMENT_mean_min,inst_AMT_INSTALMENT_mean_max,inst_PAYMENT_DIFF_mean_mean,inst_PAYMENT_DIFF_mean_std,inst_PAYMENT_DIFF_mean_min,inst_PAYMENT_DIFF_mean_max,inst_DBD_min_mean,inst_DBD_min_std,inst_DBD_min_min,inst_DBD_min_max,inst_AMT_PAYMENT_mean_mean,inst_AMT_PAYMENT_mean_std,inst_AMT_PAYMENT_mean_min,inst_AMT_PAYMENT_mean_max,inst_AMT_PAYMENT_min_mean,inst_AMT_PAYMENT_min_std,inst_AMT_PAYMENT_min_min,inst_AMT_PAYMENT_min_max,inst_AMT_INSTALMENT_std_mean,inst_AMT_INSTALMENT_std_std,inst_AMT_INSTALMENT_std_min,inst_AMT_INSTALMENT_std_max,inst_inst_INST_COUNT_mean,inst_inst_INST_COUNT_std,inst_inst_INST_COUNT_min,inst_inst_INST_COUNT_max,inst_DAYS_ENTRY_PAYMENT_min_mean,inst_DAYS_ENTRY_PAYMENT_min_std,inst_DAYS_ENTRY_PAYMENT_min_min,inst_DAYS_ENTRY_PAYMENT_min_max,inst_DBD_mean_mean,inst_DBD_mean_std,inst_DBD_mean_min,inst_DBD_mean_max,inst_DAYS_INSTALMENT_min_mean,inst_DAYS_INSTALMENT_min_std,inst_DAYS_INSTALMENT_min_min,inst_DAYS_INSTALMENT_min_max,inst_PAYMENT_DIFF_std_mean,inst_PAYMENT_DIFF_std_std,inst_PAYMENT_DIFF_std_min,inst_PAYMENT_DIFF_std_max,inst_PAYMENT_PERC_min_mean,inst_PAYMENT_PERC_min_std,inst_PAYMENT_PERC_min_min,inst_PAYMENT_PERC_min_max,inst_PAYMENT_PERC_max_mean,inst_PAYMENT_PERC_max_std,inst_PAYMENT_PERC_max_min,inst_PAYMENT_PERC_max_max,inst_DAYS_ENTRY_PAYMENT_max_mean,inst_DAYS_ENTRY_PAYMENT_max_std,inst_DAYS_ENTRY_PAYMENT_max_min,inst_DAYS_ENTRY_PAYMENT_max_max,inst_DBD_max_mean,inst_DBD_max_std,inst_DBD_max_min,inst_DBD_max_max,inst_NUM_INSTALMENT_VERSION_min_mean,inst_NUM_INSTALMENT_VERSION_min_std,inst_NUM_INSTALMENT_VERSION_min_min,inst_NUM_INSTALMENT_VERSION_min_max,inst_DPD_max_mean,inst_DPD_max_std,inst_DPD_max_min,inst_DPD_max_max,inst_DAYS_INSTALMENT_std_mean,inst_DAYS_INSTALMENT_std_std,inst_DAYS_INSTALMENT_std_min,inst_DAYS_INSTALMENT_std_max,inst_NUM_INSTALMENT_VERSION_std_mean,inst_NUM_INSTALMENT_VERSION_std_std,inst_NUM_INSTALMENT_VERSION_std_min,inst_NUM_INSTALMENT_VERSION_std_max,inst_DAYS_ENTRY_PAYMENT_std_mean,inst_DAYS_ENTRY_PAYMENT_std_std,inst_DAYS_ENTRY_PAYMENT_std_min,inst_DAYS_ENTRY_PAYMENT_std_max,inst_PAYMENT_PERC_std_mean,inst_PAYMENT_PERC_std_std,inst_PAYMENT_PERC_std_min,inst_PAYMENT_PERC_std_max,inst_DAYS_INSTALMENT_mean_mean,inst_DAYS_INSTALMENT_mean_std,inst_DAYS_INSTALMENT_mean_min,inst_DAYS_INSTALMENT_mean_max,inst_PAYMENT_DIFF_min_mean,inst_PAYMENT_DIFF_min_std,inst_PAYMENT_DIFF_min_min,inst_PAYMENT_DIFF_min_max,inst_DPD_mean_mean,inst_DPD_mean_std,inst_DPD_mean_min,inst_DPD_mean_max,inst_AMT_PAYMENT_max_mean,inst_AMT_PAYMENT_max_std,inst_AMT_PAYMENT_max_min,inst_AMT_PAYMENT_max_max,inst_DAYS_ENTRY_PAYMENT_mean_mean,inst_DAYS_ENTRY_PAYMENT_mean_std,inst_DAYS_ENTRY_PAYMENT_mean_min,inst_DAYS_ENTRY_PAYMENT_mean_max,inst_DBD_std_mean,inst_DBD_std_std,inst_DBD_std_min,inst_DBD_std_max,inst_PAYMENT_DIFF_max_mean,inst_PAYMENT_DIFF_max_std,inst_PAYMENT_DIFF_max_min,inst_PAYMENT_DIFF_max_max,inst_AMT_INSTALMENT_min_mean,inst_AMT_INSTALMENT_min_std,inst_AMT_INSTALMENT_min_min,inst_AMT_INSTALMENT_min_max
SK_ID_CURR,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1
100001,-2237.5,874.691088,-2856.0,-1619.0,1.125,0.176777,1.0,1.25,3.175426,4.490731,0.0,6.350853,1.5,0.707107,1.0,2.0,9.026983,1.042529,8.289803,9.764162,1.0,0.0,1.0,1.0,0.370628,0.523916,0.000163,0.741093,0.0,0.0,0.0,0.0,8.471116,0.256549,8.289709,8.652523,0.0,0.0,0.0,0.0,3.0,4.242641,0.0,6.0,8.471116,0.256549,8.289709,8.652523,8.285749,0.005334,8.281977,8.289521,0.370628,0.523916,0.000163,0.741093,3.5,0.707107,3,4,-2315.5,849.235244,-2916.0,-1715.0,7.75,10.960155,0.0,15.5,-2312.5,853.477885,-2916.0,-1709.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,-2242.0,868.327127,-2856.0,-1628.0,18.0,25.455844,0.0,36.0,1.0,0.0,1.0,1.0,5.5,7.778175,0.0,11.0,34.364917,6.172924,30.0,38.729833,0.25,0.353553,0.0,0.5,36.844052,8.738705,30.664855,43.02325,0.0,0.0,0.0,0.0,-2275.0,864.084487,-2886.0,-1664.0,0.0,0.0,0.0,0.0,1.833333,2.592725,0.0,3.666667,9.026983,1.042529,8.289803,9.764162,-2280.916667,850.531607,-2882.333333,-1679.5,6.910137,9.77241,0.0,13.820275,0.0,0.0,0.0,0.0,8.285749,0.005334,8.281977,8.289521
100002,-25.0,,-25.0,-25.0,1.052632,,1.052632,1.052632,0.0,,0.0,0.0,2.0,,2.0,2.0,10.879833,,10.879833,10.879833,1.0,,1.0,1.0,0.400825,,0.400825,0.400825,0.0,,0.0,0.0,9.224634,,9.224634,9.224634,0.0,,0.0,0.0,12.0,,12.0,12.0,9.224634,,9.224634,9.224634,9.132679,,9.132679,9.132679,0.400825,,0.400825,0.400825,19.0,,19,19,-587.0,,-587.0,-587.0,20.421053,,20.421053,20.421053,-565.0,,-565.0,-565.0,0.0,,0.0,0.0,1.0,,1.0,1.0,1.0,,1.0,1.0,-49.0,,-49.0,-49.0,31.0,,31.0,31.0,1.0,,1.0,1.0,0.0,,0.0,0.0,168.81943,,168.81943,168.81943,0.229416,,0.229416,0.229416,172.058877,,172.058877,172.058877,0.0,,0.0,0.0,-295.0,,-295.0,-295.0,0.0,,0.0,0.0,0.0,,0.0,0.0,10.879833,,10.879833,10.879833,-315.421053,,-315.421053,-315.421053,4.925171,,4.925171,4.925171,0.0,,0.0,0.0,9.132679,,9.132679,9.132679
100003,-1054.333333,803.569744,-1980.0,-536.0,1.047619,0.082479,1.0,1.142857,0.0,0.0,0.0,0.0,1.333333,0.57735,1.0,2.0,11.042744,2.210992,8.815564,13.237184,1.0,0.0,1.0,1.0,0.221007,0.378419,0.001851,0.657966,0.0,0.0,0.0,0.0,10.544809,1.535398,8.81464,11.745057,0.0,0.0,0.0,0.0,4.333333,4.163332,1.0,9.0,10.544809,1.535398,8.81464,11.745057,10.457264,1.447079,8.804471,11.496369,0.221007,0.378419,0.001851,0.657966,8.333333,3.21455,6,12,-1283.0,902.581298,-2324.0,-719.0,7.448413,3.422911,4.428571,11.166667,-1274.333333,897.827563,-2310.0,-716.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,-1063.333333,800.327641,-1985.0,-544.0,12.0,3.464102,8.0,14.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,76.366269,27.879912,56.124861,108.166538,0.125988,0.218218,0.0,0.377964,76.380137,30.572883,54.153178,111.24594,0.0,0.0,0.0,0.0,-1164.333333,850.637604,-2145.0,-626.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,11.042744,2.210992,8.815564,13.237184,-1171.781746,850.230624,-2151.75,-630.428571,2.48377,0.985781,1.718249,3.596084,0.0,0.0,0.0,0.0,10.457264,1.447079,8.804471,11.496369
100004,-724.0,,-724.0,-724.0,1.333333,,1.333333,1.333333,0.0,,0.0,0.0,2.0,,2.0,2.0,9.266245,,9.266245,9.266245,1.0,,1.0,1.0,0.392513,,0.392513,0.392513,0.0,,0.0,0.0,8.81301,,8.81301,8.81301,0.0,,0.0,0.0,3.0,,3.0,3.0,8.81301,,8.81301,8.81301,8.586393,,8.586393,8.586393,0.392513,,0.392513,0.392513,3.0,,3,3,-795.0,,-795.0,-795.0,7.666667,,7.666667,7.666667,-784.0,,-784.0,-784.0,0.0,,0.0,0.0,1.0,,1.0,1.0,1.0,,1.0,1.0,-727.0,,-727.0,-727.0,11.0,,11.0,11.0,1.0,,1.0,1.0,0.0,,0.0,0.0,30.0,,30.0,30.0,0.57735,,0.57735,0.57735,34.019602,,34.019602,34.019602,0.0,,0.0,0.0,-754.0,,-754.0,-754.0,0.0,,0.0,0.0,0.0,,0.0,0.0,9.266245,,9.266245,9.266245,-761.666667,,-761.666667,-761.666667,4.163332,,4.163332,4.163332,0.0,,0.0,0.0,8.586393,,8.586393,8.586393
100005,-466.0,,-466.0,-466.0,1.111111,,1.111111,1.111111,0.333333,,0.333333,0.333333,2.0,,2.0,2.0,9.778901,,9.778901,9.778901,1.0,,1.0,1.0,0.433192,,0.433192,0.433192,0.0,,0.0,0.0,8.623723,,8.623723,8.623723,0.0,,0.0,0.0,0.0,,0.0,0.0,8.623723,,8.623723,8.623723,8.479325,,8.479325,8.479325,0.433192,,0.433192,0.433192,9.0,,9,9,-736.0,,-736.0,-736.0,23.666667,,23.666667,23.666667,-706.0,,-706.0,-706.0,0.0,,0.0,0.0,1.0,,1.0,1.0,1.0,,1.0,1.0,-470.0,,-470.0,-470.0,37.0,,37.0,37.0,1.0,,1.0,1.0,1.0,,1.0,1.0,82.158384,,82.158384,82.158384,0.333333,,0.333333,0.333333,90.554005,,90.554005,90.554005,0.0,,0.0,0.0,-586.0,,-586.0,-586.0,0.0,,0.0,0.0,0.111111,,0.111111,0.111111,9.778901,,9.778901,9.778901,-609.555556,,-609.555556,-609.555556,13.28533,,13.28533,13.28533,0.0,,0.0,0.0,8.479325,,8.479325,8.479325


In [43]:
# clear memory
del inst

### 4.3.2. POCA DATA

In [44]:
# check poca data
poca.head()

Unnamed: 0,SK_ID_PREV,SK_ID_CURR,MONTHS_BALANCE,CNT_INSTALMENT,CNT_INSTALMENT_FUTURE,NAME_CONTRACT_STATUS,SK_DPD,SK_DPD_DEF
0,1803195,182943,-31,48.0,45.0,Active,0,0
1,1715348,367990,-33,36.0,35.0,Active,0,0
2,1784872,397406,-32,12.0,9.0,Active,0,0
3,1903291,269225,-35,48.0,42.0,Active,0,0
4,2341044,334279,-35,36.0,35.0,Active,0,0


In [45]:
### FEATURE ENGINEERING

# installments percentage
poca["INSTALLMENTS_PERCENT"] = poca["CNT_INSTALMENT_FUTURE"] / poca["CNT_INSTALMENT"]

In [46]:
# dummy encodnig for factors
poca = pd.get_dummies(poca, drop_first = True)

In [47]:
# count missings
nas = count_missings(poca)
nas.head()

Unnamed: 0,Total,Percent
INSTALLMENTS_PERCENT,26184,0.261804
CNT_INSTALMENT_FUTURE,26087,0.260835
CNT_INSTALMENT,26071,0.260675


In [48]:
### AGGREGATIONS

# count months
cnt_mon = poca[["SK_ID_PREV", "MONTHS_BALANCE"]].groupby("SK_ID_PREV").count()
del poca["MONTHS_BALANCE"]

# delete ID_CURR
poca_id = poca[["SK_ID_CURR", "SK_ID_PREV"]]
del poca["SK_ID_CURR"]

# aggregate data
agg_poca = aggregate_data(poca, id_var = "SK_ID_PREV")

# add month count
agg_poca["poca_MON_COUNT"] = cnt_mon

# put back ID_CURR
poca_id = poca_id.drop_duplicates()
agg_poca = poca_id.merge(right = agg_poca.reset_index(), how = "right", on = "SK_ID_PREV")
del agg_poca["SK_ID_PREV"]

# aggregate data (round 2)
agg_poca = aggregate_data(agg_poca, id_var = "SK_ID_CURR", label = "poca")

- Preparing the dataset...
- Extracted 0 factors and 13 numerics...
- Aggregating numeric features...
- Final dimensions: (936325, 52)
- Preparing the dataset...
- Extracted 0 factors and 53 numerics...
- Aggregating numeric features...
- Final dimensions: (337252, 212)


In [49]:
# count missings
nas = count_missings(agg_poca)
nas.head()

Unnamed: 0,Total,Percent
poca_INSTALLMENTS_PERCENT_std_std,106269,31.510265
poca_CNT_INSTALMENT_FUTURE_std_std,106269,31.510265
poca_CNT_INSTALMENT_std_std,106269,31.510265
poca_NAME_CONTRACT_STATUS_Demand_std_std,106037,31.441474
poca_NAME_CONTRACT_STATUS_Approved_std_std,106037,31.441474


In [50]:
# check data
agg_poca.head()

Unnamed: 0_level_0,poca_CNT_INSTALMENT_mean_mean,poca_CNT_INSTALMENT_mean_std,poca_CNT_INSTALMENT_mean_min,poca_CNT_INSTALMENT_mean_max,poca_SK_DPD_mean_mean,poca_SK_DPD_mean_std,poca_SK_DPD_mean_min,poca_SK_DPD_mean_max,poca_NAME_CONTRACT_STATUS_Amortized debt_max_mean,poca_NAME_CONTRACT_STATUS_Amortized debt_max_std,poca_NAME_CONTRACT_STATUS_Amortized debt_max_min,poca_NAME_CONTRACT_STATUS_Amortized debt_max_max,poca_NAME_CONTRACT_STATUS_Demand_min_mean,poca_NAME_CONTRACT_STATUS_Demand_min_std,poca_NAME_CONTRACT_STATUS_Demand_min_min,poca_NAME_CONTRACT_STATUS_Demand_min_max,poca_NAME_CONTRACT_STATUS_Approved_min_mean,poca_NAME_CONTRACT_STATUS_Approved_min_std,poca_NAME_CONTRACT_STATUS_Approved_min_min,poca_NAME_CONTRACT_STATUS_Approved_min_max,poca_INSTALLMENTS_PERCENT_max_mean,poca_INSTALLMENTS_PERCENT_max_std,poca_INSTALLMENTS_PERCENT_max_min,poca_INSTALLMENTS_PERCENT_max_max,poca_NAME_CONTRACT_STATUS_Signed_max_mean,poca_NAME_CONTRACT_STATUS_Signed_max_std,poca_NAME_CONTRACT_STATUS_Signed_max_min,poca_NAME_CONTRACT_STATUS_Signed_max_max,poca_NAME_CONTRACT_STATUS_Amortized debt_min_mean,poca_NAME_CONTRACT_STATUS_Amortized debt_min_std,poca_NAME_CONTRACT_STATUS_Amortized debt_min_min,poca_NAME_CONTRACT_STATUS_Amortized debt_min_max,poca_NAME_CONTRACT_STATUS_Returned to the store_min_mean,poca_NAME_CONTRACT_STATUS_Returned to the store_min_std,poca_NAME_CONTRACT_STATUS_Returned to the store_min_min,poca_NAME_CONTRACT_STATUS_Returned to the store_min_max,poca_NAME_CONTRACT_STATUS_Demand_std_mean,poca_NAME_CONTRACT_STATUS_Demand_std_std,poca_NAME_CONTRACT_STATUS_Demand_std_min,poca_NAME_CONTRACT_STATUS_Demand_std_max,poca_CNT_INSTALMENT_FUTURE_std_mean,poca_CNT_INSTALMENT_FUTURE_std_std,poca_CNT_INSTALMENT_FUTURE_std_min,poca_CNT_INSTALMENT_FUTURE_std_max,poca_NAME_CONTRACT_STATUS_Canceled_std_mean,poca_NAME_CONTRACT_STATUS_Canceled_std_std,poca_NAME_CONTRACT_STATUS_Canceled_std_min,poca_NAME_CONTRACT_STATUS_Canceled_std_max,poca_NAME_CONTRACT_STATUS_XNA_std_mean,poca_NAME_CONTRACT_STATUS_XNA_std_std,poca_NAME_CONTRACT_STATUS_XNA_std_min,poca_NAME_CONTRACT_STATUS_XNA_std_max,poca_NAME_CONTRACT_STATUS_Completed_mean_mean,poca_NAME_CONTRACT_STATUS_Completed_mean_std,poca_NAME_CONTRACT_STATUS_Completed_mean_min,poca_NAME_CONTRACT_STATUS_Completed_mean_max,poca_NAME_CONTRACT_STATUS_Completed_min_mean,poca_NAME_CONTRACT_STATUS_Completed_min_std,poca_NAME_CONTRACT_STATUS_Completed_min_min,poca_NAME_CONTRACT_STATUS_Completed_min_max,poca_NAME_CONTRACT_STATUS_Demand_max_mean,poca_NAME_CONTRACT_STATUS_Demand_max_std,poca_NAME_CONTRACT_STATUS_Demand_max_min,poca_NAME_CONTRACT_STATUS_Demand_max_max,poca_NAME_CONTRACT_STATUS_Returned to the store_mean_mean,poca_NAME_CONTRACT_STATUS_Returned to the store_mean_std,poca_NAME_CONTRACT_STATUS_Returned to the store_mean_min,poca_NAME_CONTRACT_STATUS_Returned to the store_mean_max,poca_NAME_CONTRACT_STATUS_Completed_std_mean,poca_NAME_CONTRACT_STATUS_Completed_std_std,poca_NAME_CONTRACT_STATUS_Completed_std_min,poca_NAME_CONTRACT_STATUS_Completed_std_max,poca_NAME_CONTRACT_STATUS_Amortized debt_std_mean,poca_NAME_CONTRACT_STATUS_Amortized debt_std_std,poca_NAME_CONTRACT_STATUS_Amortized debt_std_min,poca_NAME_CONTRACT_STATUS_Amortized debt_std_max,poca_NAME_CONTRACT_STATUS_Amortized debt_mean_mean,poca_NAME_CONTRACT_STATUS_Amortized debt_mean_std,poca_NAME_CONTRACT_STATUS_Amortized debt_mean_min,poca_NAME_CONTRACT_STATUS_Amortized debt_mean_max,poca_CNT_INSTALMENT_FUTURE_mean_mean,poca_CNT_INSTALMENT_FUTURE_mean_std,poca_CNT_INSTALMENT_FUTURE_mean_min,poca_CNT_INSTALMENT_FUTURE_mean_max,poca_INSTALLMENTS_PERCENT_min_mean,poca_INSTALLMENTS_PERCENT_min_std,poca_INSTALLMENTS_PERCENT_min_min,poca_INSTALLMENTS_PERCENT_min_max,poca_CNT_INSTALMENT_FUTURE_max_mean,poca_CNT_INSTALMENT_FUTURE_max_std,poca_CNT_INSTALMENT_FUTURE_max_min,poca_CNT_INSTALMENT_FUTURE_max_max,poca_INSTALLMENTS_PERCENT_std_mean,poca_INSTALLMENTS_PERCENT_std_std,poca_INSTALLMENTS_PERCENT_std_min,poca_INSTALLMENTS_PERCENT_std_max,poca_NAME_CONTRACT_STATUS_Approved_std_mean,poca_NAME_CONTRACT_STATUS_Approved_std_std,poca_NAME_CONTRACT_STATUS_Approved_std_min,poca_NAME_CONTRACT_STATUS_Approved_std_max,poca_NAME_CONTRACT_STATUS_XNA_max_mean,poca_NAME_CONTRACT_STATUS_XNA_max_std,poca_NAME_CONTRACT_STATUS_XNA_max_min,poca_NAME_CONTRACT_STATUS_XNA_max_max,poca_SK_DPD_DEF_std_mean,poca_SK_DPD_DEF_std_std,poca_SK_DPD_DEF_std_min,poca_SK_DPD_DEF_std_max,poca_SK_DPD_std_mean,poca_SK_DPD_std_std,poca_SK_DPD_std_min,poca_SK_DPD_std_max,poca_NAME_CONTRACT_STATUS_Signed_mean_mean,poca_NAME_CONTRACT_STATUS_Signed_mean_std,poca_NAME_CONTRACT_STATUS_Signed_mean_min,poca_NAME_CONTRACT_STATUS_Signed_mean_max,poca_poca_MON_COUNT_mean,poca_poca_MON_COUNT_std,poca_poca_MON_COUNT_min,poca_poca_MON_COUNT_max,poca_SK_DPD_min_mean,poca_SK_DPD_min_std,poca_SK_DPD_min_min,poca_SK_DPD_min_max,poca_NAME_CONTRACT_STATUS_Returned to the store_max_mean,poca_NAME_CONTRACT_STATUS_Returned to the store_max_std,poca_NAME_CONTRACT_STATUS_Returned to the store_max_min,poca_NAME_CONTRACT_STATUS_Returned to the store_max_max,poca_NAME_CONTRACT_STATUS_XNA_mean_mean,poca_NAME_CONTRACT_STATUS_XNA_mean_std,poca_NAME_CONTRACT_STATUS_XNA_mean_min,poca_NAME_CONTRACT_STATUS_XNA_mean_max,poca_CNT_INSTALMENT_min_mean,poca_CNT_INSTALMENT_min_std,poca_CNT_INSTALMENT_min_min,poca_CNT_INSTALMENT_min_max,poca_NAME_CONTRACT_STATUS_Completed_max_mean,poca_NAME_CONTRACT_STATUS_Completed_max_std,poca_NAME_CONTRACT_STATUS_Completed_max_min,poca_NAME_CONTRACT_STATUS_Completed_max_max,poca_SK_DPD_DEF_min_mean,poca_SK_DPD_DEF_min_std,poca_SK_DPD_DEF_min_min,poca_SK_DPD_DEF_min_max,poca_NAME_CONTRACT_STATUS_Canceled_mean_mean,poca_NAME_CONTRACT_STATUS_Canceled_mean_std,poca_NAME_CONTRACT_STATUS_Canceled_mean_min,poca_NAME_CONTRACT_STATUS_Canceled_mean_max,poca_NAME_CONTRACT_STATUS_Approved_max_mean,poca_NAME_CONTRACT_STATUS_Approved_max_std,poca_NAME_CONTRACT_STATUS_Approved_max_min,poca_NAME_CONTRACT_STATUS_Approved_max_max,poca_NAME_CONTRACT_STATUS_XNA_min_mean,poca_NAME_CONTRACT_STATUS_XNA_min_std,poca_NAME_CONTRACT_STATUS_XNA_min_min,poca_NAME_CONTRACT_STATUS_XNA_min_max,poca_NAME_CONTRACT_STATUS_Signed_std_mean,poca_NAME_CONTRACT_STATUS_Signed_std_std,poca_NAME_CONTRACT_STATUS_Signed_std_min,poca_NAME_CONTRACT_STATUS_Signed_std_max,poca_NAME_CONTRACT_STATUS_Approved_mean_mean,poca_NAME_CONTRACT_STATUS_Approved_mean_std,poca_NAME_CONTRACT_STATUS_Approved_mean_min,poca_NAME_CONTRACT_STATUS_Approved_mean_max,poca_NAME_CONTRACT_STATUS_Demand_mean_mean,poca_NAME_CONTRACT_STATUS_Demand_mean_std,poca_NAME_CONTRACT_STATUS_Demand_mean_min,poca_NAME_CONTRACT_STATUS_Demand_mean_max,poca_SK_DPD_max_mean,poca_SK_DPD_max_std,poca_SK_DPD_max_min,poca_SK_DPD_max_max,poca_CNT_INSTALMENT_std_mean,poca_CNT_INSTALMENT_std_std,poca_CNT_INSTALMENT_std_min,poca_CNT_INSTALMENT_std_max,poca_INSTALLMENTS_PERCENT_mean_mean,poca_INSTALLMENTS_PERCENT_mean_std,poca_INSTALLMENTS_PERCENT_mean_min,poca_INSTALLMENTS_PERCENT_mean_max,poca_CNT_INSTALMENT_FUTURE_min_mean,poca_CNT_INSTALMENT_FUTURE_min_std,poca_CNT_INSTALMENT_FUTURE_min_min,poca_CNT_INSTALMENT_FUTURE_min_max,poca_NAME_CONTRACT_STATUS_Returned to the store_std_mean,poca_NAME_CONTRACT_STATUS_Returned to the store_std_std,poca_NAME_CONTRACT_STATUS_Returned to the store_std_min,poca_NAME_CONTRACT_STATUS_Returned to the store_std_max,poca_SK_DPD_DEF_mean_mean,poca_SK_DPD_DEF_mean_std,poca_SK_DPD_DEF_mean_min,poca_SK_DPD_DEF_mean_max,poca_NAME_CONTRACT_STATUS_Canceled_max_mean,poca_NAME_CONTRACT_STATUS_Canceled_max_std,poca_NAME_CONTRACT_STATUS_Canceled_max_min,poca_NAME_CONTRACT_STATUS_Canceled_max_max,poca_NAME_CONTRACT_STATUS_Signed_min_mean,poca_NAME_CONTRACT_STATUS_Signed_min_std,poca_NAME_CONTRACT_STATUS_Signed_min_min,poca_NAME_CONTRACT_STATUS_Signed_min_max,poca_SK_DPD_DEF_max_mean,poca_SK_DPD_DEF_max_std,poca_SK_DPD_DEF_max_min,poca_SK_DPD_DEF_max_max,poca_CNT_INSTALMENT_max_mean,poca_CNT_INSTALMENT_max_std,poca_CNT_INSTALMENT_max_min,poca_CNT_INSTALMENT_max_max,poca_NAME_CONTRACT_STATUS_Canceled_min_mean,poca_NAME_CONTRACT_STATUS_Canceled_min_std,poca_NAME_CONTRACT_STATUS_Canceled_min_min,poca_NAME_CONTRACT_STATUS_Canceled_min_max
SK_ID_CURR,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1,Unnamed: 182_level_1,Unnamed: 183_level_1,Unnamed: 184_level_1,Unnamed: 185_level_1,Unnamed: 186_level_1,Unnamed: 187_level_1,Unnamed: 188_level_1,Unnamed: 189_level_1,Unnamed: 190_level_1,Unnamed: 191_level_1,Unnamed: 192_level_1,Unnamed: 193_level_1,Unnamed: 194_level_1,Unnamed: 195_level_1,Unnamed: 196_level_1,Unnamed: 197_level_1,Unnamed: 198_level_1,Unnamed: 199_level_1,Unnamed: 200_level_1,Unnamed: 201_level_1,Unnamed: 202_level_1,Unnamed: 203_level_1,Unnamed: 204_level_1,Unnamed: 205_level_1,Unnamed: 206_level_1,Unnamed: 207_level_1,Unnamed: 208_level_1,Unnamed: 209_level_1,Unnamed: 210_level_1,Unnamed: 211_level_1,Unnamed: 212_level_1
100001,4.0,0.0,4.0,4.0,0.875,1.237437,0.0,1.75,0.0,0.0,0,0,0,0.0,0,0,0.0,0.0,0,0,0.75,0.353553,0.5,1.0,0.0,0.0,0,0,0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,1.269283,0.441031,0.957427,1.581139,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.225,0.035355,0.2,0.25,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.473607,0.037326,0.447214,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.375,0.883883,0.75,2.0,0.0,0.0,0.0,0.0,3.0,1.414214,2.0,4.0,0.317321,0.110258,0.239357,0.395285,0.0,0.0,0.0,0.0,0.0,0.0,0,0,1.75,2.474874,0.0,3.5,1.75,2.474874,0.0,3.5,0.0,0.0,0.0,0.0,4.5,0.707107,4,5,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,4.0,0.0,4.0,4.0,1.0,0.0,1,1,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.5,4.949747,0,7,0.0,0.0,0.0,0.0,0.34375,0.220971,0.1875,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.875,1.237437,0.0,1.75,0.0,0.0,0,0,0.0,0.0,0,0,3.5,4.949747,0,7,4.0,0.0,4.0,4.0,0.0,0.0,0,0
100002,24.0,,24.0,24.0,0.0,,0.0,0.0,0.0,,0,0,0,,0,0,0.0,,0,0,1.0,,1.0,1.0,0.0,,0,0,0,,0,0,0.0,,0,0,0.0,,0.0,0.0,5.627314,,5.627314,5.627314,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0,0,0.0,,0,0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,15.0,,15.0,15.0,0.25,,0.25,0.25,24.0,,24.0,24.0,0.234471,,0.234471,0.234471,0.0,,0.0,0.0,0.0,,0,0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,19.0,,19,19,0.0,,0,0,0.0,,0,0,0.0,,0.0,0.0,24.0,,24.0,24.0,0.0,,0,0,0.0,,0,0,0.0,,0.0,0.0,0.0,,0,0,0,,0,0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0,0,0.0,,0.0,0.0,0.625,,0.625,0.625,6.0,,6.0,6.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0,0,0.0,,0,0,0.0,,0,0,24.0,,24.0,24.0,0.0,,0,0
100003,9.791667,3.298516,6.0,12.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0.0,0,0,0.0,0.0,0,0,1.0,0.0,1.0,1.0,0.0,0.0,0,0,0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,3.209241,0.822291,2.263846,3.758324,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.072169,0.0,0.125,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.235702,0.204124,0.0,0.353553,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.666667,2.722399,2.625,7.875,0.027778,0.048113,0.0,0.083333,10.0,3.464102,6.0,12.0,0.330321,0.041186,0.300463,0.377308,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.333333,2.309401,8,12,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,8.333333,3.21455,6.0,12.0,0.666667,0.57735,0,1,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.589256,1.020621,0.0,1.767767,0.545139,0.109416,0.4375,0.65625,0.333333,0.57735,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,10.0,3.464102,6.0,12.0,0.0,0.0,0,0
100004,3.75,,3.75,3.75,0.0,,0.0,0.0,0.0,,0,0,0,,0,0,0.0,,0,0,1.0,,1.0,1.0,0.0,,0,0,0,,0,0,0.0,,0,0,0.0,,0.0,0.0,1.707825,,1.707825,1.707825,0.0,,0.0,0.0,0.0,,0.0,0.0,0.25,,0.25,0.25,0.0,,0,0,0.0,,0,0,0.0,,0.0,0.0,0.5,,0.5,0.5,0.0,,0.0,0.0,0.0,,0.0,0.0,2.25,,2.25,2.25,0.0,,0.0,0.0,4.0,,4.0,4.0,0.426956,,0.426956,0.426956,0.0,,0.0,0.0,0.0,,0,0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,4.0,,4,4,0.0,,0,0,0.0,,0,0,0.0,,0.0,0.0,3.0,,3.0,3.0,1.0,,1,1,0.0,,0,0,0.0,,0.0,0.0,0.0,,0,0,0,,0,0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0,0,0.5,,0.5,0.5,0.5625,,0.5625,0.5625,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0,0,0.0,,0,0,0.0,,0,0,4.0,,4.0,4.0,0.0,,0,0
100005,11.7,,11.7,11.7,0.0,,0.0,0.0,0.0,,0,0,0,,0,0,0.0,,0,0,1.0,,1.0,1.0,1.0,,1,1,0,,0,0,0.0,,0,0,0.0,,0.0,0.0,3.614784,,3.614784,3.614784,0.0,,0.0,0.0,0.0,,0.0,0.0,0.090909,,0.090909,0.090909,0.0,,0,0,0.0,,0,0,0.0,,0.0,0.0,0.301511,,0.301511,0.301511,0.0,,0.0,0.0,0.0,,0.0,0.0,7.2,,7.2,7.2,0.0,,0.0,0.0,12.0,,12.0,12.0,0.301232,,0.301232,0.301232,0.0,,0.0,0.0,0.0,,0,0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.090909,,0.090909,0.090909,11.0,,11,11,0.0,,0,0,0.0,,0,0,0.0,,0.0,0.0,9.0,,9.0,9.0,1.0,,1,1,0.0,,0,0,0.0,,0.0,0.0,0.0,,0,0,0,,0,0,0.301511,,0.301511,0.301511,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0,0,0.948683,,0.948683,0.948683,0.6,,0.6,0.6,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0,0,0.0,,0,0,0.0,,0,0,12.0,,12.0,12.0,0.0,,0,0


In [51]:
# clear memory
del poca

### 4.3.3. CARD DATA

In [52]:
# check card data
card.head()

Unnamed: 0,SK_ID_PREV,SK_ID_CURR,MONTHS_BALANCE,AMT_BALANCE,AMT_CREDIT_LIMIT_ACTUAL,AMT_DRAWINGS_ATM_CURRENT,AMT_DRAWINGS_CURRENT,AMT_DRAWINGS_OTHER_CURRENT,AMT_DRAWINGS_POS_CURRENT,AMT_INST_MIN_REGULARITY,AMT_PAYMENT_CURRENT,AMT_PAYMENT_TOTAL_CURRENT,AMT_RECEIVABLE_PRINCIPAL,AMT_RECIVABLE,AMT_TOTAL_RECEIVABLE,CNT_DRAWINGS_ATM_CURRENT,CNT_DRAWINGS_CURRENT,CNT_DRAWINGS_OTHER_CURRENT,CNT_DRAWINGS_POS_CURRENT,CNT_INSTALMENT_MATURE_CUM,NAME_CONTRACT_STATUS,SK_DPD,SK_DPD_DEF
0,2562384,378907,-6,56.97,135000,0.0,877.5,0.0,877.5,1700.325,1800.0,1800.0,0.0,0.0,0.0,0.0,1,0.0,1.0,35.0,Active,0,0
1,2582071,363914,-1,63975.555,45000,2250.0,2250.0,0.0,0.0,2250.0,2250.0,2250.0,60175.08,64875.555,64875.555,1.0,1,0.0,0.0,69.0,Active,0,0
2,1740877,371185,-7,31815.225,450000,0.0,0.0,0.0,0.0,2250.0,2250.0,2250.0,26926.425,31460.085,31460.085,0.0,0,0.0,0.0,30.0,Active,0,0
3,1389973,337855,-4,236572.11,225000,2250.0,2250.0,0.0,0.0,11795.76,11925.0,11925.0,224949.285,233048.97,233048.97,1.0,1,0.0,0.0,10.0,Active,0,0
4,1891521,126868,-1,453919.455,450000,0.0,11547.0,0.0,11547.0,22924.89,27000.0,27000.0,443044.395,453919.455,453919.455,0.0,1,0.0,1.0,101.0,Active,0,0


In [53]:
### FEATURE ENGINEERING

# logarithms
log_vars = ["AMT_BALANCE", "AMT_CREDIT_LIMIT_ACTUAL", "AMT_DRAWINGS_ATM_CURRENT", "AMT_DRAWINGS_CURRENT",
            "AMT_DRAWINGS_OTHER_CURRENT", "AMT_DRAWINGS_POS_CURRENT", "AMT_INST_MIN_REGULARITY",
            "AMT_PAYMENT_CURRENT", "AMT_PAYMENT_TOTAL_CURRENT", "AMT_RECEIVABLE_PRINCIPAL",
            "AMT_RECIVABLE", "AMT_TOTAL_RECEIVABLE"]
card = create_logs(card, log_vars, replace = True)

In [54]:
# dummy encodnig for factors
card = pd.get_dummies(card, drop_first = True)

In [55]:
# count missings
nas = count_missings(card)
nas.head()

Unnamed: 0,Total,Percent
AMT_PAYMENT_CURRENT,767988,19.998063
AMT_DRAWINGS_ATM_CURRENT,749816,19.524872
CNT_DRAWINGS_POS_CURRENT,749816,19.524872
CNT_DRAWINGS_OTHER_CURRENT,749816,19.524872
AMT_DRAWINGS_OTHER_CURRENT,749816,19.524872


In [56]:
### AGGREGATIONS

# count months
cnt_mon = card[["SK_ID_PREV", "MONTHS_BALANCE"]].groupby("SK_ID_PREV").count()
del card["MONTHS_BALANCE"]

# delete ID_CURR
card_id = card[["SK_ID_CURR", "SK_ID_PREV"]]
del card["SK_ID_CURR"]

# aggregate data
agg_card = aggregate_data(card, id_var = "SK_ID_PREV")

# add month count
agg_card["card_MON_COUNT"] = cnt_mon

# put back ID_CURR
card_id = card_id.drop_duplicates()
agg_card = card_id.merge(right = agg_card.reset_index(), how = "right", on = "SK_ID_PREV")
del agg_card["SK_ID_PREV"]

# aggregate data (round 2)
agg_card = aggregate_data(agg_card, id_var = "SK_ID_CURR", label = "card")

- Preparing the dataset...
- Extracted 0 factors and 25 numerics...
- Aggregating numeric features...
- Final dimensions: (104307, 100)
- Preparing the dataset...
- Extracted 0 factors and 101 numerics...
- Aggregating numeric features...
- Final dimensions: (103558, 404)


In [57]:
# count missings
nas = count_missings(agg_card)
nas.head()

Unnamed: 0,Total,Percent
card_CNT_DRAWINGS_OTHER_CURRENT_std_std,103248,99.700651
card_AMT_DRAWINGS_ATM_CURRENT_std_std,103248,99.700651
card_CNT_DRAWINGS_ATM_CURRENT_std_std,103248,99.700651
card_CNT_DRAWINGS_POS_CURRENT_std_std,103248,99.700651
card_AMT_DRAWINGS_OTHER_CURRENT_std_std,103248,99.700651


In [58]:
# check data
agg_card.head()

Unnamed: 0_level_0,card_AMT_PAYMENT_CURRENT_max_mean,card_AMT_PAYMENT_CURRENT_max_std,card_AMT_PAYMENT_CURRENT_max_min,card_AMT_PAYMENT_CURRENT_max_max,card_AMT_DRAWINGS_ATM_CURRENT_max_mean,card_AMT_DRAWINGS_ATM_CURRENT_max_std,card_AMT_DRAWINGS_ATM_CURRENT_max_min,card_AMT_DRAWINGS_ATM_CURRENT_max_max,card_NAME_CONTRACT_STATUS_Demand_min_mean,card_NAME_CONTRACT_STATUS_Demand_min_std,card_NAME_CONTRACT_STATUS_Demand_min_min,card_NAME_CONTRACT_STATUS_Demand_min_max,card_AMT_TOTAL_RECEIVABLE_max_mean,card_AMT_TOTAL_RECEIVABLE_max_std,card_AMT_TOTAL_RECEIVABLE_max_min,card_AMT_TOTAL_RECEIVABLE_max_max,card_NAME_CONTRACT_STATUS_Approved_min_mean,card_NAME_CONTRACT_STATUS_Approved_min_std,card_NAME_CONTRACT_STATUS_Approved_min_min,card_NAME_CONTRACT_STATUS_Approved_min_max,card_CNT_DRAWINGS_POS_CURRENT_mean_mean,card_CNT_DRAWINGS_POS_CURRENT_mean_std,card_CNT_DRAWINGS_POS_CURRENT_mean_min,card_CNT_DRAWINGS_POS_CURRENT_mean_max,card_AMT_DRAWINGS_OTHER_CURRENT_max_mean,card_AMT_DRAWINGS_OTHER_CURRENT_max_std,card_AMT_DRAWINGS_OTHER_CURRENT_max_min,card_AMT_DRAWINGS_OTHER_CURRENT_max_max,card_AMT_DRAWINGS_ATM_CURRENT_std_mean,card_AMT_DRAWINGS_ATM_CURRENT_std_std,card_AMT_DRAWINGS_ATM_CURRENT_std_min,card_AMT_DRAWINGS_ATM_CURRENT_std_max,card_AMT_TOTAL_RECEIVABLE_min_mean,card_AMT_TOTAL_RECEIVABLE_min_std,card_AMT_TOTAL_RECEIVABLE_min_min,card_AMT_TOTAL_RECEIVABLE_min_max,card_NAME_CONTRACT_STATUS_Demand_std_mean,card_NAME_CONTRACT_STATUS_Demand_std_std,card_NAME_CONTRACT_STATUS_Demand_std_min,card_NAME_CONTRACT_STATUS_Demand_std_max,card_NAME_CONTRACT_STATUS_Refused_std_mean,card_NAME_CONTRACT_STATUS_Refused_std_std,card_NAME_CONTRACT_STATUS_Refused_std_min,card_NAME_CONTRACT_STATUS_Refused_std_max,card_NAME_CONTRACT_STATUS_Refused_max_mean,card_NAME_CONTRACT_STATUS_Refused_max_std,card_NAME_CONTRACT_STATUS_Refused_max_min,card_NAME_CONTRACT_STATUS_Refused_max_max,card_AMT_RECEIVABLE_PRINCIPAL_std_mean,card_AMT_RECEIVABLE_PRINCIPAL_std_std,card_AMT_RECEIVABLE_PRINCIPAL_std_min,card_AMT_RECEIVABLE_PRINCIPAL_std_max,card_CNT_INSTALMENT_MATURE_CUM_std_mean,card_CNT_INSTALMENT_MATURE_CUM_std_std,card_CNT_INSTALMENT_MATURE_CUM_std_min,card_CNT_INSTALMENT_MATURE_CUM_std_max,card_CNT_DRAWINGS_OTHER_CURRENT_std_mean,card_CNT_DRAWINGS_OTHER_CURRENT_std_std,card_CNT_DRAWINGS_OTHER_CURRENT_std_min,card_CNT_DRAWINGS_OTHER_CURRENT_std_max,card_NAME_CONTRACT_STATUS_Completed_min_mean,card_NAME_CONTRACT_STATUS_Completed_min_std,card_NAME_CONTRACT_STATUS_Completed_min_min,card_NAME_CONTRACT_STATUS_Completed_min_max,card_AMT_RECIVABLE_mean_mean,card_AMT_RECIVABLE_mean_std,card_AMT_RECIVABLE_mean_min,card_AMT_RECIVABLE_mean_max,card_NAME_CONTRACT_STATUS_Demand_max_mean,card_NAME_CONTRACT_STATUS_Demand_max_std,card_NAME_CONTRACT_STATUS_Demand_max_min,card_NAME_CONTRACT_STATUS_Demand_max_max,card_AMT_DRAWINGS_CURRENT_std_mean,card_AMT_DRAWINGS_CURRENT_std_std,card_AMT_DRAWINGS_CURRENT_std_min,card_AMT_DRAWINGS_CURRENT_std_max,card_AMT_PAYMENT_CURRENT_mean_mean,card_AMT_PAYMENT_CURRENT_mean_std,card_AMT_PAYMENT_CURRENT_mean_min,card_AMT_PAYMENT_CURRENT_mean_max,card_CNT_DRAWINGS_CURRENT_mean_mean,card_CNT_DRAWINGS_CURRENT_mean_std,card_CNT_DRAWINGS_CURRENT_mean_min,card_CNT_DRAWINGS_CURRENT_mean_max,card_AMT_DRAWINGS_CURRENT_min_mean,card_AMT_DRAWINGS_CURRENT_min_std,card_AMT_DRAWINGS_CURRENT_min_min,card_AMT_DRAWINGS_CURRENT_min_max,card_NAME_CONTRACT_STATUS_Approved_std_mean,card_NAME_CONTRACT_STATUS_Approved_std_std,card_NAME_CONTRACT_STATUS_Approved_std_min,card_NAME_CONTRACT_STATUS_Approved_std_max,card_AMT_TOTAL_RECEIVABLE_mean_mean,card_AMT_TOTAL_RECEIVABLE_mean_std,card_AMT_TOTAL_RECEIVABLE_mean_min,card_AMT_TOTAL_RECEIVABLE_mean_max,card_AMT_INST_MIN_REGULARITY_std_mean,card_AMT_INST_MIN_REGULARITY_std_std,card_AMT_INST_MIN_REGULARITY_std_min,card_AMT_INST_MIN_REGULARITY_std_max,card_SK_DPD_std_mean,card_SK_DPD_std_std,card_SK_DPD_std_min,card_SK_DPD_std_max,card_AMT_BALANCE_mean_mean,card_AMT_BALANCE_mean_std,card_AMT_BALANCE_mean_min,card_AMT_BALANCE_mean_max,card_AMT_INST_MIN_REGULARITY_min_mean,card_AMT_INST_MIN_REGULARITY_min_std,card_AMT_INST_MIN_REGULARITY_min_min,card_AMT_INST_MIN_REGULARITY_min_max,card_NAME_CONTRACT_STATUS_Completed_max_mean,card_NAME_CONTRACT_STATUS_Completed_max_std,card_NAME_CONTRACT_STATUS_Completed_max_min,card_NAME_CONTRACT_STATUS_Completed_max_max,card_AMT_RECEIVABLE_PRINCIPAL_min_mean,card_AMT_RECEIVABLE_PRINCIPAL_min_std,card_AMT_RECEIVABLE_PRINCIPAL_min_min,card_AMT_RECEIVABLE_PRINCIPAL_min_max,card_AMT_PAYMENT_TOTAL_CURRENT_std_mean,card_AMT_PAYMENT_TOTAL_CURRENT_std_std,card_AMT_PAYMENT_TOTAL_CURRENT_std_min,card_AMT_PAYMENT_TOTAL_CURRENT_std_max,card_CNT_DRAWINGS_ATM_CURRENT_std_mean,card_CNT_DRAWINGS_ATM_CURRENT_std_std,card_CNT_DRAWINGS_ATM_CURRENT_std_min,card_CNT_DRAWINGS_ATM_CURRENT_std_max,card_AMT_RECEIVABLE_PRINCIPAL_max_mean,card_AMT_RECEIVABLE_PRINCIPAL_max_std,card_AMT_RECEIVABLE_PRINCIPAL_max_min,card_AMT_RECEIVABLE_PRINCIPAL_max_max,card_NAME_CONTRACT_STATUS_Approved_max_mean,card_NAME_CONTRACT_STATUS_Approved_max_std,card_NAME_CONTRACT_STATUS_Approved_max_min,card_NAME_CONTRACT_STATUS_Approved_max_max,card_NAME_CONTRACT_STATUS_Approved_mean_mean,card_NAME_CONTRACT_STATUS_Approved_mean_std,card_NAME_CONTRACT_STATUS_Approved_mean_min,card_NAME_CONTRACT_STATUS_Approved_mean_max,card_NAME_CONTRACT_STATUS_Signed_std_mean,card_NAME_CONTRACT_STATUS_Signed_std_std,card_NAME_CONTRACT_STATUS_Signed_std_min,card_NAME_CONTRACT_STATUS_Signed_std_max,card_CNT_DRAWINGS_OTHER_CURRENT_mean_mean,card_CNT_DRAWINGS_OTHER_CURRENT_mean_std,card_CNT_DRAWINGS_OTHER_CURRENT_mean_min,card_CNT_DRAWINGS_OTHER_CURRENT_mean_max,card_CNT_DRAWINGS_POS_CURRENT_max_mean,card_CNT_DRAWINGS_POS_CURRENT_max_std,card_CNT_DRAWINGS_POS_CURRENT_max_min,card_CNT_DRAWINGS_POS_CURRENT_max_max,card_SK_DPD_max_mean,card_SK_DPD_max_std,card_SK_DPD_max_min,card_SK_DPD_max_max,card_AMT_BALANCE_max_mean,card_AMT_BALANCE_max_std,card_AMT_BALANCE_max_min,card_AMT_BALANCE_max_max,card_AMT_PAYMENT_CURRENT_std_mean,card_AMT_PAYMENT_CURRENT_std_std,card_AMT_PAYMENT_CURRENT_std_min,card_AMT_PAYMENT_CURRENT_std_max,card_SK_DPD_DEF_mean_mean,card_SK_DPD_DEF_mean_std,card_SK_DPD_DEF_mean_min,card_SK_DPD_DEF_mean_max,card_AMT_DRAWINGS_POS_CURRENT_mean_mean,card_AMT_DRAWINGS_POS_CURRENT_mean_std,card_AMT_DRAWINGS_POS_CURRENT_mean_min,card_AMT_DRAWINGS_POS_CURRENT_mean_max,card_card_MON_COUNT_mean,card_card_MON_COUNT_std,card_card_MON_COUNT_min,card_card_MON_COUNT_max,card_CNT_DRAWINGS_ATM_CURRENT_mean_mean,card_CNT_DRAWINGS_ATM_CURRENT_mean_std,card_CNT_DRAWINGS_ATM_CURRENT_mean_min,card_CNT_DRAWINGS_ATM_CURRENT_mean_max,card_NAME_CONTRACT_STATUS_Signed_min_mean,card_NAME_CONTRACT_STATUS_Signed_min_std,card_NAME_CONTRACT_STATUS_Signed_min_min,card_NAME_CONTRACT_STATUS_Signed_min_max,card_CNT_DRAWINGS_ATM_CURRENT_max_mean,card_CNT_DRAWINGS_ATM_CURRENT_max_std,card_CNT_DRAWINGS_ATM_CURRENT_max_min,card_CNT_DRAWINGS_ATM_CURRENT_max_max,card_AMT_DRAWINGS_OTHER_CURRENT_std_mean,card_AMT_DRAWINGS_OTHER_CURRENT_std_std,card_AMT_DRAWINGS_OTHER_CURRENT_std_min,card_AMT_DRAWINGS_OTHER_CURRENT_std_max,card_CNT_DRAWINGS_POS_CURRENT_min_mean,card_CNT_DRAWINGS_POS_CURRENT_min_std,card_CNT_DRAWINGS_POS_CURRENT_min_min,card_CNT_DRAWINGS_POS_CURRENT_min_max,card_AMT_CREDIT_LIMIT_ACTUAL_max_mean,card_AMT_CREDIT_LIMIT_ACTUAL_max_std,card_AMT_CREDIT_LIMIT_ACTUAL_max_min,card_AMT_CREDIT_LIMIT_ACTUAL_max_max,card_AMT_DRAWINGS_ATM_CURRENT_min_mean,card_AMT_DRAWINGS_ATM_CURRENT_min_std,card_AMT_DRAWINGS_ATM_CURRENT_min_min,card_AMT_DRAWINGS_ATM_CURRENT_min_max,card_AMT_DRAWINGS_OTHER_CURRENT_min_mean,card_AMT_DRAWINGS_OTHER_CURRENT_min_std,card_AMT_DRAWINGS_OTHER_CURRENT_min_min,card_AMT_DRAWINGS_OTHER_CURRENT_min_max,card_SK_DPD_mean_mean,card_SK_DPD_mean_std,card_SK_DPD_mean_min,card_SK_DPD_mean_max,card_AMT_INST_MIN_REGULARITY_max_mean,card_AMT_INST_MIN_REGULARITY_max_std,card_AMT_INST_MIN_REGULARITY_max_min,card_AMT_INST_MIN_REGULARITY_max_max,card_AMT_BALANCE_min_mean,card_AMT_BALANCE_min_std,card_AMT_BALANCE_min_min,card_AMT_BALANCE_min_max,card_NAME_CONTRACT_STATUS_Signed_max_mean,card_NAME_CONTRACT_STATUS_Signed_max_std,card_NAME_CONTRACT_STATUS_Signed_max_min,card_NAME_CONTRACT_STATUS_Signed_max_max,card_NAME_CONTRACT_STATUS_Refused_mean_mean,card_NAME_CONTRACT_STATUS_Refused_mean_std,card_NAME_CONTRACT_STATUS_Refused_mean_min,card_NAME_CONTRACT_STATUS_Refused_mean_max,card_AMT_CREDIT_LIMIT_ACTUAL_mean_mean,card_AMT_CREDIT_LIMIT_ACTUAL_mean_std,card_AMT_CREDIT_LIMIT_ACTUAL_mean_min,card_AMT_CREDIT_LIMIT_ACTUAL_mean_max,card_NAME_CONTRACT_STATUS_Refused_min_mean,card_NAME_CONTRACT_STATUS_Refused_min_std,card_NAME_CONTRACT_STATUS_Refused_min_min,card_NAME_CONTRACT_STATUS_Refused_min_max,card_AMT_INST_MIN_REGULARITY_mean_mean,card_AMT_INST_MIN_REGULARITY_mean_std,card_AMT_INST_MIN_REGULARITY_mean_min,card_AMT_INST_MIN_REGULARITY_mean_max,card_AMT_DRAWINGS_POS_CURRENT_max_mean,card_AMT_DRAWINGS_POS_CURRENT_max_std,card_AMT_DRAWINGS_POS_CURRENT_max_min,card_AMT_DRAWINGS_POS_CURRENT_max_max,card_AMT_PAYMENT_CURRENT_min_mean,card_AMT_PAYMENT_CURRENT_min_std,card_AMT_PAYMENT_CURRENT_min_min,card_AMT_PAYMENT_CURRENT_min_max,card_CNT_INSTALMENT_MATURE_CUM_max_mean,card_CNT_INSTALMENT_MATURE_CUM_max_std,card_CNT_INSTALMENT_MATURE_CUM_max_min,card_CNT_INSTALMENT_MATURE_CUM_max_max,card_AMT_RECIVABLE_max_mean,card_AMT_RECIVABLE_max_std,card_AMT_RECIVABLE_max_min,card_AMT_RECIVABLE_max_max,card_CNT_DRAWINGS_CURRENT_std_mean,card_CNT_DRAWINGS_CURRENT_std_std,card_CNT_DRAWINGS_CURRENT_std_min,card_CNT_DRAWINGS_CURRENT_std_max,card_NAME_CONTRACT_STATUS_Completed_mean_mean,card_NAME_CONTRACT_STATUS_Completed_mean_std,card_NAME_CONTRACT_STATUS_Completed_mean_min,card_NAME_CONTRACT_STATUS_Completed_mean_max,card_AMT_PAYMENT_TOTAL_CURRENT_mean_mean,card_AMT_PAYMENT_TOTAL_CURRENT_mean_std,card_AMT_PAYMENT_TOTAL_CURRENT_mean_min,card_AMT_PAYMENT_TOTAL_CURRENT_mean_max,card_AMT_DRAWINGS_OTHER_CURRENT_mean_mean,card_AMT_DRAWINGS_OTHER_CURRENT_mean_std,card_AMT_DRAWINGS_OTHER_CURRENT_mean_min,card_AMT_DRAWINGS_OTHER_CURRENT_mean_max,card_AMT_CREDIT_LIMIT_ACTUAL_min_mean,card_AMT_CREDIT_LIMIT_ACTUAL_min_std,card_AMT_CREDIT_LIMIT_ACTUAL_min_min,card_AMT_CREDIT_LIMIT_ACTUAL_min_max,card_AMT_DRAWINGS_CURRENT_max_mean,card_AMT_DRAWINGS_CURRENT_max_std,card_AMT_DRAWINGS_CURRENT_max_min,card_AMT_DRAWINGS_CURRENT_max_max,card_NAME_CONTRACT_STATUS_Sent proposal_mean_mean,card_NAME_CONTRACT_STATUS_Sent proposal_mean_std,card_NAME_CONTRACT_STATUS_Sent proposal_mean_min,card_NAME_CONTRACT_STATUS_Sent proposal_mean_max,card_AMT_BALANCE_std_mean,card_AMT_BALANCE_std_std,card_AMT_BALANCE_std_min,card_AMT_BALANCE_std_max,card_CNT_DRAWINGS_OTHER_CURRENT_max_mean,card_CNT_DRAWINGS_OTHER_CURRENT_max_std,card_CNT_DRAWINGS_OTHER_CURRENT_max_min,card_CNT_DRAWINGS_OTHER_CURRENT_max_max,card_NAME_CONTRACT_STATUS_Completed_std_mean,card_NAME_CONTRACT_STATUS_Completed_std_std,card_NAME_CONTRACT_STATUS_Completed_std_min,card_NAME_CONTRACT_STATUS_Completed_std_max,card_SK_DPD_DEF_std_mean,card_SK_DPD_DEF_std_std,card_SK_DPD_DEF_std_min,card_SK_DPD_DEF_std_max,card_NAME_CONTRACT_STATUS_Sent proposal_min_mean,card_NAME_CONTRACT_STATUS_Sent proposal_min_std,card_NAME_CONTRACT_STATUS_Sent proposal_min_min,card_NAME_CONTRACT_STATUS_Sent proposal_min_max,card_AMT_DRAWINGS_ATM_CURRENT_mean_mean,card_AMT_DRAWINGS_ATM_CURRENT_mean_std,card_AMT_DRAWINGS_ATM_CURRENT_mean_min,card_AMT_DRAWINGS_ATM_CURRENT_mean_max,card_AMT_PAYMENT_TOTAL_CURRENT_max_mean,card_AMT_PAYMENT_TOTAL_CURRENT_max_std,card_AMT_PAYMENT_TOTAL_CURRENT_max_min,card_AMT_PAYMENT_TOTAL_CURRENT_max_max,card_NAME_CONTRACT_STATUS_Signed_mean_mean,card_NAME_CONTRACT_STATUS_Signed_mean_std,card_NAME_CONTRACT_STATUS_Signed_mean_min,card_NAME_CONTRACT_STATUS_Signed_mean_max,card_CNT_DRAWINGS_OTHER_CURRENT_min_mean,card_CNT_DRAWINGS_OTHER_CURRENT_min_std,card_CNT_DRAWINGS_OTHER_CURRENT_min_min,card_CNT_DRAWINGS_OTHER_CURRENT_min_max,card_NAME_CONTRACT_STATUS_Sent proposal_max_mean,card_NAME_CONTRACT_STATUS_Sent proposal_max_std,card_NAME_CONTRACT_STATUS_Sent proposal_max_min,card_NAME_CONTRACT_STATUS_Sent proposal_max_max,card_SK_DPD_min_mean,card_SK_DPD_min_std,card_SK_DPD_min_min,card_SK_DPD_min_max,card_CNT_DRAWINGS_POS_CURRENT_std_mean,card_CNT_DRAWINGS_POS_CURRENT_std_std,card_CNT_DRAWINGS_POS_CURRENT_std_min,card_CNT_DRAWINGS_POS_CURRENT_std_max,card_SK_DPD_DEF_min_mean,card_SK_DPD_DEF_min_std,card_SK_DPD_DEF_min_min,card_SK_DPD_DEF_min_max,card_CNT_DRAWINGS_CURRENT_min_mean,card_CNT_DRAWINGS_CURRENT_min_std,card_CNT_DRAWINGS_CURRENT_min_min,card_CNT_DRAWINGS_CURRENT_min_max,card_NAME_CONTRACT_STATUS_Sent proposal_std_mean,card_NAME_CONTRACT_STATUS_Sent proposal_std_std,card_NAME_CONTRACT_STATUS_Sent proposal_std_min,card_NAME_CONTRACT_STATUS_Sent proposal_std_max,card_AMT_RECIVABLE_min_mean,card_AMT_RECIVABLE_min_std,card_AMT_RECIVABLE_min_min,card_AMT_RECIVABLE_min_max,card_AMT_PAYMENT_TOTAL_CURRENT_min_mean,card_AMT_PAYMENT_TOTAL_CURRENT_min_std,card_AMT_PAYMENT_TOTAL_CURRENT_min_min,card_AMT_PAYMENT_TOTAL_CURRENT_min_max,card_CNT_DRAWINGS_ATM_CURRENT_min_mean,card_CNT_DRAWINGS_ATM_CURRENT_min_std,card_CNT_DRAWINGS_ATM_CURRENT_min_min,card_CNT_DRAWINGS_ATM_CURRENT_min_max,card_AMT_DRAWINGS_CURRENT_mean_mean,card_AMT_DRAWINGS_CURRENT_mean_std,card_AMT_DRAWINGS_CURRENT_mean_min,card_AMT_DRAWINGS_CURRENT_mean_max,card_AMT_TOTAL_RECEIVABLE_std_mean,card_AMT_TOTAL_RECEIVABLE_std_std,card_AMT_TOTAL_RECEIVABLE_std_min,card_AMT_TOTAL_RECEIVABLE_std_max,card_CNT_DRAWINGS_CURRENT_max_mean,card_CNT_DRAWINGS_CURRENT_max_std,card_CNT_DRAWINGS_CURRENT_max_min,card_CNT_DRAWINGS_CURRENT_max_max,card_NAME_CONTRACT_STATUS_Demand_mean_mean,card_NAME_CONTRACT_STATUS_Demand_mean_std,card_NAME_CONTRACT_STATUS_Demand_mean_min,card_NAME_CONTRACT_STATUS_Demand_mean_max,card_AMT_CREDIT_LIMIT_ACTUAL_std_mean,card_AMT_CREDIT_LIMIT_ACTUAL_std_std,card_AMT_CREDIT_LIMIT_ACTUAL_std_min,card_AMT_CREDIT_LIMIT_ACTUAL_std_max,card_AMT_DRAWINGS_POS_CURRENT_std_mean,card_AMT_DRAWINGS_POS_CURRENT_std_std,card_AMT_DRAWINGS_POS_CURRENT_std_min,card_AMT_DRAWINGS_POS_CURRENT_std_max,card_AMT_RECEIVABLE_PRINCIPAL_mean_mean,card_AMT_RECEIVABLE_PRINCIPAL_mean_std,card_AMT_RECEIVABLE_PRINCIPAL_mean_min,card_AMT_RECEIVABLE_PRINCIPAL_mean_max,card_CNT_INSTALMENT_MATURE_CUM_mean_mean,card_CNT_INSTALMENT_MATURE_CUM_mean_std,card_CNT_INSTALMENT_MATURE_CUM_mean_min,card_CNT_INSTALMENT_MATURE_CUM_mean_max,card_CNT_INSTALMENT_MATURE_CUM_min_mean,card_CNT_INSTALMENT_MATURE_CUM_min_std,card_CNT_INSTALMENT_MATURE_CUM_min_min,card_CNT_INSTALMENT_MATURE_CUM_min_max,card_AMT_DRAWINGS_POS_CURRENT_min_mean,card_AMT_DRAWINGS_POS_CURRENT_min_std,card_AMT_DRAWINGS_POS_CURRENT_min_min,card_AMT_DRAWINGS_POS_CURRENT_min_max,card_SK_DPD_DEF_max_mean,card_SK_DPD_DEF_max_std,card_SK_DPD_DEF_max_min,card_SK_DPD_DEF_max_max,card_AMT_RECIVABLE_std_mean,card_AMT_RECIVABLE_std_std,card_AMT_RECIVABLE_std_min,card_AMT_RECIVABLE_std_max
SK_ID_CURR,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1,Unnamed: 182_level_1,Unnamed: 183_level_1,Unnamed: 184_level_1,Unnamed: 185_level_1,Unnamed: 186_level_1,Unnamed: 187_level_1,Unnamed: 188_level_1,Unnamed: 189_level_1,Unnamed: 190_level_1,Unnamed: 191_level_1,Unnamed: 192_level_1,Unnamed: 193_level_1,Unnamed: 194_level_1,Unnamed: 195_level_1,Unnamed: 196_level_1,Unnamed: 197_level_1,Unnamed: 198_level_1,Unnamed: 199_level_1,Unnamed: 200_level_1,Unnamed: 201_level_1,Unnamed: 202_level_1,Unnamed: 203_level_1,Unnamed: 204_level_1,Unnamed: 205_level_1,Unnamed: 206_level_1,Unnamed: 207_level_1,Unnamed: 208_level_1,Unnamed: 209_level_1,Unnamed: 210_level_1,Unnamed: 211_level_1,Unnamed: 212_level_1,Unnamed: 213_level_1,Unnamed: 214_level_1,Unnamed: 215_level_1,Unnamed: 216_level_1,Unnamed: 217_level_1,Unnamed: 218_level_1,Unnamed: 219_level_1,Unnamed: 220_level_1,Unnamed: 221_level_1,Unnamed: 222_level_1,Unnamed: 223_level_1,Unnamed: 224_level_1,Unnamed: 225_level_1,Unnamed: 226_level_1,Unnamed: 227_level_1,Unnamed: 228_level_1,Unnamed: 229_level_1,Unnamed: 230_level_1,Unnamed: 231_level_1,Unnamed: 232_level_1,Unnamed: 233_level_1,Unnamed: 234_level_1,Unnamed: 235_level_1,Unnamed: 236_level_1,Unnamed: 237_level_1,Unnamed: 238_level_1,Unnamed: 239_level_1,Unnamed: 240_level_1,Unnamed: 241_level_1,Unnamed: 242_level_1,Unnamed: 243_level_1,Unnamed: 244_level_1,Unnamed: 245_level_1,Unnamed: 246_level_1,Unnamed: 247_level_1,Unnamed: 248_level_1,Unnamed: 249_level_1,Unnamed: 250_level_1,Unnamed: 251_level_1,Unnamed: 252_level_1,Unnamed: 253_level_1,Unnamed: 254_level_1,Unnamed: 255_level_1,Unnamed: 256_level_1,Unnamed: 257_level_1,Unnamed: 258_level_1,Unnamed: 259_level_1,Unnamed: 260_level_1,Unnamed: 261_level_1,Unnamed: 262_level_1,Unnamed: 263_level_1,Unnamed: 264_level_1,Unnamed: 265_level_1,Unnamed: 266_level_1,Unnamed: 267_level_1,Unnamed: 268_level_1,Unnamed: 269_level_1,Unnamed: 270_level_1,Unnamed: 271_level_1,Unnamed: 272_level_1,Unnamed: 273_level_1,Unnamed: 274_level_1,Unnamed: 275_level_1,Unnamed: 276_level_1,Unnamed: 277_level_1,Unnamed: 278_level_1,Unnamed: 279_level_1,Unnamed: 280_level_1,Unnamed: 281_level_1,Unnamed: 282_level_1,Unnamed: 283_level_1,Unnamed: 284_level_1,Unnamed: 285_level_1,Unnamed: 286_level_1,Unnamed: 287_level_1,Unnamed: 288_level_1,Unnamed: 289_level_1,Unnamed: 290_level_1,Unnamed: 291_level_1,Unnamed: 292_level_1,Unnamed: 293_level_1,Unnamed: 294_level_1,Unnamed: 295_level_1,Unnamed: 296_level_1,Unnamed: 297_level_1,Unnamed: 298_level_1,Unnamed: 299_level_1,Unnamed: 300_level_1,Unnamed: 301_level_1,Unnamed: 302_level_1,Unnamed: 303_level_1,Unnamed: 304_level_1,Unnamed: 305_level_1,Unnamed: 306_level_1,Unnamed: 307_level_1,Unnamed: 308_level_1,Unnamed: 309_level_1,Unnamed: 310_level_1,Unnamed: 311_level_1,Unnamed: 312_level_1,Unnamed: 313_level_1,Unnamed: 314_level_1,Unnamed: 315_level_1,Unnamed: 316_level_1,Unnamed: 317_level_1,Unnamed: 318_level_1,Unnamed: 319_level_1,Unnamed: 320_level_1,Unnamed: 321_level_1,Unnamed: 322_level_1,Unnamed: 323_level_1,Unnamed: 324_level_1,Unnamed: 325_level_1,Unnamed: 326_level_1,Unnamed: 327_level_1,Unnamed: 328_level_1,Unnamed: 329_level_1,Unnamed: 330_level_1,Unnamed: 331_level_1,Unnamed: 332_level_1,Unnamed: 333_level_1,Unnamed: 334_level_1,Unnamed: 335_level_1,Unnamed: 336_level_1,Unnamed: 337_level_1,Unnamed: 338_level_1,Unnamed: 339_level_1,Unnamed: 340_level_1,Unnamed: 341_level_1,Unnamed: 342_level_1,Unnamed: 343_level_1,Unnamed: 344_level_1,Unnamed: 345_level_1,Unnamed: 346_level_1,Unnamed: 347_level_1,Unnamed: 348_level_1,Unnamed: 349_level_1,Unnamed: 350_level_1,Unnamed: 351_level_1,Unnamed: 352_level_1,Unnamed: 353_level_1,Unnamed: 354_level_1,Unnamed: 355_level_1,Unnamed: 356_level_1,Unnamed: 357_level_1,Unnamed: 358_level_1,Unnamed: 359_level_1,Unnamed: 360_level_1,Unnamed: 361_level_1,Unnamed: 362_level_1,Unnamed: 363_level_1,Unnamed: 364_level_1,Unnamed: 365_level_1,Unnamed: 366_level_1,Unnamed: 367_level_1,Unnamed: 368_level_1,Unnamed: 369_level_1,Unnamed: 370_level_1,Unnamed: 371_level_1,Unnamed: 372_level_1,Unnamed: 373_level_1,Unnamed: 374_level_1,Unnamed: 375_level_1,Unnamed: 376_level_1,Unnamed: 377_level_1,Unnamed: 378_level_1,Unnamed: 379_level_1,Unnamed: 380_level_1,Unnamed: 381_level_1,Unnamed: 382_level_1,Unnamed: 383_level_1,Unnamed: 384_level_1,Unnamed: 385_level_1,Unnamed: 386_level_1,Unnamed: 387_level_1,Unnamed: 388_level_1,Unnamed: 389_level_1,Unnamed: 390_level_1,Unnamed: 391_level_1,Unnamed: 392_level_1,Unnamed: 393_level_1,Unnamed: 394_level_1,Unnamed: 395_level_1,Unnamed: 396_level_1,Unnamed: 397_level_1,Unnamed: 398_level_1,Unnamed: 399_level_1,Unnamed: 400_level_1,Unnamed: 401_level_1,Unnamed: 402_level_1,Unnamed: 403_level_1,Unnamed: 404_level_1
100006,,,,,,,,,0,,0,0,0.0,,0.0,0.0,0,,0,0,,,,,,,,,,,,,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0,,0,0,0.0,,0.0,0.0,0.0,,0.0,0.0,,,,,0.0,,0,0,0.0,,0.0,0.0,0.0,,0,0,0.0,,0.0,0.0,,,,,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0,0,0.0,,0.0,0.0,0.0,,0.0,0.0,,,,,0.0,,0.0,0.0,0,,0,0,0.0,,0.0,0.0,0.0,,0.0,0.0,,,,,,,,,0.0,,0,0,0.0,,0.0,0.0,,,,,0.0,,0.0,0.0,,,,,6.0,,6,6,,,,,0.0,,0,0,,,,,,,,,,,,,12.506181,,12.506181,12.506181,,,,,,,,,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0,0,0.0,,0.0,0.0,12.506181,,12.506181,12.506181,0,,0,0,0.0,,0.0,0.0,,,,,,,,,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,,,,,12.506181,,12.506181,12.506181,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,,,,,0.0,,0.0,0.0,0.0,,0.0,0.0,0,,0,0,,,,,0.0,,0.0,0.0,0.0,,0.0,0.0,,,,,0.0,,0,0,0,,0,0,,,,,0,,0,0,0.0,,0,0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,,,,,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0,0,0.0,,0.0,0.0,0.0,,0.0,0.0,,,,,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,,,,,0.0,,0,0,0.0,,0.0,0.0
100011,10.923886,,10.923886,10.923886,12.100718,,12.100718,12.100718,0,,0,0,12.149508,,12.149508,12.149508,0,,0,0,0.0,,0.0,0.0,0.0,,0.0,0.0,1.40668,,1.40668,1.40668,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0,,0,0,5.813196,,5.813196,5.813196,10.288236,,10.288236,10.288236,0.0,,0.0,0.0,0.0,,0,0,5.481577,,5.481577,5.481577,0.0,,0,0,1.40668,,1.40668,1.40668,7.484197,,7.484197,7.484197,0.054054,,0.054054,0.054054,0.0,,0.0,0.0,0.0,,0.0,0.0,5.481577,,5.481577,5.481577,4.53493,,4.53493,4.53493,0.0,,0.0,0.0,5.143911,,5.143911,5.143911,0.0,,0.0,0.0,0.0,,0,0,0.0,,0.0,0.0,4.574727,,4.574727,4.574727,0.464991,,0.464991,0.464991,12.100718,,12.100718,12.100718,0,,0,0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0,0,12.149508,,12.149508,12.149508,1.663975,,1.663975,1.663975,0.0,,0.0,0.0,0.0,,0.0,0.0,74.0,,74,74,0.054054,,0.054054,0.054054,0.0,,0,0,4.0,,4.0,4.0,0.0,,0.0,0.0,0.0,,0.0,0.0,12.100718,,12.100718,12.100718,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,9.105091,,9.105091,9.105091,0.0,,0.0,0.0,0.0,,0,0,0.0,,0.0,0.0,11.97895,,11.97895,11.97895,0,,0,0,4.082931,,4.082931,4.082931,0.0,,0.0,0.0,0.0,,0.0,0.0,33.0,,33.0,33.0,12.149508,,12.149508,12.149508,0.464991,,0.464991,0.464991,0.0,,0.0,0.0,3.961915,,3.961915,3.961915,0.0,,0.0,0.0,11.407576,,11.407576,11.407576,12.100718,,12.100718,12.100718,0.0,,0.0,0.0,5.806097,,5.806097,5.806097,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0,,0,0,0.163523,,0.163523,0.163523,10.923886,,10.923886,10.923886,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0,0,0,,0,0,0.0,,0.0,0.0,0,,0,0,0.0,,0,0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.163523,,0.163523,0.163523,5.67537,,5.67537,5.67537,4.0,,4,4,0.0,,0.0,0.0,0.265571,,0.265571,0.265571,0.0,,0.0,0.0,5.035465,,5.035465,5.035465,25.767123,,25.767123,25.767123,1.0,,1.0,1.0,0.0,,0.0,0.0,0.0,,0,0,5.67537,,5.67537,5.67537
100013,11.942602,,11.942602,11.942602,11.967187,,11.967187,11.967187,0,,0,0,11.991773,,11.991773,11.991773,0,,0,0,0.0,,0.0,0.0,0.0,,0.0,0.0,2.645711,,2.645711,2.645711,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0,,0,0,4.30392,,4.30392,4.30392,5.852328,,5.852328,5.852328,0.0,,0.0,0.0,0.0,,0,0,2.390232,,2.390232,2.390232,0.0,,0,0,2.565483,,2.565483,2.565483,6.068084,,6.068084,6.068084,0.239583,,0.239583,0.239583,0.0,,0.0,0.0,0.0,,0.0,0.0,2.390232,,2.390232,2.390232,3.603271,,3.603271,3.603271,0.102062,,0.102062,0.102062,2.287024,,2.287024,2.287024,0.0,,0.0,0.0,0.0,,0,0,0.0,,0.0,0.0,3.803238,,3.803238,3.803238,1.185693,,1.185693,1.185693,11.967187,,11.967187,11.967187,0,,0,0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,1.0,,1,1,11.991773,,11.991773,11.991773,2.538063,,2.538063,2.538063,0.010417,,0.010417,0.010417,0.0,,0.0,0.0,96.0,,96,96,0.255556,,0.255556,0.255556,0.0,,0,0,7.0,,7.0,7.0,0.0,,0.0,0.0,0.0,,0.0,0.0,11.967187,,11.967187,11.967187,0.0,,0.0,0.0,0.0,,0.0,0.0,0.010417,,0.010417,0.010417,8.971575,,8.971575,8.971575,0.0,,0.0,0.0,0.0,,0,0,0.0,,0.0,0.0,11.680099,,11.680099,11.680099,0,,0,0,2.006524,,2.006524,2.006524,0.0,,0.0,0.0,0.0,,0.0,0.0,22.0,,22.0,22.0,11.991773,,11.991773,11.991773,1.149323,,1.149323,1.149323,0.0,,0.0,0.0,1.622986,,1.622986,1.622986,0.0,,0.0,0.0,10.71444,,10.71444,10.71444,11.967187,,11.967187,11.967187,0.0,,0.0,0.0,4.402373,,4.402373,4.402373,0.0,,0.0,0.0,0.0,,0.0,0.0,0.102062,,0.102062,0.102062,0,,0,0,0.636741,,0.636741,0.636741,11.942602,,11.942602,11.942602,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0,0,0,,0,0,0.0,,0.0,0.0,0,,0,0,0.0,,0,0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.596945,,0.596945,0.596945,4.394109,,4.394109,4.394109,7.0,,7,7,0.0,,0.0,0.0,0.529289,,0.529289,0.529289,0.0,,0.0,0.0,1.913403,,1.913403,1.913403,18.719101,,18.719101,18.719101,1.0,,1.0,1.0,0.0,,0.0,0.0,1.0,,1,1,4.394109,,4.394109,4.394109
100021,,,,,,,,,0,,0,0,0.0,,0.0,0.0,0,,0,0,,,,,,,,,,,,,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0,,0,0,0.0,,0.0,0.0,0.0,,0.0,0.0,,,,,0.0,,0,0,0.0,,0.0,0.0,0.0,,0,0,0.0,,0.0,0.0,,,,,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,1.0,,1,1,0.0,,0.0,0.0,0.0,,0.0,0.0,,,,,0.0,,0.0,0.0,0,,0,0,0.0,,0.0,0.0,0.0,,0.0,0.0,,,,,,,,,0.0,,0,0,0.0,,0.0,0.0,,,,,0.0,,0.0,0.0,,,,,17.0,,17,17,,,,,0.0,,0,0,,,,,,,,,,,,,13.422469,,13.422469,13.422469,,,,,,,,,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0,0,0.0,,0.0,0.0,13.422469,,13.422469,13.422469,0,,0,0,0.0,,0.0,0.0,,,,,,,,,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.588235,,0.588235,0.588235,0.0,,0.0,0.0,,,,,13.422469,,13.422469,13.422469,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,,,,,0.5073,,0.5073,0.5073,0.0,,0.0,0.0,0,,0,0,,,,,0.0,,0.0,0.0,0.0,,0.0,0.0,,,,,0.0,,0,0,0,,0,0,,,,,0,,0,0,0.0,,0,0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,,,,,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0,0,0.0,,0.0,0.0,0.0,,0.0,0.0,,,,,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,,,,,0.0,,0,0,0.0,,0.0,0.0
100023,,,,,,,,,0,,0,0,0.0,,0.0,0.0,0,,0,0,,,,,,,,,,,,,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0,,0,0,0.0,,0.0,0.0,0.0,,0.0,0.0,,,,,0.0,,0,0,0.0,,0.0,0.0,0.0,,0,0,0.0,,0.0,0.0,,,,,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0,0,0.0,,0.0,0.0,0.0,,0.0,0.0,,,,,0.0,,0.0,0.0,0,,0,0,0.0,,0.0,0.0,0.0,,0.0,0.0,,,,,,,,,0.0,,0,0,0.0,,0.0,0.0,,,,,0.0,,0.0,0.0,,,,,8.0,,8,8,,,,,0.0,,0,0,,,,,,,,,,,,,12.32386,,12.32386,12.32386,,,,,,,,,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0,0,0.0,,0.0,0.0,11.51915,,11.51915,11.51915,0,,0,0,0.0,,0.0,0.0,,,,,,,,,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,,,,,10.71444,,10.71444,10.71444,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,,,,,0.0,,0.0,0.0,0.0,,0.0,0.0,0,,0,0,,,,,0.0,,0.0,0.0,0.0,,0.0,0.0,,,,,0.0,,0,0,0,,0,0,,,,,0,,0,0,0.0,,0,0,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,,,,,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0,0,0.0,,0.0,0.0,0.860271,,0.860271,0.860271,,,,,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,,0.0,0.0,,,,,0.0,,0,0,0.0,,0.0,0.0


In [59]:
# clear memory
del card

### 4.3.4. PREV DATA

In [60]:
# check card data
prev.head()

Unnamed: 0,SK_ID_PREV,SK_ID_CURR,NAME_CONTRACT_TYPE,AMT_ANNUITY,AMT_APPLICATION,AMT_CREDIT,AMT_DOWN_PAYMENT,AMT_GOODS_PRICE,WEEKDAY_APPR_PROCESS_START,HOUR_APPR_PROCESS_START,FLAG_LAST_APPL_PER_CONTRACT,NFLAG_LAST_APPL_IN_DAY,RATE_DOWN_PAYMENT,RATE_INTEREST_PRIMARY,RATE_INTEREST_PRIVILEGED,NAME_CASH_LOAN_PURPOSE,NAME_CONTRACT_STATUS,DAYS_DECISION,NAME_PAYMENT_TYPE,CODE_REJECT_REASON,NAME_TYPE_SUITE,NAME_CLIENT_TYPE,NAME_GOODS_CATEGORY,NAME_PORTFOLIO,NAME_PRODUCT_TYPE,CHANNEL_TYPE,SELLERPLACE_AREA,NAME_SELLER_INDUSTRY,CNT_PAYMENT,NAME_YIELD_GROUP,PRODUCT_COMBINATION,DAYS_FIRST_DRAWING,DAYS_FIRST_DUE,DAYS_LAST_DUE_1ST_VERSION,DAYS_LAST_DUE,DAYS_TERMINATION,NFLAG_INSURED_ON_APPROVAL
0,2030495,271877,Consumer loans,1730.43,17145.0,17145.0,0.0,17145.0,SATURDAY,15,Y,1,0.0,0.182832,0.867336,XAP,Approved,-73,Cash through the bank,XAP,,Repeater,Mobile,POS,XNA,Country-wide,35,Connectivity,12.0,middle,POS mobile with interest,365243.0,-42.0,300.0,-42.0,-37.0,0.0
1,2802425,108129,Cash loans,25188.615,607500.0,679671.0,,607500.0,THURSDAY,11,Y,1,,,,XNA,Approved,-164,XNA,XAP,Unaccompanied,Repeater,XNA,Cash,x-sell,Contact center,-1,XNA,36.0,low_action,Cash X-Sell: low,365243.0,-134.0,916.0,365243.0,365243.0,1.0
2,2523466,122040,Cash loans,15060.735,112500.0,136444.5,,112500.0,TUESDAY,11,Y,1,,,,XNA,Approved,-301,Cash through the bank,XAP,"Spouse, partner",Repeater,XNA,Cash,x-sell,Credit and cash offices,-1,XNA,12.0,high,Cash X-Sell: high,365243.0,-271.0,59.0,365243.0,365243.0,1.0
3,2819243,176158,Cash loans,47041.335,450000.0,470790.0,,450000.0,MONDAY,7,Y,1,,,,XNA,Approved,-512,Cash through the bank,XAP,,Repeater,XNA,Cash,x-sell,Credit and cash offices,-1,XNA,12.0,middle,Cash X-Sell: middle,365243.0,-482.0,-152.0,-182.0,-177.0,1.0
4,1784265,202054,Cash loans,31924.395,337500.0,404055.0,,337500.0,THURSDAY,9,Y,1,,,,Repairs,Refused,-781,Cash through the bank,HC,,Repeater,XNA,Cash,walk-in,Credit and cash offices,-1,XNA,24.0,high,Cash Street: high,,,,,,


In [61]:
### FEATURE ENGINEERING

# amount ratios
prev["AMT_GIVEN_RATIO_1"]  = prev["AMT_CREDIT"] / prev["AMT_APPLICATION"]
prev["AMT_GIVEN_RATIO_2"]  = prev["AMT_GOODS_PRICE"] / prev["AMT_APPLICATION"]
prev["DOWN_PAYMENT_RATIO"] = prev["AMT_DOWN_PAYMENT"] / prev["AMT_APPLICATION"]

# logarithms
log_vars = ["AMT_CREDIT", "AMT_ANNUITY", "AMT_APPLICATION", "AMT_DOWN_PAYMENT", "AMT_GOODS_PRICE"]
prev = create_logs(prev, log_vars, replace = True)

# convert days
day_vars = ["DAYS_FIRST_DRAWING", "DAYS_FIRST_DUE", "DAYS_LAST_DUE_1ST_VERSION", 
            "DAYS_LAST_DUE", "DAYS_TERMINATION", "DAYS_DECISION"]
prev = convert_days(prev, day_vars, t = 1, rounding = False, replace = True)

# number of applications 
cnt_prev = prev[["SK_ID_CURR", "SK_ID_PREV"]].groupby(["SK_ID_CURR"], as_index = False).count()
cnt_prev.columns = ["SK_ID_CURR", "CNT_PREV_APPLICATIONS"]
prev = prev.merge(cnt_prev, how = "left", on = "SK_ID_CURR")

# number of contracts
cnt_prev = prev[["SK_ID_CURR", "FLAG_LAST_APPL_PER_CONTRACT"]]
cnt_prev.columns = ["SK_ID_CURR", "CNT_PREV_CONTRACTS"]
cnt_prev = cnt_prev[cnt_prev["CNT_PREV_CONTRACTS"] == "Y"]
cnt_prev = cnt_prev[["SK_ID_CURR", "CNT_PREV_CONTRACTS"]].groupby(["SK_ID_CURR"], as_index = False).count()
prev = prev.merge(cnt_prev, how = "left", on = "SK_ID_CURR")

# number ratio
prev["APPL_PER_CONTRACT_RATIO"] = prev["CNT_PREV_APPLICATIONS"] / prev["CNT_PREV_CONTRACTS"]

# loan decision ratios
prev = compute_accept_reject_ratio(prev, lags = [1, 3, 5])

# day differences
prev["DAYS_DUE_DIFF_1"] = prev["DAYS_LAST_DUE_1ST_VERSION"] - prev["DAYS_FIRST_DUE"]
prev["DAYS_DUE_DIFF_2"] = prev["DAYS_LAST_DUE"] - prev["DAYS_FIRST_DUE"]
prev["DAYS_TERMINATION_DIFF_1"] = prev["DAYS_TERMINATION"] - prev["DAYS_FIRST_DRAWING"]
prev["DAYS_TERMINATION_DIFF_2"] = prev["DAYS_TERMINATION"] - prev["DAYS_FIRST_DUE"]
prev["DAYS_TERMINATION_DIFF_3"] = prev["DAYS_TERMINATION"] - prev["DAYS_LAST_DUE"]

# application dates
prev["DAY_APPR_PROCESS_START"] = "Working day"
prev["DAY_APPR_PROCESS_START"][(prev["WEEKDAY_APPR_PROCESS_START"] == "SATURDAY") |
                               (prev["WEEKDAY_APPR_PROCESS_START"] == "SUNDAY")] = "Weekend"


##### FEATURE REMOVAL
drops = ["NAME_CLIENT_TYPE", "SK_ID_PREV"]
prev = prev.drop(columns = drops)

In [62]:
# dummy encodnig for factors
prev = pd.get_dummies(prev, drop_first = True)

In [63]:
# count missings
nas = count_missings(prev)
nas.head()

Unnamed: 0,Total,Percent
RATE_INTEREST_PRIMARY,1664263,99.643698
RATE_INTEREST_PRIVILEGED,1664263,99.643698
DAYS_TERMINATION_DIFF_1,1661862,99.499944
DAYS_FIRST_DRAWING,1607509,96.245691
DAYS_LAST_DUE_1ST_VERSION,991321,59.352933


In [64]:
### AGGREGATIONS

# aggregate data
agg_prev = aggregate_data(prev, id_var = "SK_ID_CURR", label = "prev")

# clean up
omits = ["APPROVE_RATIO_1", "APPROVE_RATIO_3", "APPROVE_RATIO_5",  
         "REJECT_RATIO_1", "REJECT_RATIO_3",  "REJECT_RATIO_5", 
         "FLAG_LAST_APPL_PER_CONTRACT_Y", "CNT_PREV_CONTRACTS", "CNT_PREV_APPLICATIONS", 
         "APPL_PER_CONTRACT_RATIO"]
for var in omits:
    del agg_prev["prev_" + str(var) + "_std"]
    del agg_prev["prev_" + str(var) + "_min"]
    del agg_prev["prev_" + str(var) + "_max"]

- Preparing the dataset...
- Extracted 0 factors and 161 numerics...
- Aggregating numeric features...
- Final dimensions: (338857, 644)


In [65]:
# count missings
nas = count_missings(agg_prev)
nas.head()

Unnamed: 0,Total,Percent
prev_DAYS_TERMINATION_DIFF_1_std,338827,99.991147
prev_DAYS_FIRST_DRAWING_std,338656,99.940683
prev_RATE_INTEREST_PRIVILEGED_std,338639,99.935666
prev_RATE_INTEREST_PRIMARY_std,338639,99.935666
prev_RATE_INTEREST_PRIMARY_min,333136,98.311677


In [66]:
# check data
agg_prev.head()

Unnamed: 0_level_0,prev_NAME_GOODS_CATEGORY_Mobile_mean,prev_NAME_GOODS_CATEGORY_Mobile_std,prev_NAME_GOODS_CATEGORY_Mobile_min,prev_NAME_GOODS_CATEGORY_Mobile_max,prev_NAME_CASH_LOAN_PURPOSE_Furniture_mean,prev_NAME_CASH_LOAN_PURPOSE_Furniture_std,prev_NAME_CASH_LOAN_PURPOSE_Furniture_min,prev_NAME_CASH_LOAN_PURPOSE_Furniture_max,prev_PRODUCT_COMBINATION_Cash_mean,prev_PRODUCT_COMBINATION_Cash_std,prev_PRODUCT_COMBINATION_Cash_min,prev_PRODUCT_COMBINATION_Cash_max,prev_CNT_PREV_APPLICATIONS_mean,prev_NAME_CASH_LOAN_PURPOSE_Urgent needs_mean,prev_NAME_CASH_LOAN_PURPOSE_Urgent needs_std,prev_NAME_CASH_LOAN_PURPOSE_Urgent needs_min,prev_NAME_CASH_LOAN_PURPOSE_Urgent needs_max,prev_NAME_GOODS_CATEGORY_Jewelry_mean,prev_NAME_GOODS_CATEGORY_Jewelry_std,prev_NAME_GOODS_CATEGORY_Jewelry_min,prev_NAME_GOODS_CATEGORY_Jewelry_max,prev_NAME_SELLER_INDUSTRY_Clothing_mean,prev_NAME_SELLER_INDUSTRY_Clothing_std,prev_NAME_SELLER_INDUSTRY_Clothing_min,prev_NAME_SELLER_INDUSTRY_Clothing_max,prev_DAYS_DUE_DIFF_2_mean,prev_DAYS_DUE_DIFF_2_std,prev_DAYS_DUE_DIFF_2_min,prev_DAYS_DUE_DIFF_2_max,prev_NAME_SELLER_INDUSTRY_Furniture_mean,prev_NAME_SELLER_INDUSTRY_Furniture_std,prev_NAME_SELLER_INDUSTRY_Furniture_min,prev_NAME_SELLER_INDUSTRY_Furniture_max,prev_PRODUCT_COMBINATION_Cash Street: middle_mean,prev_PRODUCT_COMBINATION_Cash Street: middle_std,prev_PRODUCT_COMBINATION_Cash Street: middle_min,prev_PRODUCT_COMBINATION_Cash Street: middle_max,prev_CNT_PREV_CONTRACTS_mean,prev_NAME_GOODS_CATEGORY_Education_mean,prev_NAME_GOODS_CATEGORY_Education_std,prev_NAME_GOODS_CATEGORY_Education_min,prev_NAME_GOODS_CATEGORY_Education_max,prev_NAME_GOODS_CATEGORY_Medical Supplies_mean,prev_NAME_GOODS_CATEGORY_Medical Supplies_std,prev_NAME_GOODS_CATEGORY_Medical Supplies_min,prev_NAME_GOODS_CATEGORY_Medical Supplies_max,prev_PRODUCT_COMBINATION_POS household with interest_mean,prev_PRODUCT_COMBINATION_POS household with interest_std,prev_PRODUCT_COMBINATION_POS household with interest_min,prev_PRODUCT_COMBINATION_POS household with interest_max,prev_PRODUCT_COMBINATION_POS industry with interest_mean,prev_PRODUCT_COMBINATION_POS industry with interest_std,prev_PRODUCT_COMBINATION_POS industry with interest_min,prev_PRODUCT_COMBINATION_POS industry with interest_max,prev_NAME_GOODS_CATEGORY_Audio/Video_mean,prev_NAME_GOODS_CATEGORY_Audio/Video_std,prev_NAME_GOODS_CATEGORY_Audio/Video_min,prev_NAME_GOODS_CATEGORY_Audio/Video_max,prev_REJECT_RATIO_3_mean,prev_NAME_GOODS_CATEGORY_Homewares_mean,prev_NAME_GOODS_CATEGORY_Homewares_std,prev_NAME_GOODS_CATEGORY_Homewares_min,prev_NAME_GOODS_CATEGORY_Homewares_max,prev_CODE_REJECT_REASON_VERIF_mean,prev_CODE_REJECT_REASON_VERIF_std,prev_CODE_REJECT_REASON_VERIF_min,prev_CODE_REJECT_REASON_VERIF_max,prev_DOWN_PAYMENT_RATIO_mean,prev_DOWN_PAYMENT_RATIO_std,prev_DOWN_PAYMENT_RATIO_min,prev_DOWN_PAYMENT_RATIO_max,prev_APPROVE_RATIO_1_mean,prev_CODE_REJECT_REASON_SCOFR_mean,prev_CODE_REJECT_REASON_SCOFR_std,prev_CODE_REJECT_REASON_SCOFR_min,prev_CODE_REJECT_REASON_SCOFR_max,prev_PRODUCT_COMBINATION_Cash Street: high_mean,prev_PRODUCT_COMBINATION_Cash Street: high_std,prev_PRODUCT_COMBINATION_Cash Street: high_min,prev_PRODUCT_COMBINATION_Cash Street: high_max,prev_NAME_CASH_LOAN_PURPOSE_Wedding / gift / holiday_mean,prev_NAME_CASH_LOAN_PURPOSE_Wedding / gift / holiday_std,prev_NAME_CASH_LOAN_PURPOSE_Wedding / gift / holiday_min,prev_NAME_CASH_LOAN_PURPOSE_Wedding / gift / holiday_max,prev_PRODUCT_COMBINATION_POS mobile with interest_mean,prev_PRODUCT_COMBINATION_POS mobile with interest_std,prev_PRODUCT_COMBINATION_POS mobile with interest_min,prev_PRODUCT_COMBINATION_POS mobile with interest_max,prev_CHANNEL_TYPE_Car dealer_mean,prev_CHANNEL_TYPE_Car dealer_std,prev_CHANNEL_TYPE_Car dealer_min,prev_CHANNEL_TYPE_Car dealer_max,prev_NAME_TYPE_SUITE_Other_A_mean,prev_NAME_TYPE_SUITE_Other_A_std,prev_NAME_TYPE_SUITE_Other_A_min,prev_NAME_TYPE_SUITE_Other_A_max,prev_NAME_GOODS_CATEGORY_Consumer Electronics_mean,prev_NAME_GOODS_CATEGORY_Consumer Electronics_std,prev_NAME_GOODS_CATEGORY_Consumer Electronics_min,prev_NAME_GOODS_CATEGORY_Consumer Electronics_max,prev_WEEKDAY_APPR_PROCESS_START_SUNDAY_mean,prev_WEEKDAY_APPR_PROCESS_START_SUNDAY_std,prev_WEEKDAY_APPR_PROCESS_START_SUNDAY_min,prev_WEEKDAY_APPR_PROCESS_START_SUNDAY_max,prev_NAME_CASH_LOAN_PURPOSE_Payments on other loans_mean,prev_NAME_CASH_LOAN_PURPOSE_Payments on other loans_std,prev_NAME_CASH_LOAN_PURPOSE_Payments on other loans_min,prev_NAME_CASH_LOAN_PURPOSE_Payments on other loans_max,prev_DAYS_TERMINATION_DIFF_1_mean,prev_DAYS_TERMINATION_DIFF_1_std,prev_DAYS_TERMINATION_DIFF_1_min,prev_DAYS_TERMINATION_DIFF_1_max,prev_NAME_SELLER_INDUSTRY_MLM partners_mean,prev_NAME_SELLER_INDUSTRY_MLM partners_std,prev_NAME_SELLER_INDUSTRY_MLM partners_min,prev_NAME_SELLER_INDUSTRY_MLM partners_max,prev_NAME_GOODS_CATEGORY_Weapon_mean,prev_NAME_GOODS_CATEGORY_Weapon_std,prev_NAME_GOODS_CATEGORY_Weapon_min,prev_NAME_GOODS_CATEGORY_Weapon_max,prev_WEEKDAY_APPR_PROCESS_START_MONDAY_mean,prev_WEEKDAY_APPR_PROCESS_START_MONDAY_std,prev_WEEKDAY_APPR_PROCESS_START_MONDAY_min,prev_WEEKDAY_APPR_PROCESS_START_MONDAY_max,prev_NAME_PRODUCT_TYPE_walk-in_mean,prev_NAME_PRODUCT_TYPE_walk-in_std,prev_NAME_PRODUCT_TYPE_walk-in_min,prev_NAME_PRODUCT_TYPE_walk-in_max,prev_PRODUCT_COMBINATION_POS industry without interest_mean,prev_PRODUCT_COMBINATION_POS industry without interest_std,prev_PRODUCT_COMBINATION_POS industry without interest_min,prev_PRODUCT_COMBINATION_POS industry without interest_max,prev_DAYS_DECISION_mean,prev_DAYS_DECISION_std,prev_DAYS_DECISION_min,prev_DAYS_DECISION_max,prev_NAME_PORTFOLIO_Cars_mean,prev_NAME_PORTFOLIO_Cars_std,prev_NAME_PORTFOLIO_Cars_min,prev_NAME_PORTFOLIO_Cars_max,prev_AMT_DOWN_PAYMENT_mean,prev_AMT_DOWN_PAYMENT_std,prev_AMT_DOWN_PAYMENT_min,prev_AMT_DOWN_PAYMENT_max,prev_PRODUCT_COMBINATION_POS household without interest_mean,prev_PRODUCT_COMBINATION_POS household without interest_std,prev_PRODUCT_COMBINATION_POS household without interest_min,prev_PRODUCT_COMBINATION_POS household without interest_max,prev_CODE_REJECT_REASON_XNA_mean,prev_CODE_REJECT_REASON_XNA_std,prev_CODE_REJECT_REASON_XNA_min,prev_CODE_REJECT_REASON_XNA_max,prev_NAME_CASH_LOAN_PURPOSE_XNA_mean,prev_NAME_CASH_LOAN_PURPOSE_XNA_std,prev_NAME_CASH_LOAN_PURPOSE_XNA_min,prev_NAME_CASH_LOAN_PURPOSE_XNA_max,prev_PRODUCT_COMBINATION_POS other with interest_mean,prev_PRODUCT_COMBINATION_POS other with interest_std,prev_PRODUCT_COMBINATION_POS other with interest_min,prev_PRODUCT_COMBINATION_POS other with interest_max,prev_NAME_GOODS_CATEGORY_Other_mean,prev_NAME_GOODS_CATEGORY_Other_std,prev_NAME_GOODS_CATEGORY_Other_min,prev_NAME_GOODS_CATEGORY_Other_max,prev_NAME_CASH_LOAN_PURPOSE_Education_mean,prev_NAME_CASH_LOAN_PURPOSE_Education_std,prev_NAME_CASH_LOAN_PURPOSE_Education_min,prev_NAME_CASH_LOAN_PURPOSE_Education_max,prev_NFLAG_INSURED_ON_APPROVAL_mean,prev_NFLAG_INSURED_ON_APPROVAL_std,prev_NFLAG_INSURED_ON_APPROVAL_min,prev_NFLAG_INSURED_ON_APPROVAL_max,prev_NAME_CASH_LOAN_PURPOSE_Gasification / water supply_mean,prev_NAME_CASH_LOAN_PURPOSE_Gasification / water supply_std,prev_NAME_CASH_LOAN_PURPOSE_Gasification / water supply_min,prev_NAME_CASH_LOAN_PURPOSE_Gasification / water supply_max,prev_NAME_GOODS_CATEGORY_Sport and Leisure_mean,prev_NAME_GOODS_CATEGORY_Sport and Leisure_std,prev_NAME_GOODS_CATEGORY_Sport and Leisure_min,prev_NAME_GOODS_CATEGORY_Sport and Leisure_max,prev_NAME_GOODS_CATEGORY_XNA_mean,prev_NAME_GOODS_CATEGORY_XNA_std,prev_NAME_GOODS_CATEGORY_XNA_min,prev_NAME_GOODS_CATEGORY_XNA_max,prev_DAY_APPR_PROCESS_START_Working day_mean,prev_DAY_APPR_PROCESS_START_Working day_std,prev_DAY_APPR_PROCESS_START_Working day_min,prev_DAY_APPR_PROCESS_START_Working day_max,prev_NAME_CASH_LOAN_PURPOSE_Other_mean,prev_NAME_CASH_LOAN_PURPOSE_Other_std,prev_NAME_CASH_LOAN_PURPOSE_Other_min,prev_NAME_CASH_LOAN_PURPOSE_Other_max,prev_NAME_GOODS_CATEGORY_Photo / Cinema Equipment_mean,prev_NAME_GOODS_CATEGORY_Photo / Cinema Equipment_std,prev_NAME_GOODS_CATEGORY_Photo / Cinema Equipment_min,prev_NAME_GOODS_CATEGORY_Photo / Cinema Equipment_max,prev_NAME_SELLER_INDUSTRY_Tourism_mean,prev_NAME_SELLER_INDUSTRY_Tourism_std,prev_NAME_SELLER_INDUSTRY_Tourism_min,prev_NAME_SELLER_INDUSTRY_Tourism_max,prev_APPROVE_RATIO_5_mean,prev_NAME_PORTFOLIO_XNA_mean,prev_NAME_PORTFOLIO_XNA_std,prev_NAME_PORTFOLIO_XNA_min,prev_NAME_PORTFOLIO_XNA_max,prev_PRODUCT_COMBINATION_Cash X-Sell: high_mean,prev_PRODUCT_COMBINATION_Cash X-Sell: high_std,prev_PRODUCT_COMBINATION_Cash X-Sell: high_min,prev_PRODUCT_COMBINATION_Cash X-Sell: high_max,prev_NAME_YIELD_GROUP_low_action_mean,prev_NAME_YIELD_GROUP_low_action_std,prev_NAME_YIELD_GROUP_low_action_min,prev_NAME_YIELD_GROUP_low_action_max,prev_CODE_REJECT_REASON_HC_mean,prev_CODE_REJECT_REASON_HC_std,prev_CODE_REJECT_REASON_HC_min,prev_CODE_REJECT_REASON_HC_max,prev_CHANNEL_TYPE_Credit and cash offices_mean,prev_CHANNEL_TYPE_Credit and cash offices_std,prev_CHANNEL_TYPE_Credit and cash offices_min,prev_CHANNEL_TYPE_Credit and cash offices_max,prev_NAME_GOODS_CATEGORY_Office Appliances_mean,prev_NAME_GOODS_CATEGORY_Office Appliances_std,prev_NAME_GOODS_CATEGORY_Office Appliances_min,prev_NAME_GOODS_CATEGORY_Office Appliances_max,prev_WEEKDAY_APPR_PROCESS_START_WEDNESDAY_mean,prev_WEEKDAY_APPR_PROCESS_START_WEDNESDAY_std,prev_WEEKDAY_APPR_PROCESS_START_WEDNESDAY_min,prev_WEEKDAY_APPR_PROCESS_START_WEDNESDAY_max,prev_NAME_CASH_LOAN_PURPOSE_Car repairs_mean,prev_NAME_CASH_LOAN_PURPOSE_Car repairs_std,prev_NAME_CASH_LOAN_PURPOSE_Car repairs_min,prev_NAME_CASH_LOAN_PURPOSE_Car repairs_max,prev_PRODUCT_COMBINATION_POS mobile without interest_mean,prev_PRODUCT_COMBINATION_POS mobile without interest_std,prev_PRODUCT_COMBINATION_POS mobile without interest_min,prev_PRODUCT_COMBINATION_POS mobile without interest_max,prev_CODE_REJECT_REASON_SYSTEM_mean,prev_CODE_REJECT_REASON_SYSTEM_std,prev_CODE_REJECT_REASON_SYSTEM_min,prev_CODE_REJECT_REASON_SYSTEM_max,prev_CODE_REJECT_REASON_XAP_mean,prev_CODE_REJECT_REASON_XAP_std,prev_CODE_REJECT_REASON_XAP_min,prev_CODE_REJECT_REASON_XAP_max,prev_PRODUCT_COMBINATION_Cash Street: low_mean,prev_PRODUCT_COMBINATION_Cash Street: low_std,prev_PRODUCT_COMBINATION_Cash Street: low_min,prev_PRODUCT_COMBINATION_Cash Street: low_max,prev_DAYS_TERMINATION_DIFF_3_mean,prev_DAYS_TERMINATION_DIFF_3_std,prev_DAYS_TERMINATION_DIFF_3_min,prev_DAYS_TERMINATION_DIFF_3_max,prev_APPL_PER_CONTRACT_RATIO_mean,prev_NAME_CASH_LOAN_PURPOSE_Repairs_mean,prev_NAME_CASH_LOAN_PURPOSE_Repairs_std,prev_NAME_CASH_LOAN_PURPOSE_Repairs_min,prev_NAME_CASH_LOAN_PURPOSE_Repairs_max,prev_NAME_CASH_LOAN_PURPOSE_Medicine_mean,prev_NAME_CASH_LOAN_PURPOSE_Medicine_std,prev_NAME_CASH_LOAN_PURPOSE_Medicine_min,prev_NAME_CASH_LOAN_PURPOSE_Medicine_max,prev_NAME_GOODS_CATEGORY_Vehicles_mean,prev_NAME_GOODS_CATEGORY_Vehicles_std,prev_NAME_GOODS_CATEGORY_Vehicles_min,prev_NAME_GOODS_CATEGORY_Vehicles_max,prev_NAME_CASH_LOAN_PURPOSE_Everyday expenses_mean,prev_NAME_CASH_LOAN_PURPOSE_Everyday expenses_std,prev_NAME_CASH_LOAN_PURPOSE_Everyday expenses_min,prev_NAME_CASH_LOAN_PURPOSE_Everyday expenses_max,prev_DAYS_FIRST_DUE_mean,prev_DAYS_FIRST_DUE_std,prev_DAYS_FIRST_DUE_min,prev_DAYS_FIRST_DUE_max,prev_RATE_INTEREST_PRIMARY_mean,prev_RATE_INTEREST_PRIMARY_std,prev_RATE_INTEREST_PRIMARY_min,prev_RATE_INTEREST_PRIMARY_max,prev_NAME_GOODS_CATEGORY_Direct Sales_mean,prev_NAME_GOODS_CATEGORY_Direct Sales_std,prev_NAME_GOODS_CATEGORY_Direct Sales_min,prev_NAME_GOODS_CATEGORY_Direct Sales_max,prev_RATE_DOWN_PAYMENT_mean,prev_RATE_DOWN_PAYMENT_std,prev_RATE_DOWN_PAYMENT_min,prev_RATE_DOWN_PAYMENT_max,prev_NAME_PRODUCT_TYPE_x-sell_mean,prev_NAME_PRODUCT_TYPE_x-sell_std,prev_NAME_PRODUCT_TYPE_x-sell_min,prev_NAME_PRODUCT_TYPE_x-sell_max,prev_NAME_CASH_LOAN_PURPOSE_Buying a new car_mean,prev_NAME_CASH_LOAN_PURPOSE_Buying a new car_std,prev_NAME_CASH_LOAN_PURPOSE_Buying a new car_min,prev_NAME_CASH_LOAN_PURPOSE_Buying a new car_max,prev_NAME_CASH_LOAN_PURPOSE_Buying a holiday home / land_mean,prev_NAME_CASH_LOAN_PURPOSE_Buying a holiday home / land_std,prev_NAME_CASH_LOAN_PURPOSE_Buying a holiday home / land_min,prev_NAME_CASH_LOAN_PURPOSE_Buying a holiday home / land_max,prev_NAME_PORTFOLIO_Cash_mean,prev_NAME_PORTFOLIO_Cash_std,prev_NAME_PORTFOLIO_Cash_min,prev_NAME_PORTFOLIO_Cash_max,prev_REJECT_RATIO_1_mean,prev_NAME_CASH_LOAN_PURPOSE_Buying a garage_mean,prev_NAME_CASH_LOAN_PURPOSE_Buying a garage_std,prev_NAME_CASH_LOAN_PURPOSE_Buying a garage_min,prev_NAME_CASH_LOAN_PURPOSE_Buying a garage_max,prev_PRODUCT_COMBINATION_Card X-Sell_mean,prev_PRODUCT_COMBINATION_Card X-Sell_std,prev_PRODUCT_COMBINATION_Card X-Sell_min,prev_PRODUCT_COMBINATION_Card X-Sell_max,prev_NAME_SELLER_INDUSTRY_Consumer electronics_mean,prev_NAME_SELLER_INDUSTRY_Consumer electronics_std,prev_NAME_SELLER_INDUSTRY_Consumer electronics_min,prev_NAME_SELLER_INDUSTRY_Consumer electronics_max,prev_DAYS_DUE_DIFF_1_mean,prev_DAYS_DUE_DIFF_1_std,prev_DAYS_DUE_DIFF_1_min,prev_DAYS_DUE_DIFF_1_max,prev_NAME_GOODS_CATEGORY_Construction Materials_mean,prev_NAME_GOODS_CATEGORY_Construction Materials_std,prev_NAME_GOODS_CATEGORY_Construction Materials_min,prev_NAME_GOODS_CATEGORY_Construction Materials_max,prev_CHANNEL_TYPE_Channel of corporate sales_mean,prev_CHANNEL_TYPE_Channel of corporate sales_std,prev_CHANNEL_TYPE_Channel of corporate sales_min,prev_CHANNEL_TYPE_Channel of corporate sales_max,prev_WEEKDAY_APPR_PROCESS_START_TUESDAY_mean,prev_WEEKDAY_APPR_PROCESS_START_TUESDAY_std,prev_WEEKDAY_APPR_PROCESS_START_TUESDAY_min,prev_WEEKDAY_APPR_PROCESS_START_TUESDAY_max,prev_NAME_TYPE_SUITE_Group of people_mean,prev_NAME_TYPE_SUITE_Group of people_std,prev_NAME_TYPE_SUITE_Group of people_min,prev_NAME_TYPE_SUITE_Group of people_max,prev_NAME_CASH_LOAN_PURPOSE_Buying a home_mean,prev_NAME_CASH_LOAN_PURPOSE_Buying a home_std,prev_NAME_CASH_LOAN_PURPOSE_Buying a home_min,prev_NAME_CASH_LOAN_PURPOSE_Buying a home_max,prev_NAME_CASH_LOAN_PURPOSE_XAP_mean,prev_NAME_CASH_LOAN_PURPOSE_XAP_std,prev_NAME_CASH_LOAN_PURPOSE_XAP_min,prev_NAME_CASH_LOAN_PURPOSE_XAP_max,prev_NAME_CASH_LOAN_PURPOSE_Buying a used car_mean,prev_NAME_CASH_LOAN_PURPOSE_Buying a used car_std,prev_NAME_CASH_LOAN_PURPOSE_Buying a used car_min,prev_NAME_CASH_LOAN_PURPOSE_Buying a used car_max,prev_AMT_GIVEN_RATIO_2_mean,prev_AMT_GIVEN_RATIO_2_std,prev_AMT_GIVEN_RATIO_2_min,prev_AMT_GIVEN_RATIO_2_max,prev_NAME_GOODS_CATEGORY_Insurance_mean,prev_NAME_GOODS_CATEGORY_Insurance_std,prev_NAME_GOODS_CATEGORY_Insurance_min,prev_NAME_GOODS_CATEGORY_Insurance_max,prev_NAME_CONTRACT_TYPE_Revolving loans_mean,prev_NAME_CONTRACT_TYPE_Revolving loans_std,prev_NAME_CONTRACT_TYPE_Revolving loans_min,prev_NAME_CONTRACT_TYPE_Revolving loans_max,prev_NAME_GOODS_CATEGORY_Animals_mean,prev_NAME_GOODS_CATEGORY_Animals_std,prev_NAME_GOODS_CATEGORY_Animals_min,prev_NAME_GOODS_CATEGORY_Animals_max,prev_PRODUCT_COMBINATION_Cash X-Sell: low_mean,prev_PRODUCT_COMBINATION_Cash X-Sell: low_std,prev_PRODUCT_COMBINATION_Cash X-Sell: low_min,prev_PRODUCT_COMBINATION_Cash X-Sell: low_max,prev_CNT_PAYMENT_mean,prev_CNT_PAYMENT_std,prev_CNT_PAYMENT_min,prev_CNT_PAYMENT_max,prev_APPROVE_RATIO_3_mean,prev_DAYS_LAST_DUE_mean,prev_DAYS_LAST_DUE_std,prev_DAYS_LAST_DUE_min,prev_DAYS_LAST_DUE_max,prev_FLAG_LAST_APPL_PER_CONTRACT_Y_mean,prev_REJECT_RATIO_5_mean,prev_NAME_PORTFOLIO_POS_mean,prev_NAME_PORTFOLIO_POS_std,prev_NAME_PORTFOLIO_POS_min,prev_NAME_PORTFOLIO_POS_max,prev_AMT_APPLICATION_mean,prev_AMT_APPLICATION_std,prev_AMT_APPLICATION_min,prev_AMT_APPLICATION_max,prev_NAME_TYPE_SUITE_Other_B_mean,prev_NAME_TYPE_SUITE_Other_B_std,prev_NAME_TYPE_SUITE_Other_B_min,prev_NAME_TYPE_SUITE_Other_B_max,"prev_NAME_TYPE_SUITE_Spouse, partner_mean","prev_NAME_TYPE_SUITE_Spouse, partner_std","prev_NAME_TYPE_SUITE_Spouse, partner_min","prev_NAME_TYPE_SUITE_Spouse, partner_max",prev_NAME_CONTRACT_TYPE_XNA_mean,prev_NAME_CONTRACT_TYPE_XNA_std,prev_NAME_CONTRACT_TYPE_XNA_min,prev_NAME_CONTRACT_TYPE_XNA_max,prev_NAME_CASH_LOAN_PURPOSE_Hobby_mean,prev_NAME_CASH_LOAN_PURPOSE_Hobby_std,prev_NAME_CASH_LOAN_PURPOSE_Hobby_min,prev_NAME_CASH_LOAN_PURPOSE_Hobby_max,prev_NAME_SELLER_INDUSTRY_Industry_mean,prev_NAME_SELLER_INDUSTRY_Industry_std,prev_NAME_SELLER_INDUSTRY_Industry_min,prev_NAME_SELLER_INDUSTRY_Industry_max,prev_HOUR_APPR_PROCESS_START_mean,prev_HOUR_APPR_PROCESS_START_std,prev_HOUR_APPR_PROCESS_START_min,prev_HOUR_APPR_PROCESS_START_max,prev_NAME_CONTRACT_TYPE_Consumer loans_mean,prev_NAME_CONTRACT_TYPE_Consumer loans_std,prev_NAME_CONTRACT_TYPE_Consumer loans_min,prev_NAME_CONTRACT_TYPE_Consumer loans_max,prev_CODE_REJECT_REASON_LIMIT_mean,prev_CODE_REJECT_REASON_LIMIT_std,prev_CODE_REJECT_REASON_LIMIT_min,prev_CODE_REJECT_REASON_LIMIT_max,prev_NAME_GOODS_CATEGORY_Furniture_mean,prev_NAME_GOODS_CATEGORY_Furniture_std,prev_NAME_GOODS_CATEGORY_Furniture_min,prev_NAME_GOODS_CATEGORY_Furniture_max,prev_CHANNEL_TYPE_Stone_mean,prev_CHANNEL_TYPE_Stone_std,prev_CHANNEL_TYPE_Stone_min,prev_CHANNEL_TYPE_Stone_max,prev_NAME_SELLER_INDUSTRY_Jewelry_mean,prev_NAME_SELLER_INDUSTRY_Jewelry_std,prev_NAME_SELLER_INDUSTRY_Jewelry_min,prev_NAME_SELLER_INDUSTRY_Jewelry_max,prev_NFLAG_LAST_APPL_IN_DAY_mean,prev_NFLAG_LAST_APPL_IN_DAY_std,prev_NFLAG_LAST_APPL_IN_DAY_min,prev_NFLAG_LAST_APPL_IN_DAY_max,prev_NAME_CONTRACT_STATUS_Refused_mean,prev_NAME_CONTRACT_STATUS_Refused_std,prev_NAME_CONTRACT_STATUS_Refused_min,prev_NAME_CONTRACT_STATUS_Refused_max,prev_NAME_GOODS_CATEGORY_Fitness_mean,prev_NAME_GOODS_CATEGORY_Fitness_std,prev_NAME_GOODS_CATEGORY_Fitness_min,prev_NAME_GOODS_CATEGORY_Fitness_max,prev_NAME_GOODS_CATEGORY_Clothing and Accessories_mean,prev_NAME_GOODS_CATEGORY_Clothing and Accessories_std,prev_NAME_GOODS_CATEGORY_Clothing and Accessories_min,prev_NAME_GOODS_CATEGORY_Clothing and Accessories_max,prev_NAME_YIELD_GROUP_middle_mean,prev_NAME_YIELD_GROUP_middle_std,prev_NAME_YIELD_GROUP_middle_min,prev_NAME_YIELD_GROUP_middle_max,prev_NAME_PAYMENT_TYPE_Cashless from the account of the employer_mean,prev_NAME_PAYMENT_TYPE_Cashless from the account of the employer_std,prev_NAME_PAYMENT_TYPE_Cashless from the account of the employer_min,prev_NAME_PAYMENT_TYPE_Cashless from the account of the employer_max,prev_NAME_TYPE_SUITE_Unaccompanied_mean,prev_NAME_TYPE_SUITE_Unaccompanied_std,prev_NAME_TYPE_SUITE_Unaccompanied_min,prev_NAME_TYPE_SUITE_Unaccompanied_max,prev_RATE_INTEREST_PRIVILEGED_mean,prev_RATE_INTEREST_PRIVILEGED_std,prev_RATE_INTEREST_PRIVILEGED_min,prev_RATE_INTEREST_PRIVILEGED_max,prev_NAME_YIELD_GROUP_high_mean,prev_NAME_YIELD_GROUP_high_std,prev_NAME_YIELD_GROUP_high_min,prev_NAME_YIELD_GROUP_high_max,prev_CODE_REJECT_REASON_SCO_mean,prev_CODE_REJECT_REASON_SCO_std,prev_CODE_REJECT_REASON_SCO_min,prev_CODE_REJECT_REASON_SCO_max,prev_PRODUCT_COMBINATION_POS others without interest_mean,prev_PRODUCT_COMBINATION_POS others without interest_std,prev_PRODUCT_COMBINATION_POS others without interest_min,prev_PRODUCT_COMBINATION_POS others without interest_max,prev_NAME_GOODS_CATEGORY_House Construction_mean,prev_NAME_GOODS_CATEGORY_House Construction_std,prev_NAME_GOODS_CATEGORY_House Construction_min,prev_NAME_GOODS_CATEGORY_House Construction_max,prev_NAME_CASH_LOAN_PURPOSE_Refusal to name the goal_mean,prev_NAME_CASH_LOAN_PURPOSE_Refusal to name the goal_std,prev_NAME_CASH_LOAN_PURPOSE_Refusal to name the goal_min,prev_NAME_CASH_LOAN_PURPOSE_Refusal to name the goal_max,prev_NAME_CASH_LOAN_PURPOSE_Purchase of electronic equipment_mean,prev_NAME_CASH_LOAN_PURPOSE_Purchase of electronic equipment_std,prev_NAME_CASH_LOAN_PURPOSE_Purchase of electronic equipment_min,prev_NAME_CASH_LOAN_PURPOSE_Purchase of electronic equipment_max,prev_NAME_PAYMENT_TYPE_XNA_mean,prev_NAME_PAYMENT_TYPE_XNA_std,prev_NAME_PAYMENT_TYPE_XNA_min,prev_NAME_PAYMENT_TYPE_XNA_max,prev_CHANNEL_TYPE_Contact center_mean,prev_CHANNEL_TYPE_Contact center_std,prev_CHANNEL_TYPE_Contact center_min,prev_CHANNEL_TYPE_Contact center_max,prev_NAME_GOODS_CATEGORY_Auto Accessories_mean,prev_NAME_GOODS_CATEGORY_Auto Accessories_std,prev_NAME_GOODS_CATEGORY_Auto Accessories_min,prev_NAME_GOODS_CATEGORY_Auto Accessories_max,prev_DAYS_TERMINATION_DIFF_2_mean,prev_DAYS_TERMINATION_DIFF_2_std,prev_DAYS_TERMINATION_DIFF_2_min,prev_DAYS_TERMINATION_DIFF_2_max,prev_CHANNEL_TYPE_Regional / Local_mean,prev_CHANNEL_TYPE_Regional / Local_std,prev_CHANNEL_TYPE_Regional / Local_min,prev_CHANNEL_TYPE_Regional / Local_max,prev_DAYS_FIRST_DRAWING_mean,prev_DAYS_FIRST_DRAWING_std,prev_DAYS_FIRST_DRAWING_min,prev_DAYS_FIRST_DRAWING_max,prev_AMT_GIVEN_RATIO_1_mean,prev_AMT_GIVEN_RATIO_1_std,prev_AMT_GIVEN_RATIO_1_min,prev_AMT_GIVEN_RATIO_1_max,prev_WEEKDAY_APPR_PROCESS_START_THURSDAY_mean,prev_WEEKDAY_APPR_PROCESS_START_THURSDAY_std,prev_WEEKDAY_APPR_PROCESS_START_THURSDAY_min,prev_WEEKDAY_APPR_PROCESS_START_THURSDAY_max,prev_NAME_SELLER_INDUSTRY_Connectivity_mean,prev_NAME_SELLER_INDUSTRY_Connectivity_std,prev_NAME_SELLER_INDUSTRY_Connectivity_min,prev_NAME_SELLER_INDUSTRY_Connectivity_max,prev_NAME_CONTRACT_STATUS_Unused offer_mean,prev_NAME_CONTRACT_STATUS_Unused offer_std,prev_NAME_CONTRACT_STATUS_Unused offer_min,prev_NAME_CONTRACT_STATUS_Unused offer_max,prev_AMT_CREDIT_mean,prev_AMT_CREDIT_std,prev_AMT_CREDIT_min,prev_AMT_CREDIT_max,prev_CHANNEL_TYPE_Country-wide_mean,prev_CHANNEL_TYPE_Country-wide_std,prev_CHANNEL_TYPE_Country-wide_min,prev_CHANNEL_TYPE_Country-wide_max,prev_NAME_SELLER_INDUSTRY_Construction_mean,prev_NAME_SELLER_INDUSTRY_Construction_std,prev_NAME_SELLER_INDUSTRY_Construction_min,prev_NAME_SELLER_INDUSTRY_Construction_max,prev_DAYS_LAST_DUE_1ST_VERSION_mean,prev_DAYS_LAST_DUE_1ST_VERSION_std,prev_DAYS_LAST_DUE_1ST_VERSION_min,prev_DAYS_LAST_DUE_1ST_VERSION_max,prev_NAME_CONTRACT_STATUS_Canceled_mean,prev_NAME_CONTRACT_STATUS_Canceled_std,prev_NAME_CONTRACT_STATUS_Canceled_min,prev_NAME_CONTRACT_STATUS_Canceled_max,prev_NAME_GOODS_CATEGORY_Gardening_mean,prev_NAME_GOODS_CATEGORY_Gardening_std,prev_NAME_GOODS_CATEGORY_Gardening_min,prev_NAME_GOODS_CATEGORY_Gardening_max,prev_AMT_ANNUITY_mean,prev_AMT_ANNUITY_std,prev_AMT_ANNUITY_min,prev_AMT_ANNUITY_max,prev_DAYS_TERMINATION_mean,prev_DAYS_TERMINATION_std,prev_DAYS_TERMINATION_min,prev_DAYS_TERMINATION_max,prev_NAME_CASH_LOAN_PURPOSE_Business development_mean,prev_NAME_CASH_LOAN_PURPOSE_Business development_std,prev_NAME_CASH_LOAN_PURPOSE_Business development_min,prev_NAME_CASH_LOAN_PURPOSE_Business development_max,prev_AMT_GOODS_PRICE_mean,prev_AMT_GOODS_PRICE_std,prev_AMT_GOODS_PRICE_min,prev_AMT_GOODS_PRICE_max,prev_NAME_PAYMENT_TYPE_Non-cash from your account_mean,prev_NAME_PAYMENT_TYPE_Non-cash from your account_std,prev_NAME_PAYMENT_TYPE_Non-cash from your account_min,prev_NAME_PAYMENT_TYPE_Non-cash from your account_max,prev_NAME_CASH_LOAN_PURPOSE_Journey_mean,prev_NAME_CASH_LOAN_PURPOSE_Journey_std,prev_NAME_CASH_LOAN_PURPOSE_Journey_min,prev_NAME_CASH_LOAN_PURPOSE_Journey_max,prev_SELLERPLACE_AREA_mean,prev_SELLERPLACE_AREA_std,prev_SELLERPLACE_AREA_min,prev_SELLERPLACE_AREA_max,prev_NAME_CASH_LOAN_PURPOSE_Money for a third person_mean,prev_NAME_CASH_LOAN_PURPOSE_Money for a third person_std,prev_NAME_CASH_LOAN_PURPOSE_Money for a third person_min,prev_NAME_CASH_LOAN_PURPOSE_Money for a third person_max,prev_WEEKDAY_APPR_PROCESS_START_SATURDAY_mean,prev_WEEKDAY_APPR_PROCESS_START_SATURDAY_std,prev_WEEKDAY_APPR_PROCESS_START_SATURDAY_min,prev_WEEKDAY_APPR_PROCESS_START_SATURDAY_max,prev_NAME_GOODS_CATEGORY_Medicine_mean,prev_NAME_GOODS_CATEGORY_Medicine_std,prev_NAME_GOODS_CATEGORY_Medicine_min,prev_NAME_GOODS_CATEGORY_Medicine_max,prev_NAME_GOODS_CATEGORY_Tourism_mean,prev_NAME_GOODS_CATEGORY_Tourism_std,prev_NAME_GOODS_CATEGORY_Tourism_min,prev_NAME_GOODS_CATEGORY_Tourism_max,prev_NAME_YIELD_GROUP_low_normal_mean,prev_NAME_YIELD_GROUP_low_normal_std,prev_NAME_YIELD_GROUP_low_normal_min,prev_NAME_YIELD_GROUP_low_normal_max,prev_NAME_TYPE_SUITE_Family_mean,prev_NAME_TYPE_SUITE_Family_std,prev_NAME_TYPE_SUITE_Family_min,prev_NAME_TYPE_SUITE_Family_max,prev_NAME_SELLER_INDUSTRY_XNA_mean,prev_NAME_SELLER_INDUSTRY_XNA_std,prev_NAME_SELLER_INDUSTRY_XNA_min,prev_NAME_SELLER_INDUSTRY_XNA_max,prev_NAME_GOODS_CATEGORY_Computers_mean,prev_NAME_GOODS_CATEGORY_Computers_std,prev_NAME_GOODS_CATEGORY_Computers_min,prev_NAME_GOODS_CATEGORY_Computers_max,prev_PRODUCT_COMBINATION_Cash X-Sell: middle_mean,prev_PRODUCT_COMBINATION_Cash X-Sell: middle_std,prev_PRODUCT_COMBINATION_Cash X-Sell: middle_min,prev_PRODUCT_COMBINATION_Cash X-Sell: middle_max
SK_ID_CURR,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1,Unnamed: 182_level_1,Unnamed: 183_level_1,Unnamed: 184_level_1,Unnamed: 185_level_1,Unnamed: 186_level_1,Unnamed: 187_level_1,Unnamed: 188_level_1,Unnamed: 189_level_1,Unnamed: 190_level_1,Unnamed: 191_level_1,Unnamed: 192_level_1,Unnamed: 193_level_1,Unnamed: 194_level_1,Unnamed: 195_level_1,Unnamed: 196_level_1,Unnamed: 197_level_1,Unnamed: 198_level_1,Unnamed: 199_level_1,Unnamed: 200_level_1,Unnamed: 201_level_1,Unnamed: 202_level_1,Unnamed: 203_level_1,Unnamed: 204_level_1,Unnamed: 205_level_1,Unnamed: 206_level_1,Unnamed: 207_level_1,Unnamed: 208_level_1,Unnamed: 209_level_1,Unnamed: 210_level_1,Unnamed: 211_level_1,Unnamed: 212_level_1,Unnamed: 213_level_1,Unnamed: 214_level_1,Unnamed: 215_level_1,Unnamed: 216_level_1,Unnamed: 217_level_1,Unnamed: 218_level_1,Unnamed: 219_level_1,Unnamed: 220_level_1,Unnamed: 221_level_1,Unnamed: 222_level_1,Unnamed: 223_level_1,Unnamed: 224_level_1,Unnamed: 225_level_1,Unnamed: 226_level_1,Unnamed: 227_level_1,Unnamed: 228_level_1,Unnamed: 229_level_1,Unnamed: 230_level_1,Unnamed: 231_level_1,Unnamed: 232_level_1,Unnamed: 233_level_1,Unnamed: 234_level_1,Unnamed: 235_level_1,Unnamed: 236_level_1,Unnamed: 237_level_1,Unnamed: 238_level_1,Unnamed: 239_level_1,Unnamed: 240_level_1,Unnamed: 241_level_1,Unnamed: 242_level_1,Unnamed: 243_level_1,Unnamed: 244_level_1,Unnamed: 245_level_1,Unnamed: 246_level_1,Unnamed: 247_level_1,Unnamed: 248_level_1,Unnamed: 249_level_1,Unnamed: 250_level_1,Unnamed: 251_level_1,Unnamed: 252_level_1,Unnamed: 253_level_1,Unnamed: 254_level_1,Unnamed: 255_level_1,Unnamed: 256_level_1,Unnamed: 257_level_1,Unnamed: 258_level_1,Unnamed: 259_level_1,Unnamed: 260_level_1,Unnamed: 261_level_1,Unnamed: 262_level_1,Unnamed: 263_level_1,Unnamed: 264_level_1,Unnamed: 265_level_1,Unnamed: 266_level_1,Unnamed: 267_level_1,Unnamed: 268_level_1,Unnamed: 269_level_1,Unnamed: 270_level_1,Unnamed: 271_level_1,Unnamed: 272_level_1,Unnamed: 273_level_1,Unnamed: 274_level_1,Unnamed: 275_level_1,Unnamed: 276_level_1,Unnamed: 277_level_1,Unnamed: 278_level_1,Unnamed: 279_level_1,Unnamed: 280_level_1,Unnamed: 281_level_1,Unnamed: 282_level_1,Unnamed: 283_level_1,Unnamed: 284_level_1,Unnamed: 285_level_1,Unnamed: 286_level_1,Unnamed: 287_level_1,Unnamed: 288_level_1,Unnamed: 289_level_1,Unnamed: 290_level_1,Unnamed: 291_level_1,Unnamed: 292_level_1,Unnamed: 293_level_1,Unnamed: 294_level_1,Unnamed: 295_level_1,Unnamed: 296_level_1,Unnamed: 297_level_1,Unnamed: 298_level_1,Unnamed: 299_level_1,Unnamed: 300_level_1,Unnamed: 301_level_1,Unnamed: 302_level_1,Unnamed: 303_level_1,Unnamed: 304_level_1,Unnamed: 305_level_1,Unnamed: 306_level_1,Unnamed: 307_level_1,Unnamed: 308_level_1,Unnamed: 309_level_1,Unnamed: 310_level_1,Unnamed: 311_level_1,Unnamed: 312_level_1,Unnamed: 313_level_1,Unnamed: 314_level_1,Unnamed: 315_level_1,Unnamed: 316_level_1,Unnamed: 317_level_1,Unnamed: 318_level_1,Unnamed: 319_level_1,Unnamed: 320_level_1,Unnamed: 321_level_1,Unnamed: 322_level_1,Unnamed: 323_level_1,Unnamed: 324_level_1,Unnamed: 325_level_1,Unnamed: 326_level_1,Unnamed: 327_level_1,Unnamed: 328_level_1,Unnamed: 329_level_1,Unnamed: 330_level_1,Unnamed: 331_level_1,Unnamed: 332_level_1,Unnamed: 333_level_1,Unnamed: 334_level_1,Unnamed: 335_level_1,Unnamed: 336_level_1,Unnamed: 337_level_1,Unnamed: 338_level_1,Unnamed: 339_level_1,Unnamed: 340_level_1,Unnamed: 341_level_1,Unnamed: 342_level_1,Unnamed: 343_level_1,Unnamed: 344_level_1,Unnamed: 345_level_1,Unnamed: 346_level_1,Unnamed: 347_level_1,Unnamed: 348_level_1,Unnamed: 349_level_1,Unnamed: 350_level_1,Unnamed: 351_level_1,Unnamed: 352_level_1,Unnamed: 353_level_1,Unnamed: 354_level_1,Unnamed: 355_level_1,Unnamed: 356_level_1,Unnamed: 357_level_1,Unnamed: 358_level_1,Unnamed: 359_level_1,Unnamed: 360_level_1,Unnamed: 361_level_1,Unnamed: 362_level_1,Unnamed: 363_level_1,Unnamed: 364_level_1,Unnamed: 365_level_1,Unnamed: 366_level_1,Unnamed: 367_level_1,Unnamed: 368_level_1,Unnamed: 369_level_1,Unnamed: 370_level_1,Unnamed: 371_level_1,Unnamed: 372_level_1,Unnamed: 373_level_1,Unnamed: 374_level_1,Unnamed: 375_level_1,Unnamed: 376_level_1,Unnamed: 377_level_1,Unnamed: 378_level_1,Unnamed: 379_level_1,Unnamed: 380_level_1,Unnamed: 381_level_1,Unnamed: 382_level_1,Unnamed: 383_level_1,Unnamed: 384_level_1,Unnamed: 385_level_1,Unnamed: 386_level_1,Unnamed: 387_level_1,Unnamed: 388_level_1,Unnamed: 389_level_1,Unnamed: 390_level_1,Unnamed: 391_level_1,Unnamed: 392_level_1,Unnamed: 393_level_1,Unnamed: 394_level_1,Unnamed: 395_level_1,Unnamed: 396_level_1,Unnamed: 397_level_1,Unnamed: 398_level_1,Unnamed: 399_level_1,Unnamed: 400_level_1,Unnamed: 401_level_1,Unnamed: 402_level_1,Unnamed: 403_level_1,Unnamed: 404_level_1,Unnamed: 405_level_1,Unnamed: 406_level_1,Unnamed: 407_level_1,Unnamed: 408_level_1,Unnamed: 409_level_1,Unnamed: 410_level_1,Unnamed: 411_level_1,Unnamed: 412_level_1,Unnamed: 413_level_1,Unnamed: 414_level_1,Unnamed: 415_level_1,Unnamed: 416_level_1,Unnamed: 417_level_1,Unnamed: 418_level_1,Unnamed: 419_level_1,Unnamed: 420_level_1,Unnamed: 421_level_1,Unnamed: 422_level_1,Unnamed: 423_level_1,Unnamed: 424_level_1,Unnamed: 425_level_1,Unnamed: 426_level_1,Unnamed: 427_level_1,Unnamed: 428_level_1,Unnamed: 429_level_1,Unnamed: 430_level_1,Unnamed: 431_level_1,Unnamed: 432_level_1,Unnamed: 433_level_1,Unnamed: 434_level_1,Unnamed: 435_level_1,Unnamed: 436_level_1,Unnamed: 437_level_1,Unnamed: 438_level_1,Unnamed: 439_level_1,Unnamed: 440_level_1,Unnamed: 441_level_1,Unnamed: 442_level_1,Unnamed: 443_level_1,Unnamed: 444_level_1,Unnamed: 445_level_1,Unnamed: 446_level_1,Unnamed: 447_level_1,Unnamed: 448_level_1,Unnamed: 449_level_1,Unnamed: 450_level_1,Unnamed: 451_level_1,Unnamed: 452_level_1,Unnamed: 453_level_1,Unnamed: 454_level_1,Unnamed: 455_level_1,Unnamed: 456_level_1,Unnamed: 457_level_1,Unnamed: 458_level_1,Unnamed: 459_level_1,Unnamed: 460_level_1,Unnamed: 461_level_1,Unnamed: 462_level_1,Unnamed: 463_level_1,Unnamed: 464_level_1,Unnamed: 465_level_1,Unnamed: 466_level_1,Unnamed: 467_level_1,Unnamed: 468_level_1,Unnamed: 469_level_1,Unnamed: 470_level_1,Unnamed: 471_level_1,Unnamed: 472_level_1,Unnamed: 473_level_1,Unnamed: 474_level_1,Unnamed: 475_level_1,Unnamed: 476_level_1,Unnamed: 477_level_1,Unnamed: 478_level_1,Unnamed: 479_level_1,Unnamed: 480_level_1,Unnamed: 481_level_1,Unnamed: 482_level_1,Unnamed: 483_level_1,Unnamed: 484_level_1,Unnamed: 485_level_1,Unnamed: 486_level_1,Unnamed: 487_level_1,Unnamed: 488_level_1,Unnamed: 489_level_1,Unnamed: 490_level_1,Unnamed: 491_level_1,Unnamed: 492_level_1,Unnamed: 493_level_1,Unnamed: 494_level_1,Unnamed: 495_level_1,Unnamed: 496_level_1,Unnamed: 497_level_1,Unnamed: 498_level_1,Unnamed: 499_level_1,Unnamed: 500_level_1,Unnamed: 501_level_1,Unnamed: 502_level_1,Unnamed: 503_level_1,Unnamed: 504_level_1,Unnamed: 505_level_1,Unnamed: 506_level_1,Unnamed: 507_level_1,Unnamed: 508_level_1,Unnamed: 509_level_1,Unnamed: 510_level_1,Unnamed: 511_level_1,Unnamed: 512_level_1,Unnamed: 513_level_1,Unnamed: 514_level_1,Unnamed: 515_level_1,Unnamed: 516_level_1,Unnamed: 517_level_1,Unnamed: 518_level_1,Unnamed: 519_level_1,Unnamed: 520_level_1,Unnamed: 521_level_1,Unnamed: 522_level_1,Unnamed: 523_level_1,Unnamed: 524_level_1,Unnamed: 525_level_1,Unnamed: 526_level_1,Unnamed: 527_level_1,Unnamed: 528_level_1,Unnamed: 529_level_1,Unnamed: 530_level_1,Unnamed: 531_level_1,Unnamed: 532_level_1,Unnamed: 533_level_1,Unnamed: 534_level_1,Unnamed: 535_level_1,Unnamed: 536_level_1,Unnamed: 537_level_1,Unnamed: 538_level_1,Unnamed: 539_level_1,Unnamed: 540_level_1,Unnamed: 541_level_1,Unnamed: 542_level_1,Unnamed: 543_level_1,Unnamed: 544_level_1,Unnamed: 545_level_1,Unnamed: 546_level_1,Unnamed: 547_level_1,Unnamed: 548_level_1,Unnamed: 549_level_1,Unnamed: 550_level_1,Unnamed: 551_level_1,Unnamed: 552_level_1,Unnamed: 553_level_1,Unnamed: 554_level_1,Unnamed: 555_level_1,Unnamed: 556_level_1,Unnamed: 557_level_1,Unnamed: 558_level_1,Unnamed: 559_level_1,Unnamed: 560_level_1,Unnamed: 561_level_1,Unnamed: 562_level_1,Unnamed: 563_level_1,Unnamed: 564_level_1,Unnamed: 565_level_1,Unnamed: 566_level_1,Unnamed: 567_level_1,Unnamed: 568_level_1,Unnamed: 569_level_1,Unnamed: 570_level_1,Unnamed: 571_level_1,Unnamed: 572_level_1,Unnamed: 573_level_1,Unnamed: 574_level_1,Unnamed: 575_level_1,Unnamed: 576_level_1,Unnamed: 577_level_1,Unnamed: 578_level_1,Unnamed: 579_level_1,Unnamed: 580_level_1,Unnamed: 581_level_1,Unnamed: 582_level_1,Unnamed: 583_level_1,Unnamed: 584_level_1,Unnamed: 585_level_1,Unnamed: 586_level_1,Unnamed: 587_level_1,Unnamed: 588_level_1,Unnamed: 589_level_1,Unnamed: 590_level_1,Unnamed: 591_level_1,Unnamed: 592_level_1,Unnamed: 593_level_1,Unnamed: 594_level_1,Unnamed: 595_level_1,Unnamed: 596_level_1,Unnamed: 597_level_1,Unnamed: 598_level_1,Unnamed: 599_level_1,Unnamed: 600_level_1,Unnamed: 601_level_1,Unnamed: 602_level_1,Unnamed: 603_level_1,Unnamed: 604_level_1,Unnamed: 605_level_1,Unnamed: 606_level_1,Unnamed: 607_level_1,Unnamed: 608_level_1,Unnamed: 609_level_1,Unnamed: 610_level_1,Unnamed: 611_level_1,Unnamed: 612_level_1,Unnamed: 613_level_1,Unnamed: 614_level_1
100001,1.0,,1,1,0.0,,0,0,0.0,,0,0,1,0.0,,0,0,0.0,,0,0,0.0,,0,0,-90.0,,-90.0,-90.0,0.0,,0,0,0.0,,0,0,1,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0,0.0,,0,0,0.0,,0,0,0.101468,,0.101468,0.101468,1,0.0,,0,0,0.0,,0,0,0.0,,0,0,1.0,,1,1,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,,,,,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,1740.0,,1740.0,1740.0,0.0,,0,0,7.832411,,7.832411,7.832411,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0.0,0.0,0.0,,0,0,0.0,,0,0,0.0,,0,0,1.0,,1,1,0.0,,0,0,0.0,,0,0,0.0,,0,0,1,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,1.0,,1,1,0.0,,0,0,-7.0,,-7.0,-7.0,1.0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,1709.0,,1709.0,1709.0,,,,,0.0,,0,0,0.104326,,0.104326,0.104326,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,-210.0,,-210.0,-210.0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,1.0,,1,1,0.0,,0,0,1.0,,1.0,1.0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,8.0,,8.0,8.0,1,1619.0,,1619.0,1619.0,1.0,0,1.0,,1,1,10.12007,,10.12007,10.12007,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,13.0,,13,13,1.0,,1,1,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,1.0,,1,1,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,,,,,1.0,,1,1,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,-97.0,,-97.0,-97.0,0.0,,0,0,,,,,0.957782,,0.957782,0.957782,0.0,,0,0,1.0,,1,1,0.0,,0,0,10.076937,,10.076937,10.076937,1.0,,1,1,0.0,,0,0,1499.0,,1499.0,1499.0,0.0,,0,0,0.0,,0,0,8.281977,,8.281977,8.281977,1612.0,,1612.0,1612.0,0.0,,0,0,10.12007,,10.12007,10.12007,0.0,,0,0,0.0,,0,0,23.0,,23,23,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,1.0,,1,1,0.0,,0,0,0.0,,0,0,0.0,,0,0
100002,0.0,,0,0,0.0,,0,0,0.0,,0,0,1,0.0,,0,0,0.0,,0,0,0.0,,0,0,-540.0,,-540.0,-540.0,0.0,,0,0,0.0,,0,0,1,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0,0.0,,0,0,0.0,,0,0,0.0,,0.0,0.0,1,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,,,,,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,606.0,,606.0,606.0,0.0,,0,0,0.0,,0.0,0.0,0.0,,0,0,0.0,,0,0,0.0,,0,0,1.0,,1,1,0.0,,0,0,0.0,,0,0,0.0,,0.0,0.0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,1,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,1.0,,1,1,0.0,,0,0,-8.0,,-8.0,-8.0,1.0,0.0,,0,0,0.0,,0,0,1.0,,1,1,0.0,,0,0,565.0,,565.0,565.0,,,,,0.0,,0,0,0.0,,0.0,0.0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,,,,,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,1.0,,1,1,0.0,,0,0,1.0,,1.0,1.0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,24.0,,24.0,24.0,1,25.0,,25.0,25.0,1.0,0,1.0,,1,1,12.095454,,12.095454,12.095454,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,9.0,,9,9,1.0,,1,1,0.0,,0,0,0.0,,0,0,1.0,,1,1,0.0,,0,0,1.0,,1,1,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,,,,,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,1.0,,1,1,0.0,,0,0,0.0,,0,0,-548.0,,-548.0,-548.0,0.0,,0,0,,,,,1.0,,1.0,1.0,0.0,,0,0,0.0,,0,0,0.0,,0,0,12.095454,,12.095454,12.095454,0.0,,0,0,0.0,,0,0,,,,,0.0,,0,0,0.0,,0,0,9.132679,,9.132679,9.132679,17.0,,17.0,17.0,0.0,,0,0,12.095454,,12.095454,12.095454,0.0,,0,0,0.0,,0,0,500.0,,500,500,0.0,,0,0,1.0,,1,1,0.0,,0,0,0.0,,0,0,1.0,,1,1,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0
100003,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,3,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,-220.0,96.436508,-330.0,-150.0,0.333333,0.57735,0,1,0.0,0.0,0,0,3,0.0,0.0,0,0,0.0,0.0,0,0,0.333333,0.57735,0,1,0.333333,0.57735,0,1,0.0,0.0,0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.050029,0.070752,0.0,0.100059,1,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.333333,0.57735,0,1,0.333333,0.57735,0,1,0.0,0.0,0,0,,,,,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,1305.0,898.138631,746.0,2341.0,0.0,0.0,0,0,4.418623,6.248876,0.0,8.837246,0.0,0.0,0,0,0.0,0.0,0,0,0.333333,0.57735,0,1,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.666667,0.57735,0.0,1.0,0.0,0.0,0,0,0.0,0.0,0,0,0.333333,0.57735,0,1,0.333333,0.57735,0,1,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,1,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.333333,0.57735,0,1,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,1.0,0.0,1,1,0.0,0.0,0,0,-7.0,2.645751,-9.0,-4.0,1.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,1274.333333,897.827563,716.0,2310.0,,,,,0.0,0.0,0,0,0.05003,0.070754,0.0,0.100061,0.333333,0.57735,0,1,0.0,0.0,0,0,0.0,0.0,0,0,0.333333,0.57735,0,1,0,0.0,0.0,0,0,0.0,0.0,0,0,0.333333,0.57735,0,1,-270.0,103.923048,-330.0,-150.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.666667,0.57735,0,1,0.0,0.0,0,0,1.0,0.0,1.0,1.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.333333,0.57735,0,1,10.0,3.464102,6.0,12.0,1,1054.333333,803.569744,536.0,1980.0,1.0,0,0.666667,0.57735,0,1,12.526196,1.2975,11.139112,13.710151,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,14.666667,2.516611,12,17,0.666667,0.57735,0,1,0.0,0.0,0,0,0.333333,0.57735,0,1,0.333333,0.57735,0,1,0.0,0.0,0,0,1.0,0.0,1,1,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.666667,0.57735,0,1,0.0,0.0,0,0,0.333333,0.57735,0,1,,,,,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.333333,0.57735,0,1,0.0,0.0,0,0,0.0,0.0,0,0,-227.0,93.952115,-334.0,-158.0,0.0,0.0,0,0,,,,,1.057664,0.083753,0.989013,1.15098,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,12.580207,1.370403,11.128064,13.850765,0.333333,0.57735,0,1,0.0,0.0,0,0,1004.333333,854.97037,386.0,1980.0,0.0,0.0,0,0,0.0,0.0,0,0,10.462473,1.441706,8.815564,11.496369,1047.333333,806.196213,527.0,1976.0,0.0,0.0,0,0,12.526196,1.2975,11.139112,13.710151,0.0,0.0,0,0,0.0,0.0,0,0,533.0,757.540098,-1,1400,0.0,0.0,0,0,0.333333,0.57735,0,1,0.0,0.0,0,0,0.0,0.0,0,0,0.333333,0.57735,0,1,0.666667,0.57735,0,1,0.333333,0.57735,0,1,0.0,0.0,0,0,0.0,0.0,0,0
100004,1.0,,1,1,0.0,,0,0,0.0,,0,0,1,0.0,,0,0,0.0,,0,0,0.0,,0,0,-60.0,,-60.0,-60.0,0.0,,0,0,0.0,,0,0,1,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0,0.0,,0,0,0.0,,0,0,0.200148,,0.200148,0.200148,1,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,,,,,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,815.0,,815.0,815.0,0.0,,0,0,8.488999,,8.488999,8.488999,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0.0,0.0,0.0,,0,0,0.0,,0,0,0.0,,0,0,1.0,,1,1,0.0,,0,0,0.0,,0,0,0.0,,0,0,1,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,1.0,,1,1,0.0,,0,0,1.0,,1,1,0.0,,0,0,-10.0,,-10.0,-10.0,1.0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,784.0,,784.0,784.0,,,,,0.0,,0,0,0.212008,,0.212008,0.212008,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,-90.0,,-90.0,-90.0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,1.0,,1,1,0.0,,0,0,1.0,,1.0,1.0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,4.0,,4.0,4.0,1,724.0,,724.0,724.0,1.0,0,1.0,,1,1,10.097532,,10.097532,10.097532,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,5.0,,5,5,1.0,,1,1,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,1.0,,1,1,0.0,,0,0,0.0,,0,0,0.0,,0,0,1.0,,1,1,0.0,,0,0,1.0,,1,1,,,,,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,-70.0,,-70.0,-70.0,1.0,,1,1,,,,,0.828021,,0.828021,0.828021,0.0,,0,0,1.0,,1,1,0.0,,0,0,9.908823,,9.908823,9.908823,0.0,,0,0,0.0,,0,0,694.0,,694.0,694.0,0.0,,0,0,0.0,,0,0,8.586393,,8.586393,8.586393,714.0,,714.0,714.0,0.0,,0,0,10.097532,,10.097532,10.097532,0.0,,0,0,0.0,,0,0,30.0,,30,30,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0,0.0,,0,0
100005,0.5,0.707107,0,1,0.0,0.0,0,0,0.5,0.707107,0,1,2,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,-240.0,,-240.0,-240.0,0.0,0.0,0,0,0.0,0.0,0,0,2,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.10005,,0.10005,0.10005,1,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.5,0.707107,0,1,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,,,,,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,536.0,312.541197,315.0,757.0,0.0,0.0,0,0,8.404024,,8.404024,8.404024,0.0,0.0,0,0,0.0,0.0,0,0,0.5,0.707107,0,1,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0,0,0.5,0.707107,0,1,1.0,0.0,1,1,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,1,0.5,0.707107,0,1,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.5,0.707107,0,1,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,1.0,0.0,1,1,0.0,0.0,0,0,-6.0,,-6.0,-6.0,1.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,706.0,,706.0,706.0,,,,,0.0,0.0,0,0,0.108964,,0.108964,0.108964,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,-330.0,,-330.0,-330.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.5,0.707107,0,1,0.0,0.0,0,0,1.0,,1.0,1.0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,12.0,,12.0,12.0,1,466.0,,466.0,466.0,1.0,0,0.5,0.707107,0,1,5.352952,7.570217,0.0,10.705904,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,10.5,0.707107,10,11,0.5,0.707107,0,1,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,1.0,0.0,1,1,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,,,,,0.5,0.707107,0,1,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.5,0.707107,0,1,0.0,0.0,0,0,0.0,0.0,0,0,-246.0,,-246.0,-246.0,0.0,0.0,0,0,,,,,0.89995,,0.89995,0.89995,0.5,0.707107,0,1,0.5,0.707107,0,1,0.0,0.0,0,0,5.300245,7.495678,0.0,10.60049,0.5,0.707107,0,1,0.0,0.0,0,0,376.0,,376.0,376.0,0.5,0.707107,0,1,0.0,0.0,0,0,8.479325,,8.479325,8.479325,460.0,,460.0,460.0,0.0,0.0,0,0,10.705904,,10.705904,10.705904,0.0,0.0,0,0,0.0,0.0,0,0,18.0,26.870058,-1,37,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.5,0.707107,0,1,0.0,0.0,0,0,0.0,0.0,0,0


In [67]:
# clear memory
del prev

# 5. MERGE

In [68]:
# merge data
print(appl.shape)
appl = appl.merge(right = agg_buro.reset_index(), how = "left", on = "SK_ID_CURR")
print(appl.shape)
del agg_buro
appl = appl.merge(right = agg_prev.reset_index(), how = "left", on = "SK_ID_CURR")
print(appl.shape)
del agg_prev
appl = appl.merge(right = agg_inst.reset_index(), how = "left", on = "SK_ID_CURR")
print(appl.shape)
del agg_inst
appl = appl.merge(right = agg_poca.reset_index(), how = "left", on = "SK_ID_CURR")
print(appl.shape)
del agg_poca
appl = appl.merge(right = agg_card.reset_index(), how = "left", on = "SK_ID_CURR")
print(appl.shape)
del agg_card

(356255, 109)
(356255, 339)
(356255, 953)
(356255, 1101)
(356255, 1313)
(356255, 1717)


In [69]:
##### CROSS-TABLE FEATURE ENGINEERING

# credit ratios
appl["mix_AMT_PREV_ANNUITY_RATIO"]     = appl["app_AMT_ANNUITY"] / appl["prev_AMT_ANNUITY_mean"]
appl["mix_AMT_PREV_CREDIT_RATIO"]      = appl["app_AMT_CREDIT"] / appl["prev_AMT_CREDIT_mean"]
appl["mix_AMT_PREV_GOODS_PRICE_RATIO"] = appl["app_AMT_GOODS_PRICE"] / appl["prev_AMT_GOODS_PRICE_mean"]
appl["mix_AMT_BURO_ANNUITY_RATIO"]     = appl["app_AMT_ANNUITY"] / appl["buro_AMT_ANNUITY_mean"]
appl["mix_AMT_BURO_CREDIT_RATIO"]      = appl["app_AMT_CREDIT"] / appl["buro_AMT_CREDIT_SUM_mean"]

In [70]:
# dummy encodnig for factors
appl = pd.get_dummies(appl, drop_first = True)

In [71]:
# label encoder for factors
#data_factors = [f for f in appl.columns if appl[f].dtype == "object"]
#for var in data_factors:
#    appl[var], _ = pd.factorize(appl[var])

In [9]:
# merge target
appl = appl.merge(y, how = "left", on = "SK_ID_CURR")
appl.rename(index = str, columns = {"TARGET": "BAD"})

In [7]:
# exclude features
excluded_feats = ["SK_ID_CURR"]
features = [f for f in appl.columns if f not in excluded_feats]
appl = appl[features]

In [8]:
# check dimensions
print(appl.shape)

(307511, 1829)


In [None]:
# partitioning
train = appl[appl.BAD.isnull() == False]
del appl

In [None]:
# target labels
train["BAD"][train.BAD == 1] = "BAD"
train["BAD"][train.BAD != "BAD"] = "GOOD"

# 6. MISSINGS

In [72]:
# count missings
nas = count_missings(train)
nas.head()

Unnamed: 0,Total,Percent
prev_DAYS_TERMINATION_DIFF_1_std,356225,99.991579
prev_DAYS_FIRST_DRAWING_std,356054,99.94358
prev_RATE_INTEREST_PRIVILEGED_std,356037,99.938808
prev_RATE_INTEREST_PRIMARY_std,356037,99.938808
card_AMT_DRAWINGS_ATM_CURRENT_std_std,355945,99.912984


In [11]:
# impute infinity
feats = train.columns
for var in feats:
    train[var][np.isinf(train[var])] = None

In [12]:
# count missings
nas = count_missings(train)
nas.head()

Unnamed: 0,Total,Percent
prev_DAYS_TERMINATION_DIFF_1_std,307490,99.993171
prev_DAYS_FIRST_DRAWING_std,307363,99.951872
prev_RATE_INTEREST_PRIMARY_std,307337,99.943417
prev_RATE_INTEREST_PRIVILEGED_std,307337,99.943417
card_AMT_DRAWINGS_ATM_CURRENT_std_std,307285,99.926507


In [13]:
# exclude features
na_feats = list(nas.index[nas.Percent > 66])
for var in na_feats:
    train[var][train[var].isnull() == False] = 1
    train[var][train[var].isnull() == True] = 0

In [14]:
# impute missings
na_feats = list(nas.index)
for var in na_feats:
    train[var].fillna(train[var].mean(), inplace = True)

In [15]:
# count missings
nas = count_missings(train)
nas.head()

Unnamed: 0,Total,Percent


# 7. EXPORT

In [16]:
# dimensions
print(train.shape)

(307511, 1831)


In [17]:
# export CSV
train.to_csv("../data/prepared/homecredit.csv", index = False, float_format = "%.8f")