第一波特征：lr 0.71。categorical feature 做target、onehot、label encoding，加上normal features，60维

第二波特征：lr 0.75。对连续型，数值型变量做离散化处理。998维

第三波特征：lr 0.76。引入bureau特征。1468维

第四波特征：lr 0.77。引入bureau、cash、credit、installment、previous(only)特征。3039维

In [1]:
%matplotlib inline

import pandas as pd
import numpy as np
import re
import category_encoders as ce
from sklearn.preprocessing import LabelBinarizer, StandardScaler, MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import SelectKBest, chi2, SelectFromModel, mutual_info_classif
from sklearn.ensemble import ExtraTreesClassifier

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [2]:
train = pd.read_csv('./data/rawdata/application_train.csv.zip',compression='zip')
test = pd.read_csv('./data/rawdata/application_test.csv.zip',compression='zip')

train.shape, test.shape

((307511, 122), (48744, 121))

In [3]:
#  add features from other source
bureau_features = pd.read_csv('./data/rawdata/bureau_feature.csv')
bureau_features.columns = ['bureau_' + re.sub(',|:| ','_',c) if c != 'SK_ID_CURR' else c for c in bureau_features.columns]

train = train.merge(bureau_features,how='left',on='SK_ID_CURR')
test = test.merge(bureau_features,how='left',on='SK_ID_CURR')
del bureau_features
train.shape, test.shape

((307511, 152), (48744, 151))

In [4]:
#  add features from other source
cash_features = pd.read_csv('./data/rawdata/cash_features.csv')
cash_features.columns = ['cash_' + re.sub(',|:| ','_',c) if c != 'SK_ID_CURR' else c for c in cash_features.columns]

train = train.merge(cash_features,how='left',on='SK_ID_CURR')
test = test.merge(cash_features,how='left',on='SK_ID_CURR')
del cash_features
train.shape, test.shape

((307511, 169), (48744, 168))

In [5]:
#  add features from other source
credit_features = pd.read_csv('./data/rawdata/credit_features.csv')
credit_features.columns = ['credit_' + re.sub(',|:| ','_',c) if c != 'SK_ID_CURR' else c for c in credit_features.columns]

train = train.merge(credit_features,how='left',on='SK_ID_CURR')
test = test.merge(credit_features,how='left',on='SK_ID_CURR')

del credit_features
train.shape, test.shape

((307511, 240), (48744, 239))

In [6]:
#  add features from other source
installment_features = pd.read_csv('./data/rawdata/installment_features.csv')
installment_features.columns = ['installment_' + re.sub(',|:| ','_',c) if c != 'SK_ID_CURR' else c for c in installment_features.columns]

train = train.merge(installment_features,how='left',on='SK_ID_CURR')
test = test.merge(installment_features,how='left',on='SK_ID_CURR')

del installment_features
train.shape, test.shape

((307511, 284), (48744, 283))

In [7]:
#  add features from other source
only_prev_features = pd.read_csv('./data/rawdata/only_prev_features.csv')
only_prev_features.columns = ['prev_' + re.sub(',|:| ','_',c) if c != 'SK_ID_CURR' else c for c in only_prev_features.columns]

train = train.merge(only_prev_features,how='left',on='SK_ID_CURR')
test = test.merge(only_prev_features,how='left',on='SK_ID_CURR')

del only_prev_features
train.shape, test.shape

((307511, 483), (48744, 482))

In [8]:
def duplicate_columns(df, return_dataframe = False, verbose = False):
    '''
        a function to detect and possibly remove duplicated columns for a pandas dataframe
    '''
    from pandas.core.common import array_equivalent
    # group columns by dtypes, only the columns of the same dtypes can be duplicate of each other
    groups = df.columns.to_series().groupby(df.dtypes).groups
    duplicated_columns = []

    for dtype, col_names in groups.items():
        column_values = df[col_names]
        num_columns = len(col_names)

        # find duplicated columns by checking pairs of columns, store first column name if duplicate exist 
        for i in range(num_columns):
            column_i = column_values.iloc[:,i].values
            for j in range(i + 1, num_columns):
                column_j = column_values.iloc[:,j].values
                if array_equivalent(column_i, column_j):
                    if verbose: 
                        print("column {} is a duplicate of column {}".format(col_names[i], col_names[j]))
                    duplicated_columns.append(col_names[i])
                    break
    if not return_dataframe:
        # return the column names of those duplicated exists
        return duplicated_columns
    else:
        # return a dataframe with duplicated columns dropped 
        return df.drop(labels = duplicated_columns, axis = 1)

In [9]:
train_s = duplicate_columns(train, return_dataframe=True,verbose = True)
train_s.shape



column prev_NAME_CONTRACT_STATUS_Unused_offer is a duplicate of column prev_CODE_REJECT_REASON_CLIENT


(307511, 482)

In [10]:
feature_dict = {'NAME_CONTRACT_TYPE':'category_features', 'CODE_GENDER':'category_features', 'FLAG_OWN_CAR':'boolean_features', 'FLAG_OWN_REALTY':'boolean_features', 'CNT_CHILDREN':'count_features', 'AMT_INCOME_TOTAL':'amount_feautres', 'AMT_CREDIT':'amount_feautres', 'AMT_ANNUITY':'amount_feautres', 'AMT_GOODS_PRICE':'amount_feautres', 'NAME_TYPE_SUITE':'category_features', 'NAME_INCOME_TYPE':'category_features', 'NAME_EDUCATION_TYPE':'category_features', 'NAME_FAMILY_STATUS':'category_features', 'NAME_HOUSING_TYPE':'category_features', 'REGION_POPULATION_RELATIVE':'normal_features', 'DAYS_BIRTH':'amount_feautres', 'DAYS_EMPLOYED':'amount_feautres', 'DAYS_REGISTRATION':'amount_feautres', 'DAYS_ID_PUBLISH':'amount_feautres', 'OWN_CAR_AGE':'amount_feautres', 'FLAG_MOBIL':'boolean_features', 'FLAG_EMP_PHONE':'boolean_features', 'FLAG_WORK_PHONE':'boolean_features', 'FLAG_CONT_MOBILE':'boolean_features', 'FLAG_PHONE':'boolean_features', 'FLAG_EMAIL':'boolean_features', 'OCCUPATION_TYPE':'category_features', 'CNT_FAM_MEMBERS':'count_features', 'REGION_RATING_CLIENT':'category_features', 'REGION_RATING_CLIENT_W_CITY':'category_features', 'WEEKDAY_APPR_PROCESS_START':'category_features', 'HOUR_APPR_PROCESS_START':'category_features', 'REG_REGION_NOT_LIVE_REGION':'boolean_features', 'REG_REGION_NOT_WORK_REGION':'boolean_features', 'LIVE_REGION_NOT_WORK_REGION':'boolean_features', 'REG_CITY_NOT_LIVE_CITY':'boolean_features', 'REG_CITY_NOT_WORK_CITY':'boolean_features', 'LIVE_CITY_NOT_WORK_CITY':'boolean_features', 'ORGANIZATION_TYPE':'category_features', 'EXT_SOURCE_1':'normal_features', 'EXT_SOURCE_2':'normal_features', 'EXT_SOURCE_3':'normal_features', 'APARTMENTS_AVG':'normal_features', 'BASEMENTAREA_AVG':'normal_features', 'YEARS_BEGINEXPLUATATION_AVG':'normal_features', 'YEARS_BUILD_AVG':'normal_features', 'COMMONAREA_AVG':'normal_features', 'ELEVATORS_AVG':'normal_features', 'ENTRANCES_AVG':'normal_features', 'FLOORSMAX_AVG':'normal_features', 'FLOORSMIN_AVG':'normal_features', 'LANDAREA_AVG':'normal_features', 'LIVINGAPARTMENTS_AVG':'normal_features', 'LIVINGAREA_AVG':'normal_features', 'NONLIVINGAPARTMENTS_AVG':'normal_features', 'NONLIVINGAREA_AVG':'normal_features', 'APARTMENTS_MODE':'normal_features', 'BASEMENTAREA_MODE':'normal_features', 'YEARS_BEGINEXPLUATATION_MODE':'normal_features', 'YEARS_BUILD_MODE':'normal_features', 'COMMONAREA_MODE':'normal_features', 'ELEVATORS_MODE':'normal_features', 'ENTRANCES_MODE':'normal_features', 'FLOORSMAX_MODE':'normal_features', 'FLOORSMIN_MODE':'normal_features', 'LANDAREA_MODE':'normal_features', 'LIVINGAPARTMENTS_MODE':'normal_features', 'LIVINGAREA_MODE':'normal_features', 'NONLIVINGAPARTMENTS_MODE':'normal_features', 'NONLIVINGAREA_MODE':'normal_features', 'APARTMENTS_MEDI':'normal_features', 'BASEMENTAREA_MEDI':'normal_features', 'YEARS_BEGINEXPLUATATION_MEDI':'normal_features', 'YEARS_BUILD_MEDI':'normal_features', 'COMMONAREA_MEDI':'normal_features', 'ELEVATORS_MEDI':'normal_features', 'ENTRANCES_MEDI':'normal_features', 'FLOORSMAX_MEDI':'normal_features', 'FLOORSMIN_MEDI':'normal_features', 'LANDAREA_MEDI':'normal_features', 'LIVINGAPARTMENTS_MEDI':'normal_features', 'LIVINGAREA_MEDI':'normal_features', 'NONLIVINGAPARTMENTS_MEDI':'normal_features', 'NONLIVINGAREA_MEDI':'normal_features', 'FONDKAPREMONT_MODE':'category_features', 'HOUSETYPE_MODE':'category_features', 'TOTALAREA_MODE':'normal_features', 'WALLSMATERIAL_MODE':'category_features', 'EMERGENCYSTATE_MODE':'category_features', 'OBS_30_CNT_SOCIAL_CIRCLE':'count_features', 'DEF_30_CNT_SOCIAL_CIRCLE':'count_features', 'OBS_60_CNT_SOCIAL_CIRCLE':'count_features', 'DEF_60_CNT_SOCIAL_CIRCLE':'count_features', 'DAYS_LAST_PHONE_CHANGE':'count_features', 'FLAG_DOCUMENT_2':'boolean_features', 'FLAG_DOCUMENT_3':'boolean_features', 'FLAG_DOCUMENT_4':'boolean_features', 'FLAG_DOCUMENT_5':'boolean_features', 'FLAG_DOCUMENT_6':'boolean_features', 'FLAG_DOCUMENT_7':'boolean_features', 'FLAG_DOCUMENT_8':'boolean_features', 'FLAG_DOCUMENT_9':'boolean_features', 'FLAG_DOCUMENT_10':'boolean_features', 'FLAG_DOCUMENT_11':'boolean_features', 'FLAG_DOCUMENT_12':'boolean_features', 'FLAG_DOCUMENT_13':'boolean_features', 'FLAG_DOCUMENT_14':'boolean_features', 'FLAG_DOCUMENT_15':'boolean_features', 'FLAG_DOCUMENT_16':'boolean_features', 'FLAG_DOCUMENT_17':'boolean_features', 'FLAG_DOCUMENT_18':'boolean_features', 'FLAG_DOCUMENT_19':'boolean_features', 'FLAG_DOCUMENT_20':'boolean_features', 'FLAG_DOCUMENT_21':'boolean_features', 'AMT_REQ_CREDIT_BUREAU_HOUR':'count_features', 'AMT_REQ_CREDIT_BUREAU_DAY':'count_features', 'AMT_REQ_CREDIT_BUREAU_WEEK':'count_features', 'AMT_REQ_CREDIT_BUREAU_MON':'count_features', 'AMT_REQ_CREDIT_BUREAU_QRT':'count_features', 'AMT_REQ_CREDIT_BUREAU_YEAR':'count_features',
                'bureau_bureau_cnt':'count_features', 'bureau_CREDIT_DAY_OVERDUE_sum':'amount_feautres', 'bureau_DAYS_CREDIT_ENDDATE_sum':'amount_feautres', 'bureau_DAYS_ENDDATE_FACT_sum':'amount_feautres', 'bureau_DAYS_CREDIT_sum':'amount_feautres', 'bureau_DAYS_CREDIT_UPDATE_sum':'amount_feautres', 'bureau_CNT_CREDIT_PROLONG_sum':'amount_feautres', 'bureau_AMT_CREDIT_SUM_sum':'amount_feautres', 'bureau_AMT_ANNUITY_sum':'amount_feautres', 'bureau_AMT_CREDIT_SUM_DEBT_sum':'amount_feautres', 'bureau_AMT_CREDIT_SUM_LIMIT_sum':'amount_feautres', 'bureau_AMT_CREDIT_MAX_OVERDUE_sum':'amount_feautres', 'bureau_MONTHS_BALANCE_max_sum':'amount_feautres', 'bureau_MONTHS_BALANCE_cnt_sum':'amount_feautres', 'bureau_STATUS_cntd_sum':'amount_feautres', 'bureau_AMT_CREDIT_MAX_OVERDUE_max':'amount_feautres', 'bureau_CNT_CREDIT_PROLONG_max':'amount_feautres', 'bureau_DAYS_CREDIT_max':'amount_feautres', 'bureau_MONTHS_BALANCE_max_max':'amount_feautres', 'bureau_STATUS_cntd_max':'amount_feautres', 'bureau_CREDIT_CURRENCY_cnt':'amount_feautres', 'bureau_CREDIT_ACTIVE_cnt':'amount_feautres', 'bureau_CREDIT_TYPE_cnt':'amount_feautres', 'bureau_STATUS_set_cnt':'amount_feautres', 'bureau_latest_STATUS_cnt':'amount_feautres', 'bureau_CREDIT_CURRENCY_set':'category_features', 'bureau_CREDIT_ACTIVE_set':'category_features', 'bureau_CREDIT_TYPE_set':'category_features', 'bureau_latest_STATUS_set':'category_features', 'bureau_STATUS_set':'category_features',
                'cash_MONTHS_BALANCE_min':'count_features', 'cash_MONTHS_BALANCE_max':'count_features', 'cash_MONTHS_BALANCE_cnt':'count_features', 'cash_CNT_INSTALMENT_min':'count_features', 'cash_CNT_INSTALMENT_max':'count_features', 'cash_CNT_INSTALMENT_set':'category_features', 'cash_CNT_INSTALMENT_FUTURE_min':'count_features', 'cash_CNT_INSTALMENT_FUTURE_max':'count_features', 'cash_NAME_CONTRACT_STATUS_cntd':'count_features', 'cash_NAME_CONTRACT_STATUS_set':'category_features', 'cash_NAME_CONTRACT_STATUS_latest':'category_features', 'cash_SK_DPD_max':'count_features', 'cash_SK_DPD_cnt0':'count_features', 'cash_SK_DPD_DEF_max':'count_features', 'cash_SK_DPD_DEF_cnt0':'count_features', 'cash_SK_DPD_diff_max':'count_features', 'cash_SK_DPD_diff_cnt0':'count_features',
               'prev_PREV_cnt':'count_features', 'prev_AMT_DOWN_PAYMENT_cnt_negative':'count_features', 'prev_AMT_DOWN_PAYMENT_cnt_positive':'count_features', 'prev_HOUR_APPR_PROCESS_START_median':'count_features', 'prev_FLAG_LAST_APPL_PER_CONTRACT_1':'boolean_features', 'prev_NFLAG_LAST_APPL_IN_DAY_1':'boolean_features', 'prev_NFLAG_INSURED_ON_APPROVAL':'boolean_features', 'prev_NAME_CONTRACT_STATUS_Approved':'boolean_features', 'prev_NAME_CONTRACT_STATUS_Canceled':'boolean_features', 'prev_NAME_CONTRACT_STATUS_Refused':'boolean_features', 'prev_NAME_CONTRACT_STATUS_Unused_offer':'boolean_features', 'prev_NAME_PAYMENT_TYPE_Cash_through_the_bank':'boolean_features', 'prev_NAME_PAYMENT_TYPE_Cashless_from_the_account_of_the_employer':'boolean_features', 'prev_NAME_PAYMENT_TYPE_Non-cash_from_your_account':'boolean_features', 'prev_NAME_PAYMENT_TYPE_XNA':'boolean_features', 'prev_CODE_REJECT_REASON_CLIENT':'boolean_features', 'prev_CODE_REJECT_REASON_HC':'boolean_features', 'prev_CODE_REJECT_REASON_LIMIT':'boolean_features', 'prev_CODE_REJECT_REASON_SCO':'boolean_features', 'prev_CODE_REJECT_REASON_SCOFR':'boolean_features', 'prev_CODE_REJECT_REASON_SYSTEM':'boolean_features', 'prev_CODE_REJECT_REASON_VERIF':'boolean_features', 'prev_CODE_REJECT_REASON_XAP':'boolean_features', 'prev_CODE_REJECT_REASON_XNA':'boolean_features', 'prev_NAME_TYPE_SUITE_Children':'boolean_features', 'prev_NAME_TYPE_SUITE_Family':'boolean_features', 'prev_NAME_TYPE_SUITE_Group_of_people':'boolean_features', 'prev_NAME_TYPE_SUITE_Other_A':'boolean_features', 'prev_NAME_TYPE_SUITE_Other_B':'boolean_features', 'prev_NAME_TYPE_SUITE_Spouse__partner':'boolean_features', 'prev_NAME_TYPE_SUITE_Unaccompanied':'boolean_features', 'prev_NAME_CLIENT_TYPE_New':'boolean_features', 'prev_NAME_CLIENT_TYPE_Refreshed':'boolean_features', 'prev_NAME_CLIENT_TYPE_Repeater':'boolean_features', 'prev_NAME_CLIENT_TYPE_XNA':'boolean_features', 'prev_NAME_PORTFOLIO_Cards':'boolean_features', 'prev_NAME_PORTFOLIO_Cars':'boolean_features', 'prev_NAME_PORTFOLIO_Cash':'boolean_features', 'prev_NAME_PORTFOLIO_POS':'boolean_features', 'prev_NAME_PORTFOLIO_XNA':'boolean_features', 'prev_NAME_PRODUCT_TYPE_XNA':'boolean_features', 'prev_NAME_PRODUCT_TYPE_walk-in':'boolean_features', 'prev_NAME_PRODUCT_TYPE_x-sell':'boolean_features', 'prev_CHANNEL_TYPE_AP+_(Cash_loan)':'boolean_features', 'prev_CHANNEL_TYPE_Car_dealer':'boolean_features', 'prev_CHANNEL_TYPE_Channel_of_corporate_sales':'boolean_features', 'prev_CHANNEL_TYPE_Contact_center':'boolean_features', 'prev_CHANNEL_TYPE_Country-wide':'boolean_features', 'prev_CHANNEL_TYPE_Credit_and_cash_offices':'boolean_features', 'prev_CHANNEL_TYPE_Regional_/_Local':'boolean_features', 'prev_CHANNEL_TYPE_Stone':'boolean_features', 'prev_NAME_SELLER_INDUSTRY_Auto_technology':'boolean_features', 'prev_NAME_SELLER_INDUSTRY_Clothing':'boolean_features', 'prev_NAME_SELLER_INDUSTRY_Connectivity':'boolean_features', 'prev_NAME_SELLER_INDUSTRY_Construction':'boolean_features', 'prev_NAME_SELLER_INDUSTRY_Consumer_electronics':'boolean_features', 'prev_NAME_SELLER_INDUSTRY_Furniture':'boolean_features', 'prev_NAME_SELLER_INDUSTRY_Industry':'boolean_features', 'prev_NAME_SELLER_INDUSTRY_Jewelry':'boolean_features', 'prev_NAME_SELLER_INDUSTRY_MLM_partners':'boolean_features', 'prev_NAME_SELLER_INDUSTRY_Tourism':'boolean_features', 'prev_NAME_SELLER_INDUSTRY_XNA':'boolean_features', 'prev_NAME_YIELD_GROUP_XNA':'boolean_features', 'prev_NAME_YIELD_GROUP_high':'boolean_features', 'prev_NAME_YIELD_GROUP_low_action':'boolean_features', 'prev_NAME_YIELD_GROUP_low_normal':'boolean_features', 'prev_NAME_YIELD_GROUP_middle':'boolean_features', 'prev_PRODUCT_COMBINATION_Card_Street':'boolean_features', 'prev_PRODUCT_COMBINATION_Card_X-Sell':'boolean_features', 'prev_PRODUCT_COMBINATION_Cash':'boolean_features', 'prev_PRODUCT_COMBINATION_Cash_Street__high':'boolean_features', 'prev_PRODUCT_COMBINATION_Cash_Street__low':'boolean_features', 'prev_PRODUCT_COMBINATION_Cash_Street__middle':'boolean_features', 'prev_PRODUCT_COMBINATION_Cash_X-Sell__high':'boolean_features', 'prev_PRODUCT_COMBINATION_Cash_X-Sell__low':'boolean_features', 'prev_PRODUCT_COMBINATION_Cash_X-Sell__middle':'boolean_features', 'prev_PRODUCT_COMBINATION_POS_household_with_interest':'boolean_features', 'prev_PRODUCT_COMBINATION_POS_household_without_interest':'boolean_features', 'prev_PRODUCT_COMBINATION_POS_industry_with_interest':'boolean_features', 'prev_PRODUCT_COMBINATION_POS_industry_without_interest':'boolean_features', 'prev_PRODUCT_COMBINATION_POS_mobile_with_interest':'boolean_features', 'prev_PRODUCT_COMBINATION_POS_mobile_without_interest':'boolean_features', 'prev_PRODUCT_COMBINATION_POS_other_with_interest':'boolean_features', 'prev_PRODUCT_COMBINATION_POS_others_without_interest':'boolean_features', 'prev_RATE_DOWN_PAYMENT_min':'normal_features', 'prev_RATE_DOWN_PAYMENT_max':'normal_features', 'prev_RATE_DOWN_PAYMENT_mean':'normal_features', 'prev_RATE_DOWN_PAYMENT_median':'normal_features', 'prev_RATE_DOWN_PAYMENT_latest':'normal_features', 'prev_RATE_INTEREST_PRIMARY_min':'normal_features', 'prev_RATE_INTEREST_PRIMARY_max':'normal_features', 'prev_RATE_INTEREST_PRIMARY_mean':'normal_features', 'prev_RATE_INTEREST_PRIMARY_median':'normal_features', 'prev_RATE_INTEREST_PRIMARY_latest':'normal_features', 'prev_RATE_INTEREST_PRIVILEGED_min':'normal_features', 'prev_RATE_INTEREST_PRIVILEGED_max':'normal_features', 'prev_RATE_INTEREST_PRIVILEGED_mean':'normal_features', 'prev_RATE_INTEREST_PRIVILEGED_median':'normal_features', 'prev_RATE_INTEREST_PRIVILEGED_latest':'normal_features', 'prev_DAYS_DECISION_min':'amount_feautres', 'prev_DAYS_DECISION_max':'amount_feautres', 'prev_DAYS_DECISION_mean':'amount_feautres', 'prev_DAYS_DECISION_median':'amount_feautres', 'prev_DAYS_DECISION_latest':'amount_feautres', 'prev_SELLERPLACE_AREA_min':'amount_feautres', 'prev_SELLERPLACE_AREA_max':'amount_feautres', 'prev_SELLERPLACE_AREA_mean':'amount_feautres', 'prev_SELLERPLACE_AREA_median':'amount_feautres', 'prev_SELLERPLACE_AREA_latest':'amount_feautres', 'prev_DAYS_FIRST_DRAWING_min':'amount_feautres', 'prev_DAYS_FIRST_DRAWING_max':'amount_feautres', 'prev_DAYS_FIRST_DRAWING_mean':'amount_feautres', 'prev_DAYS_FIRST_DRAWING_median':'amount_feautres', 'prev_DAYS_FIRST_DRAWING_latest':'amount_feautres', 'prev_DAYS_FIRST_DUE_min':'amount_feautres', 'prev_DAYS_FIRST_DUE_max':'amount_feautres', 'prev_DAYS_FIRST_DUE_mean':'amount_feautres', 'prev_DAYS_FIRST_DUE_median':'amount_feautres', 'prev_DAYS_FIRST_DUE_latest':'amount_feautres', 'prev_DAYS_LAST_DUE_1ST_VERSION_min':'amount_feautres', 'prev_DAYS_LAST_DUE_1ST_VERSION_max':'amount_feautres', 'prev_DAYS_LAST_DUE_1ST_VERSION_mean':'amount_feautres', 'prev_DAYS_LAST_DUE_1ST_VERSION_median':'amount_feautres', 'prev_DAYS_LAST_DUE_1ST_VERSION_latest':'amount_feautres', 'prev_DAYS_LAST_DUE_min':'amount_feautres', 'prev_DAYS_LAST_DUE_max':'amount_feautres', 'prev_DAYS_LAST_DUE_mean':'amount_feautres', 'prev_DAYS_LAST_DUE_median':'amount_feautres', 'prev_DAYS_LAST_DUE_latest':'amount_feautres', 'prev_DAYS_TERMINATION_min':'amount_feautres', 'prev_DAYS_TERMINATION_max':'amount_feautres', 'prev_DAYS_TERMINATION_mean':'amount_feautres', 'prev_DAYS_TERMINATION_median':'amount_feautres', 'prev_DAYS_TERMINATION_latest':'amount_feautres', 'prev_cash_MONTHS_BALANCE_min_min':'count_features', 'prev_cash_MONTHS_BALANCE_min_max':'count_features', 'prev_cash_MONTHS_BALANCE_min_mean':'count_features', 'prev_cash_MONTHS_BALANCE_min_median':'count_features', 'prev_cash_MONTHS_BALANCE_min_latest':'count_features', 'prev_cash_MONTHS_BALANCE_max_min':'count_features', 'prev_cash_MONTHS_BALANCE_max_max':'count_features', 'prev_cash_MONTHS_BALANCE_max_mean':'count_features', 'prev_cash_MONTHS_BALANCE_max_median':'count_features', 'prev_cash_MONTHS_BALANCE_max_latest':'count_features', 'prev_cash_MONTHS_BALANCE_cnt_min':'count_features', 'prev_cash_MONTHS_BALANCE_cnt_max':'count_features', 'prev_cash_MONTHS_BALANCE_cnt_mean':'count_features', 'prev_cash_MONTHS_BALANCE_cnt_median':'count_features', 'prev_cash_MONTHS_BALANCE_cnt_latest':'count_features', 'prev_cash_CNT_INSTALMENT_min_min':'count_features', 'prev_cash_CNT_INSTALMENT_min_max':'count_features', 'prev_cash_CNT_INSTALMENT_min_mean':'count_features', 'prev_cash_CNT_INSTALMENT_min_median':'count_features', 'prev_cash_CNT_INSTALMENT_min_latest':'count_features', 'prev_cash_CNT_INSTALMENT_max_min':'count_features', 'prev_cash_CNT_INSTALMENT_max_max':'count_features', 'prev_cash_CNT_INSTALMENT_max_mean':'count_features', 'prev_cash_CNT_INSTALMENT_max_median':'count_features', 'prev_cash_CNT_INSTALMENT_max_latest':'count_features', 'prev_cash_CNT_INSTALMENT_FUTURE_min_min':'count_features', 'prev_cash_CNT_INSTALMENT_FUTURE_min_max':'count_features', 'prev_cash_CNT_INSTALMENT_FUTURE_min_mean':'count_features', 'prev_cash_CNT_INSTALMENT_FUTURE_min_median':'count_features', 'prev_cash_CNT_INSTALMENT_FUTURE_min_latest':'count_features', 'prev_cash_CNT_INSTALMENT_FUTURE_max_min':'count_features', 'prev_cash_CNT_INSTALMENT_FUTURE_max_max':'count_features', 'prev_cash_CNT_INSTALMENT_FUTURE_max_mean':'count_features', 'prev_cash_CNT_INSTALMENT_FUTURE_max_median':'count_features', 'prev_cash_CNT_INSTALMENT_FUTURE_max_latest':'count_features', 'prev_cash_NAME_CONTRACT_STATUS_cntd_min':'count_features', 'prev_cash_NAME_CONTRACT_STATUS_cntd_max':'count_features', 'prev_cash_NAME_CONTRACT_STATUS_cntd_mean':'count_features', 'prev_cash_NAME_CONTRACT_STATUS_cntd_median':'count_features', 'prev_cash_NAME_CONTRACT_STATUS_cntd_latest':'count_features', 'prev_cash_SK_DPD_max_min':'count_features', 'prev_cash_SK_DPD_max_max':'count_features', 'prev_cash_SK_DPD_max_mean':'count_features', 'prev_cash_SK_DPD_max_median':'count_features', 'prev_cash_SK_DPD_max_latest':'count_features', 'prev_cash_SK_DPD_cnt0_min':'count_features', 'prev_cash_SK_DPD_cnt0_max':'count_features', 'prev_cash_SK_DPD_cnt0_mean':'count_features', 'prev_cash_SK_DPD_cnt0_median':'count_features', 'prev_cash_SK_DPD_cnt0_latest':'count_features', 'prev_cash_SK_DPD_DEF_max_min':'count_features', 'prev_cash_SK_DPD_DEF_max_max':'count_features', 'prev_cash_SK_DPD_DEF_max_mean':'count_features', 'prev_cash_SK_DPD_DEF_max_median':'count_features', 'prev_cash_SK_DPD_DEF_max_latest':'count_features', 'prev_cash_SK_DPD_DEF_cnt0_min':'count_features', 'prev_cash_SK_DPD_DEF_cnt0_max':'count_features', 'prev_cash_SK_DPD_DEF_cnt0_mean':'count_features', 'prev_cash_SK_DPD_DEF_cnt0_median':'count_features', 'prev_cash_SK_DPD_DEF_cnt0_latest':'count_features', 'prev_cash_SK_DPD_diff_max_min':'count_features', 'prev_cash_SK_DPD_diff_max_max':'count_features', 'prev_cash_SK_DPD_diff_max_mean':'count_features', 'prev_cash_SK_DPD_diff_max_median':'count_features', 'prev_cash_SK_DPD_diff_max_latest':'count_features', 'prev_cash_SK_DPD_diff_cnt0_min':'count_features', 'prev_cash_SK_DPD_diff_cnt0_max':'count_features', 'prev_cash_SK_DPD_diff_cnt0_mean':'count_features', 'prev_cash_SK_DPD_diff_cnt0_median':'count_features', 'prev_cash_SK_DPD_diff_cnt0_latest':'count_features', 'prev_credit_MONTHS_BALANCE_min_min':'count_features', 'prev_credit_MONTHS_BALANCE_min_max':'count_features', 'prev_credit_MONTHS_BALANCE_min_mean':'count_features', 'prev_credit_MONTHS_BALANCE_min_median':'count_features', 'prev_credit_MONTHS_BALANCE_min_latest':'count_features', 'prev_credit_MONTHS_BALANCE_max_min':'count_features', 'prev_credit_MONTHS_BALANCE_max_max':'count_features', 'prev_credit_MONTHS_BALANCE_max_mean':'count_features', 'prev_credit_MONTHS_BALANCE_max_median':'count_features', 'prev_credit_MONTHS_BALANCE_max_latest':'count_features', 'prev_credit_MONTHS_BALANCE_count_min':'count_features', 'prev_credit_MONTHS_BALANCE_count_max':'count_features', 'prev_credit_MONTHS_BALANCE_count_mean':'count_features', 'prev_credit_MONTHS_BALANCE_count_median':'count_features', 'prev_credit_MONTHS_BALANCE_count_latest':'count_features', 'prev_credit_AMT_BALANCE_min_min':'amount_feautres', 'prev_credit_AMT_BALANCE_min_max':'amount_feautres', 'prev_credit_AMT_BALANCE_min_mean':'amount_feautres', 'prev_credit_AMT_BALANCE_min_median':'amount_feautres', 'prev_credit_AMT_BALANCE_min_latest':'amount_feautres', 'prev_credit_AMT_BALANCE_max_min':'amount_feautres', 'prev_credit_AMT_BALANCE_max_max':'amount_feautres', 'prev_credit_AMT_BALANCE_max_mean':'amount_feautres', 'prev_credit_AMT_BALANCE_max_median':'amount_feautres', 'prev_credit_AMT_BALANCE_max_latest':'amount_feautres', 'prev_credit_AMT_BALANCE_sum_min':'amount_feautres', 'prev_credit_AMT_BALANCE_sum_max':'amount_feautres', 'prev_credit_AMT_BALANCE_sum_mean':'amount_feautres', 'prev_credit_AMT_BALANCE_sum_median':'amount_feautres', 'prev_credit_AMT_BALANCE_sum_latest':'amount_feautres', 'prev_credit_AMT_BALANCE_mean_min':'amount_feautres', 'prev_credit_AMT_BALANCE_mean_max':'amount_feautres', 'prev_credit_AMT_BALANCE_mean_mean':'amount_feautres', 'prev_credit_AMT_BALANCE_mean_median':'amount_feautres', 'prev_credit_AMT_BALANCE_mean_latest':'amount_feautres', 'prev_credit_AMT_BALANCE_size_min':'count_features', 'prev_credit_AMT_BALANCE_size_max':'count_features', 'prev_credit_AMT_BALANCE_size_mean':'count_features', 'prev_credit_AMT_BALANCE_size_median':'count_features', 'prev_credit_AMT_BALANCE_size_latest':'count_features', 'prev_credit_AMT_CREDIT_LIMIT_ACTUAL_min_min':'amount_feautres', 'prev_credit_AMT_CREDIT_LIMIT_ACTUAL_min_max':'amount_feautres', 'prev_credit_AMT_CREDIT_LIMIT_ACTUAL_min_mean':'amount_feautres', 'prev_credit_AMT_CREDIT_LIMIT_ACTUAL_min_median':'amount_feautres', 'prev_credit_AMT_CREDIT_LIMIT_ACTUAL_min_latest':'amount_feautres', 'prev_credit_AMT_CREDIT_LIMIT_ACTUAL_max_min':'amount_feautres', 'prev_credit_AMT_CREDIT_LIMIT_ACTUAL_max_max':'amount_feautres', 'prev_credit_AMT_CREDIT_LIMIT_ACTUAL_max_mean':'amount_feautres', 'prev_credit_AMT_CREDIT_LIMIT_ACTUAL_max_median':'amount_feautres', 'prev_credit_AMT_CREDIT_LIMIT_ACTUAL_max_latest':'amount_feautres', 'prev_credit_AMT_DRAWINGS_ATM_CURRENT_min_min':'amount_feautres', 'prev_credit_AMT_DRAWINGS_ATM_CURRENT_min_max':'amount_feautres', 'prev_credit_AMT_DRAWINGS_ATM_CURRENT_min_mean':'amount_feautres', 'prev_credit_AMT_DRAWINGS_ATM_CURRENT_min_median':'amount_feautres', 'prev_credit_AMT_DRAWINGS_ATM_CURRENT_min_latest':'amount_feautres', 'prev_credit_AMT_DRAWINGS_ATM_CURRENT_size_min':'count_features', 'prev_credit_AMT_DRAWINGS_ATM_CURRENT_size_max':'count_features', 'prev_credit_AMT_DRAWINGS_ATM_CURRENT_size_mean':'count_features', 'prev_credit_AMT_DRAWINGS_ATM_CURRENT_size_median':'count_features', 'prev_credit_AMT_DRAWINGS_ATM_CURRENT_size_latest':'count_features', 'prev_credit_AMT_DRAWINGS_ATM_CURRENT_mean_min':'amount_feautres', 'prev_credit_AMT_DRAWINGS_ATM_CURRENT_mean_max':'amount_feautres', 'prev_credit_AMT_DRAWINGS_ATM_CURRENT_mean_mean':'amount_feautres', 'prev_credit_AMT_DRAWINGS_ATM_CURRENT_mean_median':'amount_feautres', 'prev_credit_AMT_DRAWINGS_ATM_CURRENT_mean_latest':'amount_feautres', 'prev_credit_AMT_DRAWINGS_ATM_CURRENT_sum_min':'amount_feautres', 'prev_credit_AMT_DRAWINGS_ATM_CURRENT_sum_max':'amount_feautres', 'prev_credit_AMT_DRAWINGS_ATM_CURRENT_sum_mean':'amount_feautres', 'prev_credit_AMT_DRAWINGS_ATM_CURRENT_sum_median':'amount_feautres', 'prev_credit_AMT_DRAWINGS_ATM_CURRENT_sum_latest':'amount_feautres', 'prev_credit_AMT_DRAWINGS_ATM_CURRENT_max_min':'amount_feautres', 'prev_credit_AMT_DRAWINGS_ATM_CURRENT_max_max':'amount_feautres', 'prev_credit_AMT_DRAWINGS_ATM_CURRENT_max_mean':'amount_feautres', 'prev_credit_AMT_DRAWINGS_ATM_CURRENT_max_median':'amount_feautres', 'prev_credit_AMT_DRAWINGS_ATM_CURRENT_max_latest':'amount_feautres', 'prev_credit_AMT_DRAWINGS_CURRENT_min_min':'amount_feautres', 'prev_credit_AMT_DRAWINGS_CURRENT_min_max':'amount_feautres', 'prev_credit_AMT_DRAWINGS_CURRENT_min_mean':'amount_feautres', 'prev_credit_AMT_DRAWINGS_CURRENT_min_median':'amount_feautres', 'prev_credit_AMT_DRAWINGS_CURRENT_min_latest':'amount_feautres', 'prev_credit_AMT_DRAWINGS_CURRENT_size_min':'count_features', 'prev_credit_AMT_DRAWINGS_CURRENT_size_max':'count_features', 'prev_credit_AMT_DRAWINGS_CURRENT_size_mean':'count_features', 'prev_credit_AMT_DRAWINGS_CURRENT_size_median':'count_features', 'prev_credit_AMT_DRAWINGS_CURRENT_size_latest':'count_features', 'prev_credit_AMT_DRAWINGS_CURRENT_mean_min':'amount_feautres', 'prev_credit_AMT_DRAWINGS_CURRENT_mean_max':'amount_feautres', 'prev_credit_AMT_DRAWINGS_CURRENT_mean_mean':'amount_feautres', 'prev_credit_AMT_DRAWINGS_CURRENT_mean_median':'amount_feautres', 'prev_credit_AMT_DRAWINGS_CURRENT_mean_latest':'amount_feautres', 'prev_credit_AMT_DRAWINGS_CURRENT_sum_min':'amount_feautres', 'prev_credit_AMT_DRAWINGS_CURRENT_sum_max':'amount_feautres', 'prev_credit_AMT_DRAWINGS_CURRENT_sum_mean':'amount_feautres', 'prev_credit_AMT_DRAWINGS_CURRENT_sum_median':'amount_feautres', 'prev_credit_AMT_DRAWINGS_CURRENT_sum_latest':'amount_feautres', 'prev_credit_AMT_DRAWINGS_CURRENT_max_min':'amount_feautres', 'prev_credit_AMT_DRAWINGS_CURRENT_max_max':'amount_feautres', 'prev_credit_AMT_DRAWINGS_CURRENT_max_mean':'amount_feautres', 'prev_credit_AMT_DRAWINGS_CURRENT_max_median':'amount_feautres', 'prev_credit_AMT_DRAWINGS_CURRENT_max_latest':'amount_feautres', 'prev_credit_AMT_DRAWINGS_OTHER_CURRENT_min_min':'amount_feautres', 'prev_credit_AMT_DRAWINGS_OTHER_CURRENT_min_max':'amount_feautres', 'prev_credit_AMT_DRAWINGS_OTHER_CURRENT_min_mean':'amount_feautres', 'prev_credit_AMT_DRAWINGS_OTHER_CURRENT_min_median':'amount_feautres', 'prev_credit_AMT_DRAWINGS_OTHER_CURRENT_min_latest':'amount_feautres', 'prev_credit_AMT_DRAWINGS_OTHER_CURRENT_size_min':'count_features', 'prev_credit_AMT_DRAWINGS_OTHER_CURRENT_size_max':'count_features', 'prev_credit_AMT_DRAWINGS_OTHER_CURRENT_size_mean':'count_features', 'prev_credit_AMT_DRAWINGS_OTHER_CURRENT_size_median':'count_features', 'prev_credit_AMT_DRAWINGS_OTHER_CURRENT_size_latest':'count_features', 'prev_credit_AMT_DRAWINGS_OTHER_CURRENT_mean_min':'amount_feautres', 'prev_credit_AMT_DRAWINGS_OTHER_CURRENT_mean_max':'amount_feautres', 'prev_credit_AMT_DRAWINGS_OTHER_CURRENT_mean_mean':'amount_feautres', 'prev_credit_AMT_DRAWINGS_OTHER_CURRENT_mean_median':'amount_feautres', 'prev_credit_AMT_DRAWINGS_OTHER_CURRENT_mean_latest':'amount_feautres', 'prev_credit_AMT_DRAWINGS_OTHER_CURRENT_sum_min':'amount_feautres', 'prev_credit_AMT_DRAWINGS_OTHER_CURRENT_sum_max':'amount_feautres', 'prev_credit_AMT_DRAWINGS_OTHER_CURRENT_sum_mean':'amount_feautres', 'prev_credit_AMT_DRAWINGS_OTHER_CURRENT_sum_median':'amount_feautres', 'prev_credit_AMT_DRAWINGS_OTHER_CURRENT_sum_latest':'amount_feautres', 'prev_credit_AMT_DRAWINGS_OTHER_CURRENT_max_min':'amount_feautres', 'prev_credit_AMT_DRAWINGS_OTHER_CURRENT_max_max':'amount_feautres', 'prev_credit_AMT_DRAWINGS_OTHER_CURRENT_max_mean':'amount_feautres', 'prev_credit_AMT_DRAWINGS_OTHER_CURRENT_max_median':'amount_feautres', 'prev_credit_AMT_DRAWINGS_OTHER_CURRENT_max_latest':'amount_feautres', 'prev_credit_AMT_DRAWINGS_POS_CURRENT_min_min':'amount_feautres', 'prev_credit_AMT_DRAWINGS_POS_CURRENT_min_max':'amount_feautres', 'prev_credit_AMT_DRAWINGS_POS_CURRENT_min_mean':'amount_feautres', 'prev_credit_AMT_DRAWINGS_POS_CURRENT_min_median':'amount_feautres', 'prev_credit_AMT_DRAWINGS_POS_CURRENT_min_latest':'amount_feautres', 'prev_credit_AMT_DRAWINGS_POS_CURRENT_size_min':'count_features', 'prev_credit_AMT_DRAWINGS_POS_CURRENT_size_max':'count_features', 'prev_credit_AMT_DRAWINGS_POS_CURRENT_size_mean':'count_features', 'prev_credit_AMT_DRAWINGS_POS_CURRENT_size_median':'count_features', 'prev_credit_AMT_DRAWINGS_POS_CURRENT_size_latest':'count_features', 'prev_credit_AMT_DRAWINGS_POS_CURRENT_mean_min':'amount_feautres', 'prev_credit_AMT_DRAWINGS_POS_CURRENT_mean_max':'amount_feautres', 'prev_credit_AMT_DRAWINGS_POS_CURRENT_mean_mean':'amount_feautres', 'prev_credit_AMT_DRAWINGS_POS_CURRENT_mean_median':'amount_feautres', 'prev_credit_AMT_DRAWINGS_POS_CURRENT_mean_latest':'amount_feautres', 'prev_credit_AMT_DRAWINGS_POS_CURRENT_sum_min':'amount_feautres', 'prev_credit_AMT_DRAWINGS_POS_CURRENT_sum_max':'amount_feautres', 'prev_credit_AMT_DRAWINGS_POS_CURRENT_sum_mean':'amount_feautres', 'prev_credit_AMT_DRAWINGS_POS_CURRENT_sum_median':'amount_feautres', 'prev_credit_AMT_DRAWINGS_POS_CURRENT_sum_latest':'amount_feautres', 'prev_credit_AMT_DRAWINGS_POS_CURRENT_max_min':'amount_feautres', 'prev_credit_AMT_DRAWINGS_POS_CURRENT_max_max':'amount_feautres', 'prev_credit_AMT_DRAWINGS_POS_CURRENT_max_mean':'amount_feautres', 'prev_credit_AMT_DRAWINGS_POS_CURRENT_max_median':'amount_feautres', 'prev_credit_AMT_DRAWINGS_POS_CURRENT_max_latest':'amount_feautres', 'prev_credit_AMT_INST_MIN_REGULARITY_min_min':'amount_feautres', 'prev_credit_AMT_INST_MIN_REGULARITY_min_max':'amount_feautres', 'prev_credit_AMT_INST_MIN_REGULARITY_min_mean':'amount_feautres', 'prev_credit_AMT_INST_MIN_REGULARITY_min_median':'amount_feautres', 'prev_credit_AMT_INST_MIN_REGULARITY_min_latest':'amount_feautres', 'prev_credit_AMT_INST_MIN_REGULARITY_max_min':'amount_feautres', 'prev_credit_AMT_INST_MIN_REGULARITY_max_max':'amount_feautres', 'prev_credit_AMT_INST_MIN_REGULARITY_max_mean':'amount_feautres', 'prev_credit_AMT_INST_MIN_REGULARITY_max_median':'amount_feautres', 'prev_credit_AMT_INST_MIN_REGULARITY_max_latest':'amount_feautres', 'prev_credit_AMT_INST_MIN_REGULARITY_cntd_min':'count_features', 'prev_credit_AMT_INST_MIN_REGULARITY_cntd_max':'count_features', 'prev_credit_AMT_INST_MIN_REGULARITY_cntd_mean':'count_features', 'prev_credit_AMT_INST_MIN_REGULARITY_cntd_median':'count_features', 'prev_credit_AMT_INST_MIN_REGULARITY_cntd_latest':'count_features', 'prev_credit_AMT_PAYMENT_CURRENT_sum_min':'amount_feautres', 'prev_credit_AMT_PAYMENT_CURRENT_sum_max':'amount_feautres', 'prev_credit_AMT_PAYMENT_CURRENT_sum_mean':'amount_feautres', 'prev_credit_AMT_PAYMENT_CURRENT_sum_median':'amount_feautres', 'prev_credit_AMT_PAYMENT_CURRENT_sum_latest':'amount_feautres', 'prev_credit_AMT_PAYMENT_CURRENT_max_min':'amount_feautres', 'prev_credit_AMT_PAYMENT_CURRENT_max_max':'amount_feautres', 'prev_credit_AMT_PAYMENT_CURRENT_max_mean':'amount_feautres', 'prev_credit_AMT_PAYMENT_CURRENT_max_median':'amount_feautres', 'prev_credit_AMT_PAYMENT_CURRENT_max_latest':'amount_feautres', 'prev_credit_AMT_PAYMENT_CURRENT_cntd_min':'count_features', 'prev_credit_AMT_PAYMENT_CURRENT_cntd_max':'count_features', 'prev_credit_AMT_PAYMENT_CURRENT_cntd_mean':'count_features', 'prev_credit_AMT_PAYMENT_CURRENT_cntd_median':'count_features', 'prev_credit_AMT_PAYMENT_CURRENT_cntd_latest':'count_features', 'prev_credit_AMT_PAYMENT_TOTAL_CURRENT_sum_min':'amount_feautres', 'prev_credit_AMT_PAYMENT_TOTAL_CURRENT_sum_max':'amount_feautres', 'prev_credit_AMT_PAYMENT_TOTAL_CURRENT_sum_mean':'amount_feautres', 'prev_credit_AMT_PAYMENT_TOTAL_CURRENT_sum_median':'amount_feautres', 'prev_credit_AMT_PAYMENT_TOTAL_CURRENT_sum_latest':'amount_feautres', 'prev_credit_AMT_PAYMENT_TOTAL_CURRENT_max_min':'amount_feautres', 'prev_credit_AMT_PAYMENT_TOTAL_CURRENT_max_max':'amount_feautres', 'prev_credit_AMT_PAYMENT_TOTAL_CURRENT_max_mean':'amount_feautres', 'prev_credit_AMT_PAYMENT_TOTAL_CURRENT_max_median':'amount_feautres', 'prev_credit_AMT_PAYMENT_TOTAL_CURRENT_max_latest':'amount_feautres', 'prev_credit_AMT_PAYMENT_TOTAL_CURRENT_cntd_min':'count_features', 'prev_credit_AMT_PAYMENT_TOTAL_CURRENT_cntd_max':'count_features', 'prev_credit_AMT_PAYMENT_TOTAL_CURRENT_cntd_mean':'count_features', 'prev_credit_AMT_PAYMENT_TOTAL_CURRENT_cntd_median':'count_features', 'prev_credit_AMT_PAYMENT_TOTAL_CURRENT_cntd_latest':'count_features', 'prev_credit_AMT_RECEIVABLE_PRINCIPAL_sum_min':'amount_feautres', 'prev_credit_AMT_RECEIVABLE_PRINCIPAL_sum_max':'amount_feautres', 'prev_credit_AMT_RECEIVABLE_PRINCIPAL_sum_mean':'amount_feautres', 'prev_credit_AMT_RECEIVABLE_PRINCIPAL_sum_median':'amount_feautres', 'prev_credit_AMT_RECEIVABLE_PRINCIPAL_sum_latest':'amount_feautres', 'prev_credit_AMT_RECEIVABLE_PRINCIPAL_max_min':'amount_feautres', 'prev_credit_AMT_RECEIVABLE_PRINCIPAL_max_max':'amount_feautres', 'prev_credit_AMT_RECEIVABLE_PRINCIPAL_max_mean':'amount_feautres', 'prev_credit_AMT_RECEIVABLE_PRINCIPAL_max_median':'amount_feautres', 'prev_credit_AMT_RECEIVABLE_PRINCIPAL_max_latest':'amount_feautres', 'prev_credit_AMT_RECEIVABLE_PRINCIPAL_cntd_min':'count_features', 'prev_credit_AMT_RECEIVABLE_PRINCIPAL_cntd_max':'count_features', 'prev_credit_AMT_RECEIVABLE_PRINCIPAL_cntd_mean':'count_features', 'prev_credit_AMT_RECEIVABLE_PRINCIPAL_cntd_median':'count_features', 'prev_credit_AMT_RECEIVABLE_PRINCIPAL_cntd_latest':'count_features', 'prev_credit_AMT_RECIVABLE_sum_min':'amount_feautres', 'prev_credit_AMT_RECIVABLE_sum_max':'amount_feautres', 'prev_credit_AMT_RECIVABLE_sum_mean':'amount_feautres', 'prev_credit_AMT_RECIVABLE_sum_median':'amount_feautres', 'prev_credit_AMT_RECIVABLE_sum_latest':'amount_feautres', 'prev_credit_AMT_RECIVABLE_max_min':'amount_feautres', 'prev_credit_AMT_RECIVABLE_max_max':'amount_feautres', 'prev_credit_AMT_RECIVABLE_max_mean':'amount_feautres', 'prev_credit_AMT_RECIVABLE_max_median':'amount_feautres', 'prev_credit_AMT_RECIVABLE_max_latest':'amount_feautres', 'prev_credit_AMT_RECIVABLE_cntd_min':'count_features', 'prev_credit_AMT_RECIVABLE_cntd_max':'count_features', 'prev_credit_AMT_RECIVABLE_cntd_mean':'count_features', 'prev_credit_AMT_RECIVABLE_cntd_median':'count_features', 'prev_credit_AMT_RECIVABLE_cntd_latest':'count_features', 'prev_credit_AMT_TOTAL_RECEIVABLE_sum_min':'amount_feautres', 'prev_credit_AMT_TOTAL_RECEIVABLE_sum_max':'amount_feautres', 'prev_credit_AMT_TOTAL_RECEIVABLE_sum_mean':'amount_feautres', 'prev_credit_AMT_TOTAL_RECEIVABLE_sum_median':'amount_feautres', 'prev_credit_AMT_TOTAL_RECEIVABLE_sum_latest':'amount_feautres', 'prev_credit_AMT_TOTAL_RECEIVABLE_max_min':'amount_feautres', 'prev_credit_AMT_TOTAL_RECEIVABLE_max_max':'amount_feautres', 'prev_credit_AMT_TOTAL_RECEIVABLE_max_mean':'amount_feautres', 'prev_credit_AMT_TOTAL_RECEIVABLE_max_median':'amount_feautres', 'prev_credit_AMT_TOTAL_RECEIVABLE_max_latest':'amount_feautres', 'prev_credit_AMT_TOTAL_RECEIVABLE_cntd_min':'count_features', 'prev_credit_AMT_TOTAL_RECEIVABLE_cntd_max':'count_features', 'prev_credit_AMT_TOTAL_RECEIVABLE_cntd_mean':'count_features', 'prev_credit_AMT_TOTAL_RECEIVABLE_cntd_median':'count_features', 'prev_credit_AMT_TOTAL_RECEIVABLE_cntd_latest':'count_features', 'prev_credit_CNT_DRAWINGS_ATM_CURRENT_sum_min':'count_features', 'prev_credit_CNT_DRAWINGS_ATM_CURRENT_sum_max':'count_features', 'prev_credit_CNT_DRAWINGS_ATM_CURRENT_sum_mean':'count_features', 'prev_credit_CNT_DRAWINGS_ATM_CURRENT_sum_median':'count_features', 'prev_credit_CNT_DRAWINGS_ATM_CURRENT_sum_latest':'count_features', 'prev_credit_CNT_DRAWINGS_ATM_CURRENT_max_min':'count_features', 'prev_credit_CNT_DRAWINGS_ATM_CURRENT_max_max':'count_features', 'prev_credit_CNT_DRAWINGS_ATM_CURRENT_max_mean':'count_features', 'prev_credit_CNT_DRAWINGS_ATM_CURRENT_max_median':'count_features', 'prev_credit_CNT_DRAWINGS_ATM_CURRENT_max_latest':'count_features', 'prev_credit_CNT_DRAWINGS_ATM_CURRENT_mean_min':'count_features', 'prev_credit_CNT_DRAWINGS_ATM_CURRENT_mean_max':'count_features', 'prev_credit_CNT_DRAWINGS_ATM_CURRENT_mean_mean':'count_features', 'prev_credit_CNT_DRAWINGS_ATM_CURRENT_mean_median':'count_features', 'prev_credit_CNT_DRAWINGS_ATM_CURRENT_mean_latest':'count_features', 'prev_credit_CNT_DRAWINGS_CURRENT_sum_min':'count_features', 'prev_credit_CNT_DRAWINGS_CURRENT_sum_max':'count_features', 'prev_credit_CNT_DRAWINGS_CURRENT_sum_mean':'count_features', 'prev_credit_CNT_DRAWINGS_CURRENT_sum_median':'count_features', 'prev_credit_CNT_DRAWINGS_CURRENT_sum_latest':'count_features', 'prev_credit_CNT_DRAWINGS_CURRENT_max_min':'count_features', 'prev_credit_CNT_DRAWINGS_CURRENT_max_max':'count_features', 'prev_credit_CNT_DRAWINGS_CURRENT_max_mean':'count_features', 'prev_credit_CNT_DRAWINGS_CURRENT_max_median':'count_features', 'prev_credit_CNT_DRAWINGS_CURRENT_max_latest':'count_features', 'prev_credit_CNT_DRAWINGS_CURRENT_mean_min':'count_features', 'prev_credit_CNT_DRAWINGS_CURRENT_mean_max':'count_features', 'prev_credit_CNT_DRAWINGS_CURRENT_mean_mean':'count_features', 'prev_credit_CNT_DRAWINGS_CURRENT_mean_median':'count_features', 'prev_credit_CNT_DRAWINGS_CURRENT_mean_latest':'count_features', 'prev_credit_CNT_DRAWINGS_OTHER_CURRENT_sum_min':'count_features', 'prev_credit_CNT_DRAWINGS_OTHER_CURRENT_sum_max':'count_features', 'prev_credit_CNT_DRAWINGS_OTHER_CURRENT_sum_mean':'count_features', 'prev_credit_CNT_DRAWINGS_OTHER_CURRENT_sum_median':'count_features', 'prev_credit_CNT_DRAWINGS_OTHER_CURRENT_sum_latest':'count_features', 'prev_credit_CNT_DRAWINGS_OTHER_CURRENT_max_min':'count_features', 'prev_credit_CNT_DRAWINGS_OTHER_CURRENT_max_max':'count_features', 'prev_credit_CNT_DRAWINGS_OTHER_CURRENT_max_mean':'count_features', 'prev_credit_CNT_DRAWINGS_OTHER_CURRENT_max_median':'count_features', 'prev_credit_CNT_DRAWINGS_OTHER_CURRENT_max_latest':'count_features', 'prev_credit_CNT_DRAWINGS_OTHER_CURRENT_mean_min':'count_features', 'prev_credit_CNT_DRAWINGS_OTHER_CURRENT_mean_max':'count_features', 'prev_credit_CNT_DRAWINGS_OTHER_CURRENT_mean_mean':'count_features', 'prev_credit_CNT_DRAWINGS_OTHER_CURRENT_mean_median':'count_features', 'prev_credit_CNT_DRAWINGS_OTHER_CURRENT_mean_latest':'count_features', 'prev_credit_CNT_DRAWINGS_POS_CURRENT_sum_min':'count_features', 'prev_credit_CNT_DRAWINGS_POS_CURRENT_sum_max':'count_features', 'prev_credit_CNT_DRAWINGS_POS_CURRENT_sum_mean':'count_features', 'prev_credit_CNT_DRAWINGS_POS_CURRENT_sum_median':'count_features', 'prev_credit_CNT_DRAWINGS_POS_CURRENT_sum_latest':'count_features', 'prev_credit_CNT_DRAWINGS_POS_CURRENT_max_min':'count_features', 'prev_credit_CNT_DRAWINGS_POS_CURRENT_max_max':'count_features', 'prev_credit_CNT_DRAWINGS_POS_CURRENT_max_mean':'count_features', 'prev_credit_CNT_DRAWINGS_POS_CURRENT_max_median':'count_features', 'prev_credit_CNT_DRAWINGS_POS_CURRENT_max_latest':'count_features', 'prev_credit_CNT_DRAWINGS_POS_CURRENT_mean_min':'count_features', 'prev_credit_CNT_DRAWINGS_POS_CURRENT_mean_max':'count_features', 'prev_credit_CNT_DRAWINGS_POS_CURRENT_mean_mean':'count_features', 'prev_credit_CNT_DRAWINGS_POS_CURRENT_mean_median':'count_features', 'prev_credit_CNT_DRAWINGS_POS_CURRENT_mean_latest':'count_features', 'prev_credit_CNT_INSTALMENT_MATURE_CUM_cntd_min':'count_features', 'prev_credit_CNT_INSTALMENT_MATURE_CUM_cntd_max':'count_features', 'prev_credit_CNT_INSTALMENT_MATURE_CUM_cntd_mean':'count_features', 'prev_credit_CNT_INSTALMENT_MATURE_CUM_cntd_median':'count_features', 'prev_credit_CNT_INSTALMENT_MATURE_CUM_cntd_latest':'count_features', 'prev_credit_SK_DPD_max_min':'count_features', 'prev_credit_SK_DPD_max_max':'count_features', 'prev_credit_SK_DPD_max_mean':'count_features', 'prev_credit_SK_DPD_max_median':'count_features', 'prev_credit_SK_DPD_max_latest':'count_features', 'prev_credit_SK_DPD_size_min':'count_features', 'prev_credit_SK_DPD_size_max':'count_features', 'prev_credit_SK_DPD_size_mean':'count_features', 'prev_credit_SK_DPD_size_median':'count_features', 'prev_credit_SK_DPD_size_latest':'count_features', 'prev_credit_SK_DPD_DEF_max_min':'count_features', 'prev_credit_SK_DPD_DEF_max_max':'count_features', 'prev_credit_SK_DPD_DEF_max_mean':'count_features', 'prev_credit_SK_DPD_DEF_max_median':'count_features', 'prev_credit_SK_DPD_DEF_max_latest':'count_features', 'prev_credit_SK_DPD_DEF_size_min':'count_features', 'prev_credit_SK_DPD_DEF_size_max':'count_features', 'prev_credit_SK_DPD_DEF_size_mean':'count_features', 'prev_credit_SK_DPD_DEF_size_median':'count_features', 'prev_credit_SK_DPD_DEF_size_latest':'count_features', 'prev_credit_SK_DPD_diff_max_min':'count_features', 'prev_credit_SK_DPD_diff_max_max':'count_features', 'prev_credit_SK_DPD_diff_max_mean':'count_features', 'prev_credit_SK_DPD_diff_max_median':'count_features', 'prev_credit_SK_DPD_diff_max_latest':'count_features', 'prev_credit_SK_DPD_diff_size_min':'count_features', 'prev_credit_SK_DPD_diff_size_max':'count_features', 'prev_credit_SK_DPD_diff_size_mean':'count_features', 'prev_credit_SK_DPD_diff_size_median':'count_features', 'prev_credit_SK_DPD_diff_size_latest':'count_features', 'prev_installment_NUM_INSTALMENT_VERSION_nunique_min':'count_features', 'prev_installment_NUM_INSTALMENT_VERSION_nunique_max':'count_features', 'prev_installment_NUM_INSTALMENT_VERSION_nunique_mean':'count_features', 'prev_installment_NUM_INSTALMENT_VERSION_nunique_median':'count_features', 'prev_installment_NUM_INSTALMENT_VERSION_nunique_latest':'count_features', 'prev_installment_NUM_INSTALMENT_NUMBER_min_min':'count_features', 'prev_installment_NUM_INSTALMENT_NUMBER_min_max':'count_features', 'prev_installment_NUM_INSTALMENT_NUMBER_min_mean':'count_features', 'prev_installment_NUM_INSTALMENT_NUMBER_min_median':'count_features', 'prev_installment_NUM_INSTALMENT_NUMBER_min_latest':'count_features', 'prev_installment_NUM_INSTALMENT_NUMBER_max_min':'count_features', 'prev_installment_NUM_INSTALMENT_NUMBER_max_max':'count_features', 'prev_installment_NUM_INSTALMENT_NUMBER_max_mean':'count_features', 'prev_installment_NUM_INSTALMENT_NUMBER_max_median':'count_features', 'prev_installment_NUM_INSTALMENT_NUMBER_max_latest':'count_features', 'prev_installment_NUM_INSTALMENT_NUMBER_count_min':'count_features', 'prev_installment_NUM_INSTALMENT_NUMBER_count_max':'count_features', 'prev_installment_NUM_INSTALMENT_NUMBER_count_mean':'count_features', 'prev_installment_NUM_INSTALMENT_NUMBER_count_median':'count_features', 'prev_installment_NUM_INSTALMENT_NUMBER_count_latest':'count_features', 'prev_installment_NUM_INSTALMENT_NUMBER_nunique_min':'count_features', 'prev_installment_NUM_INSTALMENT_NUMBER_nunique_max':'count_features', 'prev_installment_NUM_INSTALMENT_NUMBER_nunique_mean':'count_features', 'prev_installment_NUM_INSTALMENT_NUMBER_nunique_median':'count_features', 'prev_installment_NUM_INSTALMENT_NUMBER_nunique_latest':'count_features', 'prev_installment_DAYS_INSTALMENT_min_min':'count_features', 'prev_installment_DAYS_INSTALMENT_min_max':'count_features', 'prev_installment_DAYS_INSTALMENT_min_mean':'count_features', 'prev_installment_DAYS_INSTALMENT_min_median':'count_features', 'prev_installment_DAYS_INSTALMENT_min_latest':'count_features', 'prev_installment_DAYS_INSTALMENT_max_min':'count_features', 'prev_installment_DAYS_INSTALMENT_max_max':'count_features', 'prev_installment_DAYS_INSTALMENT_max_mean':'count_features', 'prev_installment_DAYS_INSTALMENT_max_median':'count_features', 'prev_installment_DAYS_INSTALMENT_max_latest':'count_features', 'prev_installment_DAYS_INSTALMENT_median_min':'count_features', 'prev_installment_DAYS_INSTALMENT_median_max':'count_features', 'prev_installment_DAYS_INSTALMENT_median_mean':'count_features', 'prev_installment_DAYS_INSTALMENT_median_median':'count_features', 'prev_installment_DAYS_INSTALMENT_median_latest':'count_features', 'prev_installment_DAYS_INSTALMENT_mean_min':'count_features', 'prev_installment_DAYS_INSTALMENT_mean_max':'count_features', 'prev_installment_DAYS_INSTALMENT_mean_mean':'count_features', 'prev_installment_DAYS_INSTALMENT_mean_median':'count_features', 'prev_installment_DAYS_INSTALMENT_mean_latest':'count_features', 'prev_installment_DAYS_INSTALMENT_range_min':'count_features', 'prev_installment_DAYS_INSTALMENT_range_max':'count_features', 'prev_installment_DAYS_INSTALMENT_range_mean':'count_features', 'prev_installment_DAYS_INSTALMENT_range_median':'count_features', 'prev_installment_DAYS_INSTALMENT_range_latest':'count_features', 'prev_installment_DAYS_ENTRY_PAYMENT_min_min':'count_features', 'prev_installment_DAYS_ENTRY_PAYMENT_min_max':'count_features', 'prev_installment_DAYS_ENTRY_PAYMENT_min_mean':'count_features', 'prev_installment_DAYS_ENTRY_PAYMENT_min_median':'count_features', 'prev_installment_DAYS_ENTRY_PAYMENT_min_latest':'count_features', 'prev_installment_DAYS_ENTRY_PAYMENT_max_min':'count_features', 'prev_installment_DAYS_ENTRY_PAYMENT_max_max':'count_features', 'prev_installment_DAYS_ENTRY_PAYMENT_max_mean':'count_features', 'prev_installment_DAYS_ENTRY_PAYMENT_max_median':'count_features', 'prev_installment_DAYS_ENTRY_PAYMENT_max_latest':'count_features', 'prev_installment_DAYS_ENTRY_PAYMENT_median_min':'count_features', 'prev_installment_DAYS_ENTRY_PAYMENT_median_max':'count_features', 'prev_installment_DAYS_ENTRY_PAYMENT_median_mean':'count_features', 'prev_installment_DAYS_ENTRY_PAYMENT_median_median':'count_features', 'prev_installment_DAYS_ENTRY_PAYMENT_median_latest':'count_features', 'prev_installment_DAYS_ENTRY_PAYMENT_mean_min':'count_features', 'prev_installment_DAYS_ENTRY_PAYMENT_mean_max':'count_features', 'prev_installment_DAYS_ENTRY_PAYMENT_mean_mean':'count_features', 'prev_installment_DAYS_ENTRY_PAYMENT_mean_median':'count_features', 'prev_installment_DAYS_ENTRY_PAYMENT_mean_latest':'count_features', 'prev_installment_DAYS_ENTRY_PAYMENT_range_min':'count_features', 'prev_installment_DAYS_ENTRY_PAYMENT_range_max':'count_features', 'prev_installment_DAYS_ENTRY_PAYMENT_range_mean':'count_features', 'prev_installment_DAYS_ENTRY_PAYMENT_range_median':'count_features', 'prev_installment_DAYS_ENTRY_PAYMENT_range_latest':'count_features', 'prev_installment_DPD_min_min':'count_features', 'prev_installment_DPD_min_max':'count_features', 'prev_installment_DPD_min_mean':'count_features', 'prev_installment_DPD_min_median':'count_features', 'prev_installment_DPD_min_latest':'count_features', 'prev_installment_DPD_max_min':'count_features', 'prev_installment_DPD_max_max':'count_features', 'prev_installment_DPD_max_mean':'count_features', 'prev_installment_DPD_max_median':'count_features', 'prev_installment_DPD_max_latest':'count_features', 'prev_installment_DPD_median_min':'count_features', 'prev_installment_DPD_median_max':'count_features', 'prev_installment_DPD_median_mean':'count_features', 'prev_installment_DPD_median_median':'count_features', 'prev_installment_DPD_median_latest':'count_features', 'prev_installment_DPD_mean_min':'count_features', 'prev_installment_DPD_mean_max':'count_features', 'prev_installment_DPD_mean_mean':'count_features', 'prev_installment_DPD_mean_median':'count_features', 'prev_installment_DPD_mean_latest':'count_features', 'prev_installment_DPD_sum_min':'count_features', 'prev_installment_DPD_sum_max':'count_features', 'prev_installment_DPD_sum_mean':'count_features', 'prev_installment_DPD_sum_median':'count_features', 'prev_installment_DPD_sum_latest':'count_features', 'prev_installment_DPD_std_min':'count_features', 'prev_installment_DPD_std_max':'count_features', 'prev_installment_DPD_std_mean':'count_features', 'prev_installment_DPD_std_median':'count_features', 'prev_installment_DPD_std_latest':'count_features', 'prev_installment_AMT_INSTALMENT_min_min':'amount_feautres', 'prev_installment_AMT_INSTALMENT_min_max':'amount_feautres', 'prev_installment_AMT_INSTALMENT_min_mean':'amount_feautres', 'prev_installment_AMT_INSTALMENT_min_median':'amount_feautres', 'prev_installment_AMT_INSTALMENT_min_latest':'amount_feautres', 'prev_installment_AMT_INSTALMENT_max_min':'amount_feautres', 'prev_installment_AMT_INSTALMENT_max_max':'amount_feautres', 'prev_installment_AMT_INSTALMENT_max_mean':'amount_feautres', 'prev_installment_AMT_INSTALMENT_max_median':'amount_feautres', 'prev_installment_AMT_INSTALMENT_max_latest':'amount_feautres', 'prev_installment_AMT_INSTALMENT_mean_min':'amount_feautres', 'prev_installment_AMT_INSTALMENT_mean_max':'amount_feautres', 'prev_installment_AMT_INSTALMENT_mean_mean':'amount_feautres', 'prev_installment_AMT_INSTALMENT_mean_median':'amount_feautres', 'prev_installment_AMT_INSTALMENT_mean_latest':'amount_feautres', 'prev_installment_AMT_INSTALMENT_sum_min':'amount_feautres', 'prev_installment_AMT_INSTALMENT_sum_max':'amount_feautres', 'prev_installment_AMT_INSTALMENT_sum_mean':'amount_feautres', 'prev_installment_AMT_INSTALMENT_sum_median':'amount_feautres', 'prev_installment_AMT_INSTALMENT_sum_latest':'amount_feautres', 'prev_installment_AMT_INSTALMENT_median_min':'amount_feautres', 'prev_installment_AMT_INSTALMENT_median_max':'amount_feautres', 'prev_installment_AMT_INSTALMENT_median_mean':'amount_feautres', 'prev_installment_AMT_INSTALMENT_median_median':'amount_feautres', 'prev_installment_AMT_INSTALMENT_median_latest':'amount_feautres', 'prev_installment_AMT_INSTALMENT_std_min':'amount_feautres', 'prev_installment_AMT_INSTALMENT_std_max':'amount_feautres', 'prev_installment_AMT_INSTALMENT_std_mean':'amount_feautres', 'prev_installment_AMT_INSTALMENT_std_median':'amount_feautres', 'prev_installment_AMT_INSTALMENT_std_latest':'amount_feautres', 'prev_installment_AMT_INSTALMENT_nunique_min':'count_features', 'prev_installment_AMT_INSTALMENT_nunique_max':'count_features', 'prev_installment_AMT_INSTALMENT_nunique_mean':'count_features', 'prev_installment_AMT_INSTALMENT_nunique_median':'count_features', 'prev_installment_AMT_INSTALMENT_nunique_latest':'count_features', 'prev_installment_AMT_PAYMENT_min_min':'amount_feautres', 'prev_installment_AMT_PAYMENT_min_max':'amount_feautres', 'prev_installment_AMT_PAYMENT_min_mean':'amount_feautres', 'prev_installment_AMT_PAYMENT_min_median':'amount_feautres', 'prev_installment_AMT_PAYMENT_min_latest':'amount_feautres', 'prev_installment_AMT_PAYMENT_max_min':'amount_feautres', 'prev_installment_AMT_PAYMENT_max_max':'amount_feautres', 'prev_installment_AMT_PAYMENT_max_mean':'amount_feautres', 'prev_installment_AMT_PAYMENT_max_median':'amount_feautres', 'prev_installment_AMT_PAYMENT_max_latest':'amount_feautres', 'prev_installment_AMT_PAYMENT_mean_min':'amount_feautres', 'prev_installment_AMT_PAYMENT_mean_max':'amount_feautres', 'prev_installment_AMT_PAYMENT_mean_mean':'amount_feautres', 'prev_installment_AMT_PAYMENT_mean_median':'amount_feautres', 'prev_installment_AMT_PAYMENT_mean_latest':'amount_feautres', 'prev_installment_AMT_PAYMENT_sum_min':'amount_feautres', 'prev_installment_AMT_PAYMENT_sum_max':'amount_feautres', 'prev_installment_AMT_PAYMENT_sum_mean':'amount_feautres', 'prev_installment_AMT_PAYMENT_sum_median':'amount_feautres', 'prev_installment_AMT_PAYMENT_sum_latest':'amount_feautres', 'prev_installment_AMT_PAYMENT_median_min':'amount_feautres', 'prev_installment_AMT_PAYMENT_median_max':'amount_feautres', 'prev_installment_AMT_PAYMENT_median_mean':'amount_feautres', 'prev_installment_AMT_PAYMENT_median_median':'amount_feautres', 'prev_installment_AMT_PAYMENT_median_latest':'amount_feautres', 'prev_installment_AMT_PAYMENT_std_min':'amount_feautres', 'prev_installment_AMT_PAYMENT_std_max':'amount_feautres', 'prev_installment_AMT_PAYMENT_std_mean':'amount_feautres', 'prev_installment_AMT_PAYMENT_std_median':'amount_feautres', 'prev_installment_AMT_PAYMENT_std_latest':'amount_feautres', 'prev_installment_AMT_PAYMENT_nunique_min':'count_features', 'prev_installment_AMT_PAYMENT_nunique_max':'count_features', 'prev_installment_AMT_PAYMENT_nunique_mean':'count_features', 'prev_installment_AMT_PAYMENT_nunique_median':'count_features', 'prev_installment_AMT_PAYMENT_nunique_latest':'count_features', 'prev_installment_AMT_DPD_min_min':'count_features', 'prev_installment_AMT_DPD_min_max':'count_features', 'prev_installment_AMT_DPD_min_mean':'count_features', 'prev_installment_AMT_DPD_min_median':'count_features', 'prev_installment_AMT_DPD_min_latest':'count_features', 'prev_installment_AMT_DPD_max_min':'count_features', 'prev_installment_AMT_DPD_max_max':'count_features', 'prev_installment_AMT_DPD_max_mean':'count_features', 'prev_installment_AMT_DPD_max_median':'count_features', 'prev_installment_AMT_DPD_max_latest':'count_features', 'prev_installment_AMT_DPD_mean_min':'count_features', 'prev_installment_AMT_DPD_mean_max':'count_features', 'prev_installment_AMT_DPD_mean_mean':'count_features', 'prev_installment_AMT_DPD_mean_median':'count_features', 'prev_installment_AMT_DPD_mean_latest':'count_features', 'prev_installment_AMT_DPD_sum_min':'count_features', 'prev_installment_AMT_DPD_sum_max':'count_features', 'prev_installment_AMT_DPD_sum_mean':'count_features', 'prev_installment_AMT_DPD_sum_median':'count_features', 'prev_installment_AMT_DPD_sum_latest':'count_features', 'prev_installment_AMT_DPD_median_min':'count_features', 'prev_installment_AMT_DPD_median_max':'count_features', 'prev_installment_AMT_DPD_median_mean':'count_features', 'prev_installment_AMT_DPD_median_median':'count_features', 'prev_installment_AMT_DPD_median_latest':'count_features', 'prev_installment_AMT_DPD_std_min':'count_features', 'prev_installment_AMT_DPD_std_max':'count_features', 'prev_installment_AMT_DPD_std_mean':'count_features', 'prev_installment_AMT_DPD_std_median':'count_features', 'prev_installment_AMT_DPD_std_latest':'count_features', 'prev_installment_AMT_DPD_nunique_min':'count_features', 'prev_installment_AMT_DPD_nunique_max':'count_features', 'prev_installment_AMT_DPD_nunique_mean':'count_features', 'prev_installment_AMT_DPD_nunique_median':'count_features', 'prev_installment_AMT_DPD_nunique_latest':'count_features', 'prev_WEEKDAY_APPR_PROCESS_START_collect_set':'category_features', 'prev_WEEKDAY_APPR_PROCESS_START_countd':'count_features', 'prev_WEEKDAY_APPR_PROCESS_START_latest':'category_features', 'prev_HOUR_APPR_PROCESS_START_collect_set':'category_features', 'prev_HOUR_APPR_PROCESS_START_countd':'count_features', 'prev_HOUR_APPR_PROCESS_START_latest':'category_features', 'prev_NAME_CASH_LOAN_PURPOSE_collect_set':'category_features', 'prev_NAME_CASH_LOAN_PURPOSE_countd':'count_features', 'prev_NAME_CASH_LOAN_PURPOSE_latest':'category_features', 'prev_NAME_GOODS_CATEGORY_collect_set':'category_features', 'prev_NAME_GOODS_CATEGORY_countd':'count_features', 'prev_NAME_GOODS_CATEGORY_latest':'category_features', 'prev_cash_CNT_INSTALMENT_set_collect_set':'category_features', 'prev_cash_CNT_INSTALMENT_set_countd':'count_features', 'prev_cash_CNT_INSTALMENT_set_latest':'category_features', 'prev_cash_NAME_CONTRACT_STATUS_set_collect_set':'category_features', 'prev_cash_NAME_CONTRACT_STATUS_set_countd':'count_features', 'prev_cash_NAME_CONTRACT_STATUS_set_latest':'category_features', 'prev_cash_NAME_CONTRACT_STATUS_latest_collect_set':'category_features', 'prev_cash_NAME_CONTRACT_STATUS_latest_countd':'count_features', 'prev_cash_NAME_CONTRACT_STATUS_latest_latest':'category_features', 'prev_credit_CNT_INSTALMENT_MATURE_CUM_set_collect_set':'category_features', 'prev_credit_CNT_INSTALMENT_MATURE_CUM_set_countd':'count_features', 'prev_credit_CNT_INSTALMENT_MATURE_CUM_set_latest':'category_features', 'prev_credit_CNT_INSTALMENT_MATURE_CUM_latest_collect_set':'category_features', 'prev_credit_CNT_INSTALMENT_MATURE_CUM_latest_countd':'count_features', 'prev_credit_CNT_INSTALMENT_MATURE_CUM_latest_latest':'category_features', 'prev_credit_NAME_CONTRACT_STATUS_cntd_collect_set':'category_features', 'prev_credit_NAME_CONTRACT_STATUS_cntd_countd':'count_features', 'prev_credit_NAME_CONTRACT_STATUS_cntd_latest':'category_features', 'prev_credit_NAME_CONTRACT_STATUS_set_collect_set':'category_features', 'prev_credit_NAME_CONTRACT_STATUS_set_countd':'count_features', 'prev_credit_NAME_CONTRACT_STATUS_set_latest':'category_features', 'prev_installment_NUM_INSTALMENT_VERSION_set_collect_set':'category_features', 'prev_installment_NUM_INSTALMENT_VERSION_set_countd':'count_features', 'prev_installment_NUM_INSTALMENT_VERSION_set_latest':'category_features', 'prev_installment_NUM_INSTALMENT_VERSION_latest_collect_set':'category_features', 'prev_installment_NUM_INSTALMENT_VERSION_latest_countd''prev_installment_NUM_INSTALMENT_VERSION_latest_latest':'category_features', 'prev_NAME_CONTRACT_STATUS_collect_set':'category_features', 'prev_NAME_CONTRACT_STATUS_countd':'count_features', 'prev_NAME_CONTRACT_STATUS_latest':'category_features', 'prev_NAME_PAYMENT_TYPE_collect_set':'category_features', 'prev_NAME_PAYMENT_TYPE_countd':'count_features', 'prev_NAME_PAYMENT_TYPE_latest':'category_features', 'prev_CODE_REJECT_REASON_collect_set':'category_features', 'prev_CODE_REJECT_REASON_countd':'count_features', 'prev_CODE_REJECT_REASON_latest':'category_features', 'prev_NAME_TYPE_SUITE_collect_set':'category_features', 'prev_NAME_TYPE_SUITE_countd':'count_features', 'prev_NAME_TYPE_SUITE_latest':'category_features', 'prev_NAME_CLIENT_TYPE_collect_set':'category_features', 'prev_NAME_CLIENT_TYPE_countd':'count_features', 'prev_NAME_CLIENT_TYPE_latest':'category_features', 'prev_NAME_PORTFOLIO_collect_set':'category_features', 'prev_NAME_PORTFOLIO_countd':'count_features', 'prev_NAME_PORTFOLIO_latest':'category_features', 'prev_NAME_PRODUCT_TYPE_collect_set':'category_features', 'prev_NAME_PRODUCT_TYPE_countd':'count_features', 'prev_NAME_PRODUCT_TYPE_latest':'category_features', 'prev_CHANNEL_TYPE_collect_set':'category_features', 'prev_CHANNEL_TYPE_countd':'count_features', 'prev_CHANNEL_TYPE_latest':'category_features', 'prev_NAME_SELLER_INDUSTRY_collect_set':'category_features', 'prev_NAME_SELLER_INDUSTRY_countd':'count_features', 'prev_NAME_SELLER_INDUSTRY_latest':'category_features', 'prev_NAME_YIELD_GROUP_collect_set':'category_features', 'prev_NAME_YIELD_GROUP_countd':'count_features', 'prev_NAME_YIELD_GROUP_latest':'category_features', 'prev_PRODUCT_COMBINATION_collect_set':'category_features', 'prev_PRODUCT_COMBINATION_countd':'count_features', 'prev_PRODUCT_COMBINATION_latest':'category_features', 'prev_AMT_ANNUITY_sum':'amount_feautres', 'prev_AMT_ANNUITY_mean':'amount_feautres', 'prev_AMT_ANNUITY_max':'amount_feautres', 'prev_AMT_ANNUITY_min':'amount_feautres', 'prev_AMT_APPLICATION_sum':'amount_feautres', 'prev_AMT_APPLICATION_mean':'amount_feautres', 'prev_AMT_APPLICATION_max':'amount_feautres', 'prev_AMT_APPLICATION_min':'amount_feautres', 'prev_AMT_CREDIT_sum':'amount_feautres', 'prev_AMT_CREDIT_mean':'amount_feautres', 'prev_AMT_CREDIT_max':'amount_feautres', 'prev_AMT_CREDIT_min':'amount_feautres', 'prev_AMT_GOODS_PRICE_sum':'amount_feautres', 'prev_AMT_GOODS_PRICE_mean':'amount_feautres', 'prev_AMT_GOODS_PRICE_max':'amount_feautres', 'prev_AMT_GOODS_PRICE_min':'amount_feautres', 'prev_AMT_DOWN_PAYMENT_sum':'amount_feautres', 'prev_AMT_DOWN_PAYMENT_mean':'amount_feautres', 'prev_AMT_DOWN_PAYMENT_max':'amount_feautres', 'prev_AMT_DOWN_PAYMENT_min':'amount_feautres',
               }

In [11]:
category_features = [f for f in feature_dict if feature_dict[f] == 'category_features' and f in train_s.columns ]
boolean_features = [f for f in feature_dict if feature_dict[f] == 'boolean_features'  and f in train_s.columns ]
amount_feautres = [f for f in feature_dict if feature_dict[f] == 'amount_feautres'  and f in train_s.columns ]
normal_features = [f for f in feature_dict if feature_dict[f] == 'normal_features'  and f in train_s.columns ]
count_features = [f for f in feature_dict if feature_dict[f] == 'count_features'  and f in train_s.columns ]

del train_s

In [12]:
len(category_features)

55

In [13]:
minmax = MinMaxScaler().fit(pd.concat([train[amount_feautres+count_features],test[amount_feautres+count_features]],axis = 0).fillna(0))

train[amount_feautres+count_features] = pd.DataFrame(minmax.transform(train[amount_feautres+count_features].fillna(0)), columns = amount_feautres+count_features)
test[amount_feautres+count_features] = pd.DataFrame(minmax.transform(test[amount_feautres+count_features].fillna(0)), columns = amount_feautres+count_features)


In [14]:
# bin
bins = 5
for rate in normal_features+amount_feautres+count_features:
    qcut = pd.qcut(train[rate],bins,retbins=True,duplicates='drop')[1]
    if len(qcut) <=2:
        continue
    qcut[0] = 0
    qcut[-1] = 2
    train[rate +'_bin'] = [rate + '_' + str(np.where(qcut>= r)[0][0]) if not np.isnan(r) else 'miss' for r in train[rate]]
    test[rate +'_bin'] =  [rate + '_' +  str(np.where(qcut>= r)[0][0]) if not np.isnan(r) else 'miss' for r in test[rate]]
    category_features.append(rate +'_bin')


In [15]:

category_features_cardinal = {}
category_features_low_cardinal = []
category_features_high_cardinal = []
for f in category_features:
    l = train[f].unique().shape[0]
    category_features_cardinal[f] = train[f].unique().shape[0]
    if l <= 128 :
        category_features_low_cardinal.append(f)
    else :
        category_features_high_cardinal.append(f)

import operator
sorted(category_features_cardinal.items(), key=operator.itemgetter(1), reverse = True)

[('cash_CNT_INSTALMENT_set', 47629),
 ('prev_PRODUCT_COMBINATION_collect_set', 17934),
 ('prev_HOUR_APPR_PROCESS_START_collect_set', 12774),
 ('prev_NAME_GOODS_CATEGORY_collect_set', 4714),
 ('prev_NAME_CASH_LOAN_PURPOSE_collect_set', 1133),
 ('prev_NAME_SELLER_INDUSTRY_collect_set', 551),
 ('prev_NAME_TYPE_SUITE_collect_set', 245),
 ('prev_WEEKDAY_APPR_PROCESS_START_collect_set', 214),
 ('prev_CODE_REJECT_REASON_collect_set', 206),
 ('prev_CHANNEL_TYPE_collect_set', 178),
 ('bureau_CREDIT_TYPE_set', 165),
 ('bureau_latest_STATUS_set', 114),
 ('ORGANIZATION_TYPE', 58),
 ('cash_NAME_CONTRACT_STATUS_set', 47),
 ('prev_NAME_YIELD_GROUP_collect_set', 41),
 ('prev_NAME_PORTFOLIO_collect_set', 37),
 ('cash_NAME_CONTRACT_STATUS_latest', 28),
 ('prev_NAME_GOODS_CATEGORY_latest', 27),
 ('prev_NAME_CASH_LOAN_PURPOSE_latest', 26),
 ('prev_HOUR_APPR_PROCESS_START_latest', 25),
 ('HOUR_APPR_PROCESS_START', 24),
 ('prev_NAME_CLIENT_TYPE_collect_set', 20),
 ('OCCUPATION_TYPE', 19),
 ('prev_PRODUCT_CO

In [16]:
taget_encoder = ce.TargetEncoder(cols=category_features_low_cardinal + boolean_features)
taget_encoder.fit(train[category_features_low_cardinal + boolean_features], train.TARGET)

onehot_encoder = ce.OneHotEncoder(cols = category_features_low_cardinal + boolean_features)
onehot_encoder.fit(train[category_features_low_cardinal + boolean_features])

ordinal_encoder = ce.OrdinalEncoder(cols = category_features_low_cardinal)
ordinal_encoder.fit(train[category_features_low_cardinal])

X_train = pd.concat([
    train[normal_features+amount_feautres+count_features].fillna(0),
    taget_encoder.transform(train[category_features_low_cardinal + boolean_features]),
    onehot_encoder.transform(train[category_features_low_cardinal + boolean_features]),
    ordinal_encoder.transform(train[category_features_low_cardinal]),
], axis = 1)
y_train = train.TARGET


X_test = pd.concat([
    test[normal_features+amount_feautres+count_features].fillna(0),
    taget_encoder.transform(test[category_features_low_cardinal + boolean_features]),
    onehot_encoder.transform(test[category_features_low_cardinal + boolean_features]),
    ordinal_encoder.transform(test[category_features_low_cardinal]),
], axis = 1)

TargetEncoder(cols=['NAME_CONTRACT_TYPE', 'CODE_GENDER', 'NAME_TYPE_SUITE', 'NAME_INCOME_TYPE', 'NAME_EDUCATION_TYPE', 'NAME_FAMILY_STATUS', 'NAME_HOUSING_TYPE', 'OCCUPATION_TYPE', 'REGION_RATING_CLIENT', 'REGION_RATING_CLIENT_W_CITY', 'WEEKDAY_APPR_PROCESS_START', 'HOUR_APPR_PROCESS_START', 'ORGANIZATION_TYPE', ...RODUCT_COMBINATION_POS_other_with_interest', 'prev_PRODUCT_COMBINATION_POS_others_without_interest'],
       drop_invariant=False, handle_unknown='impute', impute_missing=True,
       min_samples_leaf=1, return_df=True, smoothing=1, verbose=0)

OneHotEncoder(cols=['NAME_CONTRACT_TYPE', 'CODE_GENDER', 'NAME_TYPE_SUITE', 'NAME_INCOME_TYPE', 'NAME_EDUCATION_TYPE', 'NAME_FAMILY_STATUS', 'NAME_HOUSING_TYPE', 'OCCUPATION_TYPE', 'REGION_RATING_CLIENT', 'REGION_RATING_CLIENT_W_CITY', 'WEEKDAY_APPR_PROCESS_START', 'HOUR_APPR_PROCESS_START', 'ORGANIZATION_TYPE', ...RODUCT_COMBINATION_POS_other_with_interest', 'prev_PRODUCT_COMBINATION_POS_others_without_interest'],
       drop_invariant=False, handle_unknown='impute', impute_missing=True,
       return_df=True, verbose=0)

OrdinalEncoder(cols=['NAME_CONTRACT_TYPE', 'CODE_GENDER', 'NAME_TYPE_SUITE', 'NAME_INCOME_TYPE', 'NAME_EDUCATION_TYPE', 'NAME_FAMILY_STATUS', 'NAME_HOUSING_TYPE', 'OCCUPATION_TYPE', 'REGION_RATING_CLIENT', 'REGION_RATING_CLIENT_W_CITY', 'WEEKDAY_APPR_PROCESS_START', 'HOUR_APPR_PROCESS_START', 'ORGANIZATION_TYPE', ...LER_INDUSTRY_countd_bin', 'prev_NAME_YIELD_GROUP_countd_bin', 'prev_PRODUCT_COMBINATION_countd_bin'],
        drop_invariant=False, handle_unknown='impute', impute_missing=True,
        mapping=[{'col': 'NAME_CONTRACT_TYPE', 'mapping': [('Cash loans', 0), ('Revolving loans', 1)]}, {'col': 'CODE_GENDER', 'mapping': [('M', 0), ('F', 1), ('XNA', 2)]}, {'col': 'NAME_TYPE_SUITE', 'mapping': [('Unaccompanied', 0), ('Family', 1), ('Spouse, partner', 2), ('Children', 3), ('Other_A', 4), (n...countd_4', 3), ('prev_PRODUCT_COMBINATION_countd_2', 4), ('prev_PRODUCT_COMBINATION_countd_0', 5)]}],
        return_df=True, verbose=0)

In [17]:
pd.concat([X_train,pd.DataFrame(y_train, columns = ['TARGET'])], axis = 1).to_csv('./data/input/train.csv', index = False)
# y_train.to_csv('./data/input/y_train.csv', index = False)
X_test.to_csv('./data/input/test.csv', index = False)


In [18]:
X_train.shape
len(np.unique(X_train.columns))
X_train.head()

(307511, 3039)

2821

Unnamed: 0,REGION_POPULATION_RELATIVE,EXT_SOURCE_1,EXT_SOURCE_2,EXT_SOURCE_3,APARTMENTS_AVG,BASEMENTAREA_AVG,YEARS_BEGINEXPLUATATION_AVG,YEARS_BUILD_AVG,COMMONAREA_AVG,ELEVATORS_AVG,...,prev_NAME_PAYMENT_TYPE_countd_bin,prev_CODE_REJECT_REASON_countd_bin,prev_NAME_TYPE_SUITE_countd_bin,prev_NAME_CLIENT_TYPE_countd_bin,prev_NAME_PORTFOLIO_countd_bin,prev_NAME_PRODUCT_TYPE_countd_bin,prev_CHANNEL_TYPE_countd_bin,prev_NAME_SELLER_INDUSTRY_countd_bin,prev_NAME_YIELD_GROUP_countd_bin,prev_PRODUCT_COMBINATION_countd_bin
0,0.018801,0.083037,0.262949,0.139376,0.0247,0.0369,0.9722,0.6192,0.0143,0.0,...,0,0,0,0,0,0,0,0,0,0
1,0.003541,0.311267,0.622246,0.0,0.0959,0.0529,0.9851,0.796,0.0605,0.08,...,1,0,1,1,1,1,1,1,1,1
2,0.010032,0.0,0.555912,0.729567,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0
3,0.008019,0.0,0.650442,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1,1,2,1,2,1,1,1,2,2
4,0.028663,0.0,0.322738,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1,0,2,1,1,2,2,1,1,3


In [19]:
X_train_cv, X_test_cv, y_train_cv, y_test_cv = train_test_split(X_train, y_train, test_size = 0.2, random_state = 42)

lr = LogisticRegression(C = 0.1, penalty = 'l1', random_state = 42)

# pipeline = Pipeline([
#     ('select', SelectKBest(chi2, k=300)),
#     ('clf', lr)])

lr.fit(X_train_cv, y_train_cv)
roc_auc_score(y_train_cv, lr.predict_proba(X_train_cv)[:,1]), roc_auc_score(y_test_cv, lr.predict_proba(X_test_cv)[:,1])

LogisticRegression(C=0.1, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l1', random_state=42, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

(0.77632119247234432, 0.76845013393429484)

In [20]:
lr = LogisticRegression(C = 0.1, penalty = 'l1', random_state = 42)
lr.fit(X_train, y_train)
roc_auc_score(y_train, lr.predict_proba(X_train)[:,1])

LogisticRegression(C=0.1, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l1', random_state=42, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

0.77622416208935252

In [21]:
sub = pd.concat([
    test.SK_ID_CURR,
    pd.DataFrame({'TARGET':lr.predict_proba(X_test)[:,1]})
], axis = 1)


In [22]:
sub.head()
sub.to_csv('./data/submission/forth_20180609.csv', index = False)

Unnamed: 0,SK_ID_CURR,TARGET
0,100001,0.039642
1,100005,0.104532
2,100013,0.018295
3,100028,0.028609
4,100038,0.196711
