In [1]:
import pandas as pd
import os
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

pd.set_option('display.max_rows', 999)
pd.set_option('display.max_columns', 999)


In [2]:
%cd data
os.getcwd()

d:\Documents\Github\projs\home-credit-default-risk\data


'd:\\Documents\\Github\\projs\\home-credit-default-risk\\data'

In [3]:
def show_cat_col_values(df):
    cats = df.select_dtypes(include = ['object'])

    for col in cats:
        ratios = []
        values = pd.DataFrame(df[col].value_counts())
        for value in values[col].values:
            total = np.sum(values[col].values)
            ratios.append(value/total)

        values['Ratio'] = ratios

        print(values.sort_values(by = 'Ratio', ascending = False))
        print("\n")

def show_missing_values(df):
    na_dict = {}
    for col in df.columns:
        na = df[col].isna().sum()
        total = len(df[col])

        na_dict[col] = f"{round(100*na/total,2)}%"

    print(pd.Series(na_dict).sort_values(ascending = False))

def remove_cat_col_values(df, threshold):
    """
    If one single value dominates the entire column, then we remove the column entirely, as there is too little variation in the column values
    """
    assert 0 <= threshold <= 1, "threshold must be between 0 and 1"
    cats = df.select_dtypes(include = ['object'])
    for col in cats:
        print(col)
        values = pd.DataFrame(df[col].value_counts())
        for value in values[col].values:
            total = np.sum(values[col].values)
            if value/total >= threshold:
                df = df.drop([col], axis = 1)
    return df


def remove_missing_values(df, threshold):
    assert 0 <= threshold <= 1, "threshold must be between 0 and 1"

    for col in df.columns:
        na = df[col].isna().sum()
        total = len(df[col])

        if na/total > threshold:
            df = df.drop([col], axis = 1)

    return df

def remove_bad_rows(df, col, value):
    rows = df.loc[df[col] == value]
    
    df = df.drop(rows.index, axis = 0)
    
    return df

def show_ratio(df, target, col):
    dict_ = {}
    for value in set(df[col].values):
        
        target_1 = df.loc[(df[target] == 1) & (df[col] == value)]
        target_0 = df.loc[(df[target] == 0) & (df[col] == value)]

        if len(target_1) == len(target_0) == 0:
            dict_[value] = -1
        elif len(target_0) == 0:
            dict_[value] = -2
        else:
            dict_[value] = len(target_1)/len(target_0)
    
    series = pd.Series(dict_).sort_values(ascending = False)
    series = series.replace({-1: "no values found", -2: f"{value} has all target values of 1"})
    print(col)
    print(series)
    print("\n")

def show_ratio_by_target(df, target, cols = None):
    """
    This can take either a list, string, or all of the object dataframes in the dataframe
    """
    if isinstance(cols, str):
        show_ratio(df, target, cols)

    elif isinstance(cols, list):
        for col in cols:
            if col != target:
                show_ratio(df, target, col)

    elif isinstance(df, pd.DataFrame):
        for col in df.select_dtypes('object'):
            if col != target:
                show_ratio(df, target, col)

    else:
        return "Invalid type for parameter col"


def show_unique_keys(df):
    for col in df.columns:
        print(col, df[col].is_unique)

def get_cat_agg(df, col, type):
    cat_agg = {}
    for i in range(len(df.columns)):
        if col in df.columns[i]:
            cat_agg[df.columns[i]] = type
    
    return cat_agg


Bureau

In [4]:
raw_bureau = pd.read_csv(os.getcwd()+"\\bureau.csv")

Here we can see that the unique key is SK_ID_BUREAU, which we can use to combine data from the bureau_balance.csv file

In [5]:
show_unique_keys(raw_bureau)

SK_ID_CURR False
SK_ID_BUREAU True
CREDIT_ACTIVE False
CREDIT_CURRENCY False
DAYS_CREDIT False
CREDIT_DAY_OVERDUE False
DAYS_CREDIT_ENDDATE False
DAYS_ENDDATE_FACT False
AMT_CREDIT_MAX_OVERDUE False
CNT_CREDIT_PROLONG False
AMT_CREDIT_SUM False
AMT_CREDIT_SUM_DEBT False
AMT_CREDIT_SUM_LIMIT False
AMT_CREDIT_SUM_OVERDUE False
CREDIT_TYPE False
DAYS_CREDIT_UPDATE False
AMT_ANNUITY False


In [6]:
raw_bureau_dropped = raw_bureau.drop(['SK_ID_BUREAU', 'CREDIT_ACTIVE', 'CREDIT_CURRENCY'], axis = 1)

Dataset Analysis for Bureau

In [7]:
raw_bureau_dropped.head()

Unnamed: 0,SK_ID_CURR,DAYS_CREDIT,CREDIT_DAY_OVERDUE,DAYS_CREDIT_ENDDATE,DAYS_ENDDATE_FACT,AMT_CREDIT_MAX_OVERDUE,CNT_CREDIT_PROLONG,AMT_CREDIT_SUM,AMT_CREDIT_SUM_DEBT,AMT_CREDIT_SUM_LIMIT,AMT_CREDIT_SUM_OVERDUE,CREDIT_TYPE,DAYS_CREDIT_UPDATE,AMT_ANNUITY
0,215354,-497,0,-153.0,-153.0,,0,91323.0,0.0,,0.0,Consumer credit,-131,
1,215354,-208,0,1075.0,,,0,225000.0,171342.0,,0.0,Credit card,-20,
2,215354,-203,0,528.0,,,0,464323.5,,,0.0,Consumer credit,-16,
3,215354,-203,0,,,,0,90000.0,,,0.0,Credit card,-16,
4,215354,-629,0,1197.0,,77674.5,0,2700000.0,,,0.0,Consumer credit,-21,


In [8]:
show_missing_values(raw_bureau_dropped)

AMT_ANNUITY               71.47%
AMT_CREDIT_MAX_OVERDUE    65.51%
DAYS_CREDIT_ENDDATE        6.15%
DAYS_ENDDATE_FACT         36.92%
AMT_CREDIT_SUM_LIMIT      34.48%
AMT_CREDIT_SUM_DEBT       15.01%
SK_ID_CURR                  0.0%
DAYS_CREDIT                 0.0%
CREDIT_DAY_OVERDUE          0.0%
CNT_CREDIT_PROLONG          0.0%
AMT_CREDIT_SUM              0.0%
AMT_CREDIT_SUM_OVERDUE      0.0%
CREDIT_TYPE                 0.0%
DAYS_CREDIT_UPDATE          0.0%
dtype: object


In [9]:
raw_bureau_dropped.dtypes

SK_ID_CURR                  int64
DAYS_CREDIT                 int64
CREDIT_DAY_OVERDUE          int64
DAYS_CREDIT_ENDDATE       float64
DAYS_ENDDATE_FACT         float64
AMT_CREDIT_MAX_OVERDUE    float64
CNT_CREDIT_PROLONG          int64
AMT_CREDIT_SUM            float64
AMT_CREDIT_SUM_DEBT       float64
AMT_CREDIT_SUM_LIMIT      float64
AMT_CREDIT_SUM_OVERDUE    float64
CREDIT_TYPE                object
DAYS_CREDIT_UPDATE          int64
AMT_ANNUITY               float64
dtype: object

In [10]:
show_cat_col_values(raw_bureau_dropped)

                                              CREDIT_TYPE         Ratio
Consumer credit                                   1251615  7.291975e-01
Credit card                                        402195  2.343209e-01
Car loan                                            27690  1.613234e-02
Mortgage                                            18391  1.071469e-02
Microloan                                           12413  7.231879e-03
Loan for business development                        1975  1.150645e-03
Another type of loan                                 1017  5.925096e-04
Unknown type of loan                                  555  3.233459e-04
Loan for working capital replenishment                469  2.732419e-04
Cash loan (non-earmarked)                              56  3.262590e-05
Real estate loan                                       27  1.573034e-05
Loan for the purchase of equipment                     19  1.106950e-05
Loan for purchase of shares (margin lending)            4  2.330

Feature Engineering for Bureau

In [11]:
raw_bureau_dropped['DAYS_CREDIT'].replace(365243, np.nan)
raw_bureau_dropped['DAYS_CREDIT_ENDDATE'].replace(365243, np.nan)
raw_bureau_dropped['DAYS_ENDDATE_FACT'].replace(365243, np.nan)
raw_bureau_dropped['DAYS_CREDIT_UPDATE'].replace(365243, np.nan)

0          -131
1           -20
2           -16
3           -16
4           -21
           ... 
1716423     -19
1716424   -2493
1716425    -967
1716426   -1508
1716427    -387
Name: DAYS_CREDIT_UPDATE, Length: 1716428, dtype: int64

In [12]:
raw_bureau_cat = pd.get_dummies(raw_bureau_dropped)

Aggregating Data by SK_ID_CURR in the Bureau Dataset

In [13]:
bureau_agg_num = {
    "DAYS_CREDIT":["mean"],
    "CREDIT_DAY_OVERDUE":["mean"],
    "DAYS_CREDIT_ENDDATE":["min"],
    "DAYS_ENDDATE_FACT":["max"],
    "AMT_CREDIT_MAX_OVERDUE":["max"],
    "CNT_CREDIT_PROLONG":["max"],
    "AMT_CREDIT_SUM":["min","mean", "max", "var"],
    "AMT_CREDIT_SUM_DEBT":["mean"],
    "AMT_CREDIT_SUM_LIMIT":["max"],
    "AMT_CREDIT_SUM_OVERDUE":["max"],
    "DAYS_CREDIT_UPDATE":["max"],
    "AMT_ANNUITY":["min","mean", "max", "var"],
}

bureau_agg_cat = get_cat_agg(raw_bureau_cat, "CREDIT_TYPE", 'sum')

bureau_agg_num.update(bureau_agg_cat)

bureau_agg_num

{'DAYS_CREDIT': ['mean'],
 'CREDIT_DAY_OVERDUE': ['mean'],
 'DAYS_CREDIT_ENDDATE': ['min'],
 'DAYS_ENDDATE_FACT': ['max'],
 'AMT_CREDIT_MAX_OVERDUE': ['max'],
 'CNT_CREDIT_PROLONG': ['max'],
 'AMT_CREDIT_SUM': ['min', 'mean', 'max', 'var'],
 'AMT_CREDIT_SUM_DEBT': ['mean'],
 'AMT_CREDIT_SUM_LIMIT': ['max'],
 'AMT_CREDIT_SUM_OVERDUE': ['max'],
 'DAYS_CREDIT_UPDATE': ['max'],
 'AMT_ANNUITY': ['min', 'mean', 'max', 'var'],
 'CREDIT_TYPE_Another type of loan': 'sum',
 'CREDIT_TYPE_Car loan': 'sum',
 'CREDIT_TYPE_Cash loan (non-earmarked)': 'sum',
 'CREDIT_TYPE_Consumer credit': 'sum',
 'CREDIT_TYPE_Credit card': 'sum',
 'CREDIT_TYPE_Interbank credit': 'sum',
 'CREDIT_TYPE_Loan for business development': 'sum',
 'CREDIT_TYPE_Loan for purchase of shares (margin lending)': 'sum',
 'CREDIT_TYPE_Loan for the purchase of equipment': 'sum',
 'CREDIT_TYPE_Loan for working capital replenishment': 'sum',
 'CREDIT_TYPE_Microloan': 'sum',
 'CREDIT_TYPE_Mobile operator loan': 'sum',
 'CREDIT_TYPE_Mortg

In [14]:
bureau_agg = raw_bureau_cat.groupby('SK_ID_CURR').agg(bureau_agg_num)

bureau_agg.columns = pd.Index(['bureau_' + col[0] + "_" + col[1] for col in bureau_agg.columns.tolist() if col[0] != 'SK_ID_CURR'])

bureau_agg = bureau_agg.reset_index()

In [15]:
bureau_agg.head()

Unnamed: 0,SK_ID_CURR,bureau_DAYS_CREDIT_mean,bureau_CREDIT_DAY_OVERDUE_mean,bureau_DAYS_CREDIT_ENDDATE_min,bureau_DAYS_ENDDATE_FACT_max,bureau_AMT_CREDIT_MAX_OVERDUE_max,bureau_CNT_CREDIT_PROLONG_max,bureau_AMT_CREDIT_SUM_min,bureau_AMT_CREDIT_SUM_mean,bureau_AMT_CREDIT_SUM_max,bureau_AMT_CREDIT_SUM_var,bureau_AMT_CREDIT_SUM_DEBT_mean,bureau_AMT_CREDIT_SUM_LIMIT_max,bureau_AMT_CREDIT_SUM_OVERDUE_max,bureau_DAYS_CREDIT_UPDATE_max,bureau_AMT_ANNUITY_min,bureau_AMT_ANNUITY_mean,bureau_AMT_ANNUITY_max,bureau_AMT_ANNUITY_var,bureau_CREDIT_TYPE_Another type of loan_sum,bureau_CREDIT_TYPE_Car loan_sum,bureau_CREDIT_TYPE_Cash loan (non-earmarked)_sum,bureau_CREDIT_TYPE_Consumer credit_sum,bureau_CREDIT_TYPE_Credit card_sum,bureau_CREDIT_TYPE_Interbank credit_sum,bureau_CREDIT_TYPE_Loan for business development_sum,bureau_CREDIT_TYPE_Loan for purchase of shares (margin lending)_sum,bureau_CREDIT_TYPE_Loan for the purchase of equipment_sum,bureau_CREDIT_TYPE_Loan for working capital replenishment_sum,bureau_CREDIT_TYPE_Microloan_sum,bureau_CREDIT_TYPE_Mobile operator loan_sum,bureau_CREDIT_TYPE_Mortgage_sum,bureau_CREDIT_TYPE_Real estate loan_sum,bureau_CREDIT_TYPE_Unknown type of loan_sum
0,100001,-735.0,0.0,-1329.0,-544.0,,0,85500.0,207623.571429,378000.0,15017170000.0,85240.928571,0.0,0.0,-6,0.0,3545.357143,10822.5,23045830.0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0
1,100002,-874.0,0.0,-1072.0,-36.0,5043.645,0,0.0,108131.945625,450000.0,21338070000.0,49156.2,31988.565,0.0,-7,0.0,0.0,0.0,0.0,0,0,0,4,4,0,0,0,0,0,0,0,0,0,0
2,100003,-1400.75,0.0,-2434.0,-540.0,0.0,0,22248.0,254350.125,810000.0,138584600000.0,0.0,810000.0,0.0,-43,,,,,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0
3,100004,-867.0,0.0,-595.0,-382.0,0.0,0,94500.0,94518.9,94537.8,714.42,0.0,0.0,0.0,-382,,,,,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0
4,100005,-190.666667,0.0,-128.0,-123.0,0.0,0,29826.0,219042.0,568800.0,91953540000.0,189469.5,0.0,0.0,-11,0.0,1420.5,4261.5,6053461.0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0


In [16]:
print(len(bureau_agg))

305811


Bureau Balance

In [17]:
raw_bureau_balance = pd.read_csv(os.getcwd()+"\\bureau_balance.csv")

In [18]:
show_unique_keys(raw_bureau_balance)

SK_ID_BUREAU False
MONTHS_BALANCE False
STATUS False


Here we deem the bureau_balance to not be of any interest for our model, we will omit this dataset

POS Cash Balance

In [19]:
raw_cash_balance = pd.read_csv(os.getcwd()+"\\POS_CASH_balance.csv")

In [20]:
show_unique_keys(raw_cash_balance)

SK_ID_PREV False
SK_ID_CURR False
MONTHS_BALANCE False
CNT_INSTALMENT False
CNT_INSTALMENT_FUTURE False
NAME_CONTRACT_STATUS False
SK_DPD False
SK_DPD_DEF False


Data Analysis for POS Cash Balance

In [21]:
raw_cash_balance.dtypes

SK_ID_PREV                 int64
SK_ID_CURR                 int64
MONTHS_BALANCE             int64
CNT_INSTALMENT           float64
CNT_INSTALMENT_FUTURE    float64
NAME_CONTRACT_STATUS      object
SK_DPD                     int64
SK_DPD_DEF                 int64
dtype: object

In [22]:
show_missing_values(raw_cash_balance)

CNT_INSTALMENT           0.26%
CNT_INSTALMENT_FUTURE    0.26%
SK_ID_PREV                0.0%
SK_ID_CURR                0.0%
MONTHS_BALANCE            0.0%
NAME_CONTRACT_STATUS      0.0%
SK_DPD                    0.0%
SK_DPD_DEF                0.0%
dtype: object


In [23]:
show_cat_col_values(raw_cash_balance)

                       NAME_CONTRACT_STATUS         Ratio
Active                              9151119  9.149876e-01
Completed                            744883  7.447819e-02
Signed                                87260  8.724815e-03
Demand                                 7065  7.064041e-04
Returned to the store                  5461  5.460258e-04
Approved                               4917  4.916332e-04
Amortized debt                          636  6.359136e-05
Canceled                                 15  1.499796e-06
XNA                                       2  1.999728e-07




In [24]:
raw_cash_balance_dropped = raw_cash_balance.drop(['SK_ID_PREV', 'MONTHS_BALANCE', 'NAME_CONTRACT_STATUS'], axis = 1)

In [25]:
raw_cash_balance_dropped = raw_cash_balance_dropped.replace('XNA', np.nan)

Data Aggregation for POS Cash Balance

In [26]:
cash_balance_agg_num = {
"CNT_INSTALMENT":["min","mean", "max"],
"CNT_INSTALMENT_FUTURE":["min","mean", "max"],
"SK_DPD":["min","mean", "max"],
"SK_DPD_DEF":["min","mean", "max"],
}

In [27]:
cash_balance_agg = raw_cash_balance_dropped.groupby('SK_ID_CURR').agg(cash_balance_agg_num)

cash_balance_agg.columns = pd.Index(['cash_balance_' + col[0] + "_" + col[1] for col in cash_balance_agg.columns.tolist() if col[0] != 'SK_ID_CURR'])

cash_balance_agg = cash_balance_agg.reset_index()

In [28]:
cash_balance_agg.head()

Unnamed: 0,SK_ID_CURR,cash_balance_CNT_INSTALMENT_min,cash_balance_CNT_INSTALMENT_mean,cash_balance_CNT_INSTALMENT_max,cash_balance_CNT_INSTALMENT_FUTURE_min,cash_balance_CNT_INSTALMENT_FUTURE_mean,cash_balance_CNT_INSTALMENT_FUTURE_max,cash_balance_SK_DPD_min,cash_balance_SK_DPD_mean,cash_balance_SK_DPD_max,cash_balance_SK_DPD_DEF_min,cash_balance_SK_DPD_DEF_mean,cash_balance_SK_DPD_DEF_max
0,100001,4.0,4.0,4.0,0.0,1.444444,4.0,0,0.777778,7,0,0.777778,7
1,100002,24.0,24.0,24.0,6.0,15.0,24.0,0,0.0,0,0,0.0,0
2,100003,6.0,10.107143,12.0,0.0,5.785714,12.0,0,0.0,0,0,0.0,0
3,100004,3.0,3.75,4.0,0.0,2.25,4.0,0,0.0,0,0,0.0,0
4,100005,9.0,11.7,12.0,0.0,7.2,12.0,0,0.0,0,0,0.0,0


In [29]:
len(cash_balance_agg)

337252

Credit Card Balance

In [30]:
raw_credit_card_balance = pd.read_csv(os.getcwd()+"\\credit_card_balance.csv")

Data Analysis for Credit Card Balance

In [31]:
raw_credit_card_balance.head()

Unnamed: 0,SK_ID_PREV,SK_ID_CURR,MONTHS_BALANCE,AMT_BALANCE,AMT_CREDIT_LIMIT_ACTUAL,AMT_DRAWINGS_ATM_CURRENT,AMT_DRAWINGS_CURRENT,AMT_DRAWINGS_OTHER_CURRENT,AMT_DRAWINGS_POS_CURRENT,AMT_INST_MIN_REGULARITY,AMT_PAYMENT_CURRENT,AMT_PAYMENT_TOTAL_CURRENT,AMT_RECEIVABLE_PRINCIPAL,AMT_RECIVABLE,AMT_TOTAL_RECEIVABLE,CNT_DRAWINGS_ATM_CURRENT,CNT_DRAWINGS_CURRENT,CNT_DRAWINGS_OTHER_CURRENT,CNT_DRAWINGS_POS_CURRENT,CNT_INSTALMENT_MATURE_CUM,NAME_CONTRACT_STATUS,SK_DPD,SK_DPD_DEF
0,2562384,378907,-6,56.97,135000,0.0,877.5,0.0,877.5,1700.325,1800.0,1800.0,0.0,0.0,0.0,0.0,1,0.0,1.0,35.0,Active,0,0
1,2582071,363914,-1,63975.555,45000,2250.0,2250.0,0.0,0.0,2250.0,2250.0,2250.0,60175.08,64875.555,64875.555,1.0,1,0.0,0.0,69.0,Active,0,0
2,1740877,371185,-7,31815.225,450000,0.0,0.0,0.0,0.0,2250.0,2250.0,2250.0,26926.425,31460.085,31460.085,0.0,0,0.0,0.0,30.0,Active,0,0
3,1389973,337855,-4,236572.11,225000,2250.0,2250.0,0.0,0.0,11795.76,11925.0,11925.0,224949.285,233048.97,233048.97,1.0,1,0.0,0.0,10.0,Active,0,0
4,1891521,126868,-1,453919.455,450000,0.0,11547.0,0.0,11547.0,22924.89,27000.0,27000.0,443044.395,453919.455,453919.455,0.0,1,0.0,1.0,101.0,Active,0,0


In [32]:
show_missing_values(raw_credit_card_balance)

CNT_INSTALMENT_MATURE_CUM      7.95%
AMT_INST_MIN_REGULARITY        7.95%
AMT_PAYMENT_CURRENT            20.0%
AMT_DRAWINGS_ATM_CURRENT      19.52%
CNT_DRAWINGS_POS_CURRENT      19.52%
AMT_DRAWINGS_OTHER_CURRENT    19.52%
AMT_DRAWINGS_POS_CURRENT      19.52%
CNT_DRAWINGS_OTHER_CURRENT    19.52%
CNT_DRAWINGS_ATM_CURRENT      19.52%
SK_ID_PREV                      0.0%
AMT_TOTAL_RECEIVABLE            0.0%
SK_DPD                          0.0%
NAME_CONTRACT_STATUS            0.0%
CNT_DRAWINGS_CURRENT            0.0%
AMT_PAYMENT_TOTAL_CURRENT       0.0%
AMT_RECIVABLE                   0.0%
AMT_RECEIVABLE_PRINCIPAL        0.0%
SK_ID_CURR                      0.0%
AMT_DRAWINGS_CURRENT            0.0%
AMT_CREDIT_LIMIT_ACTUAL         0.0%
AMT_BALANCE                     0.0%
MONTHS_BALANCE                  0.0%
SK_DPD_DEF                      0.0%
dtype: object


In [33]:
show_cat_col_values(raw_credit_card_balance)

               NAME_CONTRACT_STATUS     Ratio
Active                      3698436  0.963056
Completed                    128918  0.033570
Signed                        11058  0.002879
Demand                         1365  0.000355
Sent proposal                   513  0.000134
Refused                          17  0.000004
Approved                          5  0.000001




In [34]:
raw_credit_card_balance_dropped = raw_credit_card_balance.drop(['SK_ID_PREV', 'NAME_CONTRACT_STATUS'], axis = 1)

In [35]:
raw_credit_card_balance_dropped.dtypes

SK_ID_CURR                      int64
MONTHS_BALANCE                  int64
AMT_BALANCE                   float64
AMT_CREDIT_LIMIT_ACTUAL         int64
AMT_DRAWINGS_ATM_CURRENT      float64
AMT_DRAWINGS_CURRENT          float64
AMT_DRAWINGS_OTHER_CURRENT    float64
AMT_DRAWINGS_POS_CURRENT      float64
AMT_INST_MIN_REGULARITY       float64
AMT_PAYMENT_CURRENT           float64
AMT_PAYMENT_TOTAL_CURRENT     float64
AMT_RECEIVABLE_PRINCIPAL      float64
AMT_RECIVABLE                 float64
AMT_TOTAL_RECEIVABLE          float64
CNT_DRAWINGS_ATM_CURRENT      float64
CNT_DRAWINGS_CURRENT            int64
CNT_DRAWINGS_OTHER_CURRENT    float64
CNT_DRAWINGS_POS_CURRENT      float64
CNT_INSTALMENT_MATURE_CUM     float64
SK_DPD                          int64
SK_DPD_DEF                      int64
dtype: object

Feature Engineering for Credit Card Balance

In [36]:
raw_credit_card_balance_dropped['PNT_BALANCE_DRAWINGS_ATM'] = raw_credit_card_balance_dropped['AMT_DRAWINGS_ATM_CURRENT']/raw_credit_card_balance_dropped['AMT_BALANCE']
raw_credit_card_balance_dropped['PNT_BALANCE_DRAWINGS_CURRENT'] = raw_credit_card_balance_dropped['AMT_DRAWINGS_ATM_CURRENT']/raw_credit_card_balance_dropped['AMT_BALANCE']
raw_credit_card_balance_dropped['AVG_DRAWINGS_ATM_CURRENT'] = raw_credit_card_balance_dropped['AMT_DRAWINGS_CURRENT']/raw_credit_card_balance_dropped['CNT_DRAWINGS_ATM_CURRENT']
raw_credit_card_balance_dropped['AVG_DRAWINGS_CURRENT'] = raw_credit_card_balance_dropped['AMT_DRAWINGS_CURRENT']/raw_credit_card_balance_dropped['CNT_DRAWINGS_CURRENT']
raw_credit_card_balance_dropped['AVG_DRAWINGS_OTHER_CURRENT'] = raw_credit_card_balance_dropped['AMT_DRAWINGS_OTHER_CURRENT']/raw_credit_card_balance_dropped['CNT_DRAWINGS_OTHER_CURRENT']
raw_credit_card_balance_dropped['AVG_DRAWINGS_POS_CURRENT'] = raw_credit_card_balance_dropped['AMT_DRAWINGS_POS_CURRENT']/raw_credit_card_balance_dropped['CNT_DRAWINGS_POS_CURRENT']

In [37]:
credit_card_balance_dropped_agg_num = {
"MONTHS_BALANCE":['max', 'mean', 'sum', 'median', 'std'],
"AMT_BALANCE": ['max', 'mean', 'sum', 'median', 'std'],
"AMT_CREDIT_LIMIT_ACTUAL": ['max', 'mean', 'sum', 'median', 'std'],
"AMT_DRAWINGS_ATM_CURRENT":['max', 'mean', 'sum', 'median', 'std'],
"AMT_DRAWINGS_CURRENT":['max', 'mean', 'sum', 'median', 'std'],
"AMT_DRAWINGS_OTHER_CURRENT":['max', 'mean', 'sum', 'median', 'std'],
"AMT_DRAWINGS_POS_CURRENT":['max', 'mean', 'sum', 'median', 'std'],
"AMT_INST_MIN_REGULARITY":['max', 'mean', 'sum', 'median', 'std'],
"AMT_PAYMENT_CURRENT":['max', 'mean', 'sum', 'median', 'std'],
"AMT_PAYMENT_TOTAL_CURRENT":['max', 'mean', 'sum', 'median', 'std'],
"AMT_RECEIVABLE_PRINCIPAL":['max', 'mean', 'sum', 'median', 'std'],
"AMT_RECIVABLE":['max', 'mean', 'sum', 'median', 'std'],
"AMT_TOTAL_RECEIVABLE":['max', 'mean', 'sum', 'median', 'std'],
"CNT_DRAWINGS_ATM_CURRENT":['max', 'mean', 'sum', 'median', 'std'],
"CNT_DRAWINGS_CURRENT":['max', 'mean', 'sum', 'median', 'std'],
"CNT_DRAWINGS_OTHER_CURRENT":['max', 'mean', 'sum', 'median', 'std'],
"CNT_DRAWINGS_POS_CURRENT":['max', 'mean', 'sum', 'median', 'std'],
"CNT_INSTALMENT_MATURE_CUM":['max', 'mean', 'sum', 'median', 'std'],
"SK_DPD":['max', 'mean', 'sum', 'median', 'std'],
"SK_DPD_DEF":['max', 'mean', 'sum', 'median', 'std'],
"PNT_BALANCE_DRAWINGS_ATM":['max', 'mean', 'sum', 'median', 'std'],
"PNT_BALANCE_DRAWINGS_CURRENT":['max', 'mean', 'sum', 'median', 'std'],
"AVG_DRAWINGS_ATM_CURRENT":['max', 'mean', 'sum', 'median', 'std'],
"AVG_DRAWINGS_CURRENT":['max', 'mean', 'sum', 'median', 'std'],
"AVG_DRAWINGS_OTHER_CURRENT":['max', 'mean', 'sum', 'median', 'std'],
"AVG_DRAWINGS_POS_CURRENT":['max', 'mean', 'sum', 'median', 'std'],

}

In [38]:
credit_card_agg = raw_credit_card_balance_dropped.groupby('SK_ID_CURR').agg(credit_card_balance_dropped_agg_num)

credit_card_agg.columns = pd.Index(['credit_card_' + col[0] + "_" + col[1] for col in credit_card_agg.columns.tolist() if col[0] != 'SK_ID_CURR'])

credit_card_agg = credit_card_agg.reset_index()

In [39]:
credit_card_agg

Unnamed: 0,SK_ID_CURR,credit_card_MONTHS_BALANCE_max,credit_card_MONTHS_BALANCE_mean,credit_card_MONTHS_BALANCE_sum,credit_card_MONTHS_BALANCE_median,credit_card_MONTHS_BALANCE_std,credit_card_AMT_BALANCE_max,credit_card_AMT_BALANCE_mean,credit_card_AMT_BALANCE_sum,credit_card_AMT_BALANCE_median,credit_card_AMT_BALANCE_std,credit_card_AMT_CREDIT_LIMIT_ACTUAL_max,credit_card_AMT_CREDIT_LIMIT_ACTUAL_mean,credit_card_AMT_CREDIT_LIMIT_ACTUAL_sum,credit_card_AMT_CREDIT_LIMIT_ACTUAL_median,credit_card_AMT_CREDIT_LIMIT_ACTUAL_std,credit_card_AMT_DRAWINGS_ATM_CURRENT_max,credit_card_AMT_DRAWINGS_ATM_CURRENT_mean,credit_card_AMT_DRAWINGS_ATM_CURRENT_sum,credit_card_AMT_DRAWINGS_ATM_CURRENT_median,credit_card_AMT_DRAWINGS_ATM_CURRENT_std,credit_card_AMT_DRAWINGS_CURRENT_max,credit_card_AMT_DRAWINGS_CURRENT_mean,credit_card_AMT_DRAWINGS_CURRENT_sum,credit_card_AMT_DRAWINGS_CURRENT_median,credit_card_AMT_DRAWINGS_CURRENT_std,credit_card_AMT_DRAWINGS_OTHER_CURRENT_max,credit_card_AMT_DRAWINGS_OTHER_CURRENT_mean,credit_card_AMT_DRAWINGS_OTHER_CURRENT_sum,credit_card_AMT_DRAWINGS_OTHER_CURRENT_median,credit_card_AMT_DRAWINGS_OTHER_CURRENT_std,credit_card_AMT_DRAWINGS_POS_CURRENT_max,credit_card_AMT_DRAWINGS_POS_CURRENT_mean,credit_card_AMT_DRAWINGS_POS_CURRENT_sum,credit_card_AMT_DRAWINGS_POS_CURRENT_median,credit_card_AMT_DRAWINGS_POS_CURRENT_std,credit_card_AMT_INST_MIN_REGULARITY_max,credit_card_AMT_INST_MIN_REGULARITY_mean,credit_card_AMT_INST_MIN_REGULARITY_sum,credit_card_AMT_INST_MIN_REGULARITY_median,credit_card_AMT_INST_MIN_REGULARITY_std,credit_card_AMT_PAYMENT_CURRENT_max,credit_card_AMT_PAYMENT_CURRENT_mean,credit_card_AMT_PAYMENT_CURRENT_sum,credit_card_AMT_PAYMENT_CURRENT_median,credit_card_AMT_PAYMENT_CURRENT_std,credit_card_AMT_PAYMENT_TOTAL_CURRENT_max,credit_card_AMT_PAYMENT_TOTAL_CURRENT_mean,credit_card_AMT_PAYMENT_TOTAL_CURRENT_sum,credit_card_AMT_PAYMENT_TOTAL_CURRENT_median,credit_card_AMT_PAYMENT_TOTAL_CURRENT_std,credit_card_AMT_RECEIVABLE_PRINCIPAL_max,credit_card_AMT_RECEIVABLE_PRINCIPAL_mean,credit_card_AMT_RECEIVABLE_PRINCIPAL_sum,credit_card_AMT_RECEIVABLE_PRINCIPAL_median,credit_card_AMT_RECEIVABLE_PRINCIPAL_std,credit_card_AMT_RECIVABLE_max,credit_card_AMT_RECIVABLE_mean,credit_card_AMT_RECIVABLE_sum,credit_card_AMT_RECIVABLE_median,credit_card_AMT_RECIVABLE_std,credit_card_AMT_TOTAL_RECEIVABLE_max,credit_card_AMT_TOTAL_RECEIVABLE_mean,credit_card_AMT_TOTAL_RECEIVABLE_sum,credit_card_AMT_TOTAL_RECEIVABLE_median,credit_card_AMT_TOTAL_RECEIVABLE_std,credit_card_CNT_DRAWINGS_ATM_CURRENT_max,credit_card_CNT_DRAWINGS_ATM_CURRENT_mean,credit_card_CNT_DRAWINGS_ATM_CURRENT_sum,credit_card_CNT_DRAWINGS_ATM_CURRENT_median,credit_card_CNT_DRAWINGS_ATM_CURRENT_std,credit_card_CNT_DRAWINGS_CURRENT_max,credit_card_CNT_DRAWINGS_CURRENT_mean,credit_card_CNT_DRAWINGS_CURRENT_sum,credit_card_CNT_DRAWINGS_CURRENT_median,credit_card_CNT_DRAWINGS_CURRENT_std,credit_card_CNT_DRAWINGS_OTHER_CURRENT_max,credit_card_CNT_DRAWINGS_OTHER_CURRENT_mean,credit_card_CNT_DRAWINGS_OTHER_CURRENT_sum,credit_card_CNT_DRAWINGS_OTHER_CURRENT_median,credit_card_CNT_DRAWINGS_OTHER_CURRENT_std,credit_card_CNT_DRAWINGS_POS_CURRENT_max,credit_card_CNT_DRAWINGS_POS_CURRENT_mean,credit_card_CNT_DRAWINGS_POS_CURRENT_sum,credit_card_CNT_DRAWINGS_POS_CURRENT_median,credit_card_CNT_DRAWINGS_POS_CURRENT_std,credit_card_CNT_INSTALMENT_MATURE_CUM_max,credit_card_CNT_INSTALMENT_MATURE_CUM_mean,credit_card_CNT_INSTALMENT_MATURE_CUM_sum,credit_card_CNT_INSTALMENT_MATURE_CUM_median,credit_card_CNT_INSTALMENT_MATURE_CUM_std,credit_card_SK_DPD_max,credit_card_SK_DPD_mean,credit_card_SK_DPD_sum,credit_card_SK_DPD_median,credit_card_SK_DPD_std,credit_card_SK_DPD_DEF_max,credit_card_SK_DPD_DEF_mean,credit_card_SK_DPD_DEF_sum,credit_card_SK_DPD_DEF_median,credit_card_SK_DPD_DEF_std,credit_card_PNT_BALANCE_DRAWINGS_ATM_max,credit_card_PNT_BALANCE_DRAWINGS_ATM_mean,credit_card_PNT_BALANCE_DRAWINGS_ATM_sum,credit_card_PNT_BALANCE_DRAWINGS_ATM_median,credit_card_PNT_BALANCE_DRAWINGS_ATM_std,credit_card_PNT_BALANCE_DRAWINGS_CURRENT_max,credit_card_PNT_BALANCE_DRAWINGS_CURRENT_mean,credit_card_PNT_BALANCE_DRAWINGS_CURRENT_sum,credit_card_PNT_BALANCE_DRAWINGS_CURRENT_median,credit_card_PNT_BALANCE_DRAWINGS_CURRENT_std,credit_card_AVG_DRAWINGS_ATM_CURRENT_max,credit_card_AVG_DRAWINGS_ATM_CURRENT_mean,credit_card_AVG_DRAWINGS_ATM_CURRENT_sum,credit_card_AVG_DRAWINGS_ATM_CURRENT_median,credit_card_AVG_DRAWINGS_ATM_CURRENT_std,credit_card_AVG_DRAWINGS_CURRENT_max,credit_card_AVG_DRAWINGS_CURRENT_mean,credit_card_AVG_DRAWINGS_CURRENT_sum,credit_card_AVG_DRAWINGS_CURRENT_median,credit_card_AVG_DRAWINGS_CURRENT_std,credit_card_AVG_DRAWINGS_OTHER_CURRENT_max,credit_card_AVG_DRAWINGS_OTHER_CURRENT_mean,credit_card_AVG_DRAWINGS_OTHER_CURRENT_sum,credit_card_AVG_DRAWINGS_OTHER_CURRENT_median,credit_card_AVG_DRAWINGS_OTHER_CURRENT_std,credit_card_AVG_DRAWINGS_POS_CURRENT_max,credit_card_AVG_DRAWINGS_POS_CURRENT_mean,credit_card_AVG_DRAWINGS_POS_CURRENT_sum,credit_card_AVG_DRAWINGS_POS_CURRENT_median,credit_card_AVG_DRAWINGS_POS_CURRENT_std
0,100006,-1,-3.5,-21,-3.5,1.870829,0.000,0.000000,0.000,0.0000,0.000000,270000,270000.000000,1620000,270000.0,0.000000,,,0.0,,,0.00,0.000000,0.000,0.000,0.000000,,,0.0,,,,,0.000,,,0.000,0.000000,0.000,0.0000,0.000000,,,0.000,,,0.00,0.000000,0.000,0.0000,0.000000,0.000,0.000000,0.000,0.0000,0.000000,0.000,0.000000,0.000,0.000,0.000000,0.000,0.000000,0.000,0.000,0.000000,,,0.0,,,0,0.000000,0,0.0,0.000000,,,0.0,,,,,0.0,,,0.0,0.000000,0.0,0.0,0.000000,0,0.000000,0,0.0,0.000000,0,0.000000,0,0.0,0.000000,,,0.000000,,,,,0.000000,,,,,0.000000,,,,,0.000000e+00,,,,,0.0,,,,,0.00000,,
1,100011,-2,-38.5,-2849,-38.5,21.505813,189000.000,54482.111149,4031676.225,0.0000,68127.238270,180000,164189.189189,12150000,180000.0,34482.743620,180000.0,2432.432432,180000.0,0.0,20924.574974,180000.00,2432.432432,180000.000,0.000,20924.574974,0.0,0.0,0.0,0.0,0.0,0.000,0.000000,0.000,0.000,0.000000,9000.000,3956.221849,288804.195,0.0000,4487.750710,55485.00,4843.064189,358386.750,563.355,7279.601955,55485.00,4520.067568,334485.000,0.0000,7473.872687,180000.000,52402.088919,3877754.580,0.0000,65758.823328,189000.000,54433.179122,4028055.255,0.000,68166.970922,189000.000,54433.179122,4028055.255,0.000,68166.970922,4.0,0.054054,4.0,0.0,0.464991,4,0.054054,4,0.0,0.464991,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,33.0,25.767123,1881.0,33.0,10.288236,0,0.000000,0,0.0,0.000000,0,0.000000,0,0.0,0.000000,0.952381,0.028860,0.952381,0.00000,0.165788,0.952381,0.028860,0.952381,0.00000,0.165788,4.500000e+04,45000.000000,45000.000000,4.500000e+04,,4.500000e+04,4.500000e+04,4.500000e+04,45000.000000,,,,0.0,,,,,0.00000,,
2,100013,-1,-48.5,-4656,-48.5,27.856777,161420.220,18159.919219,1743352.245,0.0000,43237.406997,157500,131718.750000,12645000,157500.0,47531.585759,157500.0,6350.000000,571500.0,0.0,28722.270457,157500.00,5953.125000,571500.000,0.000,27843.366225,0.0,0.0,0.0,0.0,0.0,0.000,0.000000,0.000,0.000,0.000000,7875.000,1454.539551,129454.020,0.0000,3028.409304,153675.00,7168.346250,688161.240,274.320,21626.144325,153675.00,6817.172344,654448.545,0.0000,21730.655260,157500.000,17255.559844,1656533.745,0.0000,41279.745434,161420.220,18101.079844,1737703.665,0.000,43262.026330,161420.220,18101.079844,1737703.665,0.000,43262.026330,7.0,0.255556,23.0,0.0,1.185693,7,0.239583,23,0.0,1.149323,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,22.0,18.719101,1666.0,22.0,5.852328,1,0.010417,1,0.0,0.102062,1,0.010417,1,0.0,0.102062,0.975714,0.180555,3.972203,0.00000,0.375506,0.975714,0.180555,3.972203,0.00000,0.375506,3.712500e+04,25778.571429,128892.857143,2.250000e+04,7187.900388,3.712500e+04,2.577857e+04,1.288929e+05,22500.000000,7187.900388,,,0.0,,,,,0.00000,,
3,100021,-2,-10.0,-170,-10.0,5.049752,0.000,0.000000,0.000,0.0000,0.000000,675000,675000.000000,11475000,675000.0,0.000000,,,0.0,,,0.00,0.000000,0.000,0.000,0.000000,,,0.0,,,,,0.000,,,0.000,0.000000,0.000,0.0000,0.000000,,,0.000,,,0.00,0.000000,0.000,0.0000,0.000000,0.000,0.000000,0.000,0.0000,0.000000,0.000,0.000000,0.000,0.000,0.000000,0.000,0.000000,0.000,0.000,0.000000,,,0.0,,,0,0.000000,0,0.0,0.000000,,,0.0,,,,,0.0,,,0.0,0.000000,0.0,0.0,0.000000,0,0.000000,0,0.0,0.000000,0,0.000000,0,0.0,0.000000,,,0.000000,,,,,0.000000,,,,,0.000000,,,,,0.000000e+00,,,,,0.0,,,,,0.00000,,
4,100023,-4,-7.5,-60,-7.5,2.449490,0.000,0.000000,0.000,0.0000,0.000000,225000,135000.000000,1080000,135000.0,96214.047088,,,0.0,,,0.00,0.000000,0.000,0.000,0.000000,,,0.0,,,,,0.000,,,0.000,0.000000,0.000,0.0000,0.000000,,,0.000,,,0.00,0.000000,0.000,0.0000,0.000000,0.000,0.000000,0.000,0.0000,0.000000,0.000,0.000000,0.000,0.000,0.000000,0.000,0.000000,0.000,0.000,0.000000,,,0.0,,,0,0.000000,0,0.0,0.000000,,,0.0,,,,,0.0,,,0.0,0.000000,0.0,0.0,0.000000,0,0.000000,0,0.0,0.000000,0,0.000000,0,0.0,0.000000,,,0.000000,,,,,0.000000,,,,,0.000000,,,,,0.000000e+00,,,,,0.0,,,,,0.00000,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
103553,456244,-1,-21.0,-861,-21.0,11.979149,453627.675,131834.730732,5405223.960,0.0000,181540.706436,450000,296341.463415,12150000,450000.0,216041.154616,279000.0,24475.609756,1003500.0,0.0,54198.747820,307953.45,26842.388049,1100537.910,0.000,58950.161279,0.0,0.0,0.0,0.0,0.0,30911.850,2363.015854,96883.650,0.000,7177.457716,23343.165,6514.200000,260568.000,0.0000,9325.388156,482329.62,32720.544878,1341542.340,154.260,95852.088187,482329.62,32720.544878,1341542.340,154.2600,95852.088187,442113.570,127608.373537,5231943.315,0.0000,176058.396403,453627.675,130767.060732,5361449.490,0.000,180546.703695,453627.675,130767.060732,5361449.490,0.000,180546.703695,6.0,1.048780,43.0,0.0,1.745726,8,1.365854,56,0.0,2.277675,0.0,0.0,0.0,0.0,0.0,4.0,0.317073,13.0,0.0,0.960183,17.0,13.600000,544.0,17.0,5.148064,0,0.000000,0,0.0,0.000000,0,0.000000,0,0.0,0.000000,inf,,,0.11462,,inf,,,0.11462,,inf,,,2.328750e+04,,inf,inf,inf,17316.365625,,,,0.0,,,17100.000,8405.100000,42025.50000,7238.362500,5052.882211
103554,456246,-2,-5.5,-44,-5.5,2.449490,43490.115,13136.731875,105093.855,753.4125,18263.381378,135000,135000.000000,1080000,135000.0,0.000000,0.0,0.000000,0.0,0.0,0.000000,48929.85,15199.256250,121594.050,526.275,21710.636111,0.0,0.0,0.0,0.0,0.0,48929.850,15199.256250,121594.050,526.275,21710.636111,2250.000,1439.150625,11513.205,2250.0000,1119.815335,43669.71,18778.275000,131447.925,18000.000,17913.738243,41419.71,15554.340000,124434.720,9783.1800,17012.766963,43437.555,12883.016250,103064.130,752.0625,17998.915570,43490.115,12897.894375,103183.155,753.165,18021.227861,43490.115,12897.894375,103183.155,753.165,18021.227861,0.0,0.000000,0.0,0.0,0.000000,8,2.500000,20,0.5,3.545621,0.0,0.0,0.0,0.0,0.0,8.0,2.500000,20.0,0.5,3.545621,7.0,3.500000,28.0,3.5,2.449490,0,0.000000,0,0.0,0.000000,0,0.000000,0,0.0,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,inf,,,inf,,1.630995e+04,6.578489e+03,2.631396e+04,4475.728125,6758.252494,,,0.0,,,16309.950,6578.489062,26313.95625,4475.728125,6758.252494
103555,456247,-2,-49.0,-4655,-49.0,27.568098,190202.130,23216.396211,2205557.640,0.0000,56576.242229,180000,144000.000000,13680000,180000.0,72381.965545,96750.0,2136.315789,202950.0,0.0,13508.768709,96750.00,2149.506474,204203.115,0.000,13507.272226,0.0,0.0,0.0,0.0,0.0,1253.115,13.190684,1253.115,0.000,128.566886,9000.000,1414.704789,134396.955,0.0000,3067.353655,99990.00,4883.755263,463956.750,520.605,16818.866002,99990.00,4115.878105,391008.420,0.0000,16973.145046,178205.220,22100.653895,2099562.120,0.0000,54103.235131,190202.130,23128.243105,2197183.095,0.000,56612.512602,190202.130,23128.243105,2197183.095,0.000,56612.512602,4.0,0.115789,11.0,0.0,0.543103,4,0.147368,14,0.0,0.618310,0.0,0.0,0.0,0.0,0.0,3.0,0.031579,3.0,0.0,0.307794,32.0,26.494737,2517.0,32.0,8.517658,1,0.031579,3,0.0,0.175804,1,0.021053,2,0.0,0.144321,1.629089,0.117642,3.529262,0.00000,0.349272,1.629089,0.117642,3.529262,0.00000,0.349272,inf,,,1.434375e+04,,4.500000e+04,1.298837e+04,7.793021e+04,3937.500000,18054.731329,,,0.0,,,417.705,417.705000,417.70500,417.705000,
103556,456248,-2,-13.0,-299,-13.0,6.782330,0.000,0.000000,0.000,0.0000,0.000000,900000,900000.000000,20700000,900000.0,0.000000,,,0.0,,,0.00,0.000000,0.000,0.000,0.000000,,,0.0,,,,,0.000,,,0.000,0.000000,0.000,0.0000,0.000000,,,0.000,,,0.00,0.000000,0.000,0.0000,0.000000,0.000,0.000000,0.000,0.0000,0.000000,0.000,0.000000,0.000,0.000,0.000000,0.000,0.000000,0.000,0.000,0.000000,,,0.0,,,0,0.000000,0,0.0,0.000000,,,0.0,,,,,0.0,,,0.0,0.000000,0.0,0.0,0.000000,0,0.000000,0,0.0,0.000000,0,0.000000,0,0.0,0.000000,,,0.000000,,,,,0.000000,,,,,0.000000,,,,,0.000000e+00,,,,,0.0,,,,,0.00000,,


In [40]:
len(credit_card_agg)

103558

Previous Application

In [41]:
raw_previous_application = pd.read_csv(os.getcwd()+"\\previous_application.csv")

In [42]:
raw_previous_application.columns

Index(['SK_ID_PREV', 'SK_ID_CURR', 'NAME_CONTRACT_TYPE', 'AMT_ANNUITY',
       'AMT_APPLICATION', 'AMT_CREDIT', 'AMT_DOWN_PAYMENT', 'AMT_GOODS_PRICE',
       'WEEKDAY_APPR_PROCESS_START', 'HOUR_APPR_PROCESS_START',
       'FLAG_LAST_APPL_PER_CONTRACT', 'NFLAG_LAST_APPL_IN_DAY',
       'RATE_DOWN_PAYMENT', 'RATE_INTEREST_PRIMARY',
       'RATE_INTEREST_PRIVILEGED', 'NAME_CASH_LOAN_PURPOSE',
       'NAME_CONTRACT_STATUS', 'DAYS_DECISION', 'NAME_PAYMENT_TYPE',
       'CODE_REJECT_REASON', 'NAME_TYPE_SUITE', 'NAME_CLIENT_TYPE',
       'NAME_GOODS_CATEGORY', 'NAME_PORTFOLIO', 'NAME_PRODUCT_TYPE',
       'CHANNEL_TYPE', 'SELLERPLACE_AREA', 'NAME_SELLER_INDUSTRY',
       'CNT_PAYMENT', 'NAME_YIELD_GROUP', 'PRODUCT_COMBINATION',
       'DAYS_FIRST_DRAWING', 'DAYS_FIRST_DUE', 'DAYS_LAST_DUE_1ST_VERSION',
       'DAYS_LAST_DUE', 'DAYS_TERMINATION', 'NFLAG_INSURED_ON_APPROVAL'],
      dtype='object')

Previous Application Data Analysis

In [43]:
show_missing_values(raw_previous_application)

RATE_INTEREST_PRIVILEGED       99.64%
RATE_INTEREST_PRIMARY          99.64%
RATE_DOWN_PAYMENT              53.64%
AMT_DOWN_PAYMENT               53.64%
NAME_TYPE_SUITE                49.12%
NFLAG_INSURED_ON_APPROVAL       40.3%
DAYS_FIRST_DRAWING              40.3%
DAYS_FIRST_DUE                  40.3%
DAYS_LAST_DUE_1ST_VERSION       40.3%
DAYS_LAST_DUE                   40.3%
DAYS_TERMINATION                40.3%
AMT_GOODS_PRICE                23.08%
AMT_ANNUITY                    22.29%
CNT_PAYMENT                    22.29%
PRODUCT_COMBINATION             0.02%
CHANNEL_TYPE                     0.0%
NAME_PRODUCT_TYPE                0.0%
NAME_YIELD_GROUP                 0.0%
SELLERPLACE_AREA                 0.0%
NAME_SELLER_INDUSTRY             0.0%
NAME_GOODS_CATEGORY              0.0%
NAME_PORTFOLIO                   0.0%
SK_ID_PREV                       0.0%
NAME_CLIENT_TYPE                 0.0%
CODE_REJECT_REASON               0.0%
SK_ID_CURR                       0.0%
DAYS_DECISIO

In [44]:
show_cat_col_values(raw_previous_application)

                 NAME_CONTRACT_TYPE     Ratio
Cash loans                   747553  0.447579
Consumer loans               729151  0.436561
Revolving loans              193164  0.115652
XNA                             346  0.000207


           WEEKDAY_APPR_PROCESS_START     Ratio
TUESDAY                        255118  0.152746
WEDNESDAY                      255010  0.152681
MONDAY                         253557  0.151811
FRIDAY                         252048  0.150908
THURSDAY                       249099  0.149142
SATURDAY                       240631  0.144072
SUNDAY                         164751  0.098641


   FLAG_LAST_APPL_PER_CONTRACT     Ratio
Y                      1661739  0.994926
N                         8475  0.005074


                                  NAME_CASH_LOAN_PURPOSE     Ratio
XAP                                               922661  0.552421
XNA                                               677918  0.405887
Repairs                                            23765

In [45]:
raw_previous_application_dropped = raw_previous_application.drop(['SK_ID_PREV', 'HOUR_APPR_PROCESS_START','FLAG_LAST_APPL_PER_CONTRACT', 'NFLAG_LAST_APPL_IN_DAY','RATE_INTEREST_PRIVILEGED', 'RATE_INTEREST_PRIMARY'], axis = 1)

In [46]:
raw_previous_application_dropped.dtypes

SK_ID_CURR                      int64
NAME_CONTRACT_TYPE             object
AMT_ANNUITY                   float64
AMT_APPLICATION               float64
AMT_CREDIT                    float64
AMT_DOWN_PAYMENT              float64
AMT_GOODS_PRICE               float64
WEEKDAY_APPR_PROCESS_START     object
RATE_DOWN_PAYMENT             float64
NAME_CASH_LOAN_PURPOSE         object
NAME_CONTRACT_STATUS           object
DAYS_DECISION                   int64
NAME_PAYMENT_TYPE              object
CODE_REJECT_REASON             object
NAME_TYPE_SUITE                object
NAME_CLIENT_TYPE               object
NAME_GOODS_CATEGORY            object
NAME_PORTFOLIO                 object
NAME_PRODUCT_TYPE              object
CHANNEL_TYPE                   object
SELLERPLACE_AREA                int64
NAME_SELLER_INDUSTRY           object
CNT_PAYMENT                   float64
NAME_YIELD_GROUP               object
PRODUCT_COMBINATION            object
DAYS_FIRST_DRAWING            float64
DAYS_FIRST_D

Previous Application Feature Engineering

In [47]:
raw_previous_application_dropped['DAYS_DECISION'].replace(365243, np.nan, inplace= True)
raw_previous_application_dropped['DAYS_FIRST_DRAWING'].replace(365243, np.nan, inplace= True)
raw_previous_application_dropped['DAYS_FIRST_DUE'].replace(365243, np.nan, inplace= True)
raw_previous_application_dropped['DAYS_LAST_DUE_1ST_VERSION'].replace(365243, np.nan, inplace= True)
raw_previous_application_dropped['DAYS_LAST_DUE'].replace(365243, np.nan, inplace= True)
raw_previous_application_dropped['DAYS_TERMINATION'].replace(365243, np.nan, inplace= True)

In [48]:
raw_previous_application_dropped['DOWN_PAYMENT_PNT'] = raw_previous_application_dropped['AMT_DOWN_PAYMENT']/raw_previous_application_dropped['AMT_CREDIT']
raw_previous_application_dropped['ANNUITY_CREDIT_RATIO'] = raw_previous_application_dropped['AMT_ANNUITY']/raw_previous_application_dropped['AMT_CREDIT']

In [49]:
raw_previous_application_cats = pd.get_dummies(raw_previous_application_dropped)

In [50]:
raw_previous_application_dropped.dtypes

SK_ID_CURR                      int64
NAME_CONTRACT_TYPE             object
AMT_ANNUITY                   float64
AMT_APPLICATION               float64
AMT_CREDIT                    float64
AMT_DOWN_PAYMENT              float64
AMT_GOODS_PRICE               float64
WEEKDAY_APPR_PROCESS_START     object
RATE_DOWN_PAYMENT             float64
NAME_CASH_LOAN_PURPOSE         object
NAME_CONTRACT_STATUS           object
DAYS_DECISION                   int64
NAME_PAYMENT_TYPE              object
CODE_REJECT_REASON             object
NAME_TYPE_SUITE                object
NAME_CLIENT_TYPE               object
NAME_GOODS_CATEGORY            object
NAME_PORTFOLIO                 object
NAME_PRODUCT_TYPE              object
CHANNEL_TYPE                   object
SELLERPLACE_AREA                int64
NAME_SELLER_INDUSTRY           object
CNT_PAYMENT                   float64
NAME_YIELD_GROUP               object
PRODUCT_COMBINATION            object
DAYS_FIRST_DRAWING            float64
DAYS_FIRST_D

Previous Application Data Aggregation

In [51]:
raw_previous_application_agg_num = {
"AMT_ANNUITY":['max', 'mean','min','std'],
"AMT_APPLICATION":['max', 'mean','min','std'],
"AMT_CREDIT":['max', 'mean','min','std'],
"AMT_DOWN_PAYMENT":['max', 'mean','min','std'],
"AMT_GOODS_PRICE":['max', 'mean','min','std'],
"RATE_DOWN_PAYMENT":['max', 'mean','min','std'],
"DAYS_DECISION":['max', 'mean','min','std'],
"CNT_PAYMENT":['max', 'mean','min','std'],
"DAYS_FIRST_DRAWING":['max', 'mean','min','std'],
"DAYS_FIRST_DUE":['max', 'mean','min','std'],
"DAYS_LAST_DUE_1ST_VERSION":['max', 'mean','min','std'],
"DAYS_LAST_DUE":['max', 'mean','min','std'],
"DAYS_TERMINATION":['max', 'mean','min','std'],
"DOWN_PAYMENT_PNT":['max', 'mean','min','std', 'var'],
"ANNUITY_CREDIT_RATIO":['max', 'mean','min','std', 'var'],
}

raw_previous_application_cat_cols = [
    "NAME_CASH_LOAN_PURPOSE", 
    "NAME_PAYMENT_TYPE",
    "WEEKDAY_APPR_PROCESS_START",
    "CODE_REJECT_REASON",
    "NAME_CLIENT_TYPE",
    "NAME_GOODS_CATEGORY",
    "NAME_PORTFOLIO",
    "NAME_PRODUCT_TYPE",
    "CHANNEL_TYPE",
    "SELLERPLACE_AREA",
    "NAME_SELLER_INDUSTRY",
    "NAME_YIELD_GROUP",
]

for col in raw_previous_application_cat_cols:
    dict_ = get_cat_agg(raw_previous_application_cats, col, 'sum')
    raw_previous_application_agg_num.update(dict_)

raw_previous_application_agg_num

{'AMT_ANNUITY': ['max', 'mean', 'min', 'std'],
 'AMT_APPLICATION': ['max', 'mean', 'min', 'std'],
 'AMT_CREDIT': ['max', 'mean', 'min', 'std'],
 'AMT_DOWN_PAYMENT': ['max', 'mean', 'min', 'std'],
 'AMT_GOODS_PRICE': ['max', 'mean', 'min', 'std'],
 'RATE_DOWN_PAYMENT': ['max', 'mean', 'min', 'std'],
 'DAYS_DECISION': ['max', 'mean', 'min', 'std'],
 'CNT_PAYMENT': ['max', 'mean', 'min', 'std'],
 'DAYS_FIRST_DRAWING': ['max', 'mean', 'min', 'std'],
 'DAYS_FIRST_DUE': ['max', 'mean', 'min', 'std'],
 'DAYS_LAST_DUE_1ST_VERSION': ['max', 'mean', 'min', 'std'],
 'DAYS_LAST_DUE': ['max', 'mean', 'min', 'std'],
 'DAYS_TERMINATION': ['max', 'mean', 'min', 'std'],
 'DOWN_PAYMENT_PNT': ['max', 'mean', 'min', 'std', 'var'],
 'ANNUITY_CREDIT_RATIO': ['max', 'mean', 'min', 'std', 'var'],
 'NAME_CASH_LOAN_PURPOSE_Building a house or an annex': 'sum',
 'NAME_CASH_LOAN_PURPOSE_Business development': 'sum',
 'NAME_CASH_LOAN_PURPOSE_Buying a garage': 'sum',
 'NAME_CASH_LOAN_PURPOSE_Buying a holiday home /

In [52]:
previous_application_agg = raw_previous_application_cats.groupby('SK_ID_CURR').agg(raw_previous_application_agg_num)

previous_application_agg.columns = pd.Index(['prev_app_' + col[0] + "_" + col[1] for col in previous_application_agg.columns.tolist() if col[0] != 'SK_ID_CURR'])

previous_application_agg = previous_application_agg.reset_index()

In [53]:
len(previous_application_agg)

338857

Installment Payments

In [54]:
raw_installment_payments = pd.read_csv(os.getcwd()+"\\installments_payments.csv")

Installment Payments Data Analysis

In [55]:
show_missing_values(raw_installment_payments)

DAYS_ENTRY_PAYMENT        0.02%
AMT_PAYMENT               0.02%
SK_ID_PREV                 0.0%
SK_ID_CURR                 0.0%
NUM_INSTALMENT_VERSION     0.0%
NUM_INSTALMENT_NUMBER      0.0%
DAYS_INSTALMENT            0.0%
AMT_INSTALMENT             0.0%
dtype: object


In [56]:
raw_installment_payments.dtypes

SK_ID_PREV                  int64
SK_ID_CURR                  int64
NUM_INSTALMENT_VERSION    float64
NUM_INSTALMENT_NUMBER       int64
DAYS_INSTALMENT           float64
DAYS_ENTRY_PAYMENT        float64
AMT_INSTALMENT            float64
AMT_PAYMENT               float64
dtype: object

In [57]:
raw_installment_payments_dropped = raw_installment_payments.drop(['SK_ID_PREV', 'NUM_INSTALMENT_VERSION', 'NUM_INSTALMENT_NUMBER'], axis = 1)

Installment Payments Feature Engineering

In [58]:
raw_installment_payments_dropped['DAYS_INSTALMENT'].replace(365243, np.nan, inplace= True)
raw_installment_payments_dropped['DAYS_ENTRY_PAYMENT'].replace(365243, np.nan, inplace= True)

In [59]:
raw_installment_payments_agg_num = {
"DAYS_INSTALMENT": "mean",
"DAYS_ENTRY_PAYMENT": "mean",
"AMT_INSTALMENT": "mean",
"AMT_PAYMENT": "mean",
}

In [60]:
installment_payments_agg = raw_installment_payments_dropped.groupby('SK_ID_CURR').agg(raw_installment_payments_agg_num)

installment_payments_agg.columns = pd.Index(['install_payments_' + col + "_" + raw_installment_payments_agg_num[col] for col in installment_payments_agg.columns.tolist() if col != 'SK_ID_CURR'])

installment_payments_agg = installment_payments_agg.reset_index()

In [61]:
installment_payments_agg

Unnamed: 0,SK_ID_CURR,install_payments_DAYS_INSTALMENT_mean,install_payments_DAYS_ENTRY_PAYMENT_mean,install_payments_AMT_INSTALMENT_mean,install_payments_AMT_PAYMENT_mean
0,100001,-2187.714286,-2195.000000,5885.132143,5885.132143
1,100002,-295.000000,-315.421053,11559.247105,11559.247105
2,100003,-1378.160000,-1385.320000,64754.586000,64754.586000
3,100004,-754.000000,-761.666667,7096.155000,7096.155000
4,100005,-586.000000,-609.555556,6240.205000,6240.205000
...,...,...,...,...,...
339582,456251,-120.000000,-156.285714,7492.924286,7492.924286
339583,456252,-2391.000000,-2393.833333,10069.867500,10069.867500
339584,456253,-2372.928571,-2387.428571,4399.707857,4115.915357
339585,456254,-142.263158,-161.263158,10239.832895,10239.832895


In [62]:
len(installment_payments_agg)

339587

train_application

In [63]:
raw_train = pd.read_csv(os.getcwd()+"\\application_train.csv")

Data Analysis for the aggregated dataset will be performed in the main Exploratory Data Analysis File

In [64]:
print("application_train: ", len(raw_train))
print("bureau: ",len(bureau_agg))
print("credit_card_balance: ",len(credit_card_agg))
print("POS_CASH_balance: ",len(cash_balance_agg))
print("previous_application: ",len(previous_application_agg))
print("installments_payments: ",len(installment_payments_agg))


application_train:  307511
bureau:  305811
credit_card_balance:  103558
POS_CASH_balance:  337252
previous_application:  338857
installments_payments:  339587


In [65]:
train_agg = raw_train.merge(bureau_agg, how = 'left', on='SK_ID_CURR')

train_agg = train_agg.merge(credit_card_agg, how = 'left', on='SK_ID_CURR')

train_agg = train_agg.merge(cash_balance_agg, how = 'left', on='SK_ID_CURR')

train_agg = train_agg.merge(previous_application_agg, how = 'left', on='SK_ID_CURR')

train_agg = train_agg.merge(installment_payments_agg, how = 'left', on='SK_ID_CURR')

In [66]:
%cd home-credit-default-risk

raw_test = pd.read_csv(os.getcwd()+"\\application_test.csv")

test_agg = raw_test.merge(bureau_agg, how = 'left', on='SK_ID_CURR')

test_agg = test_agg.merge(credit_card_agg, how = 'left', on='SK_ID_CURR')

test_agg = test_agg.merge(cash_balance_agg, how = 'left', on='SK_ID_CURR')

test_agg = test_agg.merge(previous_application_agg, how = 'left', on='SK_ID_CURR')

test_agg = test_agg.merge(installment_payments_agg, how = 'left', on='SK_ID_CURR')

test_agg.to_csv("test_agg.csv", index = False)

[WinError 2] The system cannot find the file specified: 'home-credit-default-risk'
d:\Documents\Github\projs\home-credit-default-risk\data


In [67]:
print(len(train_agg))
print(len(train_agg.columns))
train_agg.head()

307511
473


Unnamed: 0,SK_ID_CURR,TARGET,NAME_CONTRACT_TYPE,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,AMT_GOODS_PRICE,NAME_TYPE_SUITE,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,NAME_HOUSING_TYPE,REGION_POPULATION_RELATIVE,DAYS_BIRTH,DAYS_EMPLOYED,DAYS_REGISTRATION,DAYS_ID_PUBLISH,OWN_CAR_AGE,FLAG_MOBIL,FLAG_EMP_PHONE,FLAG_WORK_PHONE,FLAG_CONT_MOBILE,FLAG_PHONE,FLAG_EMAIL,OCCUPATION_TYPE,CNT_FAM_MEMBERS,REGION_RATING_CLIENT,REGION_RATING_CLIENT_W_CITY,WEEKDAY_APPR_PROCESS_START,HOUR_APPR_PROCESS_START,REG_REGION_NOT_LIVE_REGION,REG_REGION_NOT_WORK_REGION,LIVE_REGION_NOT_WORK_REGION,REG_CITY_NOT_LIVE_CITY,REG_CITY_NOT_WORK_CITY,LIVE_CITY_NOT_WORK_CITY,ORGANIZATION_TYPE,EXT_SOURCE_1,EXT_SOURCE_2,EXT_SOURCE_3,APARTMENTS_AVG,BASEMENTAREA_AVG,YEARS_BEGINEXPLUATATION_AVG,YEARS_BUILD_AVG,COMMONAREA_AVG,ELEVATORS_AVG,ENTRANCES_AVG,FLOORSMAX_AVG,FLOORSMIN_AVG,LANDAREA_AVG,LIVINGAPARTMENTS_AVG,LIVINGAREA_AVG,NONLIVINGAPARTMENTS_AVG,NONLIVINGAREA_AVG,APARTMENTS_MODE,BASEMENTAREA_MODE,YEARS_BEGINEXPLUATATION_MODE,YEARS_BUILD_MODE,COMMONAREA_MODE,ELEVATORS_MODE,ENTRANCES_MODE,FLOORSMAX_MODE,FLOORSMIN_MODE,LANDAREA_MODE,LIVINGAPARTMENTS_MODE,LIVINGAREA_MODE,NONLIVINGAPARTMENTS_MODE,NONLIVINGAREA_MODE,APARTMENTS_MEDI,BASEMENTAREA_MEDI,YEARS_BEGINEXPLUATATION_MEDI,YEARS_BUILD_MEDI,COMMONAREA_MEDI,ELEVATORS_MEDI,ENTRANCES_MEDI,FLOORSMAX_MEDI,FLOORSMIN_MEDI,LANDAREA_MEDI,LIVINGAPARTMENTS_MEDI,LIVINGAREA_MEDI,NONLIVINGAPARTMENTS_MEDI,NONLIVINGAREA_MEDI,FONDKAPREMONT_MODE,HOUSETYPE_MODE,TOTALAREA_MODE,WALLSMATERIAL_MODE,EMERGENCYSTATE_MODE,OBS_30_CNT_SOCIAL_CIRCLE,DEF_30_CNT_SOCIAL_CIRCLE,OBS_60_CNT_SOCIAL_CIRCLE,DEF_60_CNT_SOCIAL_CIRCLE,DAYS_LAST_PHONE_CHANGE,FLAG_DOCUMENT_2,FLAG_DOCUMENT_3,FLAG_DOCUMENT_4,FLAG_DOCUMENT_5,FLAG_DOCUMENT_6,FLAG_DOCUMENT_7,FLAG_DOCUMENT_8,FLAG_DOCUMENT_9,FLAG_DOCUMENT_10,FLAG_DOCUMENT_11,FLAG_DOCUMENT_12,FLAG_DOCUMENT_13,FLAG_DOCUMENT_14,FLAG_DOCUMENT_15,FLAG_DOCUMENT_16,FLAG_DOCUMENT_17,FLAG_DOCUMENT_18,FLAG_DOCUMENT_19,FLAG_DOCUMENT_20,FLAG_DOCUMENT_21,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_YEAR,bureau_DAYS_CREDIT_mean,bureau_CREDIT_DAY_OVERDUE_mean,bureau_DAYS_CREDIT_ENDDATE_min,bureau_DAYS_ENDDATE_FACT_max,bureau_AMT_CREDIT_MAX_OVERDUE_max,bureau_CNT_CREDIT_PROLONG_max,bureau_AMT_CREDIT_SUM_min,bureau_AMT_CREDIT_SUM_mean,bureau_AMT_CREDIT_SUM_max,bureau_AMT_CREDIT_SUM_var,bureau_AMT_CREDIT_SUM_DEBT_mean,bureau_AMT_CREDIT_SUM_LIMIT_max,bureau_AMT_CREDIT_SUM_OVERDUE_max,bureau_DAYS_CREDIT_UPDATE_max,bureau_AMT_ANNUITY_min,bureau_AMT_ANNUITY_mean,bureau_AMT_ANNUITY_max,bureau_AMT_ANNUITY_var,bureau_CREDIT_TYPE_Another type of loan_sum,bureau_CREDIT_TYPE_Car loan_sum,bureau_CREDIT_TYPE_Cash loan (non-earmarked)_sum,bureau_CREDIT_TYPE_Consumer credit_sum,bureau_CREDIT_TYPE_Credit card_sum,bureau_CREDIT_TYPE_Interbank credit_sum,bureau_CREDIT_TYPE_Loan for business development_sum,bureau_CREDIT_TYPE_Loan for purchase of shares (margin lending)_sum,bureau_CREDIT_TYPE_Loan for the purchase of equipment_sum,bureau_CREDIT_TYPE_Loan for working capital replenishment_sum,bureau_CREDIT_TYPE_Microloan_sum,bureau_CREDIT_TYPE_Mobile operator loan_sum,bureau_CREDIT_TYPE_Mortgage_sum,bureau_CREDIT_TYPE_Real estate loan_sum,bureau_CREDIT_TYPE_Unknown type of loan_sum,credit_card_MONTHS_BALANCE_max,credit_card_MONTHS_BALANCE_mean,credit_card_MONTHS_BALANCE_sum,credit_card_MONTHS_BALANCE_median,credit_card_MONTHS_BALANCE_std,credit_card_AMT_BALANCE_max,credit_card_AMT_BALANCE_mean,credit_card_AMT_BALANCE_sum,credit_card_AMT_BALANCE_median,credit_card_AMT_BALANCE_std,credit_card_AMT_CREDIT_LIMIT_ACTUAL_max,credit_card_AMT_CREDIT_LIMIT_ACTUAL_mean,credit_card_AMT_CREDIT_LIMIT_ACTUAL_sum,credit_card_AMT_CREDIT_LIMIT_ACTUAL_median,credit_card_AMT_CREDIT_LIMIT_ACTUAL_std,credit_card_AMT_DRAWINGS_ATM_CURRENT_max,credit_card_AMT_DRAWINGS_ATM_CURRENT_mean,credit_card_AMT_DRAWINGS_ATM_CURRENT_sum,credit_card_AMT_DRAWINGS_ATM_CURRENT_median,credit_card_AMT_DRAWINGS_ATM_CURRENT_std,credit_card_AMT_DRAWINGS_CURRENT_max,credit_card_AMT_DRAWINGS_CURRENT_mean,credit_card_AMT_DRAWINGS_CURRENT_sum,credit_card_AMT_DRAWINGS_CURRENT_median,credit_card_AMT_DRAWINGS_CURRENT_std,credit_card_AMT_DRAWINGS_OTHER_CURRENT_max,credit_card_AMT_DRAWINGS_OTHER_CURRENT_mean,credit_card_AMT_DRAWINGS_OTHER_CURRENT_sum,credit_card_AMT_DRAWINGS_OTHER_CURRENT_median,credit_card_AMT_DRAWINGS_OTHER_CURRENT_std,credit_card_AMT_DRAWINGS_POS_CURRENT_max,credit_card_AMT_DRAWINGS_POS_CURRENT_mean,credit_card_AMT_DRAWINGS_POS_CURRENT_sum,credit_card_AMT_DRAWINGS_POS_CURRENT_median,credit_card_AMT_DRAWINGS_POS_CURRENT_std,credit_card_AMT_INST_MIN_REGULARITY_max,credit_card_AMT_INST_MIN_REGULARITY_mean,credit_card_AMT_INST_MIN_REGULARITY_sum,credit_card_AMT_INST_MIN_REGULARITY_median,credit_card_AMT_INST_MIN_REGULARITY_std,credit_card_AMT_PAYMENT_CURRENT_max,credit_card_AMT_PAYMENT_CURRENT_mean,credit_card_AMT_PAYMENT_CURRENT_sum,credit_card_AMT_PAYMENT_CURRENT_median,credit_card_AMT_PAYMENT_CURRENT_std,credit_card_AMT_PAYMENT_TOTAL_CURRENT_max,credit_card_AMT_PAYMENT_TOTAL_CURRENT_mean,credit_card_AMT_PAYMENT_TOTAL_CURRENT_sum,credit_card_AMT_PAYMENT_TOTAL_CURRENT_median,credit_card_AMT_PAYMENT_TOTAL_CURRENT_std,credit_card_AMT_RECEIVABLE_PRINCIPAL_max,credit_card_AMT_RECEIVABLE_PRINCIPAL_mean,credit_card_AMT_RECEIVABLE_PRINCIPAL_sum,credit_card_AMT_RECEIVABLE_PRINCIPAL_median,credit_card_AMT_RECEIVABLE_PRINCIPAL_std,credit_card_AMT_RECIVABLE_max,credit_card_AMT_RECIVABLE_mean,credit_card_AMT_RECIVABLE_sum,credit_card_AMT_RECIVABLE_median,credit_card_AMT_RECIVABLE_std,credit_card_AMT_TOTAL_RECEIVABLE_max,credit_card_AMT_TOTAL_RECEIVABLE_mean,credit_card_AMT_TOTAL_RECEIVABLE_sum,credit_card_AMT_TOTAL_RECEIVABLE_median,credit_card_AMT_TOTAL_RECEIVABLE_std,credit_card_CNT_DRAWINGS_ATM_CURRENT_max,credit_card_CNT_DRAWINGS_ATM_CURRENT_mean,credit_card_CNT_DRAWINGS_ATM_CURRENT_sum,credit_card_CNT_DRAWINGS_ATM_CURRENT_median,credit_card_CNT_DRAWINGS_ATM_CURRENT_std,credit_card_CNT_DRAWINGS_CURRENT_max,credit_card_CNT_DRAWINGS_CURRENT_mean,credit_card_CNT_DRAWINGS_CURRENT_sum,credit_card_CNT_DRAWINGS_CURRENT_median,credit_card_CNT_DRAWINGS_CURRENT_std,credit_card_CNT_DRAWINGS_OTHER_CURRENT_max,credit_card_CNT_DRAWINGS_OTHER_CURRENT_mean,credit_card_CNT_DRAWINGS_OTHER_CURRENT_sum,credit_card_CNT_DRAWINGS_OTHER_CURRENT_median,credit_card_CNT_DRAWINGS_OTHER_CURRENT_std,credit_card_CNT_DRAWINGS_POS_CURRENT_max,credit_card_CNT_DRAWINGS_POS_CURRENT_mean,credit_card_CNT_DRAWINGS_POS_CURRENT_sum,credit_card_CNT_DRAWINGS_POS_CURRENT_median,credit_card_CNT_DRAWINGS_POS_CURRENT_std,credit_card_CNT_INSTALMENT_MATURE_CUM_max,credit_card_CNT_INSTALMENT_MATURE_CUM_mean,credit_card_CNT_INSTALMENT_MATURE_CUM_sum,credit_card_CNT_INSTALMENT_MATURE_CUM_median,credit_card_CNT_INSTALMENT_MATURE_CUM_std,credit_card_SK_DPD_max,credit_card_SK_DPD_mean,credit_card_SK_DPD_sum,credit_card_SK_DPD_median,credit_card_SK_DPD_std,credit_card_SK_DPD_DEF_max,credit_card_SK_DPD_DEF_mean,credit_card_SK_DPD_DEF_sum,credit_card_SK_DPD_DEF_median,credit_card_SK_DPD_DEF_std,credit_card_PNT_BALANCE_DRAWINGS_ATM_max,credit_card_PNT_BALANCE_DRAWINGS_ATM_mean,credit_card_PNT_BALANCE_DRAWINGS_ATM_sum,credit_card_PNT_BALANCE_DRAWINGS_ATM_median,credit_card_PNT_BALANCE_DRAWINGS_ATM_std,credit_card_PNT_BALANCE_DRAWINGS_CURRENT_max,credit_card_PNT_BALANCE_DRAWINGS_CURRENT_mean,credit_card_PNT_BALANCE_DRAWINGS_CURRENT_sum,credit_card_PNT_BALANCE_DRAWINGS_CURRENT_median,credit_card_PNT_BALANCE_DRAWINGS_CURRENT_std,credit_card_AVG_DRAWINGS_ATM_CURRENT_max,credit_card_AVG_DRAWINGS_ATM_CURRENT_mean,credit_card_AVG_DRAWINGS_ATM_CURRENT_sum,credit_card_AVG_DRAWINGS_ATM_CURRENT_median,credit_card_AVG_DRAWINGS_ATM_CURRENT_std,credit_card_AVG_DRAWINGS_CURRENT_max,credit_card_AVG_DRAWINGS_CURRENT_mean,credit_card_AVG_DRAWINGS_CURRENT_sum,credit_card_AVG_DRAWINGS_CURRENT_median,credit_card_AVG_DRAWINGS_CURRENT_std,credit_card_AVG_DRAWINGS_OTHER_CURRENT_max,credit_card_AVG_DRAWINGS_OTHER_CURRENT_mean,credit_card_AVG_DRAWINGS_OTHER_CURRENT_sum,credit_card_AVG_DRAWINGS_OTHER_CURRENT_median,credit_card_AVG_DRAWINGS_OTHER_CURRENT_std,credit_card_AVG_DRAWINGS_POS_CURRENT_max,credit_card_AVG_DRAWINGS_POS_CURRENT_mean,credit_card_AVG_DRAWINGS_POS_CURRENT_sum,credit_card_AVG_DRAWINGS_POS_CURRENT_median,credit_card_AVG_DRAWINGS_POS_CURRENT_std,cash_balance_CNT_INSTALMENT_min,cash_balance_CNT_INSTALMENT_mean,cash_balance_CNT_INSTALMENT_max,cash_balance_CNT_INSTALMENT_FUTURE_min,cash_balance_CNT_INSTALMENT_FUTURE_mean,cash_balance_CNT_INSTALMENT_FUTURE_max,cash_balance_SK_DPD_min,cash_balance_SK_DPD_mean,cash_balance_SK_DPD_max,cash_balance_SK_DPD_DEF_min,cash_balance_SK_DPD_DEF_mean,cash_balance_SK_DPD_DEF_max,prev_app_AMT_ANNUITY_max,prev_app_AMT_ANNUITY_mean,prev_app_AMT_ANNUITY_min,prev_app_AMT_ANNUITY_std,prev_app_AMT_APPLICATION_max,prev_app_AMT_APPLICATION_mean,prev_app_AMT_APPLICATION_min,prev_app_AMT_APPLICATION_std,prev_app_AMT_CREDIT_max,prev_app_AMT_CREDIT_mean,prev_app_AMT_CREDIT_min,prev_app_AMT_CREDIT_std,prev_app_AMT_DOWN_PAYMENT_max,prev_app_AMT_DOWN_PAYMENT_mean,prev_app_AMT_DOWN_PAYMENT_min,prev_app_AMT_DOWN_PAYMENT_std,prev_app_AMT_GOODS_PRICE_max,prev_app_AMT_GOODS_PRICE_mean,prev_app_AMT_GOODS_PRICE_min,prev_app_AMT_GOODS_PRICE_std,prev_app_RATE_DOWN_PAYMENT_max,prev_app_RATE_DOWN_PAYMENT_mean,prev_app_RATE_DOWN_PAYMENT_min,prev_app_RATE_DOWN_PAYMENT_std,prev_app_DAYS_DECISION_max,prev_app_DAYS_DECISION_mean,prev_app_DAYS_DECISION_min,prev_app_DAYS_DECISION_std,prev_app_CNT_PAYMENT_max,prev_app_CNT_PAYMENT_mean,prev_app_CNT_PAYMENT_min,prev_app_CNT_PAYMENT_std,prev_app_DAYS_FIRST_DRAWING_max,prev_app_DAYS_FIRST_DRAWING_mean,prev_app_DAYS_FIRST_DRAWING_min,prev_app_DAYS_FIRST_DRAWING_std,prev_app_DAYS_FIRST_DUE_max,prev_app_DAYS_FIRST_DUE_mean,prev_app_DAYS_FIRST_DUE_min,prev_app_DAYS_FIRST_DUE_std,prev_app_DAYS_LAST_DUE_1ST_VERSION_max,prev_app_DAYS_LAST_DUE_1ST_VERSION_mean,prev_app_DAYS_LAST_DUE_1ST_VERSION_min,prev_app_DAYS_LAST_DUE_1ST_VERSION_std,prev_app_DAYS_LAST_DUE_max,prev_app_DAYS_LAST_DUE_mean,prev_app_DAYS_LAST_DUE_min,prev_app_DAYS_LAST_DUE_std,prev_app_DAYS_TERMINATION_max,prev_app_DAYS_TERMINATION_mean,prev_app_DAYS_TERMINATION_min,prev_app_DAYS_TERMINATION_std,prev_app_DOWN_PAYMENT_PNT_max,prev_app_DOWN_PAYMENT_PNT_mean,prev_app_DOWN_PAYMENT_PNT_min,prev_app_DOWN_PAYMENT_PNT_std,prev_app_DOWN_PAYMENT_PNT_var,prev_app_ANNUITY_CREDIT_RATIO_max,prev_app_ANNUITY_CREDIT_RATIO_mean,prev_app_ANNUITY_CREDIT_RATIO_min,prev_app_ANNUITY_CREDIT_RATIO_std,prev_app_ANNUITY_CREDIT_RATIO_var,prev_app_NAME_CASH_LOAN_PURPOSE_Building a house or an annex_sum,prev_app_NAME_CASH_LOAN_PURPOSE_Business development_sum,prev_app_NAME_CASH_LOAN_PURPOSE_Buying a garage_sum,prev_app_NAME_CASH_LOAN_PURPOSE_Buying a holiday home / land_sum,prev_app_NAME_CASH_LOAN_PURPOSE_Buying a home_sum,prev_app_NAME_CASH_LOAN_PURPOSE_Buying a new car_sum,prev_app_NAME_CASH_LOAN_PURPOSE_Buying a used car_sum,prev_app_NAME_CASH_LOAN_PURPOSE_Car repairs_sum,prev_app_NAME_CASH_LOAN_PURPOSE_Education_sum,prev_app_NAME_CASH_LOAN_PURPOSE_Everyday expenses_sum,prev_app_NAME_CASH_LOAN_PURPOSE_Furniture_sum,prev_app_NAME_CASH_LOAN_PURPOSE_Gasification / water supply_sum,prev_app_NAME_CASH_LOAN_PURPOSE_Hobby_sum,prev_app_NAME_CASH_LOAN_PURPOSE_Journey_sum,prev_app_NAME_CASH_LOAN_PURPOSE_Medicine_sum,prev_app_NAME_CASH_LOAN_PURPOSE_Money for a third person_sum,prev_app_NAME_CASH_LOAN_PURPOSE_Other_sum,prev_app_NAME_CASH_LOAN_PURPOSE_Payments on other loans_sum,prev_app_NAME_CASH_LOAN_PURPOSE_Purchase of electronic equipment_sum,prev_app_NAME_CASH_LOAN_PURPOSE_Refusal to name the goal_sum,prev_app_NAME_CASH_LOAN_PURPOSE_Repairs_sum,prev_app_NAME_CASH_LOAN_PURPOSE_Urgent needs_sum,prev_app_NAME_CASH_LOAN_PURPOSE_Wedding / gift / holiday_sum,prev_app_NAME_CASH_LOAN_PURPOSE_XAP_sum,prev_app_NAME_CASH_LOAN_PURPOSE_XNA_sum,prev_app_NAME_PAYMENT_TYPE_Cash through the bank_sum,prev_app_NAME_PAYMENT_TYPE_Cashless from the account of the employer_sum,prev_app_NAME_PAYMENT_TYPE_Non-cash from your account_sum,prev_app_NAME_PAYMENT_TYPE_XNA_sum,prev_app_WEEKDAY_APPR_PROCESS_START_FRIDAY_sum,prev_app_WEEKDAY_APPR_PROCESS_START_MONDAY_sum,prev_app_WEEKDAY_APPR_PROCESS_START_SATURDAY_sum,prev_app_WEEKDAY_APPR_PROCESS_START_SUNDAY_sum,prev_app_WEEKDAY_APPR_PROCESS_START_THURSDAY_sum,prev_app_WEEKDAY_APPR_PROCESS_START_TUESDAY_sum,prev_app_WEEKDAY_APPR_PROCESS_START_WEDNESDAY_sum,prev_app_CODE_REJECT_REASON_CLIENT_sum,prev_app_CODE_REJECT_REASON_HC_sum,prev_app_CODE_REJECT_REASON_LIMIT_sum,prev_app_CODE_REJECT_REASON_SCO_sum,prev_app_CODE_REJECT_REASON_SCOFR_sum,prev_app_CODE_REJECT_REASON_SYSTEM_sum,prev_app_CODE_REJECT_REASON_VERIF_sum,prev_app_CODE_REJECT_REASON_XAP_sum,prev_app_CODE_REJECT_REASON_XNA_sum,prev_app_NAME_CLIENT_TYPE_New_sum,prev_app_NAME_CLIENT_TYPE_Refreshed_sum,prev_app_NAME_CLIENT_TYPE_Repeater_sum,prev_app_NAME_CLIENT_TYPE_XNA_sum,prev_app_NAME_GOODS_CATEGORY_Additional Service_sum,prev_app_NAME_GOODS_CATEGORY_Animals_sum,prev_app_NAME_GOODS_CATEGORY_Audio/Video_sum,prev_app_NAME_GOODS_CATEGORY_Auto Accessories_sum,prev_app_NAME_GOODS_CATEGORY_Clothing and Accessories_sum,prev_app_NAME_GOODS_CATEGORY_Computers_sum,prev_app_NAME_GOODS_CATEGORY_Construction Materials_sum,prev_app_NAME_GOODS_CATEGORY_Consumer Electronics_sum,prev_app_NAME_GOODS_CATEGORY_Direct Sales_sum,prev_app_NAME_GOODS_CATEGORY_Education_sum,prev_app_NAME_GOODS_CATEGORY_Fitness_sum,prev_app_NAME_GOODS_CATEGORY_Furniture_sum,prev_app_NAME_GOODS_CATEGORY_Gardening_sum,prev_app_NAME_GOODS_CATEGORY_Homewares_sum,prev_app_NAME_GOODS_CATEGORY_House Construction_sum,prev_app_NAME_GOODS_CATEGORY_Insurance_sum,prev_app_NAME_GOODS_CATEGORY_Jewelry_sum,prev_app_NAME_GOODS_CATEGORY_Medical Supplies_sum,prev_app_NAME_GOODS_CATEGORY_Medicine_sum,prev_app_NAME_GOODS_CATEGORY_Mobile_sum,prev_app_NAME_GOODS_CATEGORY_Office Appliances_sum,prev_app_NAME_GOODS_CATEGORY_Other_sum,prev_app_NAME_GOODS_CATEGORY_Photo / Cinema Equipment_sum,prev_app_NAME_GOODS_CATEGORY_Sport and Leisure_sum,prev_app_NAME_GOODS_CATEGORY_Tourism_sum,prev_app_NAME_GOODS_CATEGORY_Vehicles_sum,prev_app_NAME_GOODS_CATEGORY_Weapon_sum,prev_app_NAME_GOODS_CATEGORY_XNA_sum,prev_app_NAME_PORTFOLIO_Cards_sum,prev_app_NAME_PORTFOLIO_Cars_sum,prev_app_NAME_PORTFOLIO_Cash_sum,prev_app_NAME_PORTFOLIO_POS_sum,prev_app_NAME_PORTFOLIO_XNA_sum,prev_app_NAME_PRODUCT_TYPE_XNA_sum,prev_app_NAME_PRODUCT_TYPE_walk-in_sum,prev_app_NAME_PRODUCT_TYPE_x-sell_sum,prev_app_CHANNEL_TYPE_AP+ (Cash loan)_sum,prev_app_CHANNEL_TYPE_Car dealer_sum,prev_app_CHANNEL_TYPE_Channel of corporate sales_sum,prev_app_CHANNEL_TYPE_Contact center_sum,prev_app_CHANNEL_TYPE_Country-wide_sum,prev_app_CHANNEL_TYPE_Credit and cash offices_sum,prev_app_CHANNEL_TYPE_Regional / Local_sum,prev_app_CHANNEL_TYPE_Stone_sum,prev_app_SELLERPLACE_AREA_sum,prev_app_NAME_SELLER_INDUSTRY_Auto technology_sum,prev_app_NAME_SELLER_INDUSTRY_Clothing_sum,prev_app_NAME_SELLER_INDUSTRY_Connectivity_sum,prev_app_NAME_SELLER_INDUSTRY_Construction_sum,prev_app_NAME_SELLER_INDUSTRY_Consumer electronics_sum,prev_app_NAME_SELLER_INDUSTRY_Furniture_sum,prev_app_NAME_SELLER_INDUSTRY_Industry_sum,prev_app_NAME_SELLER_INDUSTRY_Jewelry_sum,prev_app_NAME_SELLER_INDUSTRY_MLM partners_sum,prev_app_NAME_SELLER_INDUSTRY_Tourism_sum,prev_app_NAME_SELLER_INDUSTRY_XNA_sum,prev_app_NAME_YIELD_GROUP_XNA_sum,prev_app_NAME_YIELD_GROUP_high_sum,prev_app_NAME_YIELD_GROUP_low_action_sum,prev_app_NAME_YIELD_GROUP_low_normal_sum,prev_app_NAME_YIELD_GROUP_middle_sum,install_payments_DAYS_INSTALMENT_mean,install_payments_DAYS_ENTRY_PAYMENT_mean,install_payments_AMT_INSTALMENT_mean,install_payments_AMT_PAYMENT_mean
0,100002,1,Cash loans,M,N,Y,0,202500.0,406597.5,24700.5,351000.0,Unaccompanied,Working,Secondary / secondary special,Single / not married,House / apartment,0.018801,-9461,-637,-3648.0,-2120,,1,1,0,1,1,0,Laborers,1.0,2,2,WEDNESDAY,10,0,0,0,0,0,0,Business Entity Type 3,0.083037,0.262949,0.139376,0.0247,0.0369,0.9722,0.6192,0.0143,0.0,0.069,0.0833,0.125,0.0369,0.0202,0.019,0.0,0.0,0.0252,0.0383,0.9722,0.6341,0.0144,0.0,0.069,0.0833,0.125,0.0377,0.022,0.0198,0.0,0.0,0.025,0.0369,0.9722,0.6243,0.0144,0.0,0.069,0.0833,0.125,0.0375,0.0205,0.0193,0.0,0.0,reg oper account,block of flats,0.0149,"Stone, brick",No,2.0,2.0,2.0,2.0,-1134.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,1.0,-874.0,0.0,-1072.0,-36.0,5043.645,0.0,0.0,108131.945625,450000.0,21338070000.0,49156.2,31988.565,0.0,-7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,24.0,24.0,24.0,6.0,15.0,24.0,0.0,0.0,0.0,0.0,0.0,0.0,9251.775,9251.775,9251.775,,179055.0,179055.0,179055.0,,179055.0,179055.0,179055.0,,0.0,0.0,0.0,,179055.0,179055.0,179055.0,,0.0,0.0,0.0,,-606.0,-606.0,-606.0,,24.0,24.0,24.0,,,,,,-565.0,-565.0,-565.0,,125.0,125.0,125.0,,-25.0,-25.0,-25.0,,-17.0,-17.0,-17.0,,0.0,0.0,0.0,,,0.05167,0.05167,0.05167,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,500.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,-295.0,-315.421053,11559.247105,11559.247105
1,100003,0,Cash loans,F,N,N,0,270000.0,1293502.5,35698.5,1129500.0,Family,State servant,Higher education,Married,House / apartment,0.003541,-16765,-1188,-1186.0,-291,,1,1,0,1,1,0,Core staff,2.0,1,1,MONDAY,11,0,0,0,0,0,0,School,0.311267,0.622246,,0.0959,0.0529,0.9851,0.796,0.0605,0.08,0.0345,0.2917,0.3333,0.013,0.0773,0.0549,0.0039,0.0098,0.0924,0.0538,0.9851,0.804,0.0497,0.0806,0.0345,0.2917,0.3333,0.0128,0.079,0.0554,0.0,0.0,0.0968,0.0529,0.9851,0.7987,0.0608,0.08,0.0345,0.2917,0.3333,0.0132,0.0787,0.0558,0.0039,0.01,reg oper account,block of flats,0.0714,Block,No,1.0,0.0,1.0,0.0,-828.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,-1400.75,0.0,-2434.0,-540.0,0.0,0.0,22248.0,254350.125,810000.0,138584600000.0,0.0,810000.0,0.0,-43.0,,,,,0.0,0.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,6.0,10.107143,12.0,0.0,5.785714,12.0,0.0,0.0,0.0,0.0,0.0,0.0,98356.995,56553.99,6737.31,46332.557777,900000.0,435436.5,68809.5,424161.620549,1035882.0,484191.0,68053.5,497949.861808,6885.0,3442.5,0.0,4868.430188,900000.0,435436.5,68809.5,424161.620549,0.100061,0.05003,0.0,0.070754,-746.0,-1305.0,-2341.0,898.138631,12.0,10.0,6.0,3.464102,,,,,-716.0,-1274.333333,-2310.0,897.827563,-386.0,-1004.333333,-1980.0,854.97037,-536.0,-1054.333333,-1980.0,803.569744,-527.0,-1047.333333,-1976.0,806.196213,0.10117,0.050585,0.0,0.071538,0.005118,0.1852,0.126383,0.09495,0.050977,0.002599,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,2.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,2.0,0.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,1599.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,2.0,-1378.16,-1385.32,64754.586,64754.586
2,100004,0,Revolving loans,M,Y,Y,0,67500.0,135000.0,6750.0,135000.0,Unaccompanied,Working,Secondary / secondary special,Single / not married,House / apartment,0.010032,-19046,-225,-4260.0,-2531,26.0,1,1,1,1,1,0,Laborers,1.0,2,2,MONDAY,9,0,0,0,0,0,0,Government,,0.555912,0.729567,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,-815.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,-867.0,0.0,-595.0,-382.0,0.0,0.0,94500.0,94518.9,94537.8,714.42,0.0,0.0,0.0,-382.0,,,,,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3.0,3.75,4.0,0.0,2.25,4.0,0.0,0.0,0.0,0.0,0.0,0.0,5357.25,5357.25,5357.25,,24282.0,24282.0,24282.0,,20106.0,20106.0,20106.0,,4860.0,4860.0,4860.0,,24282.0,24282.0,24282.0,,0.212008,0.212008,0.212008,,-815.0,-815.0,-815.0,,4.0,4.0,4.0,,,,,,-784.0,-784.0,-784.0,,-694.0,-694.0,-694.0,,-724.0,-724.0,-724.0,,-714.0,-714.0,-714.0,,0.241719,0.241719,0.241719,,,0.26645,0.26645,0.26645,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,30.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,-754.0,-761.666667,7096.155,7096.155
3,100006,0,Cash loans,F,N,Y,0,135000.0,312682.5,29686.5,297000.0,Unaccompanied,Working,Secondary / secondary special,Civil marriage,House / apartment,0.008019,-19005,-3039,-9833.0,-2437,,1,1,0,1,0,0,Laborers,2.0,2,2,WEDNESDAY,17,0,0,0,0,0,0,Business Entity Type 3,,0.650442,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.0,0.0,2.0,0.0,-617.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,-1.0,-3.5,-21.0,-3.5,1.870829,0.0,0.0,0.0,0.0,0.0,270000.0,270000.0,1620000.0,270000.0,0.0,,,0.0,,,0.0,0.0,0.0,0.0,0.0,,,0.0,,,,,0.0,,,0.0,0.0,0.0,0.0,0.0,,,0.0,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,0.0,,,0.0,0.0,0.0,0.0,0.0,,,0.0,,,,,0.0,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,0.0,,,,,0.0,,,,,0.0,,,,,0.0,,,,,0.0,,,,,0.0,,,1.0,12.0,48.0,0.0,8.65,48.0,0.0,0.0,0.0,0.0,0.0,0.0,39954.51,23651.175,2482.92,13623.580119,688500.0,272203.26,0.0,286175.129541,906615.0,291695.5,0.0,333337.354853,66987.0,34840.17,2693.34,45462.482973,688500.0,408304.89,26912.34,253670.602059,0.21783,0.163412,0.108994,0.076958,-181.0,-272.444444,-617.0,157.236375,48.0,23.0,0.0,20.228692,,,,,-151.0,-325.666667,-545.0,200.761882,1259.0,364.333333,-215.0,785.967769,-151.0,-288.0,-425.0,193.747258,-143.0,-279.5,-416.0,193.040151,0.250017,0.180612,0.111208,0.098153,0.009634,0.10834,0.069304,0.03592,0.032882,0.001081,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,5.0,4.0,0.0,0.0,5.0,0.0,0.0,1.0,1.0,6.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,8.0,0.0,1.0,0.0,8.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,1.0,0.0,3.0,2.0,3.0,5.0,0.0,4.0,0.0,0.0,0.0,0.0,1.0,7.0,0.0,1.0,8048.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,7.0,4.0,2.0,0.0,2.0,1.0,-252.25,-271.625,62947.088438,62947.088438
4,100007,0,Cash loans,M,N,Y,0,121500.0,513000.0,21865.5,513000.0,Unaccompanied,Working,Secondary / secondary special,Single / not married,House / apartment,0.028663,-19932,-3038,-4311.0,-3458,,1,1,0,1,0,0,Core staff,1.0,2,2,THURSDAY,11,0,0,0,0,1,1,Religion,,0.322738,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,-1106.0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,-1149.0,0.0,-783.0,-783.0,0.0,0.0,146250.0,146250.0,146250.0,,0.0,0.0,0.0,-783.0,,,,,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,10.0,15.333333,24.0,0.0,8.969697,24.0,0.0,0.0,0.0,0.0,0.0,0.0,22678.785,12278.805,1834.29,8063.586466,247500.0,150530.25,17176.5,100585.988276,284400.0,166638.75,14616.0,118032.409509,3676.5,3390.75,3105.0,404.111525,247500.0,150530.25,17176.5,100585.988276,0.21889,0.159516,0.100143,0.083967,-374.0,-1222.833333,-2357.0,717.268546,48.0,20.666667,10.0,14.348054,,,,,-344.0,-1263.2,-2326.0,777.378415,346.0,-837.2,-2056.0,932.826458,-354.0,-1140.5,-2056.0,748.964396,-347.0,-1131.0,-2041.0,745.469874,0.251539,0.176401,0.101262,0.106262,0.011292,0.125499,0.090659,0.045749,0.032229,0.001039,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,4.0,5.0,0.0,0.0,1.0,1.0,1.0,1.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,0.0,1.0,0.0,5.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,4.0,2.0,0.0,2.0,1.0,3.0,1.0,0.0,0.0,0.0,3.0,1.0,1.0,0.0,2455.0,0.0,0.0,1.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,3.0,0.0,0.0,3.0,-1028.606061,-1032.242424,12666.444545,12214.060227


In [68]:
print(len(train_agg[train_agg['TARGET']==1]))
print(len(train_agg[train_agg['TARGET']==0]))

24825
282686


In [69]:
train_agg.to_csv("train_agg.csv", index = False)