In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline 

#for save & load pickle
import sys
import os
import pickle

def save_as_pickled_object(obj, filepath):
    max_bytes = 2**31 - 1
    bytes_out = pickle.dumps(obj, protocol=pickle.HIGHEST_PROTOCOL)
    n_bytes = sys.getsizeof(bytes_out)
    with open(filepath, 'wb') as f_out:
        for idx in range(0, n_bytes, max_bytes):
            f_out.write(bytes_out[idx:idx+max_bytes])


def try_to_load_as_pickled_object_or_None(filepath):
    max_bytes = 2**31 - 1
    try:
        input_size = os.path.getsize(filepath)
        bytes_in = bytearray(0)
        with open(filepath, 'rb') as f_in:
            for _ in range(0, input_size, max_bytes):
                bytes_in += f_in.read(max_bytes)
        obj = pickle.loads(bytes_in)
    except:
        return None
    return obj

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory


import os
PATH = "../csv/"
print(os.listdir(PATH))

['application_test.csv', 'POS_CASH_balance.csv', 'credit_card_balance.csv', 'installments_payments.csv', 'application_train.csv', 'bureau.csv', 'previous_application.csv', 'bureau_balance.csv', 'sample_submission.csv']


In [2]:
def reduce_mem_usage(df):
    """ iterate through all the columns of a dataframe and modify the data type
        to reduce memory usage.        
    """
    start_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage of dataframe is {:.2f} MB'.format(start_mem))
    
    for col in df.columns:
        col_type = df[col].dtype
        
        if col_type != object:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)
        else:
            df[col] = df[col].astype('category')

    end_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))
    print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))
    
    return df


def import_data(file):
    """create a dataframe and optimize its memory usage"""
    df = pd.read_csv(file, parse_dates=True, keep_date_col=True)
    df = reduce_mem_usage(df)
    return df

In [3]:
#read csv
bureau_balance = import_data(PATH+'bureau_balance.csv')
bureau = import_data(PATH+'bureau.csv')

Memory usage of dataframe is 624.85 MB
Memory usage after optimization is: 156.21 MB
Decreased by 75.0%
Memory usage of dataframe is 222.62 MB
Memory usage after optimization is: 78.57 MB
Decreased by 64.7%


In [4]:
#bureau balance
df1=bureau_balance.sort_values("SK_ID_BUREAU")
df1["COUNT"] = 1
df2 = df1.groupby(["SK_ID_BUREAU","STATUS"]).agg(['sum','max','mean'])
df2.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,MONTHS_BALANCE,MONTHS_BALANCE,MONTHS_BALANCE,COUNT,COUNT,COUNT
Unnamed: 0_level_1,Unnamed: 1_level_1,sum,max,mean,sum,max,mean
SK_ID_BUREAU,STATUS,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
5001709,C,-3655.0,0,-42.5,86,1,1
5001709,X,-1001.0,-86,-91.0,11,1,1
5001710,0,-254.0,-48,-50.8,5,1,1
5001710,C,-1128.0,0,-23.5,48,1,1
5001710,X,-2021.0,-49,-67.366667,30,1,1
5001711,0,-6.0,-1,-2.0,3,1,1
5001711,X,0.0,0,0.0,1,1,1
5001712,0,-135.0,-9,-13.5,10,1,1
5001712,C,-36.0,0,-4.0,9,1,1
5001713,X,-231.0,0,-10.5,22,1,1


In [5]:
columns_df2 = list(df2.columns)
varname = []
for i in range(0,len(columns_df2)):
    varname.append("bb_" + "_".join(columns_df2[i]))
print(varname)
df2.columns =varname
df2.head()

['bb_MONTHS_BALANCE_sum', 'bb_MONTHS_BALANCE_max', 'bb_MONTHS_BALANCE_mean', 'bb_COUNT_sum', 'bb_COUNT_max', 'bb_COUNT_mean']


Unnamed: 0_level_0,Unnamed: 1_level_0,bb_MONTHS_BALANCE_sum,bb_MONTHS_BALANCE_max,bb_MONTHS_BALANCE_mean,bb_COUNT_sum,bb_COUNT_max,bb_COUNT_mean
SK_ID_BUREAU,STATUS,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
5001709,C,-3655.0,0,-42.5,86,1,1
5001709,X,-1001.0,-86,-91.0,11,1,1
5001710,0,-254.0,-48,-50.8,5,1,1
5001710,C,-1128.0,0,-23.5,48,1,1
5001710,X,-2021.0,-49,-67.366667,30,1,1


In [6]:
df2_0 = df2.xs(key="0",level=1)
df2_0.columns = df2_0.columns + "_0"

df2_1 = df2.xs(key="1",level=1)
df2_1.columns = df2_1.columns + "_1"

df2_2 = df2.xs(key="2",level=1)
df2_2.columns = df2_2.columns + "_2"

df2_3 = df2.xs(key="3",level=1)
df2_3.columns = df2_3.columns + "_3"

df2_4 = df2.xs(key="4",level=1)
df2_4.columns = df2_4.columns + "_4"

df2_5 = df2.xs(key="5",level=1)
df2_5.columns = df2_5.columns + "_5"

df2_C = df2.xs(key="C",level=1)
df2_C.columns = df2_C.columns + "_C"

df2_X = df2.xs(key="X",level=1)
df2_X.columns = df2_X.columns + "_X"

In [7]:
df3= pd.merge(df2_0, df2_1, left_index=True, right_index=True)
df3= pd.merge(df3, df2_2, left_index=True, right_index=True)
df3= pd.merge(df3, df2_3, left_index=True, right_index=True)
df3= pd.merge(df3, df2_4, left_index=True, right_index=True)
df3= pd.merge(df3, df2_5, left_index=True, right_index=True)
df3= pd.merge(df3, df2_C, left_index=True, right_index=True)
df3= pd.merge(df3, df2_X, left_index=True, right_index=True)

In [8]:
pd.set_option('display.max_columns', 300)
df_bureau_balance = df3.reset_index()
df_bureau_balance.head()

Unnamed: 0,SK_ID_BUREAU,bb_MONTHS_BALANCE_sum_0,bb_MONTHS_BALANCE_max_0,bb_MONTHS_BALANCE_mean_0,bb_COUNT_sum_0,bb_COUNT_max_0,bb_COUNT_mean_0,bb_MONTHS_BALANCE_sum_1,bb_MONTHS_BALANCE_max_1,bb_MONTHS_BALANCE_mean_1,bb_COUNT_sum_1,bb_COUNT_max_1,bb_COUNT_mean_1,bb_MONTHS_BALANCE_sum_2,bb_MONTHS_BALANCE_max_2,bb_MONTHS_BALANCE_mean_2,bb_COUNT_sum_2,bb_COUNT_max_2,bb_COUNT_mean_2,bb_MONTHS_BALANCE_sum_3,bb_MONTHS_BALANCE_max_3,bb_MONTHS_BALANCE_mean_3,bb_COUNT_sum_3,bb_COUNT_max_3,bb_COUNT_mean_3,bb_MONTHS_BALANCE_sum_4,bb_MONTHS_BALANCE_max_4,bb_MONTHS_BALANCE_mean_4,bb_COUNT_sum_4,bb_COUNT_max_4,bb_COUNT_mean_4,bb_MONTHS_BALANCE_sum_5,bb_MONTHS_BALANCE_max_5,bb_MONTHS_BALANCE_mean_5,bb_COUNT_sum_5,bb_COUNT_max_5,bb_COUNT_mean_5,bb_MONTHS_BALANCE_sum_C,bb_MONTHS_BALANCE_max_C,bb_MONTHS_BALANCE_mean_C,bb_COUNT_sum_C,bb_COUNT_max_C,bb_COUNT_mean_C,bb_MONTHS_BALANCE_sum_X,bb_MONTHS_BALANCE_max_X,bb_MONTHS_BALANCE_mean_X,bb_COUNT_sum_X,bb_COUNT_max_X,bb_COUNT_mean_X
0,5004027,-390.0,-72,-78.0,5,1,1,-77.0,-77,-77.0,1,1,1,-147.0,-71,-73.5,2,1,1,-145.0,-70,-72.5,2,1,1,-143.0,-69,-71.5,2,1,1,-403.0,-64,-67.166667,6,1,1,-2016.0,0,-31.5,64,1,1,-82.0,-82,-82.0,1,1,1
1,5017063,-287.0,-26,-31.888889,9,1,1,-199.0,-21,-28.428571,7,1,1,-65.0,-20,-21.666667,3,1,1,-19.0,-19,-19.0,1,1,1,-18.0,-18,-18.0,1,1,1,-17.0,-17,-17.0,1,1,1,-136.0,0,-8.0,17,1,1,-3915.0,-39,-67.5,58,1,1
2,5023458,-1663.0,-7,-57.344828,29,1,1,-78.0,-14,-39.0,2,1,1,-76.0,-13,-38.0,2,1,1,-62.0,-62,-62.0,1,1,1,-61.0,-61,-61.0,1,1,1,-1694.0,-17,-38.5,44,1,1,-18.0,-3,-4.5,4,1,1,-86.0,-86,-86.0,1,1,1
3,5024940,-89.0,-89,-89.0,1,1,1,-175.0,-87,-87.5,2,1,1,-86.0,-86,-86.0,1,1,1,-85.0,-85,-85.0,1,1,1,-167.0,-83,-83.5,2,1,1,-82.0,-82,-82.0,1,1,1,-1653.0,0,-28.5,58,1,1,-1668.0,-58,-69.5,24,1,1
4,5025070,-2445.0,-15,-50.9375,48,1,1,-177.0,-44,-59.0,3,1,1,-68.0,-68,-68.0,1,1,1,-67.0,-67,-67.0,1,1,1,-66.0,-66,-66.0,1,1,1,-65.0,-65,-65.0,1,1,1,-91.0,0,-6.5,14,1,1,-424.0,-14,-30.285714,14,1,1


In [9]:
#bureau
df1 = pd.merge(bureau, df_bureau_balance, on="SK_ID_BUREAU",how='left')
df1 = df1.drop(columns="SK_ID_BUREAU")
df1 = df1.sort_values("SK_ID_CURR")
df1["COUNT"] = 1

In [10]:
df2 = df1.groupby(["SK_ID_CURR","CREDIT_ACTIVE"]).agg(['sum','max','mean'])

In [11]:
columns_df2 = list(df2.columns)
varname = []
for i in range(0,len(columns_df2)):
    varname.append("bureau_" + "_".join(columns_df2[i]))
print(varname)
df2.columns =varname
df2.head()

['bureau_DAYS_CREDIT_sum', 'bureau_DAYS_CREDIT_max', 'bureau_DAYS_CREDIT_mean', 'bureau_CREDIT_DAY_OVERDUE_sum', 'bureau_CREDIT_DAY_OVERDUE_max', 'bureau_CREDIT_DAY_OVERDUE_mean', 'bureau_DAYS_CREDIT_ENDDATE_sum', 'bureau_DAYS_CREDIT_ENDDATE_max', 'bureau_DAYS_CREDIT_ENDDATE_mean', 'bureau_DAYS_ENDDATE_FACT_sum', 'bureau_DAYS_ENDDATE_FACT_max', 'bureau_DAYS_ENDDATE_FACT_mean', 'bureau_AMT_CREDIT_MAX_OVERDUE_sum', 'bureau_AMT_CREDIT_MAX_OVERDUE_max', 'bureau_AMT_CREDIT_MAX_OVERDUE_mean', 'bureau_CNT_CREDIT_PROLONG_sum', 'bureau_CNT_CREDIT_PROLONG_max', 'bureau_CNT_CREDIT_PROLONG_mean', 'bureau_AMT_CREDIT_SUM_sum', 'bureau_AMT_CREDIT_SUM_max', 'bureau_AMT_CREDIT_SUM_mean', 'bureau_AMT_CREDIT_SUM_DEBT_sum', 'bureau_AMT_CREDIT_SUM_DEBT_max', 'bureau_AMT_CREDIT_SUM_DEBT_mean', 'bureau_AMT_CREDIT_SUM_LIMIT_sum', 'bureau_AMT_CREDIT_SUM_LIMIT_max', 'bureau_AMT_CREDIT_SUM_LIMIT_mean', 'bureau_AMT_CREDIT_SUM_OVERDUE_sum', 'bureau_AMT_CREDIT_SUM_OVERDUE_max', 'bureau_AMT_CREDIT_SUM_OVERDUE_mean',

Unnamed: 0_level_0,Unnamed: 1_level_0,bureau_DAYS_CREDIT_sum,bureau_DAYS_CREDIT_max,bureau_DAYS_CREDIT_mean,bureau_CREDIT_DAY_OVERDUE_sum,bureau_CREDIT_DAY_OVERDUE_max,bureau_CREDIT_DAY_OVERDUE_mean,bureau_DAYS_CREDIT_ENDDATE_sum,bureau_DAYS_CREDIT_ENDDATE_max,bureau_DAYS_CREDIT_ENDDATE_mean,bureau_DAYS_ENDDATE_FACT_sum,bureau_DAYS_ENDDATE_FACT_max,bureau_DAYS_ENDDATE_FACT_mean,bureau_AMT_CREDIT_MAX_OVERDUE_sum,bureau_AMT_CREDIT_MAX_OVERDUE_max,bureau_AMT_CREDIT_MAX_OVERDUE_mean,bureau_CNT_CREDIT_PROLONG_sum,bureau_CNT_CREDIT_PROLONG_max,bureau_CNT_CREDIT_PROLONG_mean,bureau_AMT_CREDIT_SUM_sum,bureau_AMT_CREDIT_SUM_max,bureau_AMT_CREDIT_SUM_mean,bureau_AMT_CREDIT_SUM_DEBT_sum,bureau_AMT_CREDIT_SUM_DEBT_max,bureau_AMT_CREDIT_SUM_DEBT_mean,bureau_AMT_CREDIT_SUM_LIMIT_sum,bureau_AMT_CREDIT_SUM_LIMIT_max,bureau_AMT_CREDIT_SUM_LIMIT_mean,bureau_AMT_CREDIT_SUM_OVERDUE_sum,bureau_AMT_CREDIT_SUM_OVERDUE_max,bureau_AMT_CREDIT_SUM_OVERDUE_mean,bureau_DAYS_CREDIT_UPDATE_sum,bureau_DAYS_CREDIT_UPDATE_max,bureau_DAYS_CREDIT_UPDATE_mean,bureau_AMT_ANNUITY_sum,bureau_AMT_ANNUITY_max,bureau_AMT_ANNUITY_mean,bureau_bb_MONTHS_BALANCE_sum_0_sum,bureau_bb_MONTHS_BALANCE_sum_0_max,bureau_bb_MONTHS_BALANCE_sum_0_mean,bureau_bb_MONTHS_BALANCE_max_0_sum,bureau_bb_MONTHS_BALANCE_max_0_max,bureau_bb_MONTHS_BALANCE_max_0_mean,bureau_bb_MONTHS_BALANCE_mean_0_sum,bureau_bb_MONTHS_BALANCE_mean_0_max,bureau_bb_MONTHS_BALANCE_mean_0_mean,bureau_bb_COUNT_sum_0_sum,bureau_bb_COUNT_sum_0_max,bureau_bb_COUNT_sum_0_mean,bureau_bb_COUNT_max_0_sum,bureau_bb_COUNT_max_0_max,bureau_bb_COUNT_max_0_mean,bureau_bb_COUNT_mean_0_sum,bureau_bb_COUNT_mean_0_max,bureau_bb_COUNT_mean_0_mean,bureau_bb_MONTHS_BALANCE_sum_1_sum,bureau_bb_MONTHS_BALANCE_sum_1_max,bureau_bb_MONTHS_BALANCE_sum_1_mean,bureau_bb_MONTHS_BALANCE_max_1_sum,bureau_bb_MONTHS_BALANCE_max_1_max,bureau_bb_MONTHS_BALANCE_max_1_mean,bureau_bb_MONTHS_BALANCE_mean_1_sum,bureau_bb_MONTHS_BALANCE_mean_1_max,bureau_bb_MONTHS_BALANCE_mean_1_mean,bureau_bb_COUNT_sum_1_sum,bureau_bb_COUNT_sum_1_max,bureau_bb_COUNT_sum_1_mean,bureau_bb_COUNT_max_1_sum,bureau_bb_COUNT_max_1_max,bureau_bb_COUNT_max_1_mean,bureau_bb_COUNT_mean_1_sum,bureau_bb_COUNT_mean_1_max,bureau_bb_COUNT_mean_1_mean,bureau_bb_MONTHS_BALANCE_sum_2_sum,bureau_bb_MONTHS_BALANCE_sum_2_max,bureau_bb_MONTHS_BALANCE_sum_2_mean,bureau_bb_MONTHS_BALANCE_max_2_sum,bureau_bb_MONTHS_BALANCE_max_2_max,bureau_bb_MONTHS_BALANCE_max_2_mean,bureau_bb_MONTHS_BALANCE_mean_2_sum,bureau_bb_MONTHS_BALANCE_mean_2_max,bureau_bb_MONTHS_BALANCE_mean_2_mean,bureau_bb_COUNT_sum_2_sum,bureau_bb_COUNT_sum_2_max,bureau_bb_COUNT_sum_2_mean,bureau_bb_COUNT_max_2_sum,bureau_bb_COUNT_max_2_max,bureau_bb_COUNT_max_2_mean,bureau_bb_COUNT_mean_2_sum,bureau_bb_COUNT_mean_2_max,bureau_bb_COUNT_mean_2_mean,bureau_bb_MONTHS_BALANCE_sum_3_sum,bureau_bb_MONTHS_BALANCE_sum_3_max,bureau_bb_MONTHS_BALANCE_sum_3_mean,bureau_bb_MONTHS_BALANCE_max_3_sum,bureau_bb_MONTHS_BALANCE_max_3_max,bureau_bb_MONTHS_BALANCE_max_3_mean,bureau_bb_MONTHS_BALANCE_mean_3_sum,bureau_bb_MONTHS_BALANCE_mean_3_max,bureau_bb_MONTHS_BALANCE_mean_3_mean,bureau_bb_COUNT_sum_3_sum,bureau_bb_COUNT_sum_3_max,bureau_bb_COUNT_sum_3_mean,bureau_bb_COUNT_max_3_sum,bureau_bb_COUNT_max_3_max,bureau_bb_COUNT_max_3_mean,bureau_bb_COUNT_mean_3_sum,bureau_bb_COUNT_mean_3_max,bureau_bb_COUNT_mean_3_mean,bureau_bb_MONTHS_BALANCE_sum_4_sum,bureau_bb_MONTHS_BALANCE_sum_4_max,bureau_bb_MONTHS_BALANCE_sum_4_mean,bureau_bb_MONTHS_BALANCE_max_4_sum,bureau_bb_MONTHS_BALANCE_max_4_max,bureau_bb_MONTHS_BALANCE_max_4_mean,bureau_bb_MONTHS_BALANCE_mean_4_sum,bureau_bb_MONTHS_BALANCE_mean_4_max,bureau_bb_MONTHS_BALANCE_mean_4_mean,bureau_bb_COUNT_sum_4_sum,bureau_bb_COUNT_sum_4_max,bureau_bb_COUNT_sum_4_mean,bureau_bb_COUNT_max_4_sum,bureau_bb_COUNT_max_4_max,bureau_bb_COUNT_max_4_mean,bureau_bb_COUNT_mean_4_sum,bureau_bb_COUNT_mean_4_max,bureau_bb_COUNT_mean_4_mean,bureau_bb_MONTHS_BALANCE_sum_5_sum,bureau_bb_MONTHS_BALANCE_sum_5_max,bureau_bb_MONTHS_BALANCE_sum_5_mean,bureau_bb_MONTHS_BALANCE_max_5_sum,bureau_bb_MONTHS_BALANCE_max_5_max,bureau_bb_MONTHS_BALANCE_max_5_mean,bureau_bb_MONTHS_BALANCE_mean_5_sum,bureau_bb_MONTHS_BALANCE_mean_5_max,bureau_bb_MONTHS_BALANCE_mean_5_mean,bureau_bb_COUNT_sum_5_sum,bureau_bb_COUNT_sum_5_max,bureau_bb_COUNT_sum_5_mean,bureau_bb_COUNT_max_5_sum,bureau_bb_COUNT_max_5_max,bureau_bb_COUNT_max_5_mean,bureau_bb_COUNT_mean_5_sum,bureau_bb_COUNT_mean_5_max,bureau_bb_COUNT_mean_5_mean,bureau_bb_MONTHS_BALANCE_sum_C_sum,bureau_bb_MONTHS_BALANCE_sum_C_max,bureau_bb_MONTHS_BALANCE_sum_C_mean,bureau_bb_MONTHS_BALANCE_max_C_sum,bureau_bb_MONTHS_BALANCE_max_C_max,bureau_bb_MONTHS_BALANCE_max_C_mean,bureau_bb_MONTHS_BALANCE_mean_C_sum,bureau_bb_MONTHS_BALANCE_mean_C_max,bureau_bb_MONTHS_BALANCE_mean_C_mean,bureau_bb_COUNT_sum_C_sum,bureau_bb_COUNT_sum_C_max,bureau_bb_COUNT_sum_C_mean,bureau_bb_COUNT_max_C_sum,bureau_bb_COUNT_max_C_max,bureau_bb_COUNT_max_C_mean,bureau_bb_COUNT_mean_C_sum,bureau_bb_COUNT_mean_C_max,bureau_bb_COUNT_mean_C_mean,bureau_bb_MONTHS_BALANCE_sum_X_sum,bureau_bb_MONTHS_BALANCE_sum_X_max,bureau_bb_MONTHS_BALANCE_sum_X_mean,bureau_bb_MONTHS_BALANCE_max_X_sum,bureau_bb_MONTHS_BALANCE_max_X_max,bureau_bb_MONTHS_BALANCE_max_X_mean,bureau_bb_MONTHS_BALANCE_mean_X_sum,bureau_bb_MONTHS_BALANCE_mean_X_max,bureau_bb_MONTHS_BALANCE_mean_X_mean,bureau_bb_COUNT_sum_X_sum,bureau_bb_COUNT_sum_X_max,bureau_bb_COUNT_sum_X_mean,bureau_bb_COUNT_max_X_sum,bureau_bb_COUNT_max_X_max,bureau_bb_COUNT_max_X_mean,bureau_bb_COUNT_mean_X_sum,bureau_bb_COUNT_mean_X_max,bureau_bb_COUNT_mean_X_mean,bureau_COUNT_sum,bureau_COUNT_max,bureau_COUNT_mean
SK_ID_CURR,CREDIT_ACTIVE,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1,Unnamed: 182_level_1,Unnamed: 183_level_1,Unnamed: 184_level_1
100001,Active,-928.0,-49,-309.333333,0,0,0.0,3092.0,1778.0,1030.0,0.0,,,0.0,,,0,0,0.0,884025.0,378000.0,294675.0,596686.5,373239.0,198895.5,0.0,0.0,0.0,0.0,0.0,0.0,-32,-6,-10.666667,24817.5,10822.5,8272.5,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,3,1,1
100001,Closed,-4217.0,-857,-1054.25,0,0,0.0,-2514.0,-179.0,-628.5,-3302.0,-544.0,-825.5,0.0,,,0,0,0.0,569340.0,279720.0,142335.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-620,-155,-155.0,0.0,0.0,0.0,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,4,1,1
100002,Active,-1145.0,-103,-572.5,0,0,0.0,780.0,780.0,780.0,0.0,,,40.5,40.5,40.5,0,0,0.0,481988.5625,450000.0,240994.28125,245781.0,245781.0,122890.5,31988.564453,31988.564453,15994.282227,0.0,0.0,0.0,-31,-7,-15.5,0.0,0.0,0.0,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,2,1,1
100002,Closed,-5847.0,-476,-974.5,0,0,0.0,-2874.0,85.0,-575.0,-4184.0,-36.0,-697.5,8364.644531,5043.64502,2091.161133,0,0,0.0,383067.0,135000.0,63844.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-3968,-34,-661.333333,0.0,0.0,0.0,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,6,1,1
100003,Active,-606.0,-606,-606.0,0,0,0.0,1216.0,1216.0,1216.0,0.0,,,0.0,0.0,0.0,0,0,0.0,810000.0,810000.0,810000.0,0.0,0.0,0.0,810000.0,810000.0,810000.0,0.0,0.0,0.0,-43,-43,-43.0,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,1,1,1


In [12]:
df1['CREDIT_ACTIVE'].value_counts()

Closed      1079273
Active       630607
Sold           6527
Bad debt         21
Name: CREDIT_ACTIVE, dtype: int64

In [13]:
df2_Active = df2.xs(key="Active",level=1)
df2_Active.columns = df2_Active.columns + "_Ac"

df2_Closed = df2.xs(key="Closed",level=1)
df2_Closed.columns = df2_Closed.columns + "_Cl"

df2_Sold= df2.xs(key="Sold",level=1)
df2_Sold.columns = df2_Sold.columns + "_So"

df2_Bad = df2.xs(key="Bad debt",level=1)
df2_Bad.columns = df2_Bad.columns + "_Ba"

In [14]:
df3= pd.merge(df2_Active, df2_Closed, left_index=True, right_index=True, how='outer')
df3= pd.merge(df3, df2_Sold, left_index=True, right_index=True, how='outer')
df3= pd.merge(df3, df2_Bad, left_index=True, right_index=True, how='outer')

In [15]:
bureau_data = df3.reset_index()
bureau_data.head()

Unnamed: 0,SK_ID_CURR,bureau_DAYS_CREDIT_sum_Ac,bureau_DAYS_CREDIT_max_Ac,bureau_DAYS_CREDIT_mean_Ac,bureau_CREDIT_DAY_OVERDUE_sum_Ac,bureau_CREDIT_DAY_OVERDUE_max_Ac,bureau_CREDIT_DAY_OVERDUE_mean_Ac,bureau_DAYS_CREDIT_ENDDATE_sum_Ac,bureau_DAYS_CREDIT_ENDDATE_max_Ac,bureau_DAYS_CREDIT_ENDDATE_mean_Ac,bureau_DAYS_ENDDATE_FACT_sum_Ac,bureau_DAYS_ENDDATE_FACT_max_Ac,bureau_DAYS_ENDDATE_FACT_mean_Ac,bureau_AMT_CREDIT_MAX_OVERDUE_sum_Ac,bureau_AMT_CREDIT_MAX_OVERDUE_max_Ac,bureau_AMT_CREDIT_MAX_OVERDUE_mean_Ac,bureau_CNT_CREDIT_PROLONG_sum_Ac,bureau_CNT_CREDIT_PROLONG_max_Ac,bureau_CNT_CREDIT_PROLONG_mean_Ac,bureau_AMT_CREDIT_SUM_sum_Ac,bureau_AMT_CREDIT_SUM_max_Ac,bureau_AMT_CREDIT_SUM_mean_Ac,bureau_AMT_CREDIT_SUM_DEBT_sum_Ac,bureau_AMT_CREDIT_SUM_DEBT_max_Ac,bureau_AMT_CREDIT_SUM_DEBT_mean_Ac,bureau_AMT_CREDIT_SUM_LIMIT_sum_Ac,bureau_AMT_CREDIT_SUM_LIMIT_max_Ac,bureau_AMT_CREDIT_SUM_LIMIT_mean_Ac,bureau_AMT_CREDIT_SUM_OVERDUE_sum_Ac,bureau_AMT_CREDIT_SUM_OVERDUE_max_Ac,bureau_AMT_CREDIT_SUM_OVERDUE_mean_Ac,bureau_DAYS_CREDIT_UPDATE_sum_Ac,bureau_DAYS_CREDIT_UPDATE_max_Ac,bureau_DAYS_CREDIT_UPDATE_mean_Ac,bureau_AMT_ANNUITY_sum_Ac,bureau_AMT_ANNUITY_max_Ac,bureau_AMT_ANNUITY_mean_Ac,bureau_bb_MONTHS_BALANCE_sum_0_sum_Ac,bureau_bb_MONTHS_BALANCE_sum_0_max_Ac,bureau_bb_MONTHS_BALANCE_sum_0_mean_Ac,bureau_bb_MONTHS_BALANCE_max_0_sum_Ac,bureau_bb_MONTHS_BALANCE_max_0_max_Ac,bureau_bb_MONTHS_BALANCE_max_0_mean_Ac,bureau_bb_MONTHS_BALANCE_mean_0_sum_Ac,bureau_bb_MONTHS_BALANCE_mean_0_max_Ac,bureau_bb_MONTHS_BALANCE_mean_0_mean_Ac,bureau_bb_COUNT_sum_0_sum_Ac,bureau_bb_COUNT_sum_0_max_Ac,bureau_bb_COUNT_sum_0_mean_Ac,bureau_bb_COUNT_max_0_sum_Ac,bureau_bb_COUNT_max_0_max_Ac,bureau_bb_COUNT_max_0_mean_Ac,bureau_bb_COUNT_mean_0_sum_Ac,bureau_bb_COUNT_mean_0_max_Ac,bureau_bb_COUNT_mean_0_mean_Ac,bureau_bb_MONTHS_BALANCE_sum_1_sum_Ac,bureau_bb_MONTHS_BALANCE_sum_1_max_Ac,bureau_bb_MONTHS_BALANCE_sum_1_mean_Ac,bureau_bb_MONTHS_BALANCE_max_1_sum_Ac,bureau_bb_MONTHS_BALANCE_max_1_max_Ac,bureau_bb_MONTHS_BALANCE_max_1_mean_Ac,bureau_bb_MONTHS_BALANCE_mean_1_sum_Ac,bureau_bb_MONTHS_BALANCE_mean_1_max_Ac,bureau_bb_MONTHS_BALANCE_mean_1_mean_Ac,bureau_bb_COUNT_sum_1_sum_Ac,bureau_bb_COUNT_sum_1_max_Ac,bureau_bb_COUNT_sum_1_mean_Ac,bureau_bb_COUNT_max_1_sum_Ac,bureau_bb_COUNT_max_1_max_Ac,bureau_bb_COUNT_max_1_mean_Ac,bureau_bb_COUNT_mean_1_sum_Ac,bureau_bb_COUNT_mean_1_max_Ac,bureau_bb_COUNT_mean_1_mean_Ac,bureau_bb_MONTHS_BALANCE_sum_2_sum_Ac,bureau_bb_MONTHS_BALANCE_sum_2_max_Ac,bureau_bb_MONTHS_BALANCE_sum_2_mean_Ac,bureau_bb_MONTHS_BALANCE_max_2_sum_Ac,bureau_bb_MONTHS_BALANCE_max_2_max_Ac,bureau_bb_MONTHS_BALANCE_max_2_mean_Ac,bureau_bb_MONTHS_BALANCE_mean_2_sum_Ac,bureau_bb_MONTHS_BALANCE_mean_2_max_Ac,bureau_bb_MONTHS_BALANCE_mean_2_mean_Ac,bureau_bb_COUNT_sum_2_sum_Ac,bureau_bb_COUNT_sum_2_max_Ac,bureau_bb_COUNT_sum_2_mean_Ac,bureau_bb_COUNT_max_2_sum_Ac,bureau_bb_COUNT_max_2_max_Ac,bureau_bb_COUNT_max_2_mean_Ac,bureau_bb_COUNT_mean_2_sum_Ac,bureau_bb_COUNT_mean_2_max_Ac,bureau_bb_COUNT_mean_2_mean_Ac,bureau_bb_MONTHS_BALANCE_sum_3_sum_Ac,bureau_bb_MONTHS_BALANCE_sum_3_max_Ac,bureau_bb_MONTHS_BALANCE_sum_3_mean_Ac,bureau_bb_MONTHS_BALANCE_max_3_sum_Ac,bureau_bb_MONTHS_BALANCE_max_3_max_Ac,bureau_bb_MONTHS_BALANCE_max_3_mean_Ac,bureau_bb_MONTHS_BALANCE_mean_3_sum_Ac,bureau_bb_MONTHS_BALANCE_mean_3_max_Ac,bureau_bb_MONTHS_BALANCE_mean_3_mean_Ac,bureau_bb_COUNT_sum_3_sum_Ac,bureau_bb_COUNT_sum_3_max_Ac,bureau_bb_COUNT_sum_3_mean_Ac,bureau_bb_COUNT_max_3_sum_Ac,bureau_bb_COUNT_max_3_max_Ac,bureau_bb_COUNT_max_3_mean_Ac,bureau_bb_COUNT_mean_3_sum_Ac,bureau_bb_COUNT_mean_3_max_Ac,bureau_bb_COUNT_mean_3_mean_Ac,bureau_bb_MONTHS_BALANCE_sum_4_sum_Ac,bureau_bb_MONTHS_BALANCE_sum_4_max_Ac,bureau_bb_MONTHS_BALANCE_sum_4_mean_Ac,bureau_bb_MONTHS_BALANCE_max_4_sum_Ac,bureau_bb_MONTHS_BALANCE_max_4_max_Ac,bureau_bb_MONTHS_BALANCE_max_4_mean_Ac,bureau_bb_MONTHS_BALANCE_mean_4_sum_Ac,bureau_bb_MONTHS_BALANCE_mean_4_max_Ac,bureau_bb_MONTHS_BALANCE_mean_4_mean_Ac,bureau_bb_COUNT_sum_4_sum_Ac,bureau_bb_COUNT_sum_4_max_Ac,bureau_bb_COUNT_sum_4_mean_Ac,bureau_bb_COUNT_max_4_sum_Ac,bureau_bb_COUNT_max_4_max_Ac,bureau_bb_COUNT_max_4_mean_Ac,bureau_bb_COUNT_mean_4_sum_Ac,bureau_bb_COUNT_mean_4_max_Ac,bureau_bb_COUNT_mean_4_mean_Ac,bureau_bb_MONTHS_BALANCE_sum_5_sum_Ac,bureau_bb_MONTHS_BALANCE_sum_5_max_Ac,bureau_bb_MONTHS_BALANCE_sum_5_mean_Ac,bureau_bb_MONTHS_BALANCE_max_5_sum_Ac,bureau_bb_MONTHS_BALANCE_max_5_max_Ac,bureau_bb_MONTHS_BALANCE_max_5_mean_Ac,bureau_bb_MONTHS_BALANCE_mean_5_sum_Ac,bureau_bb_MONTHS_BALANCE_mean_5_max_Ac,bureau_bb_MONTHS_BALANCE_mean_5_mean_Ac,bureau_bb_COUNT_sum_5_sum_Ac,bureau_bb_COUNT_sum_5_max_Ac,bureau_bb_COUNT_sum_5_mean_Ac,bureau_bb_COUNT_max_5_sum_Ac,bureau_bb_COUNT_max_5_max_Ac,bureau_bb_COUNT_max_5_mean_Ac,bureau_bb_COUNT_mean_5_sum_Ac,bureau_bb_COUNT_mean_5_max_Ac,bureau_bb_COUNT_mean_5_mean_Ac,bureau_bb_MONTHS_BALANCE_sum_C_sum_Ac,bureau_bb_MONTHS_BALANCE_sum_C_max_Ac,bureau_bb_MONTHS_BALANCE_sum_C_mean_Ac,bureau_bb_MONTHS_BALANCE_max_C_sum_Ac,bureau_bb_MONTHS_BALANCE_max_C_max_Ac,...,bureau_AMT_ANNUITY_sum_Ba,bureau_AMT_ANNUITY_max_Ba,bureau_AMT_ANNUITY_mean_Ba,bureau_bb_MONTHS_BALANCE_sum_0_sum_Ba,bureau_bb_MONTHS_BALANCE_sum_0_max_Ba,bureau_bb_MONTHS_BALANCE_sum_0_mean_Ba,bureau_bb_MONTHS_BALANCE_max_0_sum_Ba,bureau_bb_MONTHS_BALANCE_max_0_max_Ba,bureau_bb_MONTHS_BALANCE_max_0_mean_Ba,bureau_bb_MONTHS_BALANCE_mean_0_sum_Ba,bureau_bb_MONTHS_BALANCE_mean_0_max_Ba,bureau_bb_MONTHS_BALANCE_mean_0_mean_Ba,bureau_bb_COUNT_sum_0_sum_Ba,bureau_bb_COUNT_sum_0_max_Ba,bureau_bb_COUNT_sum_0_mean_Ba,bureau_bb_COUNT_max_0_sum_Ba,bureau_bb_COUNT_max_0_max_Ba,bureau_bb_COUNT_max_0_mean_Ba,bureau_bb_COUNT_mean_0_sum_Ba,bureau_bb_COUNT_mean_0_max_Ba,bureau_bb_COUNT_mean_0_mean_Ba,bureau_bb_MONTHS_BALANCE_sum_1_sum_Ba,bureau_bb_MONTHS_BALANCE_sum_1_max_Ba,bureau_bb_MONTHS_BALANCE_sum_1_mean_Ba,bureau_bb_MONTHS_BALANCE_max_1_sum_Ba,bureau_bb_MONTHS_BALANCE_max_1_max_Ba,bureau_bb_MONTHS_BALANCE_max_1_mean_Ba,bureau_bb_MONTHS_BALANCE_mean_1_sum_Ba,bureau_bb_MONTHS_BALANCE_mean_1_max_Ba,bureau_bb_MONTHS_BALANCE_mean_1_mean_Ba,bureau_bb_COUNT_sum_1_sum_Ba,bureau_bb_COUNT_sum_1_max_Ba,bureau_bb_COUNT_sum_1_mean_Ba,bureau_bb_COUNT_max_1_sum_Ba,bureau_bb_COUNT_max_1_max_Ba,bureau_bb_COUNT_max_1_mean_Ba,bureau_bb_COUNT_mean_1_sum_Ba,bureau_bb_COUNT_mean_1_max_Ba,bureau_bb_COUNT_mean_1_mean_Ba,bureau_bb_MONTHS_BALANCE_sum_2_sum_Ba,bureau_bb_MONTHS_BALANCE_sum_2_max_Ba,bureau_bb_MONTHS_BALANCE_sum_2_mean_Ba,bureau_bb_MONTHS_BALANCE_max_2_sum_Ba,bureau_bb_MONTHS_BALANCE_max_2_max_Ba,bureau_bb_MONTHS_BALANCE_max_2_mean_Ba,bureau_bb_MONTHS_BALANCE_mean_2_sum_Ba,bureau_bb_MONTHS_BALANCE_mean_2_max_Ba,bureau_bb_MONTHS_BALANCE_mean_2_mean_Ba,bureau_bb_COUNT_sum_2_sum_Ba,bureau_bb_COUNT_sum_2_max_Ba,bureau_bb_COUNT_sum_2_mean_Ba,bureau_bb_COUNT_max_2_sum_Ba,bureau_bb_COUNT_max_2_max_Ba,bureau_bb_COUNT_max_2_mean_Ba,bureau_bb_COUNT_mean_2_sum_Ba,bureau_bb_COUNT_mean_2_max_Ba,bureau_bb_COUNT_mean_2_mean_Ba,bureau_bb_MONTHS_BALANCE_sum_3_sum_Ba,bureau_bb_MONTHS_BALANCE_sum_3_max_Ba,bureau_bb_MONTHS_BALANCE_sum_3_mean_Ba,bureau_bb_MONTHS_BALANCE_max_3_sum_Ba,bureau_bb_MONTHS_BALANCE_max_3_max_Ba,bureau_bb_MONTHS_BALANCE_max_3_mean_Ba,bureau_bb_MONTHS_BALANCE_mean_3_sum_Ba,bureau_bb_MONTHS_BALANCE_mean_3_max_Ba,bureau_bb_MONTHS_BALANCE_mean_3_mean_Ba,bureau_bb_COUNT_sum_3_sum_Ba,bureau_bb_COUNT_sum_3_max_Ba,bureau_bb_COUNT_sum_3_mean_Ba,bureau_bb_COUNT_max_3_sum_Ba,bureau_bb_COUNT_max_3_max_Ba,bureau_bb_COUNT_max_3_mean_Ba,bureau_bb_COUNT_mean_3_sum_Ba,bureau_bb_COUNT_mean_3_max_Ba,bureau_bb_COUNT_mean_3_mean_Ba,bureau_bb_MONTHS_BALANCE_sum_4_sum_Ba,bureau_bb_MONTHS_BALANCE_sum_4_max_Ba,bureau_bb_MONTHS_BALANCE_sum_4_mean_Ba,bureau_bb_MONTHS_BALANCE_max_4_sum_Ba,bureau_bb_MONTHS_BALANCE_max_4_max_Ba,bureau_bb_MONTHS_BALANCE_max_4_mean_Ba,bureau_bb_MONTHS_BALANCE_mean_4_sum_Ba,bureau_bb_MONTHS_BALANCE_mean_4_max_Ba,bureau_bb_MONTHS_BALANCE_mean_4_mean_Ba,bureau_bb_COUNT_sum_4_sum_Ba,bureau_bb_COUNT_sum_4_max_Ba,bureau_bb_COUNT_sum_4_mean_Ba,bureau_bb_COUNT_max_4_sum_Ba,bureau_bb_COUNT_max_4_max_Ba,bureau_bb_COUNT_max_4_mean_Ba,bureau_bb_COUNT_mean_4_sum_Ba,bureau_bb_COUNT_mean_4_max_Ba,bureau_bb_COUNT_mean_4_mean_Ba,bureau_bb_MONTHS_BALANCE_sum_5_sum_Ba,bureau_bb_MONTHS_BALANCE_sum_5_max_Ba,bureau_bb_MONTHS_BALANCE_sum_5_mean_Ba,bureau_bb_MONTHS_BALANCE_max_5_sum_Ba,bureau_bb_MONTHS_BALANCE_max_5_max_Ba,bureau_bb_MONTHS_BALANCE_max_5_mean_Ba,bureau_bb_MONTHS_BALANCE_mean_5_sum_Ba,bureau_bb_MONTHS_BALANCE_mean_5_max_Ba,bureau_bb_MONTHS_BALANCE_mean_5_mean_Ba,bureau_bb_COUNT_sum_5_sum_Ba,bureau_bb_COUNT_sum_5_max_Ba,bureau_bb_COUNT_sum_5_mean_Ba,bureau_bb_COUNT_max_5_sum_Ba,bureau_bb_COUNT_max_5_max_Ba,bureau_bb_COUNT_max_5_mean_Ba,bureau_bb_COUNT_mean_5_sum_Ba,bureau_bb_COUNT_mean_5_max_Ba,bureau_bb_COUNT_mean_5_mean_Ba,bureau_bb_MONTHS_BALANCE_sum_C_sum_Ba,bureau_bb_MONTHS_BALANCE_sum_C_max_Ba,bureau_bb_MONTHS_BALANCE_sum_C_mean_Ba,bureau_bb_MONTHS_BALANCE_max_C_sum_Ba,bureau_bb_MONTHS_BALANCE_max_C_max_Ba,bureau_bb_MONTHS_BALANCE_max_C_mean_Ba,bureau_bb_MONTHS_BALANCE_mean_C_sum_Ba,bureau_bb_MONTHS_BALANCE_mean_C_max_Ba,bureau_bb_MONTHS_BALANCE_mean_C_mean_Ba,bureau_bb_COUNT_sum_C_sum_Ba,bureau_bb_COUNT_sum_C_max_Ba,bureau_bb_COUNT_sum_C_mean_Ba,bureau_bb_COUNT_max_C_sum_Ba,bureau_bb_COUNT_max_C_max_Ba,bureau_bb_COUNT_max_C_mean_Ba,bureau_bb_COUNT_mean_C_sum_Ba,bureau_bb_COUNT_mean_C_max_Ba,bureau_bb_COUNT_mean_C_mean_Ba,bureau_bb_MONTHS_BALANCE_sum_X_sum_Ba,bureau_bb_MONTHS_BALANCE_sum_X_max_Ba,bureau_bb_MONTHS_BALANCE_sum_X_mean_Ba,bureau_bb_MONTHS_BALANCE_max_X_sum_Ba,bureau_bb_MONTHS_BALANCE_max_X_max_Ba,bureau_bb_MONTHS_BALANCE_max_X_mean_Ba,bureau_bb_MONTHS_BALANCE_mean_X_sum_Ba,bureau_bb_MONTHS_BALANCE_mean_X_max_Ba,bureau_bb_MONTHS_BALANCE_mean_X_mean_Ba,bureau_bb_COUNT_sum_X_sum_Ba,bureau_bb_COUNT_sum_X_max_Ba,bureau_bb_COUNT_sum_X_mean_Ba,bureau_bb_COUNT_max_X_sum_Ba,bureau_bb_COUNT_max_X_max_Ba,bureau_bb_COUNT_max_X_mean_Ba,bureau_bb_COUNT_mean_X_sum_Ba,bureau_bb_COUNT_mean_X_max_Ba,bureau_bb_COUNT_mean_X_mean_Ba,bureau_COUNT_sum_Ba,bureau_COUNT_max_Ba,bureau_COUNT_mean_Ba
0,100001,-928.0,-49.0,-309.333333,0.0,0.0,0.0,3092.0,1778.0,1030.0,0.0,,,0.0,,,0.0,0.0,0.0,884025.0,378000.0,294675.0,596686.5,373239.0,198895.5,0.0,0.0,0.0,0.0,0.0,0.0,-32.0,-6.0,-10.666667,24817.5,10822.5,8272.5,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,100002,-1145.0,-103.0,-572.5,0.0,0.0,0.0,780.0,780.0,780.0,0.0,,,40.5,40.5,40.5,0.0,0.0,0.0,481988.5625,450000.0,240994.28125,245781.0,245781.0,122890.5,31988.564453,31988.564453,15994.282227,0.0,0.0,0.0,-31.0,-7.0,-15.5,0.0,0.0,0.0,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,100003,-606.0,-606.0,-606.0,0.0,0.0,0.0,1216.0,1216.0,1216.0,0.0,,,0.0,0.0,0.0,0.0,0.0,0.0,810000.0,810000.0,810000.0,0.0,0.0,0.0,810000.0,810000.0,810000.0,0.0,0.0,0.0,-43.0,-43.0,-43.0,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,100004,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,100005,-199.0,-62.0,-99.5,0.0,0.0,0.0,1446.0,1324.0,723.0,0.0,,,0.0,0.0,0.0,0.0,0.0,0.0,598626.0,568800.0,299313.0,568408.5,543087.0,284204.25,0.0,0.0,0.0,0.0,0.0,0.0,-42.0,-11.0,-21.0,4261.5,4261.5,2130.75,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,,0.0,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [16]:
#save pkl
save_as_pickled_object(bureau_data, '/Volumes/sub/kaggle/pkl/Home Credit Default Risk/bureau_data.pkl')