In [1]:
# Full width
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import os, math, subprocess
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
from IPython.display import display

# 
from lib_modeling import *
from lib_feature_engineering import *

# some settings for displaying Pandas results
pd.set_option('display.width', 2000)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.precision', 4)
pd.set_option('display.max_colwidth', -1)

# Load data

In [4]:
# load train data
data_path = "home-credit-default-risk/application_train.csv"
pdf_train = pd.read_csv(data_path)

# filter by tvt code
pdf_tvt_extend = pd.read_pickle("pdf_tvt_extend.pkl", compression="bz2")
pdf_train_filtered = (pdf_tvt_extend.query("tvt_code == 'train'")
                      .merge(pdf_train[["SK_ID_CURR"]], on="SK_ID_CURR")
                      .drop(columns=["tvt_code"]))
pdf_train_filtered.head()

Unnamed: 0,SK_ID_CURR,TARGET
0,100002,1
1,100003,0
2,100004,0
3,100006,0
4,100007,0


In [5]:
# load bureau
data_path = "home-credit-default-risk/bureau.csv"
pdf_data = pd.read_csv(data_path)

# Preprocess data

## onehot and frequency

- CREDIT_ACTIVE
- CREDIT_CURRENCY

In [6]:
%%time
dict_feat = {
    "CREDIT_ACTIVE": ['Closed', 'Active', 'Sold', 'Bad debt'],
    "CREDIT_CURRENCY": ['currency 1', 'currency 2', 'currency 3', 'currency 4'],
    "CREDIT_TYPE": ['Consumer credit', 'Credit card', 'Car loan', 'Mortgage', 'Microloan', 'Loan for business development', 'Another type of loan', 'Unknown type of loan', 'Loan for working capital replenishment', 'Cash loan (non-earmarked)', 'Real estate loan', 'Loan for the purchase of equipment', 'Loan for purchase of shares (margin lending)', 'Interbank credit', 'Mobile operator loan'],
}
pdf_onehot = gen_one_hot_feat(pdf_data, dict_feat)

CPU times: user 55 s, sys: 1.99 s, total: 57 s
Wall time: 15.9 s


In [23]:
pdf_agg01 = agg_common_data(pdf_onehot, ["max", "sum", "mean", "std"])

{'CREDIT_ACTIVE_Active': ['max', 'sum', 'mean', 'std'],
 'CREDIT_ACTIVE_Bad_debt': ['max', 'sum', 'mean', 'std'],
 'CREDIT_ACTIVE_Closed': ['max', 'sum', 'mean', 'std'],
 'CREDIT_ACTIVE_Sold': ['max', 'sum', 'mean', 'std'],
 'CREDIT_CURRENCY_currency_1': ['max', 'sum', 'mean', 'std'],
 'CREDIT_CURRENCY_currency_2': ['max', 'sum', 'mean', 'std'],
 'CREDIT_CURRENCY_currency_3': ['max', 'sum', 'mean', 'std'],
 'CREDIT_CURRENCY_currency_4': ['max', 'sum', 'mean', 'std'],
 'CREDIT_TYPE_Another_type_of_loan': ['max', 'sum', 'mean', 'std'],
 'CREDIT_TYPE_Car_loan': ['max', 'sum', 'mean', 'std'],
 'CREDIT_TYPE_Cash_loan_(non_earmarked)': ['max', 'sum', 'mean', 'std'],
 'CREDIT_TYPE_Consumer_credit': ['max', 'sum', 'mean', 'std'],
 'CREDIT_TYPE_Credit_card': ['max', 'sum', 'mean', 'std'],
 'CREDIT_TYPE_Interbank_credit': ['max', 'sum', 'mean', 'std'],
 'CREDIT_TYPE_Loan_for_business_development': ['max', 'sum', 'mean', 'std'],
 'CREDIT_TYPE_Loan_for_purchase_of_shares_(margin_lending)': ['max',

After agg: (305811, 92)


In [24]:
%%time
eval_agg01 = feature_evaluate(pdf_train_filtered, pdf_agg01.reset_index())
display(eval_agg01)

Unnamed: 0,name,auc,corr,coverage
6,CREDIT_ACTIVE_Closed_mean,0.5881,-0.081219,1.0
74,CREDIT_ACTIVE_Active_mean,0.5856,0.078816,1.0
73,CREDIT_ACTIVE_Active_sum,0.56,0.066235,1.0
5,CREDIT_ACTIVE_Closed_sum,0.549,-0.031939,1.0
11,CREDIT_TYPE_Credit_card_std,0.5381,0.031412,0.8631
10,CREDIT_TYPE_Credit_card_mean,0.5377,0.035883,1.0
83,CREDIT_TYPE_Consumer_credit_std,0.5322,0.026608,0.8631
9,CREDIT_TYPE_Credit_card_sum,0.5306,0.035684,1.0
82,CREDIT_TYPE_Consumer_credit_mean,0.5303,-0.026911,1.0
4,CREDIT_ACTIVE_Closed_max,0.5303,-0.048711,1.0


CPU times: user 8.73 s, sys: 586 ms, total: 9.32 s
Wall time: 5.02 s


In [25]:
eval_agg01.query("auc <= 0.501").shape

(59, 4)

In [26]:
sel_feat = eval_agg01.query("auc > 0.501")["name"].tolist()
pdf_agg01 = pdf_agg01[sel_feat]
print(pdf_agg01.shape)

(305811, 33)


## days to years

- DAYS_CREDIT
- CREDIT_DAY_OVERDUE
- DAYS_CREDIT_UPDATE
- DAYS_CREDIT_ENDDATE
- DAYS_ENDDATE_FACT

In [27]:
def days_to_years(pdf_input, ls_cols):
    pdf_out = pdf_input[["SK_ID_CURR"] + ls_cols].copy()
    col_out = []
    for cname in ls_cols:
        new_name = "{}_TO_YEARS".format(cname)
        col_out.append(new_name)
        pdf_out[new_name] = pdf_out["DAYS_CREDIT"] / -365
        
    return pdf_out[["SK_ID_CURR"] + col_out]

In [28]:
ls_cols = ["DAYS_CREDIT", "CREDIT_DAY_OVERDUE", "DAYS_CREDIT_UPDATE", "DAYS_CREDIT_ENDDATE", "DAYS_ENDDATE_FACT"]
pdf_years = days_to_years(pdf_data, ls_cols)
pdf_years.head()

Unnamed: 0,SK_ID_CURR,DAYS_CREDIT_TO_YEARS,CREDIT_DAY_OVERDUE_TO_YEARS,DAYS_CREDIT_UPDATE_TO_YEARS,DAYS_CREDIT_ENDDATE_TO_YEARS,DAYS_ENDDATE_FACT_TO_YEARS
0,215354,1.3616,1.3616,1.3616,1.3616,1.3616
1,215354,0.5699,0.5699,0.5699,0.5699,0.5699
2,215354,0.5562,0.5562,0.5562,0.5562,0.5562
3,215354,0.5562,0.5562,0.5562,0.5562,0.5562
4,215354,1.7233,1.7233,1.7233,1.7233,1.7233


In [29]:
pdf_agg02 = agg_common_data(pdf_years, ["min", "max", "mean", "std", "median"])
pdf_agg02.head()

{'CREDIT_DAY_OVERDUE_TO_YEARS': ['min', 'max', 'mean', 'std', 'median'],
 'DAYS_CREDIT_ENDDATE_TO_YEARS': ['min', 'max', 'mean', 'std', 'median'],
 'DAYS_CREDIT_TO_YEARS': ['min', 'max', 'mean', 'std', 'median'],
 'DAYS_CREDIT_UPDATE_TO_YEARS': ['min', 'max', 'mean', 'std', 'median'],
 'DAYS_ENDDATE_FACT_TO_YEARS': ['min', 'max', 'mean', 'std', 'median']}

After agg: (305811, 25)


Unnamed: 0_level_0,DAYS_CREDIT_UPDATE_TO_YEARS_min,DAYS_CREDIT_UPDATE_TO_YEARS_max,DAYS_CREDIT_UPDATE_TO_YEARS_mean,DAYS_CREDIT_UPDATE_TO_YEARS_std,DAYS_CREDIT_UPDATE_TO_YEARS_median,DAYS_CREDIT_ENDDATE_TO_YEARS_min,DAYS_CREDIT_ENDDATE_TO_YEARS_max,DAYS_CREDIT_ENDDATE_TO_YEARS_mean,DAYS_CREDIT_ENDDATE_TO_YEARS_std,DAYS_CREDIT_ENDDATE_TO_YEARS_median,DAYS_ENDDATE_FACT_TO_YEARS_min,DAYS_ENDDATE_FACT_TO_YEARS_max,DAYS_ENDDATE_FACT_TO_YEARS_mean,DAYS_ENDDATE_FACT_TO_YEARS_std,DAYS_ENDDATE_FACT_TO_YEARS_median,DAYS_CREDIT_TO_YEARS_min,DAYS_CREDIT_TO_YEARS_max,DAYS_CREDIT_TO_YEARS_mean,DAYS_CREDIT_TO_YEARS_std,DAYS_CREDIT_TO_YEARS_median,CREDIT_DAY_OVERDUE_TO_YEARS_min,CREDIT_DAY_OVERDUE_TO_YEARS_max,CREDIT_DAY_OVERDUE_TO_YEARS_mean,CREDIT_DAY_OVERDUE_TO_YEARS_std,CREDIT_DAY_OVERDUE_TO_YEARS_median
SK_ID_CURR,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
100001,0.1342,4.3068,2.0137,1.3423,2.3479,0.1342,4.3068,2.0137,1.3423,2.3479,0.1342,4.3068,2.0137,1.3423,2.3479,0.1342,4.3068,2.0137,1.3423,2.3479,0.1342,4.3068,2.0137,1.3423,2.3479
100002,0.2822,3.937,2.3945,1.1821,2.8562,0.2822,3.937,2.3945,1.1821,2.8562,0.2822,3.937,2.3945,1.1821,2.8562,0.2822,3.937,2.3945,1.1821,2.8562,0.2822,3.937,2.3945,1.1821,2.8562
100003,1.6603,7.0849,3.8377,2.4927,3.3027,1.6603,7.0849,3.8377,2.4927,3.3027,1.6603,7.0849,3.8377,2.4927,3.3027,1.6603,7.0849,3.8377,2.4927,3.3027,1.6603,7.0849,3.8377,2.4927,3.3027
100004,1.1178,3.6329,2.3753,1.7784,2.3753,1.1178,3.6329,2.3753,1.7784,2.3753,1.1178,3.6329,2.3753,1.7784,2.3753,1.1178,3.6329,2.3753,1.7784,2.3753,1.1178,3.6329,2.3753,1.7784,2.3753
100005,0.1699,1.0219,0.5224,0.4446,0.3753,0.1699,1.0219,0.5224,0.4446,0.3753,0.1699,1.0219,0.5224,0.4446,0.3753,0.1699,1.0219,0.5224,0.4446,0.3753,0.1699,1.0219,0.5224,0.4446,0.3753


In [30]:
eval_agg02 = feature_evaluate(pdf_train_filtered, pdf_agg02.reset_index())
display(eval_agg02)

Unnamed: 0,name,auc,corr,coverage
12,DAYS_ENDDATE_FACT_TO_YEARS_mean,0.6043,-0.0912,1.0
2,DAYS_CREDIT_UPDATE_TO_YEARS_mean,0.6043,-0.0912,1.0
22,CREDIT_DAY_OVERDUE_TO_YEARS_mean,0.6043,-0.0912,1.0
17,DAYS_CREDIT_TO_YEARS_mean,0.6043,-0.0912,1.0
7,DAYS_CREDIT_ENDDATE_TO_YEARS_mean,0.6043,-0.0912,1.0
9,DAYS_CREDIT_ENDDATE_TO_YEARS_median,0.6042,-0.0877,1.0
19,DAYS_CREDIT_TO_YEARS_median,0.6042,-0.0877,1.0
14,DAYS_ENDDATE_FACT_TO_YEARS_median,0.6042,-0.0877,1.0
24,CREDIT_DAY_OVERDUE_TO_YEARS_median,0.6042,-0.0877,1.0
4,DAYS_CREDIT_UPDATE_TO_YEARS_median,0.6042,-0.0877,1.0


In [31]:
eval_agg02.query("auc <= 0.501").shape

(0, 4)

## keep columns

In [32]:
ls_cols = [cname for cname in pdf_data.columns if "AMT" in cname] + ["CNT_CREDIT_PROLONG"]
pdf_amt = pdf_data[["SK_ID_CURR"] + ls_cols]
pdf_amt.head()

Unnamed: 0,SK_ID_CURR,AMT_CREDIT_MAX_OVERDUE,AMT_CREDIT_SUM,AMT_CREDIT_SUM_DEBT,AMT_CREDIT_SUM_LIMIT,AMT_CREDIT_SUM_OVERDUE,AMT_ANNUITY,CNT_CREDIT_PROLONG
0,215354,,91323.0,0.0,,0.0,,0
1,215354,,225000.0,171342.0,,0.0,,0
2,215354,,464323.5,,,0.0,,0
3,215354,,90000.0,,,0.0,,0
4,215354,77674.5,2700000.0,,,0.0,,0


In [33]:
pdf_agg03 = agg_common_data(pdf_amt, ["min", "max", "mean", "std", "median"])

{'AMT_ANNUITY': ['min', 'max', 'mean', 'std', 'median'],
 'AMT_CREDIT_MAX_OVERDUE': ['min', 'max', 'mean', 'std', 'median'],
 'AMT_CREDIT_SUM': ['min', 'max', 'mean', 'std', 'median'],
 'AMT_CREDIT_SUM_DEBT': ['min', 'max', 'mean', 'std', 'median'],
 'AMT_CREDIT_SUM_LIMIT': ['min', 'max', 'mean', 'std', 'median'],
 'AMT_CREDIT_SUM_OVERDUE': ['min', 'max', 'mean', 'std', 'median'],
 'CNT_CREDIT_PROLONG': ['min', 'max', 'mean', 'std', 'median']}

After agg: (305811, 35)


In [34]:
eval_agg03 = feature_evaluate(pdf_train_filtered, pdf_agg03.reset_index())
display(eval_agg03)

Unnamed: 0,name,auc,corr,coverage
24,AMT_CREDIT_SUM_DEBT_median,0.5582,0.0022,0.9721
22,AMT_CREDIT_SUM_DEBT_mean,0.5578,-0.0004,0.9721
13,AMT_CREDIT_MAX_OVERDUE_std,0.5506,0.0004,0.4506
23,AMT_CREDIT_SUM_DEBT_std,0.5497,-0.0039,0.8123
12,AMT_CREDIT_MAX_OVERDUE_mean,0.5477,0.008,0.6988
11,AMT_CREDIT_MAX_OVERDUE_max,0.5456,0.0048,0.6988
21,AMT_CREDIT_SUM_DEBT_max,0.5452,-0.0029,0.9721
14,AMT_CREDIT_MAX_OVERDUE_median,0.5331,0.0077,0.6988
18,AMT_CREDIT_SUM_LIMIT_std,0.5309,-0.013,0.7283
34,AMT_ANNUITY_median,0.5283,-0.0011,0.3042


In [36]:
eval_agg03.query("auc <= 0.501").shape

(4, 4)

In [37]:
sel_feat = eval_agg03.query("auc > 0.501")["name"].tolist()
pdf_agg03 = pdf_agg03[sel_feat]
print(pdf_agg03.shape)

(305811, 31)


# save features

In [38]:
pdf_feat = pdf_agg01.join(pdf_agg02).join(pdf_agg03)
print(pdf_feat.shape)
pdf_feat.head()

(305811, 89)


Unnamed: 0_level_0,CREDIT_ACTIVE_Closed_mean,CREDIT_ACTIVE_Active_mean,CREDIT_ACTIVE_Active_sum,CREDIT_ACTIVE_Closed_sum,CREDIT_TYPE_Credit_card_std,CREDIT_TYPE_Credit_card_mean,CREDIT_TYPE_Consumer_credit_std,CREDIT_TYPE_Credit_card_sum,CREDIT_TYPE_Consumer_credit_mean,CREDIT_ACTIVE_Closed_max,CREDIT_ACTIVE_Active_max,CREDIT_TYPE_Consumer_credit_sum,CREDIT_ACTIVE_Active_std,CREDIT_TYPE_Credit_card_max,CREDIT_ACTIVE_Closed_std,CREDIT_TYPE_Microloan_std,CREDIT_TYPE_Car_loan_std,CREDIT_TYPE_Microloan_sum,CREDIT_TYPE_Microloan_mean,CREDIT_TYPE_Car_loan_mean,CREDIT_TYPE_Microloan_max,CREDIT_TYPE_Car_loan_sum,CREDIT_TYPE_Car_loan_max,CREDIT_TYPE_Mortgage_std,CREDIT_TYPE_Mortgage_mean,CREDIT_TYPE_Mortgage_sum,CREDIT_TYPE_Mortgage_max,CREDIT_CURRENCY_currency_1_sum,CREDIT_TYPE_Consumer_credit_max,CREDIT_ACTIVE_Sold_mean,CREDIT_ACTIVE_Sold_std,CREDIT_ACTIVE_Sold_max,CREDIT_ACTIVE_Sold_sum,DAYS_CREDIT_UPDATE_TO_YEARS_min,DAYS_CREDIT_UPDATE_TO_YEARS_max,DAYS_CREDIT_UPDATE_TO_YEARS_mean,DAYS_CREDIT_UPDATE_TO_YEARS_std,DAYS_CREDIT_UPDATE_TO_YEARS_median,DAYS_CREDIT_ENDDATE_TO_YEARS_min,DAYS_CREDIT_ENDDATE_TO_YEARS_max,DAYS_CREDIT_ENDDATE_TO_YEARS_mean,DAYS_CREDIT_ENDDATE_TO_YEARS_std,DAYS_CREDIT_ENDDATE_TO_YEARS_median,DAYS_ENDDATE_FACT_TO_YEARS_min,DAYS_ENDDATE_FACT_TO_YEARS_max,DAYS_ENDDATE_FACT_TO_YEARS_mean,DAYS_ENDDATE_FACT_TO_YEARS_std,DAYS_ENDDATE_FACT_TO_YEARS_median,DAYS_CREDIT_TO_YEARS_min,DAYS_CREDIT_TO_YEARS_max,DAYS_CREDIT_TO_YEARS_mean,DAYS_CREDIT_TO_YEARS_std,DAYS_CREDIT_TO_YEARS_median,CREDIT_DAY_OVERDUE_TO_YEARS_min,CREDIT_DAY_OVERDUE_TO_YEARS_max,CREDIT_DAY_OVERDUE_TO_YEARS_mean,CREDIT_DAY_OVERDUE_TO_YEARS_std,CREDIT_DAY_OVERDUE_TO_YEARS_median,AMT_CREDIT_SUM_DEBT_median,AMT_CREDIT_SUM_DEBT_mean,AMT_CREDIT_MAX_OVERDUE_std,AMT_CREDIT_SUM_DEBT_std,AMT_CREDIT_MAX_OVERDUE_mean,AMT_CREDIT_MAX_OVERDUE_max,AMT_CREDIT_SUM_DEBT_max,AMT_CREDIT_MAX_OVERDUE_median,AMT_CREDIT_SUM_LIMIT_std,AMT_ANNUITY_median,AMT_CREDIT_SUM_DEBT_min,AMT_CREDIT_SUM_LIMIT_max,AMT_CREDIT_SUM_LIMIT_mean,AMT_ANNUITY_mean,AMT_CREDIT_SUM_median,AMT_CREDIT_SUM_min,AMT_ANNUITY_std,AMT_ANNUITY_min,AMT_CREDIT_SUM_mean,AMT_CREDIT_MAX_OVERDUE_min,AMT_ANNUITY_max,AMT_CREDIT_SUM_max,AMT_CREDIT_SUM_OVERDUE_std,AMT_CREDIT_SUM_OVERDUE_mean,AMT_CREDIT_SUM_OVERDUE_max,AMT_CREDIT_SUM_LIMIT_median,AMT_CREDIT_SUM_std,CNT_CREDIT_PROLONG_std,CNT_CREDIT_PROLONG_max,CNT_CREDIT_PROLONG_mean,AMT_CREDIT_SUM_OVERDUE_median
SK_ID_CURR,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1
100001,0.5714,0.4286,3,4,0.0,0.0,0.0,0,1.0,1,1,7,0.5345,0,0.5345,0.0,0.0,0,0.0,0.0,0,0,0,0.0,0.0,0,0,7,1,0.0,0.0,0,0,0.1342,4.3068,2.0137,1.3423,2.3479,0.1342,4.3068,2.0137,1.3423,2.3479,0.1342,4.3068,2.0137,1.3423,2.3479,0.1342,4.3068,2.0137,1.3423,2.3479,0.1342,4.3068,2.0137,1.3423,2.3479,0.0,85240.9286,,137485.6311,,,373239.0,,0.0,0.0,0.0,0.0,0.0,3545.3571,168345.0,85500.0,4800.6075,0.0,207623.5714,,10822.5,378000.0,0.0,0.0,0.0,0.0,122544.5445,0.0,0,0.0,0.0
100002,0.75,0.25,2,6,0.5345,0.5,0.5345,4,0.5,1,1,4,0.4629,1,0.4629,0.0,0.0,0,0.0,0.0,0,0,0,0.0,0.0,0,0,8,1,0.0,0.0,0,0,0.2822,3.937,2.3945,1.1821,2.8562,0.2822,3.937,2.3945,1.1821,2.8562,0.2822,3.937,2.3945,1.1821,2.8562,0.2822,3.937,2.3945,1.1821,2.8562,0.2822,3.937,2.3945,1.1821,2.8562,0.0,49156.2,2363.2469,109916.6047,1681.029,5043.645,245781.0,40.5,15994.2825,0.0,0.0,31988.565,7997.1412,0.0,54130.5,0.0,0.0,0.0,108131.9456,0.0,0.0,450000.0,0.0,0.0,0.0,0.0,146075.5574,0.0,0,0.0,0.0
100003,0.75,0.25,1,3,0.5774,0.5,0.5774,2,0.5,1,1,2,0.5,1,0.5,0.0,0.0,0,0.0,0.0,0,0,0,0.0,0.0,0,0,4,1,0.0,0.0,0,0,1.6603,7.0849,3.8377,2.4927,3.3027,1.6603,7.0849,3.8377,2.4927,3.3027,1.6603,7.0849,3.8377,2.4927,3.3027,1.6603,7.0849,3.8377,2.4927,3.3027,1.6603,7.0849,3.8377,2.4927,3.3027,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,405000.0,,0.0,810000.0,202500.0,,92576.25,22248.0,,,254350.125,0.0,,810000.0,0.0,0.0,0.0,0.0,372269.4655,0.0,0,0.0,0.0
100004,1.0,0.0,0,2,0.0,0.0,0.0,0,1.0,1,0,2,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0,0,0,0.0,0.0,0,0,2,1,0.0,0.0,0,0,1.1178,3.6329,2.3753,1.7784,2.3753,1.1178,3.6329,2.3753,1.7784,2.3753,1.1178,3.6329,2.3753,1.7784,2.3753,1.1178,3.6329,2.3753,1.7784,2.3753,1.1178,3.6329,2.3753,1.7784,2.3753,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,,94518.9,94500.0,,,94518.9,0.0,,94537.8,0.0,0.0,0.0,0.0,26.7286,0.0,0,0.0,0.0
100005,0.3333,0.6667,2,1,0.5774,0.3333,0.5774,1,0.6667,1,1,2,0.5774,1,0.5774,0.0,0.0,0,0.0,0.0,0,0,0,0.0,0.0,0,0,3,1,0.0,0.0,0,0,0.1699,1.0219,0.5224,0.4446,0.3753,0.1699,1.0219,0.5224,0.4446,0.3753,0.1699,1.0219,0.5224,0.4446,0.3753,0.1699,1.0219,0.5224,0.4446,0.3753,0.1699,1.0219,0.5224,0.4446,0.3753,25321.5,189469.5,,306503.339,0.0,0.0,543087.0,0.0,0.0,0.0,0.0,0.0,0.0,1420.5,58500.0,29826.0,2460.3782,0.0,219042.0,0.0,4261.5,568800.0,0.0,0.0,0.0,0.0,303238.4268,0.0,0,0.0,0.0


In [39]:
%%time
fname = "bureau"
fname = os.path.join("features", "{}.pkl.bz2".format(fname))
pdf_feat.to_pickle(fname, compression="bz2")
print("Store features completed!")


Store features completed!
CPU times: user 10.7 s, sys: 224 ms, total: 10.9 s
Wall time: 11 s
