In [None]:
import numpy as np
import pandas as pd
from warnings import filterwarnings as f_w
f_w('ignore')
pd.options.display.max_columns = 999
pd.options.display.max_rows = 500
import gc

In [None]:
table = 'burb' 
locator = pd.read_excel('locator.xlsx', index_col = 0)

In [None]:
bb = pd.read_csv(locator.loc[table, 'initial'])
bb = bb.sort_values(by = ['SK_ID_BUREAU', 'MONTHS_BALANCE'])

In [None]:
bb = bb.replace({'C': np.nan,
                 'X': np.nan})
bb.STATUS = bb.STATUS.astype(float)

In [None]:
bb_agg = bb.groupby('SK_ID_BUREAU').agg({'STATUS': ['max', 'mean', 'std', 'last']})
bb_agg.columns = pd.Index(['_'.join(('BUROB', c[0], c[1])).upper() for c in bb_agg.columns])

In [None]:
table = 'buro'
buro = pd.read_csv(locator.loc[table, 'initial'])

In [None]:
buro = buro.drop(buro[buro.CREDIT_ACTIVE == 'Active'][buro.DAYS_CREDIT_UPDATE < -60].index)

In [None]:
buro = buro.join(pd.get_dummies(buro[['CREDIT_ACTIVE']]))

In [None]:
buro['DEBT_CREDIT'] = buro.eval('AMT_CREDIT_SUM_DEBT / AMT_CREDIT_SUM')
buro['DEBT_LIMIT'] = buro.eval('AMT_CREDIT_SUM_DEBT / AMT_CREDIT_SUM_LIMIT')
buro['OVERDUE_CREDIT'] = buro.eval('AMT_CREDIT_SUM_OVERDUE / AMT_CREDIT_SUM')
buro['OVERDUE_LIMIT'] = buro.eval('AMT_CREDIT_SUM_OVERDUE / AMT_CREDIT_SUM_LIMIT')
buro['OVERDUE_DEBT'] = buro.eval('AMT_CREDIT_SUM_OVERDUE / AMT_CREDIT_SUM_DEBT')
buro['END_FACT_PLAN'] = buro.eval('DAYS_CREDIT_ENDDATE - DAYS_ENDDATE_FACT')

In [None]:
buro = buro.replace({np.inf:np.nan,
                    -np.inf:np.nan})

In [None]:
buro = buro.merge(bb_agg,
                  left_on = 'SK_ID_BUREAU',
                  right_index = True,
                  how = 'left')

In [None]:
buro = buro.sort_values(by = 'DAYS_CREDIT')

In [None]:
aggregations = {
    'CREDIT_ACTIVE': ['size', 'last'],
    'CREDIT_CURRENCY': 'nunique',
    'DAYS_CREDIT': ['min', 'max'],
    'CREDIT_DAY_OVERDUE': ['max', 'mean', 'std'],
    'DAYS_CREDIT_ENDDATE': ['min', 'max'],
    'DAYS_ENDDATE_FACT': ['min', 'max'],
    'AMT_CREDIT_MAX_OVERDUE': ['max', 'mean'],
    'CNT_CREDIT_PROLONG': ['max', 'mean'],
    'AMT_CREDIT_SUM': ['sum', 'max', 'mean'],
    'AMT_CREDIT_SUM_DEBT': ['sum', 'max', 'mean'],
    'AMT_CREDIT_SUM_LIMIT': ['sum', 'max', 'mean'],
    'AMT_CREDIT_SUM_OVERDUE': ['sum', 'max', 'mean'],
    'CREDIT_TYPE': 'nunique',
    'AMT_ANNUITY': ['min', 'max', 'mean', 'std'],
    'CREDIT_ACTIVE_Active': ['sum', 'mean'],
    'CREDIT_ACTIVE_Bad debt': ['sum', 'mean'],
    'CREDIT_ACTIVE_Closed': ['sum', 'mean'],
    'CREDIT_ACTIVE_Sold': ['sum', 'mean'],
    'DEBT_CREDIT': ['min', 'max', 'mean', 'std'],
    'DEBT_LIMIT': ['min', 'max', 'mean', 'std'],
    'OVERDUE_CREDIT': ['min', 'max', 'mean', 'std'],
    'OVERDUE_LIMIT': ['min', 'max', 'mean', 'std'],
    'OVERDUE_DEBT': ['min', 'max', 'mean', 'std'],
    'END_FACT_PLAN': ['min', 'max', 'mean', 'std'],
    'BUROB_STATUS_MAX': 'mean',
    'BUROB_STATUS_MEAN': 'mean',
    'BUROB_STATUS_STD': 'mean',
    'BUROB_STATUS_LAST': 'mean',
}

In [None]:
buro_agg = buro.groupby('SK_ID_CURR').agg(aggregations)
buro_agg.columns = pd.Index(['BURO_' + c[0] + "_" + c[1].upper() for c in buro_agg.columns.tolist()])

In [None]:
aggregations['CREDIT_ACTIVE'] = 'size'

In [None]:
active = buro[buro.CREDIT_ACTIVE == 'Active']
active_agg = active.groupby('SK_ID_CURR').agg(aggregations)
active_agg.columns = pd.Index(['ACTIVE_' + e[0] + "_" + e[1].upper() for e in active_agg.columns.tolist()])
buro_agg = buro_agg.join(active_agg, how='left', on='SK_ID_CURR')
del active, active_agg
gc.collect()

In [None]:
closed = buro[buro.CREDIT_ACTIVE == 'Closed']
closed_agg = closed.groupby('SK_ID_CURR').agg(aggregations)
closed_agg.columns = pd.Index(['CLOSED_' + e[0] + "_" + e[1].upper() for e in closed_agg.columns.tolist()])
buro_agg = buro_agg.join(closed_agg, how='left', on='SK_ID_CURR')
del closed, closed_agg, buro
gc.collect()

In [None]:
buro_agg.to_csv(locator.loc[table, 'feat_eng'])