In [1]:
!cat /proc/meminfo

MemTotal:       32949048 kB
MemFree:        30109136 kB
MemAvailable:   31748544 kB
Buffers:          660712 kB
Cached:          1180768 kB
SwapCached:            0 kB
Active:          1706788 kB
Inactive:         762344 kB
Active(anon):     627656 kB
Inactive(anon):       52 kB
Active(file):    1079132 kB
Inactive(file):   762292 kB
Unevictable:           0 kB
Mlocked:               0 kB
SwapTotal:             0 kB
SwapFree:              0 kB
Dirty:                72 kB
Writeback:             0 kB
AnonPages:        628084 kB
Mapped:           150216 kB
Shmem:                60 kB
Slab:             286020 kB
SReclaimable:     261892 kB
SUnreclaim:        24128 kB
KernelStack:        4448 kB
PageTables:         7764 kB
NFS_Unstable:          0 kB
Bounce:                0 kB
WritebackTmp:          0 kB
CommitLimit:    16474524 kB
Committed_AS:    2300540 kB
VmallocTotal:   34359738367 kB
VmallocUsed:           0 kB
VmallocChunk:          0 kB
AnonHugePag

In [2]:
!pip install xgboost

Collecting xgboost
[?25l  Downloading https://files.pythonhosted.org/packages/8f/15/606f81a2b8a8e82eaa10683cb3f3074905ec65d3bcef949e3f0909f165a5/xgboost-0.80-py2.py3-none-manylinux1_x86_64.whl (15.8MB)
[K    100% |████████████████████████████████| 15.8MB 3.2MB/s eta 0:00:01
[31mdistributed 1.21.8 requires msgpack, which is not installed.[0m
Installing collected packages: xgboost
Successfully installed xgboost-0.80
[33mYou are using pip version 10.0.1, however version 18.0 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import io
import time
import json
import gc

from sklearn import preprocessing as prp
from sklearn.metrics import roc_curve, auc
from sklearn.metrics import (accuracy_score, r2_score, confusion_matrix,
f1_score, recall_score, precision_score)

import os
import boto3
import re

import xgboost as xgb

# ruta de trabajo en s3
bucket = 'boi-banregio'
target = 'BMI'
feat_part_number = 40

# s3 resource
s3_bucket_resource = boto3.resource('s3').Bucket(bucket)

In [3]:
# Aquí colocamos la ruta de nuestro directorio, el nombre de nuestro archivo y si tiene o no cabecera
prefix = 'datalake/data/InteligenciaRiesgos/M&M/MCV/DATASETS_3/JAT_MCV_VAR_VARIABLES_SELECCION/PARTITION'

In [4]:
target_file_name = "TARGETS.csv"
target_file_key = "{}/{}".format(prefix, target_file_name)
target_obj = s3_bucket_resource.Object(target_file_key).get()
data_target = pd.read_csv(io.BytesIO(target_obj['Body'].read())).loc[:,[target]]

In [5]:
data_feature_list = []
for k in range(0, 10):
    data_file_name = "DATA_PART{}.csv".format(k)
    data_file_key = "{}/{}".format(prefix, data_file_name)
    print(data_file_key)
    
    feat_imp_file_name = "FEATURE_SELECTION/FEATURE_IMPORTANCE_PART{}.csv".format(k)
    feat_imp_file_key = "{}/{}".format(prefix, feat_imp_file_name)
    print(feat_imp_file_key)    
    
    print('Loading data')
    data_part_obj = s3_bucket_resource.Object(data_file_key).get()
    data_part = pd.read_csv(io.BytesIO(data_part_obj['Body'].read()))
    display(data_part)
    
    feat_imp_part_obj = s3_bucket_resource.Object(feat_imp_file_key).get()
    feat_imp_part = pd.read_csv(io.BytesIO(feat_imp_part_obj['Body'].read()))
    display(feat_imp_part)
    #break
    features = feat_imp_part.loc[:feat_part_number-1, 'Feature'].values
    display(features)
    #break
    
    data_feat_imp_part = data_part[features]
    data_feature_list.append(data_feat_imp_part) 
    print('Step: ', k)

print('Uploading raw feature importance')
model_features = pd.concat(data_feature_list, axis=1)
model_features_file_name = "MODEL_DATASET/RAW_MODEL_FEATURES.csv"
model_features_obj_key =  "{}/{}".format(prefix, model_features_file_name)    

f_str = io.StringIO()
model_features.to_csv(f_str, index=False)
s3_bucket_resource.Object(model_features_obj_key).put(Body=f_str.getvalue())
print('Model raw features saved!')

datalake/data/InteligenciaRiesgos/M&M/MCV/DATASETS_3/JAT_MCV_VAR_VARIABLES_SELECCION/PARTITION/DATA_PART0.csv
datalake/data/InteligenciaRiesgos/M&M/MCV/DATASETS_3/JAT_MCV_VAR_VARIABLES_SELECCION/PARTITION/FEATURE_SELECTION/FEATURE_IMPORTANCE_PART0.csv
Loading data


Unnamed: 0,AVG_PND_MENOS_BAL_12M,AVG_PND_MAS_BAL_12M,AVG_BAL_12M,MAX_D_CRED_LIM_TO_BAL_03M,AVG_D_CRED_LIM_TO_BAL_06M,AVG_R_BAL_BY_CRED_LIM_12M,MIN_R_BAL_BY_MAX_BAL_03M,AVG_PND_MENOS_R_BAL_BY_MAX_BAL_06M,AVG_R_BAL_BY_MAX_BAL_12M,BK12_MAX_CREDIT_AMT,...,PCT_BK_CS_CL_ACC,PCT_BK_CS_OP_ACC,PCT_BK_LSG_OP_ACC,PCT_BRG_CS_OP_ACC,PCT_GBN_CF_CL_ACC,PCT_GBN_PQ_CL_ACC,PCT_GBN_PQ_OP_ACC,PCT_LSE_CS_OP_ACC,PCT_UN_CRE_CF_CL_ACC,PCT_UN_CRE_CL_ACC
0,2.784000e+03,2.784000e+03,2.784000e+03,147216.00,1.472160e+05,0.018560,1.000000,1.000000,1.000000,12.0,...,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0,0.0,0.0,0.0
1,2.986513e+03,3.661556e+03,3.492795e+03,145798.41,1.465072e+05,0.023285,1.000000,1.000000,1.000000,13.0,...,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0,0.0,0.0,0.0
2,3.357264e+03,4.492689e+03,4.222350e+03,145798.41,1.457777e+05,0.028149,1.000000,1.000000,1.000000,13.0,...,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0,0.0,0.0,0.0
3,3.668540e+03,3.586834e+03,3.616015e+03,148202.99,1.461066e+05,0.024107,0.316294,0.931629,0.829073,11.0,...,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0,0.0,0.0,0.0
4,2.502814e+06,2.456664e+06,2.479739e+06,481183.33,5.400732e+04,0.991896,0.991803,0.996503,0.995100,72.0,...,0.052632,0.210526,0.000000,0.0,0.052632,0.0,0,0.0,0.0,0.0
5,2.497031e+06,2.461169e+06,2.479100e+06,481183.33,5.672383e+04,0.991640,0.995920,0.994761,0.994845,72.0,...,0.052632,0.210526,0.000000,0.0,0.052632,0.0,0,0.0,0.0,0.0
6,2.491855e+06,2.468175e+06,2.480015e+06,-4137.56,5.694844e+04,0.992006,0.995920,0.992948,0.995255,68.0,...,0.055556,0.166667,0.000000,0.0,0.055556,0.0,0,0.0,0.0,0.0
7,2.486672e+06,2.472040e+06,2.479356e+06,-4137.56,5.808758e+04,0.991742,0.991669,0.992709,0.994991,68.0,...,0.055556,0.166667,0.000000,0.0,0.055556,0.0,0,0.0,0.0,0.0
8,2.483435e+06,2.476818e+06,2.480127e+06,-7073.66,5.762273e+04,0.992051,0.991669,0.993274,0.995339,68.0,...,0.055556,0.166667,0.000000,0.0,0.055556,0.0,0,0.0,0.0,0.0
9,2.428275e+06,2.296312e+06,2.362294e+06,481150.00,1.974057e+05,0.944917,0.804104,0.986187,0.974312,74.0,...,0.050000,0.150000,0.000000,0.0,0.050000,0.0,0,0.0,0.0,0.0


Unnamed: 0,Feature,Importance
0,AVG_R_BAL_BY_CRED_LIM_12M,0.037700
1,DEUDA_TOT,0.037007
2,MIN_R_BAL_BY_MAX_BAL_03M,0.028920
3,MAX_D_CRED_LIM_TO_BAL_03M,0.027226
4,AVG_D_CRED_LIM_TO_BAL_06M,0.026533
5,AVG_PND_MENOS_R_BAL_BY_MAX_BAL_06M,0.025801
6,AVG_R_BAL_BY_MAX_BAL_12M,0.025416
7,AVG_PND_MENOS_BAL_12M,0.024954
8,BK12_MAX_CREDIT_AMT,0.021526
9,AVG_PND_MAS_BAL_12M,0.021411


array(['AVG_R_BAL_BY_CRED_LIM_12M', 'DEUDA_TOT',
       'MIN_R_BAL_BY_MAX_BAL_03M', 'MAX_D_CRED_LIM_TO_BAL_03M',
       'AVG_D_CRED_LIM_TO_BAL_06M', 'AVG_PND_MENOS_R_BAL_BY_MAX_BAL_06M',
       'AVG_R_BAL_BY_MAX_BAL_12M', 'AVG_PND_MENOS_BAL_12M',
       'BK12_MAX_CREDIT_AMT', 'AVG_PND_MAS_BAL_12M',
       'MAX_VIGENTEC_BRG_R_OP_ACC_03M', 'AVG_PND_MENOS_VIGENTEH_06M',
       'MIN_SALINI_OP_ACC', 'AVG_VIGENTEC_OP_ACC_06M',
       'AVG_VIGENTEC_BRG_OP_ACC', 'AVG_MAX_DIAS_PARA_PAGO_U06M',
       'AVG_AVG_HI_CALCAR_L_12M', 'AVG_PCT_VENC29_12M',
       'AVG_AVG_HI_CALCAR_CEIL_12M', 'AVG_PND_MENOS_MAX_HI_CALCAR_L_03M',
       'MAX_AVG2_VP_MES_U06M', 'AVG_BAL_12M',
       'AVG_PND_MENOS_AVG_HI_CALCAR_FLOOR_06M', 'PCT_BK_CS_CL_ACC',
       'PCT_BK_CS_OP_ACC', 'AVG_AVG2_VP_MES_U06M_2',
       'MIN_SALINI_BRG_CL_ACC', 'MIN_PCT_VIGENTEH_12M',
       'AVG_AVG1_VP_PER_U06M_2', 'AVG2_VP_MES_2',
       'AVG_AVG1_VP_PER_U03M_2', 'AVG_AVG2_VP_MES_U03M_2',
       'AVG_VIGENTEC_BK_R_OP_ACC_06M', 'AVG_VIGE

Step:  0
datalake/data/InteligenciaRiesgos/M&M/MCV/DATASETS_3/JAT_MCV_VAR_VARIABLES_SELECCION/PARTITION/DATA_PART1.csv
datalake/data/InteligenciaRiesgos/M&M/MCV/DATASETS_3/JAT_MCV_VAR_VARIABLES_SELECCION/PARTITION/FEATURE_SELECTION/FEATURE_IMPORTANCE_PART1.csv
Loading data


Unnamed: 0,MIN_CRED_LIM_06M,AVG_PND_MAS_CRED_LIM_06M,AVG_PND_MAS_D_CRED_LIM_TO_BAL_12M,MAX_R_BAL_BY_CRED_LIM_03M,AVG_PND_MENOS_R_BAL_BY_CRED_LIM_03M,AVG_PND_MENOS_R_BAL_BY_CRED_LIM_06M,AVG_PND_MAS_R_BAL_BY_CRED_LIM_06M,MAX_R_BAL_BY_CRED_LIM_12M,MIN_R_BAL_BY_CRED_LIM_12M,AVG_R_BAL_BY_MAX_BAL_06M,...,PCT_BK_R_CL_ACC,PCT_BRG_LSE_LSG_CL_ACC,PCT_BRG_LSE_R_CL_ACC,PCT_BRG_LSG_CL_ACC,PCT_BRG_PQ_OP_ACC,PCT_GBN_CS_CL_ACC,PCT_LSE_PQ_CL_ACC,PCT_UN_CRE_OP_ACC,PCT_UN_CRE_OTHER_OP_ACC,PCT_UN_CRE_R_OP_ACC
0,150000.00,1.500000e+05,1.472160e+05,0.018560,0.018560,0.018560,0.018560,0.018560,0.018560,1.000000,...,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0,0.0
1,150000.00,1.500000e+05,1.463384e+05,0.028011,0.028011,0.019910,0.025910,0.028011,0.018560,1.000000,...,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0,0.0
2,150000.00,1.500000e+05,1.455073e+05,0.037876,0.031299,0.022382,0.032268,0.037876,0.018560,1.000000,...,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0,0.0
3,150000.00,1.500000e+05,1.464132e+05,0.037876,0.032697,0.030354,0.022814,0.037876,0.011980,0.772098,...,0.000000,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0,0.0
4,2500000.00,2.500000e+06,4.333593e+04,1.005758,0.945103,0.987044,0.969750,1.012127,0.807527,0.994815,...,0.052632,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0,0.0
5,2500000.00,2.500000e+06,3.883068e+04,1.005758,0.925520,0.978343,0.976278,1.012127,0.807527,0.993602,...,0.052632,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0,0.0
6,2500000.00,2.500000e+06,3.182547e+04,1.011255,1.004827,0.969677,0.984764,1.012127,0.807527,0.993642,...,0.055556,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0,0.0
7,2500000.00,2.500000e+06,2.795994e+04,1.011255,1.004549,0.962364,0.991166,1.012127,0.807527,0.993259,...,0.055556,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0,0.0
8,2500000.00,2.500000e+06,2.318155e+04,1.011255,1.006259,0.955646,0.998255,1.012127,0.807527,0.993616,...,0.055556,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0,0.0
9,2500000.00,2.500000e+06,2.036880e+05,1.005122,0.886690,0.963577,0.878499,1.011884,0.807527,0.955509,...,0.050000,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0,0.0


Unnamed: 0,Feature,Importance
0,AVG_PND_MAS_D_CRED_LIM_TO_BAL_12M,0.033299
1,MIN_R_BAL_BY_CRED_LIM_12M,0.032181
2,AVG_R_BAL_BY_MAX_BAL_06M,0.030441
3,MAX_R_BAL_BY_CRED_LIM_03M,0.026010
4,MAX_R_BAL_BY_CRED_LIM_12M,0.025927
5,MIN_MS_OP_BRG_OP_ACC,0.024187
6,MIN_R_BAL_BY_MAX_BAL_12M,0.023939
7,AVG_PND_MENOS_R_BAL_BY_CRED_LIM_06M,0.021661
8,AVG_PND_MAS_AVG_HI_CALCAR_FLOOR_06M,0.019383
9,MAX_AVG1_VP_MES_U06M,0.018472


array(['AVG_PND_MAS_D_CRED_LIM_TO_BAL_12M', 'MIN_R_BAL_BY_CRED_LIM_12M',
       'AVG_R_BAL_BY_MAX_BAL_06M', 'MAX_R_BAL_BY_CRED_LIM_03M',
       'MAX_R_BAL_BY_CRED_LIM_12M', 'MIN_MS_OP_BRG_OP_ACC',
       'MIN_R_BAL_BY_MAX_BAL_12M', 'AVG_PND_MENOS_R_BAL_BY_CRED_LIM_06M',
       'AVG_PND_MAS_AVG_HI_CALCAR_FLOOR_06M', 'MAX_AVG1_VP_MES_U06M',
       'AVG_PND_MAS_R_BAL_BY_CRED_LIM_06M',
       'AVG_PND_MENOS_R_BAL_BY_CRED_LIM_03M',
       'MAX_VIGENTEC_BRG_R_OP_ACC_06M', 'MIN_VIGENTEH_06M',
       'AVG_SALINI_OP_ACC_03M', 'MIN_SALINI_OP_ACC_06M',
       'MAX_VIGENTEC_OP_ACC', 'AVG_SALINI_BK_CL_ACC',
       'AVG_AVG2_VP_PER_U06M', 'NUM_CL_ACC', 'MIN_MS_OP_BK_OP_ACC',
       'MAX_MAX_VP_PER_U12M_2', 'MIN_CRED_LIM_06M',
       'MAX_VIGENTEC_BRG_OP_ACC', 'AVG_PND_MAS_CRED_LIM_06M',
       'MAX_VIGENTEC_BK_R_OP_ACC_06M', 'MAX_SALINI_CL_ACC_12M',
       'AVG_SALINI_BRG_OP_ACC_03M', 'AVG_SALINI_BRG_R_OP_ACC',
       'AVG_SALINI_BK_OP_ACC_06M', 'MAX_MS_CL_BRG_CL_ACC',
       'MAX_SALINI_BK_OP_ACC',

Step:  1
datalake/data/InteligenciaRiesgos/M&M/MCV/DATASETS_3/JAT_MCV_VAR_VARIABLES_SELECCION/PARTITION/DATA_PART2.csv
datalake/data/InteligenciaRiesgos/M&M/MCV/DATASETS_3/JAT_MCV_VAR_VARIABLES_SELECCION/PARTITION/FEATURE_SELECTION/FEATURE_IMPORTANCE_PART2.csv
Loading data


Unnamed: 0,AVG_PND_MENOS_CRED_LIM_12M,AVG_PND_MENOS_D_CRED_LIM_TO_BAL_12M,MIN_R_BAL_BY_CRED_LIM_03M,AVG_R_BAL_BY_CRED_LIM_03M,MIN_R_BAL_BY_CRED_LIM_06M,AVG_PND_MAS_R_BAL_BY_MAX_BAL_06M,MAX_R_BAL_BY_MAX_BAL_12M,BK12_DPD_PROM,BK24_IND_EXP,MONTHS_ON_FILE_BANKING,...,PCT_GBN_CL_ACC,PCT_GBN_LSG_CL_ACC,PCT_GBN_OTHER_CL_ACC,PCT_LSE_CS_CL_ACC,PCT_LSE_R_OP_ACC,PCT_OP_ACC,PCT_OTHER_FIN_CL_ACC,PCT_UN_CRE_CF_OP_ACC,PCT_UN_CRE_OTHER_CL_ACC,PCT_UN_CRE_PQ_OP_ACC
0,1.500000e+05,1.472160e+05,0.018560,0.018560,0.018560,1.000000,1.000000,2.0,0.00,1.0,...,0.000000,0.0,0.0,0.0,0.0,0.600000,0.000000,0,0.0,0.0
1,1.500000e+05,1.470135e+05,0.028011,0.028011,0.018560,1.000000,1.000000,2.0,0.00,1.0,...,0.000000,0.0,0.0,0.0,0.0,0.600000,0.000000,0,0.0,0.0
2,1.500000e+05,1.466427e+05,0.028011,0.032944,0.018560,1.000000,1.000000,2.0,0.00,1.0,...,0.000000,0.0,0.0,0.0,0.0,0.600000,0.000000,0,0.0,0.0
3,1.500000e+05,1.463315e+05,0.011980,0.024928,0.011980,0.658147,1.000000,2.0,,1.0,...,0.000000,0.0,0.0,0.0,0.0,0.600000,0.000000,0,0.0,0.0
4,2.500000e+06,-2.813938e+03,0.807527,0.954690,0.807527,0.992980,0.999990,3.0,0.00,1.0,...,0.052632,0.0,0.0,0.0,0.0,0.631579,0.052632,0,0.0,0.0
5,2.500000e+06,2.968874e+03,0.807527,0.954863,0.807527,0.992443,0.999990,3.0,0.00,1.0,...,0.052632,0.0,0.0,0.0,0.0,0.631579,0.052632,0,0.0,0.0
6,2.500000e+06,8.144617e+03,1.001655,1.005795,0.807527,0.994281,0.999990,3.0,0.10,1.0,...,0.055556,0.0,0.0,0.0,0.0,0.611111,0.055556,0,0.0,0.0
7,2.500000e+06,1.332774e+04,1.001655,1.005062,0.807527,0.993725,0.999990,3.0,0.10,1.0,...,0.055556,0.0,0.0,0.0,0.0,0.611111,0.055556,0,0.0,0.0
8,2.500000e+06,1.656515e+04,1.002829,1.005929,0.807527,0.993883,0.999990,3.0,0.10,1.0,...,0.055556,0.0,0.0,0.0,0.0,0.611111,0.055556,0,0.0,0.0
9,2.500000e+06,7.172478e+04,0.807540,0.857167,0.807540,0.914003,0.999760,3.0,0.14,1.0,...,0.050000,0.0,0.0,0.0,0.0,0.600000,0.050000,0,0.0,0.0


Unnamed: 0,Feature,Importance
0,AVG_PND_MENOS_D_CRED_LIM_TO_BAL_12M,0.042799
1,AVG_PND_MAS_R_BAL_BY_MAX_BAL_06M,0.039717
2,AVG_R_BAL_BY_CRED_LIM_03M,0.034472
3,MIN_R_BAL_BY_CRED_LIM_06M,0.028226
4,MIN_R_BAL_BY_CRED_LIM_03M,0.025824
5,AVG_PND_MENOS_CRED_LIM_12M,0.023662
6,AVG_SALINI_CL_ACC,0.022140
7,PCT_OP_ACC,0.017896
8,AVG_MIN_VP_MES_U06M,0.016735
9,MAX_MAX_VP_MES_U12M,0.016095


array(['AVG_PND_MENOS_D_CRED_LIM_TO_BAL_12M',
       'AVG_PND_MAS_R_BAL_BY_MAX_BAL_06M', 'AVG_R_BAL_BY_CRED_LIM_03M',
       'MIN_R_BAL_BY_CRED_LIM_06M', 'MIN_R_BAL_BY_CRED_LIM_03M',
       'AVG_PND_MENOS_CRED_LIM_12M', 'AVG_SALINI_CL_ACC', 'PCT_OP_ACC',
       'AVG_MIN_VP_MES_U06M', 'MAX_MAX_VP_MES_U12M', 'NBK12_DEUDA_CP',
       'AVG_AVG1_VP_MES_U03M_2', 'MAX_VIGENTEC_BRG_OP_ACC_06M',
       'NBK12_NUM_CRED', 'AVG_PCT_VIGENTEC_OP_ACC_12M',
       'AVG_VIGENTEC_BRG_CS_OP_ACC_06M', 'MAX_AVG1_VP_PER_U12M_2',
       'AVG_VIGENTEH_12M', 'AVG_SALINI_BRG_R_OP_ACC_03M',
       'AVG_VIGENTEC_BRG_OP_ACC_03M', 'AVG_PND_MAS_AVG_HI_CALCAR_L_06M',
       'MIN_VIGENTEH_03M', 'MAX_R_BAL_BY_MAX_BAL_12M',
       'MAX_MS_CL_BK_CS_CL_ACC', 'AVG_AVG1_VP_PER_U03M',
       'MIN_SALINI_BRG_OP_ACC', 'AVG_PND_MENOS_AVG_HI_CALCAR_CEIL_12M',
       'AVG_PND_MENOS_AVG_HI_CALCAR_L_12M', 'MAX_SALINI_BK_OP_ACC_06M',
       'AVG_PND_MENOS_AVG_HI_CALCAR_FLOOR_12M',
       'AVG_PND_MENOS_VENCIDO1MAS_03M', 'AVG_PCT_VEN

Step:  2
datalake/data/InteligenciaRiesgos/M&M/MCV/DATASETS_3/JAT_MCV_VAR_VARIABLES_SELECCION/PARTITION/DATA_PART3.csv
datalake/data/InteligenciaRiesgos/M&M/MCV/DATASETS_3/JAT_MCV_VAR_VARIABLES_SELECCION/PARTITION/FEATURE_SELECTION/FEATURE_IMPORTANCE_PART3.csv
Loading data


Unnamed: 0,AVG_PND_MENOS_BAL_06M,MIN_D_CRED_LIM_TO_BAL_06M,AVG_D_CRED_LIM_TO_BAL_12M,MS_OP,AVG_PND_MENOS_R_BAL_BY_CRED_LIM_12M,AVG_PND_MENOS_R_BAL_BY_MAX_BAL_12M,BK12_NUM_TC_ACT,AVG_FD_CALCAR_FLOOR,AVG_PND_MENOS_VENC29_12M,AVG_VENC59_06M,...,NUM_LSE_CS_CL_ACC,NUM_OTHER_FIN_OP_ACC,NUM_OTHER_FIN_R_OP_ACC,PCT_BK_CF_CL_ACC,PCT_BRG_CF_OP_ACC,PCT_BRG_CS_CL_ACC,PCT_BRG_PQ_CL_ACC,PCT_LSE_LSG_CL_ACC,PCT_OTHER_FIN_CS_CL_ACC,PCT_OTHER_FIN_LSG_CL_ACC
0,2.784000e+03,147216.00,1.472160e+05,73.0,0.018560,1.000000,1.00,,0.000000,0.000000,...,0.0,0,0,0.0,0,0.0,0.0,0.000000,0.000000,0.000000
1,2.986513e+03,145798.41,1.465072e+05,78.0,0.019910,1.000000,1.00,,0.000000,0.000000,...,0.0,0,0,0.0,0,0.0,0.0,0.000000,0.000000,0.000000
2,3.357264e+03,144318.54,1.457777e+05,79.0,0.022382,1.000000,1.00,,0.000000,0.000000,...,0.0,0,0,0.0,0,0.0,0.0,0.000000,0.000000,0.000000
3,4.553080e+03,144318.54,1.463840e+05,82.0,0.024457,0.965815,1.00,,0.000000,0.000000,...,0.0,0,0,0.0,0,0.0,0.0,0.000000,0.000000,0.000000
4,2.467611e+06,-30291.29,2.026100e+04,47.0,1.001126,0.995515,1.00,2.5,256.012821,1.428571,...,0.0,2,1,0.0,0,0.0,0.0,0.052632,0.000000,0.000000
5,2.445857e+06,-29708.97,2.089978e+04,48.0,0.998812,0.995419,1.00,2.5,256.012821,1.428571,...,0.0,2,1,0.0,0,0.0,0.0,0.052632,0.000000,0.000000
6,2.424193e+06,-28136.73,1.998504e+04,49.0,0.996742,0.995577,1.00,2.5,372.653846,0.000000,...,0.0,2,1,0.0,0,0.0,0.0,0.055556,0.000000,0.000000
7,2.405909e+06,-28136.73,2.064384e+04,50.0,0.994669,0.995739,1.00,2.5,372.653846,0.000000,...,0.0,2,1,0.0,0,0.0,0.0,0.055556,0.000000,0.000000
8,2.389116e+06,-28136.73,1.987335e+04,51.0,0.993374,0.996739,1.00,2.5,372.653846,0.000000,...,0.0,2,1,0.0,0,0.0,0.0,0.055556,0.000000,0.000000
9,2.408942e+06,-28136.73,1.377064e+05,54.0,0.971310,0.991606,1.00,2.5,233.487179,0.000000,...,0.0,2,1,0.0,0,0.0,0.0,0.050000,0.000000,0.000000


Unnamed: 0,Feature,Importance
0,AVG_PND_MENOS_BAL_06M,0.039418
1,AVG_PND_MENOS_R_BAL_BY_CRED_LIM_12M,0.036432
2,MIN_D_CRED_LIM_TO_BAL_06M,0.033371
3,AVG_PND_MENOS_R_BAL_BY_MAX_BAL_12M,0.032313
4,AVG_D_CRED_LIM_TO_BAL_12M,0.030839
5,MS_OP,0.030348
6,AVG_PND_MENOS_MAX_HI_CALCAR_L_12M,0.019615
7,MAX_MS_CL_CL_ACC,0.018556
8,AVG_VIGENTEC_BRG_OP_ACC_06M,0.017914
9,AVG_VIGENTEC_BRG_R_OP_ACC,0.016931


array(['AVG_PND_MENOS_BAL_06M', 'AVG_PND_MENOS_R_BAL_BY_CRED_LIM_12M',
       'MIN_D_CRED_LIM_TO_BAL_06M', 'AVG_PND_MENOS_R_BAL_BY_MAX_BAL_12M',
       'AVG_D_CRED_LIM_TO_BAL_12M', 'MS_OP',
       'AVG_PND_MENOS_MAX_HI_CALCAR_L_12M', 'MAX_MS_CL_CL_ACC',
       'AVG_VIGENTEC_BRG_OP_ACC_06M', 'AVG_VIGENTEC_BRG_R_OP_ACC',
       'MAX_SALINI_OP_ACC', 'MAX_MIN_VP_MES_U12M', 'AVG_AVG1_VP_PER_U12M',
       'AVG_AVG1_VP_MES_U06M', 'AVG_AVG2_VP_MES_U12M',
       'AVG_PCT_VIGENTEC_OP_ACC_06M', 'MAX_SALINI_BK_CL_ACC',
       'MAX_MS_OP_BK_R_OP_ACC', 'MAX_AVG1_VP_MES_U03M_2',
       'MIN_MS_CL_CL_ACC', 'MAX_VIGENTEC_BRG_CS_OP_ACC_12M',
       'MAX_VIGENTEH_06M', 'AVG_PND_MAS_AVG_HI_CALCAR_FLOOR_12M',
       'MAX_VIGENTEH_12M', 'AVG_PND_MENOS_VENC29_12M',
       'AVG_PND_MENOS_PCT_VENC29_12M',
       'AVG_PND_MAS_AVG_HI_CALCAR_CEIL_03M', 'MIN_VP_MES',
       'MAX_PCT_VENC1M_12M', 'AVG_PND_MAS_VIGENTEH_03M',
       'AVG_PND_MAS_AVG_HI_CALCAR_CEIL_06M', 'MIN_MS_OP_BK_R_OP_ACC',
       'AVG_MAX_VP_PER

Step:  3
datalake/data/InteligenciaRiesgos/M&M/MCV/DATASETS_3/JAT_MCV_VAR_VARIABLES_SELECCION/PARTITION/DATA_PART4.csv
datalake/data/InteligenciaRiesgos/M&M/MCV/DATASETS_3/JAT_MCV_VAR_VARIABLES_SELECCION/PARTITION/FEATURE_SELECTION/FEATURE_IMPORTANCE_PART4.csv
Loading data


Unnamed: 0,AVG_PND_MENOS_BAL_03M,AVG_BAL_03M,AVG_CRED_LIM_03M,MIN_CRED_LIM_12M,AVG_CRED_LIM_12M,AVG_PND_MENOS_D_CRED_LIM_TO_BAL_06M,DS_OL_CL,MAX_R_BAL_BY_CRED_LIM_06M,AVG_PND_MAS_R_BAL_BY_MAX_BAL_03M,AVG_PND_MAS_R_BAL_BY_MAX_BAL_12M,...,PCT_BRG_LSE_LSG_OP_ACC,PCT_BRG_LSE_PQ_OP_ACC,PCT_BRG_LSE_R_OP_ACC,PCT_GBN_OTHER_OP_ACC,PCT_GBN_R_OP_ACC,PCT_LSE_CF_OP_ACC,PCT_LSE_CL_ACC,PCT_OTHER_FIN_OP_ACC,PCT_OTHER_FIN_R_OP_ACC,PCT_UN_CRE_CS_CL_ACC
0,2.784000e+03,2.784000e+03,150000.00,150000.00,1.500000e+05,1.472160e+05,4377.0,0.018560,1.000000,1.000000,...,0.000000,0,0.0,0,0.0,0,0.000000,0.000000,0.000000,0.0
1,4.201590e+03,4.201590e+03,150000.00,150000.00,1.500000e+05,1.470135e+05,4377.0,0.028011,1.000000,1.000000,...,0.000000,0,0.0,0,0.0,0,0.000000,0.000000,0.000000,0.0
2,4.694880e+03,4.941525e+03,150000.00,150000.00,1.500000e+05,1.466427e+05,4377.0,0.037876,1.000000,1.000000,...,0.000000,0,0.0,0,0.0,0,0.000000,0.000000,0.000000,0.0
3,4.904570e+03,3.739235e+03,150000.00,150000.00,1.500000e+05,1.454469e+05,4377.0,0.037876,0.453035,0.753106,...,0.000000,0,0.0,0,0.0,0,0.000000,0.000000,0.000000,0.0
4,2.362758e+06,2.386725e+06,2500000.00,2500000.00,2.500000e+06,3.238880e+04,4328.0,1.012117,0.996838,0.994644,...,0.000000,0,0.0,0,0.0,0,0.052632,0.105263,0.052632,0.0
5,2.313800e+06,2.387156e+06,2500000.00,2500000.00,2.500000e+06,5.414258e+04,4328.0,1.011884,0.998031,0.994228,...,0.000000,0,0.0,0,0.0,0,0.052632,0.105263,0.052632,0.0
6,2.512068e+06,2.514486e+06,2500000.00,2500000.00,2.500000e+06,7.580693e+04,4327.0,1.011255,0.998786,0.994916,...,0.000000,0,0.0,0,0.0,0,0.055556,0.111111,0.055556,0.0
7,2.511372e+06,2.512656e+06,2500000.00,2500000.00,2.500000e+06,9.409074e+04,4327.0,1.011255,0.996011,0.994225,...,0.000000,0,0.0,0,0.0,0,0.055556,0.111111,0.055556,0.0
8,2.515647e+06,2.514823e+06,2500000.00,2500000.00,2.500000e+06,1.108841e+05,4329.0,1.011255,0.994951,0.993938,...,0.000000,0,0.0,0,0.0,0,0.055556,0.111111,0.055556,0.0
9,2.216725e+06,2.142918e+06,2500000.00,2500000.00,2.500000e+06,9.105828e+04,4327.0,1.011255,0.842071,0.953775,...,0.000000,0,0.0,0,0.0,0,0.050000,0.100000,0.050000,0.0


Unnamed: 0,Feature,Importance
0,AVG_PND_MENOS_D_CRED_LIM_TO_BAL_06M,0.040896
1,MAX_R_BAL_BY_CRED_LIM_06M,0.039510
2,AVG_PND_MAS_R_BAL_BY_MAX_BAL_12M,0.037431
3,AVG_PND_MAS_R_BAL_BY_MAX_BAL_03M,0.037277
4,MAX_MS_OP_BRG_OP_ACC,0.029074
5,AVG_PND_MENOS_BAL_03M,0.027264
6,DS_OL_CL,0.024684
7,AVG_SALINI_OP_ACC_06M,0.022104
8,AVG_PND_MAS_MAX_HI_CALCAR_FLOOR_12M,0.018831
9,AVG_MAX_VP_PER_U12M,0.018831


array(['AVG_PND_MENOS_D_CRED_LIM_TO_BAL_06M', 'MAX_R_BAL_BY_CRED_LIM_06M',
       'AVG_PND_MAS_R_BAL_BY_MAX_BAL_12M',
       'AVG_PND_MAS_R_BAL_BY_MAX_BAL_03M', 'MAX_MS_OP_BRG_OP_ACC',
       'AVG_PND_MENOS_BAL_03M', 'DS_OL_CL', 'AVG_SALINI_OP_ACC_06M',
       'AVG_PND_MAS_MAX_HI_CALCAR_FLOOR_12M', 'AVG_MAX_VP_PER_U12M',
       'AVG_BAL_03M', 'AVG_PND_MENOS_VIGENTEH_12M',
       'AVG_AVG2_VP_MES_U06M', 'MAX_AVG1_VP_MES_U12M',
       'AVG_SALINI_BK_OP_ACC_12M', 'AVG_MS_CL_BK_CL_ACC',
       'MAX_VIGENTEC_BRG_CS_OP_ACC_06M', 'MAX_SALINI_BRG_R_OP_ACC_03M',
       'AVG_SALINI_BK_CS_CL_ACC', 'MIN_SALINI_BRG_R_OP_ACC_12M',
       'MAX_MAX_VP_MES_U06M_2', 'AVG_VIGENTEC_BK_R_OP_ACC_12M',
       'MIN_SALINI_BK_R_OP_ACC_06M', 'PCT_BRG_CL_ACC',
       'AVG_PND_MAS_AVG_HI_CALCAR_FLOOR_03M', 'AVG_VIGENTEH_06M',
       'AVG_VENC1M_OP_ACC_03M', 'SUM_VIGENTEH_06M',
       'AVG_MS_CL_BK_CS_CL_ACC', 'AVG_SALINI_BRG_CS_OP_ACC_12M',
       'AVG_CRED_LIM_12M', 'MAX_PCT_VENC29_06M', 'MAX_AVG2_VP_PER_U12M_2'

Step:  4
datalake/data/InteligenciaRiesgos/M&M/MCV/DATASETS_3/JAT_MCV_VAR_VARIABLES_SELECCION/PARTITION/DATA_PART5.csv
datalake/data/InteligenciaRiesgos/M&M/MCV/DATASETS_3/JAT_MCV_VAR_VARIABLES_SELECCION/PARTITION/FEATURE_SELECTION/FEATURE_IMPORTANCE_PART5.csv
Loading data


Unnamed: 0,REVOLVENTE,MIN_BAL_03M,AVG_BAL_06M,MAX_BAL_12M,MAX_CRED_LIM_12M,D_CRED_LIM_TO_BAL,DS_CL,BK12_CLEAN,BK12_IND_QCRA,BK12_NUM_CRED,...,PCT_BRG_LSE_CF_OP_ACC,PCT_BRG_LSE_PQ_CL_ACC,PCT_BRG_LSG_OP_ACC,PCT_GBN_CF_OP_ACC,PCT_GBN_LSG_OP_ACC,PCT_GBN_OP_ACC,PCT_LSE_OTHER_CL_ACC,PCT_LSE_PQ_OP_ACC,PCT_UN_CRE_LSG_CL_ACC,PCT_UN_CRE_R_CL_ACC
0,1,2784.00,2.784000e+03,2784.00,150000.00,147216.00,2132.0,0.0,0.0,1.00,...,0,0.0,0.0,0.0,0,0.0,0.0,0.000000,0.0,0.0
1,1,4201.59,3.492795e+03,4201.59,150000.00,145798.41,1979.0,0.0,0.0,1.00,...,0,0.0,0.0,0.0,0,0.0,0.0,0.000000,0.0,0.0
2,1,4201.59,4.222350e+03,5681.46,150000.00,144318.54,1948.0,0.0,0.0,1.00,...,0,0.0,0.0,0.0,0,0.0,0.0,0.000000,0.0,0.0
3,1,1797.01,3.893353e+03,5681.46,150000.00,148202.99,1857.0,0.0,0.0,1.00,...,0,0.0,0.0,0.0,0,0.0,0.0,0.000000,0.0,0.0
4,1,2018816.67,2.445993e+06,2530317.07,2500000.00,-4137.56,2892.0,4.0,1.0,0.97,...,0,0.0,0.0,0.0,0,0.0,0.0,0.000000,0.0,0.0
5,1,2018816.67,2.443276e+06,2530317.07,2500000.00,-11275.74,2864.0,4.0,1.0,0.97,...,0,0.0,0.0,0.0,0,0.0,0.0,0.000000,0.0,0.0
6,1,2504137.56,2.443052e+06,2530317.07,2500000.00,-28136.73,2832.0,3.0,1.0,0.96,...,0,0.0,0.0,0.0,0,0.0,0.0,0.000000,0.0,0.0
7,1,2504137.56,2.441912e+06,2530317.07,2500000.00,-7073.66,2802.0,3.0,1.0,0.96,...,0,0.0,0.0,0.0,0,0.0,0.0,0.000000,0.0,0.0
8,1,2507073.66,2.442377e+06,2530317.07,2500000.00,-12804.87,2773.0,3.0,1.0,0.96,...,0,0.0,0.0,0.0,0,0.0,0.0,0.000000,0.0,0.0
9,1,2018850.00,2.302594e+06,2529708.97,2500000.00,479442.36,2679.0,3.0,0.0,0.95,...,0,0.0,0.0,0.0,0,0.0,0.0,0.000000,0.0,0.0


Unnamed: 0,Feature,Importance
0,D_CRED_LIM_TO_BAL,0.055176
1,MIN_BAL_03M,0.031979
2,DS_CL,0.030867
3,AVG_BAL_06M,0.030675
4,MAX_MS_OP_BRG_R_OP_ACC,0.030598
5,NBK12_PCT_PROMT,0.026419
6,MAX_BAL_12M,0.025844
7,AVG_AVG1_VP_MES_U12M_2,0.020322
8,MAX_SALINI_OP_ACC_06M,0.019057
9,AVG_PND_MENOS_MAX_HI_CALCAR_FLOOR_06M,0.018750


array(['D_CRED_LIM_TO_BAL', 'MIN_BAL_03M', 'DS_CL', 'AVG_BAL_06M',
       'MAX_MS_OP_BRG_R_OP_ACC', 'NBK12_PCT_PROMT', 'MAX_BAL_12M',
       'AVG_AVG1_VP_MES_U12M_2', 'MAX_SALINI_OP_ACC_06M',
       'AVG_PND_MENOS_MAX_HI_CALCAR_FLOOR_06M', 'AVG_MAX_VP_MES_U03M_2',
       'SUM_VIGENTEH_03M', 'MAX_MIN_VP_MES_U06M', 'MIN_SALINI_OP_ACC_03M',
       'MAX_VIGENTEC_BRG_OP_ACC_12M', 'MAX_CRED_LIM_12M',
       'MAX_MS_CL_BK_CL_ACC', 'AVG1_VP_MES', 'AVG_AVG1_VP_PER_U12M_2',
       'AVG_PND_MAS_PCT_VENC1M_12M', 'AVG_AVG2_VP_PER_U06M_2',
       'AVG_PND_MAS_MAX_HI_CALCAR_FLOOR_03M', 'MAX_PCT_VENC29_12M',
       'AVG_MAX_VP_PER_U03M', 'AVG_SALINI_BK_OP_ACC_03M',
       'MIN_SALINI_BRG_R_OP_ACC_06M', 'NUM_OP_ACC', 'MAX_VENC29_06M',
       'BK_DEUDA_CP', 'AVG_PND_MAS_PCT_VENC29_03M',
       'MIN_SALINI_BK_OP_ACC_06M', 'BK12_NUM_CRED',
       'MIN_SALINI_BK_CS_CL_ACC', 'AVG_SALINI_BRG_CS_OP_ACC_06M',
       'MAX_SALINI_BRG_R_OP_ACC', 'MAX_MS_CL_BK_R_CL_ACC',
       'MAX_VENC1M_BRG_OP_ACC', 'NBK12_PCT_

Step:  5
datalake/data/InteligenciaRiesgos/M&M/MCV/DATASETS_3/JAT_MCV_VAR_VARIABLES_SELECCION/PARTITION/DATA_PART6.csv
datalake/data/InteligenciaRiesgos/M&M/MCV/DATASETS_3/JAT_MCV_VAR_VARIABLES_SELECCION/PARTITION/FEATURE_SELECTION/FEATURE_IMPORTANCE_PART6.csv
Loading data


Unnamed: 0,MAX_BAL_03M,AVG_PND_MAS_BAL_03M,AVG_PND_MENOS_CRED_LIM_06M,MIN_D_CRED_LIM_TO_BAL_03M,AVG_PND_MENOS_D_CRED_LIM_TO_BAL_03M,AVG_PND_MAS_D_CRED_LIM_TO_BAL_03M,MAX_D_CRED_LIM_TO_BAL_06M,AVG_PND_MAS_D_CRED_LIM_TO_BAL_06M,MAX_D_CRED_LIM_TO_BAL_12M,MIN_D_CRED_LIM_TO_BAL_12M,...,PCT_BK_PQ_OP_ACC,PCT_BRG_LSE_CL_ACC,PCT_BRG_LSE_OP_ACC,PCT_BRG_LSE_OTHER_OP_ACC,PCT_LSE_LSG_OP_ACC,PCT_OTHER_FIN_CS_OP_ACC,PCT_OTHER_FIN_LSG_OP_ACC,PCT_OTHER_FIN_PQ_OP_ACC,PCT_OTHER_FIN_R_CL_ACC,PCT_UN_CRE_LSG_OP_ACC
0,2784.00,2.784000e+03,1.500000e+05,147216.00,1.472160e+05,1.472160e+05,147216.00,1.472160e+05,147216.00,147216.00,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.0
1,4201.59,4.201590e+03,1.500000e+05,145798.41,1.457984e+05,1.457984e+05,145798.41,1.461134e+05,145798.41,145798.41,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.0
2,5681.46,5.047230e+03,1.500000e+05,144318.54,1.453051e+05,1.449528e+05,144318.54,1.451597e+05,144318.54,144318.54,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.0
3,5681.46,2.573900e+03,1.500000e+05,144318.54,1.450954e+05,1.474261e+05,148202.99,1.465779e+05,148202.99,144318.54,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.0
4,2514395.69,2.410692e+06,2.500000e+06,-14395.69,1.372418e+05,8.930785e+04,-4137.56,7.562585e+04,-4137.56,-30317.07,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.052632,0.0,0.052632,0.0
5,2514395.69,2.460512e+06,2.500000e+06,-14395.69,1.861995e+05,3.948763e+04,-11275.74,5.930508e+04,-11275.74,-30317.07,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.052632,0.0,0.052632,0.0
6,2528136.73,2.516904e+06,2.500000e+06,-28136.73,-1.206837e+04,-1.690450e+04,-28136.73,3.808994e+04,-28136.73,-30317.07,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.055556,0.0,0.055556,0.0
7,2528136.73,2.513939e+06,2.500000e+06,-28136.73,-1.137246e+04,-1.393939e+04,-7073.66,2.208442e+04,-7073.66,-30317.07,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.055556,0.0,0.055556,0.0
8,2528136.73,2.513999e+06,2.500000e+06,-28136.73,-1.564653e+04,-1.399897e+04,-12804.87,4.361307e+03,-12804.87,-30317.07,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.055556,0.0,0.055556,0.0
9,2512804.87,2.069112e+06,2.500000e+06,-12804.87,2.832751e+05,4.308881e+05,479442.36,3.037532e+05,479442.36,-29708.97,...,0.050000,0.000000,0.000000,0.000000,0.000000,0.0,0.050000,0.0,0.050000,0.0


Unnamed: 0,Feature,Importance
0,AVG_R_BAL_BY_CRED_LIM_06M,0.025194
1,AVG_PND_MAS_R_BAL_BY_CRED_LIM_03M,0.023996
2,MIN_D_CRED_LIM_TO_BAL_12M,0.023706
3,AVG_MS_OP_BRG_R_OP_ACC,0.022762
4,MAX_BAL_03M,0.021637
5,MAX_SALINI_CL_ACC,0.019821
6,AVG_PND_MAS_BAL_03M,0.018297
7,AVG_MS_OP_OP_ACC,0.017607
8,MAX_MS_OP_OP_ACC,0.017389
9,MAX_D_CRED_LIM_TO_BAL_06M,0.017135


array(['AVG_R_BAL_BY_CRED_LIM_06M', 'AVG_PND_MAS_R_BAL_BY_CRED_LIM_03M',
       'MIN_D_CRED_LIM_TO_BAL_12M', 'AVG_MS_OP_BRG_R_OP_ACC',
       'MAX_BAL_03M', 'MAX_SALINI_CL_ACC', 'AVG_PND_MAS_BAL_03M',
       'AVG_MS_OP_OP_ACC', 'MAX_MS_OP_OP_ACC',
       'MAX_D_CRED_LIM_TO_BAL_06M', 'AVG_SALINI_OP_ACC_12M',
       'MIN_D_CRED_LIM_TO_BAL_03M', 'MAX_VIGENTEC_BRG_R_OP_ACC',
       'AVG_MAX_DIAS_PARA_PAGO_U12M', 'AVG_PND_MAS_VIGENTEH_06M',
       'MIN_MS_OP_OP_ACC', 'SUM_AMORTIZACIONEXIGIBLE',
       'AVG_MAX_HI_CALCAR_CEIL_12M', 'AVG_PND_MAS_AVG_HI_CALCAR_L_12M',
       'AVG_PND_MENOS_D_CRED_LIM_TO_BAL_03M', 'MAX_R_BAL_BY_MAX_BAL_06M',
       'AVG_AVG2_VP_PER_U12M', 'MAX_VIGENTEC_OP_ACC_12M',
       'AVG_PND_MENOS_CRED_LIM_06M', 'AVG_PND_MAS_D_CRED_LIM_TO_BAL_06M',
       'AVG_SALINI_BRG_OP_ACC_06M', 'SUM_PAGOREALIZADO_MES',
       'AVG_SALINI_BRG_R_OP_ACC_06M', 'AVG_AVG2_VP_MES_U12M_2',
       'AVG_PCT_VENC1M_OP_ACC', 'MAX_SALINI_OP_ACC_12M',
       'AVG_PND_MAS_D_CRED_LIM_TO_BAL_03M', '

Step:  6
datalake/data/InteligenciaRiesgos/M&M/MCV/DATASETS_3/JAT_MCV_VAR_VARIABLES_SELECCION/PARTITION/DATA_PART7.csv
datalake/data/InteligenciaRiesgos/M&M/MCV/DATASETS_3/JAT_MCV_VAR_VARIABLES_SELECCION/PARTITION/FEATURE_SELECTION/FEATURE_IMPORTANCE_PART7.csv
Loading data


Unnamed: 0,CUR_BAL,AVG_PND_MAS_BAL_06M,MAX_CRED_LIM_03M,AVG_PND_MAS_CRED_LIM_03M,MAX_CRED_LIM_06M,AVG_PND_MAS_CRED_LIM_12M,MIN_R_BAL_BY_MAX_BAL_06M,BK12_PC_SAT,MONTHS_SINCE_LAST_OPEN_BANKING,MIN_VENC29_06M,...,NUM_LSE_R_CL_ACC,NUM_UN_CRE_OP_ACC,PCT_BRG_LSE_CF_CL_ACC,PCT_BRG_OP_ACC,PCT_CL_ACC,PCT_GBN_CS_OP_ACC,PCT_LSE_OP_ACC,PCT_LSE_OTHER_OP_ACC,PCT_OTHER_FIN_CF_CL_ACC,PCT_OTHER_FIN_OTHER_CL_ACC
0,2784.00,2.784000e+03,150000.00,1.500000e+05,150000.00,1.500000e+05,1.000000,,1.0,0.0,...,0.0,0,0.0,0.400000,0.400000,0.0,0.000000,0.000000,0.0,0.000000
1,4201.59,3.886570e+03,150000.00,1.500000e+05,150000.00,1.500000e+05,1.000000,,1.0,0.0,...,0.0,0,0.0,0.400000,0.400000,0.0,0.000000,0.000000,0.0,0.000000
2,5681.46,4.840269e+03,150000.00,1.500000e+05,150000.00,1.500000e+05,1.000000,,1.0,0.0,...,0.0,0,0.0,0.400000,0.400000,0.0,0.000000,0.000000,0.0,0.000000
3,1797.01,3.422120e+03,150000.00,1.500000e+05,150000.00,1.500000e+05,0.316294,,1.0,0.0,...,0.0,0,0.0,0.400000,0.400000,0.0,0.000000,0.000000,0.0,0.000000
4,2504137.56,2.424374e+06,2500000.00,2.500000e+06,2500000.00,2.500000e+06,0.989664,0.00,1.0,0.0,...,0.0,0,0.0,0.105263,0.368421,0.0,0.000000,0.000000,0.0,0.000000
5,2511275.74,2.440695e+06,2500000.00,2.500000e+06,2500000.00,2.500000e+06,0.989664,0.00,1.0,0.0,...,0.0,0,0.0,0.105263,0.368421,0.0,0.000000,0.000000,0.0,0.000000
6,2528136.73,2.461910e+06,2500000.00,2.500000e+06,2500000.00,2.500000e+06,0.989664,0.00,1.0,0.0,...,0.0,0,0.0,0.111111,0.388889,0.0,0.000000,0.000000,0.0,0.000000
7,2507073.66,2.477916e+06,2500000.00,2.500000e+06,2500000.00,2.500000e+06,0.989664,0.00,1.0,0.0,...,0.0,0,0.0,0.111111,0.388889,0.0,0.000000,0.000000,0.0,0.000000
8,2512804.87,2.495639e+06,2500000.00,2.500000e+06,2500000.00,2.500000e+06,0.989664,0.00,1.0,0.0,...,0.0,0,0.0,0.111111,0.388889,0.0,0.000000,0.000000,0.0,0.000000
9,2020557.64,2.196247e+06,2500000.00,2.500000e+06,2500000.00,2.500000e+06,0.799228,0.00,1.0,0.0,...,0.0,0,0.0,0.100000,0.400000,0.0,0.000000,0.000000,0.0,0.000000


Unnamed: 0,Feature,Importance
0,MIN_R_BAL_BY_MAX_BAL_06M,0.052627
1,CUR_BAL,0.038506
2,AVG_PND_MAS_BAL_06M,0.032823
3,AVG_AVG1_VP_MES_U03M,0.023706
4,MAX_AVG2_VP_MES_U12M,0.023324
5,AVG_PND_MENOS_MAX_HI_CALCAR_CEIL_12M,0.022815
6,PCT_CL_ACC,0.020652
7,PCT_BRG_OP_ACC,0.020440
8,AVG_PND_MAS_AVG_HI_CALCAR_CEIL_12M,0.019041
9,MIN_SALINI_OP_ACC_12M,0.018871


array(['MIN_R_BAL_BY_MAX_BAL_06M', 'CUR_BAL', 'AVG_PND_MAS_BAL_06M',
       'AVG_AVG1_VP_MES_U03M', 'MAX_AVG2_VP_MES_U12M',
       'AVG_PND_MENOS_MAX_HI_CALCAR_CEIL_12M', 'PCT_CL_ACC',
       'PCT_BRG_OP_ACC', 'AVG_PND_MAS_AVG_HI_CALCAR_CEIL_12M',
       'MIN_SALINI_OP_ACC_12M', 'MAX_VIGENTEH_03M',
       'AVG_AVG1_VP_PER_U06M', 'MAX_VP_MES', 'AVG_VIGENTEH_03M',
       'AVG_PND_MENOS_AVG_HI_CALCAR_L_03M', 'AVG_SALINI_BRG_R_OP_ACC_12M',
       'AVG_MS_OP_BK_CS_OP_ACC', 'AVG_PND_MENOS_PCT_VENC1M_12M',
       'MAX_SALINI_BRG_CL_ACC', 'AVG_VIGENTEC_BK_OP_ACC_12M',
       'MIN_SALINI_BRG_R_OP_ACC', 'AVG_PND_MAS_CRED_LIM_12M',
       'MAX_MIN_VP_PER_U03M_2', 'MIN_SALINI_BK_R_OP_ACC_03M',
       'AVG_PND_MENOS_PCT_VENC29_03M', 'MIN_SALINI_BK_CL_ACC',
       'MAX_CRED_LIM_06M', 'MIN_MS_CL_BRG_R_CL_ACC',
       'AVG_PND_MAS_VENCIDO1MAS_03M', 'MIN_SALINI_BK_CS_OP_ACC_03M',
       'AVG_MS_CL_BK_R_CL_ACC', 'MAX_SALINI_BK_R_OP_ACC_12M',
       'MAX_PCT_VENC1M_06M', 'AVG_PND_MAS_VENCIDO1MAS_12M',
  

Step:  7
datalake/data/InteligenciaRiesgos/M&M/MCV/DATASETS_3/JAT_MCV_VAR_VARIABLES_SELECCION/PARTITION/DATA_PART8.csv
datalake/data/InteligenciaRiesgos/M&M/MCV/DATASETS_3/JAT_MCV_VAR_VARIABLES_SELECCION/PARTITION/FEATURE_SELECTION/FEATURE_IMPORTANCE_PART8.csv
Loading data


Unnamed: 0,MAX_BAL_06M,CUR_CRED_LIM,MIN_CRED_LIM_03M,AVG_PND_MAS_R_BAL_BY_CRED_LIM_12M,MAX_R_BAL_BY_MAX_BAL_03M,BK_IND_PMOR,MAX_FD_CALCAR_L,MAX_FD_CALCAR_FLOOR,MAX_VENC29_03M,MAX_VENC29_12M,...,PCT_BRG_CF_CL_ACC,PCT_BRG_R_CL_ACC,PCT_BRG_R_OP_ACC,PCT_GBN_R_CL_ACC,PCT_LSE_R_CL_ACC,PCT_OTHER_FIN_CF_OP_ACC,PCT_OTHER_FIN_OTHER_OP_ACC,PCT_OTHER_FIN_PQ_CL_ACC,PCT_UN_CRE_CS_OP_ACC,PCT_UN_CRE_PQ_CL_ACC
0,2784.00,150000.00,150000.00,0.018560,1.000000,150000.0,,,0.0,0.0,...,0.0,0.200000,0.400000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,4201.59,150000.00,150000.00,0.024410,1.000000,150000.0,,,0.0,0.0,...,0.0,0.200000,0.400000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,5681.46,150000.00,150000.00,0.029951,1.000000,150000.0,,,0.0,0.0,...,0.0,0.200000,0.400000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,5681.46,150000.00,150000.00,0.023912,1.000000,150000.0,,,0.0,0.0,...,0.0,0.200000,0.400000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2530291.29,2500000.00,2500000.00,0.982666,0.999741,4000000.0,5.0,1.133333,19.0,1545.0,...,0.0,0.052632,0.105263,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,2529708.97,2500000.00,2500000.00,0.984468,0.999741,4000000.0,5.0,1.133333,19.0,1545.0,...,0.0,0.052632,0.105263,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,2528136.73,2500000.00,2500000.00,0.987270,1.000000,4000000.0,5.0,1.100000,9.0,1545.0,...,0.0,0.055556,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,2528136.73,2500000.00,2500000.00,0.988816,1.000000,4000000.0,5.0,1.100000,9.0,1545.0,...,0.0,0.055556,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,2528136.73,2500000.00,2500000.00,0.990727,1.000000,4000000.0,5.0,1.100000,9.0,1545.0,...,0.0,0.055556,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,2528136.73,2500000.00,2500000.00,0.918525,0.993936,4000000.0,5.0,1.666667,8.0,1500.0,...,0.0,0.050000,0.100000,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Unnamed: 0,Feature,Importance
0,AVG_PND_MAS_R_BAL_BY_CRED_LIM_12M,0.067220
1,MAX_BAL_06M,0.041099
2,MAX_R_BAL_BY_MAX_BAL_03M,0.026477
3,AVG_MS_OP_BRG_OP_ACC,0.023750
4,SUM_VIGENTEH_12M,0.023276
5,AVG_VIGENTEC_BRG_R_OP_ACC_12M,0.023157
6,AVG_MS_CL_CL_ACC,0.020944
7,MIN_MS_OP_BRG_R_OP_ACC,0.020391
8,MAX_VIGENTEC_OP_ACC_03M,0.020352
9,PCT_BRG_R_OP_ACC,0.019759


array(['AVG_PND_MAS_R_BAL_BY_CRED_LIM_12M', 'MAX_BAL_06M',
       'MAX_R_BAL_BY_MAX_BAL_03M', 'AVG_MS_OP_BRG_OP_ACC',
       'SUM_VIGENTEH_12M', 'AVG_VIGENTEC_BRG_R_OP_ACC_12M',
       'AVG_MS_CL_CL_ACC', 'MIN_MS_OP_BRG_R_OP_ACC',
       'MAX_VIGENTEC_OP_ACC_03M', 'PCT_BRG_R_OP_ACC',
       'AVG_MIN_VP_PER_U06M_2', 'AVG_AVG1_VP_MES_U12M',
       'MAX_AVG1_VP_MES_U06M_2', 'AVG_MAX_HI_CALCAR_FLOOR_12M',
       'MAX_AVG2_VP_MES_U12M_2', 'BK_IND_PMOR', 'MIN_SALINI_CL_ACC',
       'MAX_MIN_VP_PER_U12M_2', 'AVG_SALINI_CL_ACC_12M',
       'AVG_PND_MENOS_AVG_HI_CALCAR_CEIL_06M',
       'AVG_PND_MAS_PCT_VIGENTEH_12M', 'MAX_MS_OP_BK_CS_OP_ACC',
       'CUR_CRED_LIM', 'AVG_SALINI_BK_OP_ACC', 'PCT_BK_CL_ACC',
       'AVG_PND_MAS_MAX_HI_CALCAR_L_03M', 'MAX_VENC29_12M',
       'AVG_PND_MAS_PCT_VENC29_06M',
       'AVG_PND_MAS_MAX_HI_CALCAR_FLOOR_06M',
       'MAX_VIGENTEC_BRG_CS_OP_ACC_03M', 'AVG_SALINI_BRG_CL_ACC',
       'AVG_VENC1M_OP_ACC', 'AVG_PCT_VENC1M_OP_ACC_12M',
       'AVG_SALINI_BK_R_OP_

Step:  8
datalake/data/InteligenciaRiesgos/M&M/MCV/DATASETS_3/JAT_MCV_VAR_VARIABLES_SELECCION/PARTITION/DATA_PART9.csv
datalake/data/InteligenciaRiesgos/M&M/MCV/DATASETS_3/JAT_MCV_VAR_VARIABLES_SELECCION/PARTITION/FEATURE_SELECTION/FEATURE_IMPORTANCE_PART9.csv
Loading data


Unnamed: 0,MIN_BAL_06M,MIN_BAL_12M,AVG_PND_MENOS_CRED_LIM_03M,AVG_CRED_LIM_06M,AVG_D_CRED_LIM_TO_BAL_03M,R_BAL_BY_CRED_LIM,AVG_PND_MENOS_R_BAL_BY_MAX_BAL_03M,AVG_R_BAL_BY_MAX_BAL_03M,BK12_NUM_EXP_PAIDONTIME,NBK12_COMM_PCT_PLUS,...,NUM_LSE_CF_CL_ACC,NUM_LSE_CS_OP_ACC,NUM_LSE_LSG_OP_ACC,NUM_OTHER_FIN_R_CL_ACC,NUM_UN_CRE_LSG_CL_ACC,NUM_UN_CRE_R_OP_ACC,PCT_BK_OP_ACC,PCT_BRG_LSE_OTHER_CL_ACC,PCT_BRG_OTHER_CL_ACC,PCT_LSE_CF_CL_ACC
0,2784.00,2784.00,1.500000e+05,1.500000e+05,1.472160e+05,0.018560,1.000000,1.000000,,2,...,0.0,0,0,0.0,0.0,0,0.000000,0.0,0.0,0.0
1,2784.00,2784.00,1.500000e+05,1.500000e+05,1.457984e+05,0.028011,1.000000,1.000000,,2,...,0.0,0,0,0.0,0.0,0,0.000000,0.0,0.0,0.0
2,2784.00,2784.00,1.500000e+05,1.500000e+05,1.450585e+05,0.037876,1.000000,1.000000,,2,...,0.0,0,0,0.0,0.0,0,0.000000,0.0,0.0,0.0
3,1797.01,1797.01,1.500000e+05,1.500000e+05,1.462608e+05,0.011980,0.863259,0.658147,,1,...,0.0,0,0,0.0,0.0,0,0.000000,0.0,0.0,0.0
4,2018816.67,2018816.67,2.500000e+06,2.500000e+06,1.132748e+05,1.001655,0.994659,0.995821,1.00,11,...,0.0,0,0,1.0,0.0,0,0.368421,0.0,0.0,0.0
5,2018816.67,2018816.67,2.500000e+06,2.500000e+06,1.128436e+05,1.004510,0.998304,0.998140,1.00,11,...,0.0,0,0,1.0,0.0,0,0.368421,0.0,0.0,0.0
6,2018816.67,2018816.67,2.500000e+06,2.500000e+06,-1.448643e+04,1.011255,0.998424,0.998605,1.00,12,...,0.0,0,0,1.0,0.0,0,0.333333,0.0,0.0,0.0
7,2018816.67,2018816.67,2.500000e+06,2.500000e+06,-1.265592e+04,1.002829,0.997163,0.996587,1.00,12,...,0.0,0,0,1.0,0.0,0,0.333333,0.0,0.0,0.0
8,2018816.67,2018816.67,2.500000e+06,2.500000e+06,-1.482275e+04,1.005122,0.997231,0.996091,1.00,12,...,0.0,0,0,1.0,0.0,0,0.333333,0.0,0.0,0.0
9,2018850.00,2018816.67,2.500000e+06,2.500000e+06,3.570816e+05,0.808223,0.955969,0.899020,1.00,12,...,0.0,0,0,1.0,0.0,0,0.350000,0.0,0.0,0.0


Unnamed: 0,Feature,Importance
0,AVG_D_CRED_LIM_TO_BAL_03M,0.038320
1,R_BAL_BY_CRED_LIM,0.036167
2,NBK12_DEUDA_TOT,0.026029
3,MIN_BAL_06M,0.024307
4,AVG_R_BAL_BY_MAX_BAL_03M,0.023329
5,AVG_PND_MENOS_R_BAL_BY_MAX_BAL_03M,0.022742
6,MIN_BAL_12M,0.022428
7,AVG_VIGENTEC_BRG_R_OP_ACC_03M,0.018553
8,AVG_SALINI_OP_ACC,0.017496
9,MIN_VIGENTEH_12M,0.014952


array(['AVG_D_CRED_LIM_TO_BAL_03M', 'R_BAL_BY_CRED_LIM',
       'NBK12_DEUDA_TOT', 'MIN_BAL_06M', 'AVG_R_BAL_BY_MAX_BAL_03M',
       'AVG_PND_MENOS_R_BAL_BY_MAX_BAL_03M', 'MIN_BAL_12M',
       'AVG_VIGENTEC_BRG_R_OP_ACC_03M', 'AVG_SALINI_OP_ACC',
       'MIN_VIGENTEH_12M', 'AVG_MIN_VP_PER_U12M_2',
       'AVG_MS_OP_BK_R_OP_ACC', 'MIN_SALINI_BK_OP_ACC_12M',
       'AVG_MIN_VP_PER_U06M', 'AVG_MAX_VP_MES_U06M', 'PCT_BK_OP_ACC',
       'MAX_VIGENTEC_BRG_OP_ACC_03M', 'AVG_PCT_VIGENTEH_12M',
       'MIN_SALINI_BRG_OP_ACC_12M', 'MAX_AVG1_VP_MES_U12M_2',
       'AVG_MS_OP_BRG_CS_OP_ACC', 'AVG_AVG2_VP_MES_U03M',
       'MAX_VENCIDO1MAS_12M', 'AVG_MAX_DIAS_PARA_PAGO_U03M',
       'NBK12_COMM_PCT_PLUS', 'AVG_VIGENTEC_OP_ACC', 'AVG_CRED_LIM_06M',
       'MAX_AVG1_VP_MES_U03M', 'AVG_PCT_VIGENTEC_OP_ACC',
       'AVG_PND_MENOS_CRED_LIM_03M', 'AVG_PND_MAS_MAX_HI_CALCAR_L_06M',
       'MIN_SALINI_BRG_CS_OP_ACC_12M', 'MAX_AVG2_VP_PER_U06M_2',
       'AVG_SALINI_BRG_OP_ACC', 'MAX_AVG2_VP_MES_U06M_2',
  

Step:  9
Uploading feature importance
Model features saved!


In [8]:
display(model_features.describe())
display(model_features)

Model features saved!


Unnamed: 0,AVG_R_BAL_BY_CRED_LIM_12M,DEUDA_TOT,MIN_R_BAL_BY_MAX_BAL_03M,MAX_D_CRED_LIM_TO_BAL_03M,AVG_D_CRED_LIM_TO_BAL_06M,AVG_PND_MENOS_R_BAL_BY_MAX_BAL_06M,AVG_R_BAL_BY_MAX_BAL_12M,AVG_PND_MENOS_BAL_12M,BK12_MAX_CREDIT_AMT,AVG_PND_MAS_BAL_12M,...,AVG_PND_MAS_MAX_HI_CALCAR_L_06M,MIN_SALINI_BRG_CS_OP_ACC_12M,MAX_AVG2_VP_PER_U06M_2,AVG_SALINI_BRG_OP_ACC,MAX_AVG2_VP_MES_U06M_2,MIN_AVG_HI_CALCAR_CEIL_12M,AVG_PND_MAS_MAX_HI_CALCAR_L_12M,MAX_MS_CL_BRG_R_CL_ACC,AVG_PND_MAS_MAX_HI_CALCAR_CEIL_12M,SUM_PAGOREALIZADO_PERIODO
count,425287.0,425287.0,425287.0,425287.0,425287.0,425287.0,425287.0,425287.0,425287.0,425287.0,...,425287.0,425287.0,425287.0,425287.0,425287.0,425287.0,425287.0,425287.0,425287.0,425287.0
mean,0.012022,3.243717,-0.483808,0.672008,0.6833,-0.341297,-0.251894,0.483345,3.288619,0.482477,...,0.263964,306757.1,290.9201,0.513216,501.2855,0.855755,0.223056,2.168418,1.402803,6.400931
std,23.915491,350.995352,1.10028,1.825779,1.946918,0.941598,0.768336,1.180457,492.157345,1.177261,...,0.927873,1241058.0,31015.45,1.604958,91498.31,6.743288,0.885903,16.910407,5.052671,124.400862
min,-1.789694,-0.663866,-3.601348,-10.698307,-11.800963,-4.373871,-3.104854,-0.371077,-0.967742,-0.371139,...,-1.25,-499137.0,-8.06148,-0.525291,-0.0910189,-0.944444,-1.377633,-31.0,-1.123769,-0.255973
25%,-0.576496,-0.386555,-0.733688,-0.230336,-0.234195,-0.648744,-0.635807,-0.250403,-0.419355,-0.253856,...,-0.363637,0.0,0.0,-0.319066,-0.04634485,-0.944444,-0.385737,0.0,-0.275705,-0.224162
50%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.423504,0.613445,0.266312,0.769664,0.765805,0.351256,0.364193,0.749597,0.580645,0.746144,...,0.636363,0.0,1.0,0.680934,0.9536552,0.055556,0.614263,0.0,0.724295,0.775838
max,7368.94422,56249.680672,0.359358,103.564446,131.391148,0.439008,0.486416,20.991531,96773.225806,14.555473,...,4.454544,20500000.0,6484061.0,34.494163,22337030.0,164.222222,4.188006,208.0,70.338397,24488.298307


Unnamed: 0,AVG_R_BAL_BY_CRED_LIM_12M,DEUDA_TOT,MIN_R_BAL_BY_MAX_BAL_03M,MAX_D_CRED_LIM_TO_BAL_03M,AVG_D_CRED_LIM_TO_BAL_06M,AVG_PND_MENOS_R_BAL_BY_MAX_BAL_06M,AVG_R_BAL_BY_MAX_BAL_12M,AVG_PND_MENOS_BAL_12M,BK12_MAX_CREDIT_AMT,AVG_PND_MAS_BAL_12M,...,AVG_PND_MAS_MAX_HI_CALCAR_L_06M,MIN_SALINI_BRG_CS_OP_ACC_12M,MAX_AVG2_VP_PER_U06M_2,AVG_SALINI_BRG_OP_ACC,MAX_AVG2_VP_MES_U06M_2,MIN_AVG_HI_CALCAR_CEIL_12M,AVG_PND_MAS_MAX_HI_CALCAR_L_12M,MAX_MS_CL_BRG_R_CL_ACC,AVG_PND_MAS_MAX_HI_CALCAR_CEIL_12M,SUM_PAGOREALIZADO_PERIODO
0,-1.743006,-0.025210,0.359358,-0.144109,-0.105921,0.439008,0.486416,-0.369388,-0.580645,-0.369483,...,0.000000,0.0,0.000000,-0.350195,0.000000,0.000000,0.000000,12.0,0.000000,0.000000
1,-1.731119,0.016807,0.359358,-0.145578,-0.106867,0.439008,0.486416,-0.369265,-0.548387,-0.368961,...,-1.151515,0.0,0.000000,-0.350195,0.000000,0.055556,-1.262592,17.0,-1.026672,0.000000
2,-1.718884,0.016807,0.359358,-0.145578,-0.107841,0.439008,0.486416,-0.369040,-0.548387,-0.368467,...,-1.151515,0.0,0.000000,-0.350195,0.000000,0.055556,-1.262592,17.0,-1.026672,0.000000
3,-1.729053,0.058824,-2.348602,-0.143087,-0.107402,0.109949,-0.127429,-0.368852,-0.612903,-0.369006,...,-0.848484,0.0,0.000000,-0.350195,0.000000,0.055556,-1.021059,21.0,-0.817812,0.000000
4,0.705472,-0.142857,0.326892,0.201820,-0.230332,0.422180,0.468819,1.147149,1.354839,1.089671,...,1.909090,0.0,-2.741131,0.933852,1.213713,1.222222,1.708266,15.0,2.435759,0.285833
5,0.704829,-0.142857,0.343199,0.201820,-0.226706,0.413795,0.467903,1.143641,1.354839,1.092350,...,1.909090,0.0,0.000000,0.933852,1.213713,1.222222,1.708266,15.0,2.435759,-0.255973
6,0.705749,-0.117647,0.343199,-0.300884,-0.226406,0.405068,0.469374,1.140501,1.225806,1.096515,...,1.909090,0.0,0.000000,0.933852,1.213713,1.013889,1.708266,18.0,2.435759,4.833803
7,0.705086,-0.117647,0.326359,-0.300884,-0.224886,0.403919,0.468427,1.137357,1.225806,1.098814,...,1.909090,0.0,0.000000,0.933852,1.213713,1.013889,1.708266,18.0,2.435759,0.657336
8,0.705862,-0.117647,0.326359,-0.303925,-0.225506,0.406636,0.469677,1.135393,1.225806,1.101655,...,1.909090,0.0,0.000000,0.933852,1.213713,1.013889,1.708266,18.0,2.435759,-0.255973
9,0.587296,-0.092437,-0.416527,0.201785,-0.038930,0.372529,0.394162,1.101933,1.419355,0.994320,...,1.909090,0.0,0.000000,0.933852,0.073968,1.013889,1.708266,21.0,2.435759,-0.255973


In [6]:
extra_features_file_name = 'EXTRA_FEATURES/JAT_MCV_VAR_VARIABLES_DIASATRASO.csv'
extra_features_file_key = "{}/{}".format(prefix, extra_features_file_name)
extra_features_obj = s3_bucket_resource.Object(extra_features_file_key).get()
extra_features_data = pd.read_csv(io.BytesIO(extra_features_obj['Body'].read()))

In [9]:
display(model_features.shape)
display(extra_features_data.shape)
display(model_features.head())
display(extra_features_data.head())

(425287, 400)

(425287, 4)

Unnamed: 0,AVG_R_BAL_BY_CRED_LIM_12M,DEUDA_TOT,MIN_R_BAL_BY_MAX_BAL_03M,MAX_D_CRED_LIM_TO_BAL_03M,AVG_D_CRED_LIM_TO_BAL_06M,AVG_PND_MENOS_R_BAL_BY_MAX_BAL_06M,AVG_R_BAL_BY_MAX_BAL_12M,AVG_PND_MENOS_BAL_12M,BK12_MAX_CREDIT_AMT,AVG_PND_MAS_BAL_12M,...,AVG_PND_MAS_MAX_HI_CALCAR_L_06M,MIN_SALINI_BRG_CS_OP_ACC_12M,MAX_AVG2_VP_PER_U06M_2,AVG_SALINI_BRG_OP_ACC,MAX_AVG2_VP_MES_U06M_2,MIN_AVG_HI_CALCAR_CEIL_12M,AVG_PND_MAS_MAX_HI_CALCAR_L_12M,MAX_MS_CL_BRG_R_CL_ACC,AVG_PND_MAS_MAX_HI_CALCAR_CEIL_12M,SUM_PAGOREALIZADO_PERIODO
0,0.01856,76.0,1.0,147216.0,147216.0,1.0,1.0,2784.0,12.0,2784.0,...,,,,150000.0,,,,43.0,,
1,0.023285,81.0,1.0,145798.41,146507.205,1.0,1.0,2986.513,13.0,3661.556,...,0.190476,,,150000.0,,1.5,0.207792,48.0,0.155844,
2,0.028149,81.0,1.0,145798.41,145777.65,1.0,1.0,3357.264,13.0,4492.689,...,0.190476,,,150000.0,,1.5,0.207792,48.0,0.155844,
3,0.024107,86.0,0.316294,148202.99,146106.646667,0.931629,0.829073,3668.54,11.0,3586.834,...,0.666667,,,150000.0,,1.5,0.597403,52.0,0.448052,
4,0.991896,62.0,0.991803,481183.33,54007.322857,0.996503,0.9951,2502814.0,72.0,2456664.0,...,5.0,,0.659972,1250000.0,29.20558,2.2,5.0,46.0,5.0,10645.0


Unnamed: 0,DIAS_ATRASO,DPCV_CAP,DPCV_INT,DPCV_MIN
0,0,61,61,61
1,0,61,61,61
2,0,61,61,61
3,0,61,61,61
4,0,61,61,61


In [10]:
print('Uploading raw features + extra data')
model_features_ext = pd.concat([model_features, extra_features_data], axis=1)
model_features_ext_file_name = "MODEL_DATASET/RAW_MODEL_FEATURES+EXTRAS.csv"
model_features_ext_obj_key =  "{}/{}".format(prefix, model_features_ext_file_name)    

f_str = io.StringIO()
model_features_ext.to_csv(f_str, index=False)
s3_bucket_resource.Object(model_features_ext_obj_key).put(Body=f_str.getvalue())
print('Model raw features+extras saved!')

Uploading feature importance
Model features saved!
