In [1]:
import pandas as pd
import numpy as np
from plotnine import *
%matplotlib inline

In [2]:
data_lag_n = pd.read_csv("../../data/processed/pagos_por_unidad.csv",
                  dtype = {
                                "unidad_id": "object",
                            },
                  parse_dates = ["expensa_mes_pago"])

In [3]:
data = pd.read_csv("../../data/processed/expensas_full_processed_vis_nona.csv",
                  dtype = {
                                "expensa_id": "object",
                                "unidad_id": "object",
                                "consorcio_id": "object",
                                "expensa_mes":"object",
                            },
                  parse_dates = ["expensa_fecha", "expensa_mes_pago", "expensa_mes_pago_anterior"])

In [4]:
final_data = pd.merge(
    data,
    data_lag_n,
    left_on = ["unidad_id", "expensa_mes_pago"],
    right_on = ["unidad_id", "expensa_mes_pago"],
    how = "inner"
)

## Final Data dimensions

In [38]:
final_data.shape

(153335, 25)

In [39]:
final_data.target.mean()

0.18753057031988782

In [43]:
final_data = final_data.dropna()

## Data preprocessing

In [44]:
final_data.columns = ['expensa_id', 'consorcio_id', 'consorcio_nombre',
       'consorcio_cantidad_expensas', 'consorcio_prop_impagos',
       'consorcio_prop_efectivo', 'unidad_id', 'unidad_denominacion',
       'unidad_tipo', 'expensa_proporcion',
       'expensa_interes_primer_vencimiento',
       'expensa_ineteres_segundo_vencimiento', 'expensa_mes', 'expensa_fecha',
       'expensa_mes_pago', 'pago_metodo', 'target_x',
       'expensa_mes_pago_anterior', 'pago_metodo_lag_6', 'pago_metodo_lag_5',
       'pago_metodo_lag_4', 'pago_metodo_lag_3', 'pago_metodo_lag_2',
       'pago_metodo_lag_1', 'target']

In [45]:
X_columns = ['consorcio_cantidad_expensas', 'consorcio_prop_impagos',
       'consorcio_prop_efectivo',
       'unidad_tipo', 'expensa_proporcion',
       'expensa_interes_primer_vencimiento',
       'expensa_ineteres_segundo_vencimiento', 'expensa_mes',
       'pago_metodo_lag_3', 'pago_metodo_lag_2',
       'pago_metodo_lag_1']

final_data = final_data.sort_values("expensa_mes_pago").reset_index().drop(columns="index")
min_cv_id = final_data.loc[final_data.expensa_mes_pago == "2018-06-01", "expensa_mes_pago"].idxmin()
min_test_id = final_data.loc[final_data.expensa_mes_pago == "2018-09-01", "expensa_mes_pago"].idxmin()

In [46]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
def get_train_test_meassures(model, X_train, Y_train, X_test, Y_test):
    train_preds = model.predict(X_train)
    print("accuracy:", accuracy_score(Y_train, train_preds))
    print("precision:", precision_score(Y_train, train_preds))
    print("recall:", recall_score(Y_train, train_preds))
    print("f1:", f1_score(Y_train, train_preds))
    print("--------------------------------------------------")
    test_preds = model.predict(X_test)
    print("accuracy:", accuracy_score(Y_test, test_preds))
    print("precision:", precision_score(Y_test, test_preds))
    print("recall:", recall_score(Y_test, test_preds))
    print("f1:", f1_score(Y_test, test_preds))

In [47]:
def get_train_test_split(X_columns):
    X_train = pd.get_dummies(final_data.loc[0:min_test_id, X_columns], drop_first=True)
    X_test = pd.get_dummies(final_data.loc[min_test_id:, X_columns], drop_first=True)
    Y_train = final_data.loc[0:min_test_id, "target"]
    Y_test = final_data.loc[min_test_id:, "target"]
      
    drop_test_columns = [column for column in X_test.columns if column not in X_train.columns]
    X_test = X_test.drop(columns=drop_test_columns)

    for column in [column for column in X_train.columns if column not in X_test.columns]:
        X_test[column] = 0
    
    X_test = X_test[X_train.columns]
    print(X_train.shape, Y_train.shape)
    print(X_test.shape, Y_test.shape)
    return X_train, Y_train, X_test, Y_test

In [48]:
 X_train, Y_train, X_test, Y_test = get_train_test_split(X_columns)

(119793, 37) (119793,)
(33447, 37) (33447,)


In [49]:
X_train.columns

Index(['consorcio_cantidad_expensas', 'consorcio_prop_impagos',
       'consorcio_prop_efectivo', 'expensa_proporcion',
       'expensa_interes_primer_vencimiento',
       'expensa_ineteres_segundo_vencimiento', 'unidad_tipo_Casa',
       'unidad_tipo_Cochera', 'unidad_tipo_Departamento', 'unidad_tipo_Duplex',
       'unidad_tipo_Local', 'unidad_tipo_Lote', 'unidad_tipo_Oficina',
       'expensa_mes_02', 'expensa_mes_03', 'expensa_mes_04', 'expensa_mes_05',
       'expensa_mes_06', 'expensa_mes_07', 'expensa_mes_08', 'expensa_mes_09',
       'expensa_mes_10', 'expensa_mes_11', 'expensa_mes_12',
       'pago_metodo_lag_3_EntePago', 'pago_metodo_lag_3_Impago',
       'pago_metodo_lag_3_Internet', 'pago_metodo_lag_3_NS/NC',
       'pago_metodo_lag_2_EntePago', 'pago_metodo_lag_2_Impago',
       'pago_metodo_lag_2_Internet', 'pago_metodo_lag_2_NS/NC',
       'pago_metodo_lag_1_EntePago', 'pago_metodo_lag_1_Impago',
       'pago_metodo_lag_1_Internet', 'pago_metodo_lag_1_NS/NC',
       'pag

## Naive Model

In [50]:
from sklearn.linear_model import LogisticRegression
logReg = LogisticRegression(
    C = 1000000,
    solver = "lbfgs",
    penalty = "l2",
    max_iter = 100,
)
logReg.fit(X_train[["pago_metodo_lag_1_Impago"]], Y_train) 
get_train_test_meassures(logReg, X_train[["pago_metodo_lag_1_Impago"]], Y_train, X_test[["pago_metodo_lag_1_Impago"]], Y_test)

accuracy: 0.8628300485003297
precision: 0.6457750640141816
recall: 0.6878278017874376
f1: 0.6661384046487058
--------------------------------------------------
accuracy: 0.860854486202051
precision: 0.5247076574877405
recall: 0.565907241659886
f1: 0.544529262086514


## Logistic Regression

In [51]:
from sklearn.linear_model import LogisticRegression
logReg = LogisticRegression(
    C = 1000000,
    solver = "lbfgs",
    penalty = "l2",
    max_iter = 100,
)
logReg.fit(X_train, Y_train) 
get_train_test_meassures(logReg, X_train , Y_train, X_test, Y_test)

accuracy: 0.8807359361565367
precision: 0.7432473753949649
recall: 0.6119246423026895
f1: 0.671223104360833
--------------------------------------------------
accuracy: 0.8807964839895954
precision: 0.6132098464538143
recall: 0.5117982099267697
f1: 0.557933252023506




## Random Forest

In [54]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV

param_grid = {
    "n_estimators": [10, 100, 500],
    "max_depth": [2,4,6],
    "min_samples_split": [0.2, 0.5, 0.8],    
}

rf = GridSearchCV(
    estimator = RandomForestClassifier(max_features="sqrt"),
    param_grid = param_grid,
    scoring = 'f1',
    n_jobs = 1,
    cv = [(list(range(0,min_cv_id)), list(range(min_cv_id,min_test_id)))],
    return_train_score = True,
    error_score = 0,
    verbose = 5
)

In [56]:
rf.fit(X_train.values, Y_train.values) 

Fitting 1 folds for each of 27 candidates, totalling 27 fits
[CV] max_depth=2, min_samples_split=0.2, n_estimators=10 .............


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.4s remaining:    0.0s


[CV]  max_depth=2, min_samples_split=0.2, n_estimators=10, score=0.0, total=   0.3s
[CV] max_depth=2, min_samples_split=0.2, n_estimators=100 ............
[CV]  max_depth=2, min_samples_split=0.2, n_estimators=100, score=0.26108048511576626, total=   2.8s
[CV] max_depth=2, min_samples_split=0.2, n_estimators=500 ............


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    3.9s remaining:    0.0s


[CV]  max_depth=2, min_samples_split=0.2, n_estimators=500, score=0.5665482834432606, total=  13.3s
[CV] max_depth=2, min_samples_split=0.5, n_estimators=10 .............


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:   20.2s remaining:    0.0s
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:   20.6s remaining:    0.0s


[CV]  max_depth=2, min_samples_split=0.5, n_estimators=10, score=0.0, total=   0.2s
[CV] max_depth=2, min_samples_split=0.5, n_estimators=100 ............


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


[CV]  max_depth=2, min_samples_split=0.5, n_estimators=100, score=0.0, total=   2.1s
[CV] max_depth=2, min_samples_split=0.5, n_estimators=500 ............


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


[CV]  max_depth=2, min_samples_split=0.5, n_estimators=500, score=0.0, total=  10.2s
[CV] max_depth=2, min_samples_split=0.8, n_estimators=10 .............


  'precision', 'predicted', average, warn_for)


[CV]  max_depth=2, min_samples_split=0.8, n_estimators=10, score=0.0, total=   0.0s
[CV] max_depth=2, min_samples_split=0.8, n_estimators=100 ............


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


[CV]  max_depth=2, min_samples_split=0.8, n_estimators=100, score=0.0, total=   1.0s
[CV] max_depth=2, min_samples_split=0.8, n_estimators=500 ............


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


[CV]  max_depth=2, min_samples_split=0.8, n_estimators=500, score=0.0, total=   5.0s
[CV] max_depth=4, min_samples_split=0.2, n_estimators=10 .............
[CV]  max_depth=4, min_samples_split=0.2, n_estimators=10, score=0.591849935316947, total=   0.4s
[CV] max_depth=4, min_samples_split=0.2, n_estimators=100 ............
[CV]  max_depth=4, min_samples_split=0.2, n_estimators=100, score=0.5995518565941101, total=   3.4s
[CV] max_depth=4, min_samples_split=0.2, n_estimators=500 ............
[CV]  max_depth=4, min_samples_split=0.2, n_estimators=500, score=0.5899444988573295, total=  17.3s
[CV] max_depth=4, min_samples_split=0.5, n_estimators=10 .............


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


[CV]  max_depth=4, min_samples_split=0.5, n_estimators=10, score=0.0, total=   0.2s
[CV] max_depth=4, min_samples_split=0.5, n_estimators=100 ............


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


[CV]  max_depth=4, min_samples_split=0.5, n_estimators=100, score=0.0, total=   2.1s
[CV] max_depth=4, min_samples_split=0.5, n_estimators=500 ............


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


[CV]  max_depth=4, min_samples_split=0.5, n_estimators=500, score=0.0, total=  10.2s
[CV] max_depth=4, min_samples_split=0.8, n_estimators=10 .............


  'precision', 'predicted', average, warn_for)


[CV]  max_depth=4, min_samples_split=0.8, n_estimators=10, score=0.0, total=   0.1s
[CV] max_depth=4, min_samples_split=0.8, n_estimators=100 ............


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


[CV]  max_depth=4, min_samples_split=0.8, n_estimators=100, score=0.0, total=   1.0s
[CV] max_depth=4, min_samples_split=0.8, n_estimators=500 ............


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


[CV]  max_depth=4, min_samples_split=0.8, n_estimators=500, score=0.0, total=   5.4s
[CV] max_depth=6, min_samples_split=0.2, n_estimators=10 .............
[CV]  max_depth=6, min_samples_split=0.2, n_estimators=10, score=0.5971848805946544, total=   0.4s
[CV] max_depth=6, min_samples_split=0.2, n_estimators=100 ............
[CV]  max_depth=6, min_samples_split=0.2, n_estimators=100, score=0.5936890045543266, total=   3.7s
[CV] max_depth=6, min_samples_split=0.2, n_estimators=500 ............
[CV]  max_depth=6, min_samples_split=0.2, n_estimators=500, score=0.592085979482169, total=  21.3s
[CV] max_depth=6, min_samples_split=0.5, n_estimators=10 .............


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


[CV]  max_depth=6, min_samples_split=0.5, n_estimators=10, score=0.0, total=   0.2s
[CV] max_depth=6, min_samples_split=0.5, n_estimators=100 ............


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


[CV]  max_depth=6, min_samples_split=0.5, n_estimators=100, score=0.0, total=   2.1s
[CV] max_depth=6, min_samples_split=0.5, n_estimators=500 ............


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


[CV]  max_depth=6, min_samples_split=0.5, n_estimators=500, score=0.0, total=  11.6s
[CV] max_depth=6, min_samples_split=0.8, n_estimators=10 .............


  'precision', 'predicted', average, warn_for)


[CV]  max_depth=6, min_samples_split=0.8, n_estimators=10, score=0.0, total=   0.1s
[CV] max_depth=6, min_samples_split=0.8, n_estimators=100 ............


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


[CV]  max_depth=6, min_samples_split=0.8, n_estimators=100, score=0.0, total=   1.0s
[CV] max_depth=6, min_samples_split=0.8, n_estimators=500 ............


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
[Parallel(n_jobs=1)]: Done  27 out of  27 | elapsed:  2.6min finished


[CV]  max_depth=6, min_samples_split=0.8, n_estimators=500, score=0.0, total=   5.5s


GridSearchCV(cv=[([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, ...9, 119780, 119781, 119782, 119783, 119784, 119785, 119786, 119787, 119788, 119789, 119790, 119791])],
       error_score=0,
       estimator=RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='sqrt', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators='warn', n_jobs=None,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False),
       fit_params=None, iid='warn', n_jobs=1,
       param_grid={'n_estimators': [10, 100, 500], 'max_depth': [2, 

In [57]:
rf.best_score_

0.5995518565941101

In [60]:
from xgboost import XGBClassifier

param_grid = {
    "n_estimators": [10, 100, 500],
    "max_depth": [2,4,6],
    "learning_rate": [1e-3, 1e-2, 1e-1],
    "reg_alpha": [0, .2, .5, .9]
}

xgb = GridSearchCV(
    estimator = XGBClassifier(max_features="sqrt"),
    param_grid = param_grid,
    scoring = 'f1',
    n_jobs = 1,
    cv = [(list(range(0,min_cv_id)), list(range(min_cv_id,min_test_id)))],
    return_train_score = True,
    error_score = 0,
    verbose = 5
)

In [61]:
xgb.fit(X_train.values, Y_train.values) 

Fitting 1 folds for each of 108 candidates, totalling 108 fits
[CV] learning_rate=0.001, max_depth=2, n_estimators=10, reg_alpha=0 ..


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV]  learning_rate=0.001, max_depth=2, n_estimators=10, reg_alpha=0, score=0.6118228121623708, total=   0.6s
[CV] learning_rate=0.001, max_depth=2, n_estimators=10, reg_alpha=0.2 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.7s remaining:    0.0s


[CV]  learning_rate=0.001, max_depth=2, n_estimators=10, reg_alpha=0.2, score=0.6118228121623708, total=   0.6s
[CV] learning_rate=0.001, max_depth=2, n_estimators=10, reg_alpha=0.5 


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    1.4s remaining:    0.0s


[CV]  learning_rate=0.001, max_depth=2, n_estimators=10, reg_alpha=0.5, score=0.6118228121623708, total=   0.6s
[CV] learning_rate=0.001, max_depth=2, n_estimators=10, reg_alpha=0.9 


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    2.2s remaining:    0.0s


[CV]  learning_rate=0.001, max_depth=2, n_estimators=10, reg_alpha=0.9, score=0.6118228121623708, total=   0.5s
[CV] learning_rate=0.001, max_depth=2, n_estimators=100, reg_alpha=0 .


[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    2.9s remaining:    0.0s


[CV]  learning_rate=0.001, max_depth=2, n_estimators=100, reg_alpha=0, score=0.641645096347426, total=   4.8s
[CV] learning_rate=0.001, max_depth=2, n_estimators=100, reg_alpha=0.2 
[CV]  learning_rate=0.001, max_depth=2, n_estimators=100, reg_alpha=0.2, score=0.641645096347426, total=   4.9s
[CV] learning_rate=0.001, max_depth=2, n_estimators=100, reg_alpha=0.5 
[CV]  learning_rate=0.001, max_depth=2, n_estimators=100, reg_alpha=0.5, score=0.641645096347426, total=   5.1s
[CV] learning_rate=0.001, max_depth=2, n_estimators=100, reg_alpha=0.9 
[CV]  learning_rate=0.001, max_depth=2, n_estimators=100, reg_alpha=0.9, score=0.641645096347426, total=   4.9s
[CV] learning_rate=0.001, max_depth=2, n_estimators=500, reg_alpha=0 .
[CV]  learning_rate=0.001, max_depth=2, n_estimators=500, reg_alpha=0, score=0.641645096347426, total=  25.0s
[CV] learning_rate=0.001, max_depth=2, n_estimators=500, reg_alpha=0.2 
[CV]  learning_rate=0.001, max_depth=2, n_estimators=500, reg_alpha=0.2, score=0.6416

[CV]  learning_rate=0.01, max_depth=4, n_estimators=10, reg_alpha=0.2, score=0.6410588404546108, total=   0.9s
[CV] learning_rate=0.01, max_depth=4, n_estimators=10, reg_alpha=0.5 .
[CV]  learning_rate=0.01, max_depth=4, n_estimators=10, reg_alpha=0.5, score=0.6410588404546108, total=   1.0s
[CV] learning_rate=0.01, max_depth=4, n_estimators=10, reg_alpha=0.9 .
[CV]  learning_rate=0.01, max_depth=4, n_estimators=10, reg_alpha=0.9, score=0.6410588404546108, total=   0.9s
[CV] learning_rate=0.01, max_depth=4, n_estimators=100, reg_alpha=0 ..
[CV]  learning_rate=0.01, max_depth=4, n_estimators=100, reg_alpha=0, score=0.6345957011258957, total=   8.1s
[CV] learning_rate=0.01, max_depth=4, n_estimators=100, reg_alpha=0.2 
[CV]  learning_rate=0.01, max_depth=4, n_estimators=100, reg_alpha=0.2, score=0.6345957011258957, total=   8.0s
[CV] learning_rate=0.01, max_depth=4, n_estimators=100, reg_alpha=0.5 
[CV]  learning_rate=0.01, max_depth=4, n_estimators=100, reg_alpha=0.5, score=0.6345957011

[CV]  learning_rate=0.1, max_depth=4, n_estimators=500, reg_alpha=0.9, score=0.6408216403876754, total=  41.4s
[CV] learning_rate=0.1, max_depth=6, n_estimators=10, reg_alpha=0 ....
[CV]  learning_rate=0.1, max_depth=6, n_estimators=10, reg_alpha=0, score=0.6310737852429514, total=   1.7s
[CV] learning_rate=0.1, max_depth=6, n_estimators=10, reg_alpha=0.2 ..
[CV]  learning_rate=0.1, max_depth=6, n_estimators=10, reg_alpha=0.2, score=0.6273684210526316, total=   1.8s
[CV] learning_rate=0.1, max_depth=6, n_estimators=10, reg_alpha=0.5 ..
[CV]  learning_rate=0.1, max_depth=6, n_estimators=10, reg_alpha=0.5, score=0.6291221201626261, total=   1.6s
[CV] learning_rate=0.1, max_depth=6, n_estimators=10, reg_alpha=0.9 ..
[CV]  learning_rate=0.1, max_depth=6, n_estimators=10, reg_alpha=0.9, score=0.6296851574212895, total=   1.4s
[CV] learning_rate=0.1, max_depth=6, n_estimators=100, reg_alpha=0 ...
[CV]  learning_rate=0.1, max_depth=6, n_estimators=100, reg_alpha=0, score=0.6465504614032518, t

[Parallel(n_jobs=1)]: Done 108 out of 108 | elapsed: 32.1min finished


GridSearchCV(cv=[([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, ...9, 119780, 119781, 119782, 119783, 119784, 119785, 119786, 119787, 119788, 119789, 119790, 119791])],
       error_score=0,
       estimator=XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
       max_depth=3, max_features='sqrt', min_child_weight=1, missing=None,
       n_estimators=100, n_jobs=1, nthread=None,
       objective='binary:logistic', random_state=0, reg_alpha=0,
       reg_lambda=1, scale_pos_weight=1, seed=None, silent=True,
       subsample=1),
       fit_params=None, iid='warn', n_jobs=1,
       param_grid={'n_estimators': [10, 100, 500], 'max_depth': [2, 4, 6], 'lear

In [65]:
importances = pd.DataFrame({
    "importances": xgb.best_estimator_.feature_importances_,
    "variables": X_train.columns
})

In [67]:
importances.sort_values("importances", ascending=False)

Unnamed: 0,importances,variables
3,0.168337,expensa_proporcion
1,0.165122,consorcio_prop_impagos
2,0.140533,consorcio_prop_efectivo
0,0.114053,consorcio_cantidad_expensas
5,0.062417,expensa_ineteres_segundo_vencimiento
4,0.046151,expensa_interes_primer_vencimiento
25,0.038018,pago_metodo_lag_3_Impago
33,0.031587,pago_metodo_lag_1_Impago
29,0.029317,pago_metodo_lag_2_Impago
32,0.018536,pago_metodo_lag_1_EntePago


In [69]:
get_train_test_meassures(xgb, X_train.values, Y_train, X_test.values, Y_test)

accuracy: 0.8975315753007271
precision: 0.8050570101351351
recall: 0.639911047706961
f1: 0.7130467307197793
--------------------------------------------------
accuracy: 0.8866266032828056
precision: 0.6615871190339275
recall: 0.4680634662327095
f1: 0.5482487491065046
