# Import Libraries

In [1]:
import numpy as np
import pandas as pd
import scipy
from math import sqrt
import matplotlib.pyplot as plt
import seaborn as sns

# pre-processing & feature selection
# from sklearn import preprocessing
from sklearn.preprocessing import OneHotEncoder
from sklearn.feature_selection import RFE
from sklearn.decomposition import PCA

# estimators
from sklearn.ensemble import RandomForestClassifier
from sklearn import tree
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier

# model metrics
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import balanced_accuracy_score
from sklearn.model_selection import cross_val_score

# cross validation
from sklearn.model_selection import train_test_split

# parameter tuning
from sklearn.model_selection import GridSearchCV

In [2]:
orig = pd.read_csv('credits_updated.csv', header =0)

In [3]:
# create a working copy of the data
rawData = orig.copy()

In [4]:
rawData.head()

Unnamed: 0,ID,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_0,PAY_2,PAY_3,PAY_4,...,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,default payment next month,Limit_Bins,Age_Bins
0,1,20000,2,2,1,24,2,2,-1,-1,...,0,0,689,0,0,0,0,1,1,1
1,2,120000,2,2,2,26,-1,2,0,0,...,3261,0,1000,1000,1000,0,2000,1,2,1
2,3,90000,2,2,2,34,0,0,0,0,...,15549,1518,1500,1000,1000,1000,5000,0,1,2
3,4,50000,2,2,1,37,0,0,0,0,...,29547,2000,2019,1200,1100,1069,1000,0,1,2
4,5,50000,1,2,1,57,-1,0,-1,0,...,19131,2000,36681,10000,9000,689,679,0,1,4


In [5]:
rawData.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30000 entries, 0 to 29999
Data columns (total 27 columns):
ID                            30000 non-null int64
LIMIT_BAL                     30000 non-null int64
SEX                           30000 non-null int64
EDUCATION                     30000 non-null int64
MARRIAGE                      30000 non-null int64
AGE                           30000 non-null int64
PAY_0                         30000 non-null int64
PAY_2                         30000 non-null int64
PAY_3                         30000 non-null int64
PAY_4                         30000 non-null int64
PAY_5                         30000 non-null int64
PAY_6                         30000 non-null int64
BILL_AMT1                     30000 non-null int64
BILL_AMT2                     30000 non-null int64
BILL_AMT3                     30000 non-null int64
BILL_AMT4                     30000 non-null int64
BILL_AMT5                     30000 non-null int64
BILL_AMT6               

# Select Features & Dependent Variable

In [6]:
X = rawData.iloc[:,1:24]

In [7]:
X.head()

Unnamed: 0,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_0,PAY_2,PAY_3,PAY_4,PAY_5,...,BILL_AMT3,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6
0,20000,2,2,1,24,2,2,-1,-1,-2,...,689,0,0,0,0,689,0,0,0,0
1,120000,2,2,2,26,-1,2,0,0,0,...,2682,3272,3455,3261,0,1000,1000,1000,0,2000
2,90000,2,2,2,34,0,0,0,0,0,...,13559,14331,14948,15549,1518,1500,1000,1000,1000,5000
3,50000,2,2,1,37,0,0,0,0,0,...,49291,28314,28959,29547,2000,2019,1200,1100,1069,1000
4,50000,1,2,1,57,-1,0,-1,0,0,...,35835,20940,19146,19131,2000,36681,10000,9000,689,679


In [8]:
y = rawData['default payment next month']

## Feature Engineering

### OneHotEncode SEX, EDUCATION, MARRIAGE

In [9]:
cat_columns = ["SEX", "EDUCATION", "MARRIAGE"]

In [10]:
cat_columns_idx = [X.columns.get_loc(col) 
                   for col in cat_columns]

In [11]:
ohe = OneHotEncoder(categorical_features=cat_columns_idx, 
                    sparse=False, handle_unknown="ignore")
X_ohe = ohe.fit_transform(X)



## Discretize LIMIT_BAL & AGE

In [12]:
rawData_dis = rawData.copy()
rawData_dis['LIMIT_BAL'] = rawData['Limit_Bins']
rawData_dis['AGE'] = rawData['Age_Bins']
X_dis = rawData_dis.iloc[:,1:24]

In [13]:
X_dis.head()

Unnamed: 0,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_0,PAY_2,PAY_3,PAY_4,PAY_5,...,BILL_AMT3,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6
0,1,2,2,1,1,2,2,-1,-1,-2,...,689,0,0,0,0,689,0,0,0,0
1,2,2,2,2,1,-1,2,0,0,0,...,2682,3272,3455,3261,0,1000,1000,1000,0,2000
2,1,2,2,2,2,0,0,0,0,0,...,13559,14331,14948,15549,1518,1500,1000,1000,1000,5000
3,1,2,2,1,2,0,0,0,0,0,...,49291,28314,28959,29547,2000,2019,1200,1100,1069,1000
4,1,1,2,1,4,-1,0,-1,0,0,...,35835,20940,19146,19131,2000,36681,10000,9000,689,679


### OneHotEncode SEX, EDUCATION, MARRIAGE, LIMIT_BAL, AGE

In [14]:
cat_columns_dis = ["LIMIT_BAL", "SEX", "EDUCATION", "MARRIAGE", "AGE"]

In [15]:
cat_columns_dis_idx = [X_dis.columns.get_loc(col) 
                   for col in cat_columns_dis]

In [16]:
dis_ohe = OneHotEncoder(categorical_features=cat_columns_dis_idx, 
                    sparse=False, handle_unknown="ignore")
X_dis_ohe = dis_ohe.fit_transform(X_dis)



## Principle Component Analysis

In [17]:
pca = PCA(n_components=2)
X_pca = pca.fit(X).transform(X)

In [18]:
X_pca

array([[-166488.19108566,  -75538.15366417],
       [-114226.97681653,    9780.67125058],
       [ -98432.36218052,  -33471.38733069],
       ...,
       [-140206.12804809,  -77857.49653957],
       [ -41427.74077383,  -74908.30367179],
       [ -70860.31120885,  -95514.14430938]])

# Establish X / y train & test sets

In [19]:
# Original Dataset
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size = .25, random_state  =  123)

In [20]:
# One Hot Encoded Dataset
X_ohe_train, X_ohe_test, y_train, y_test = train_test_split(
    X_ohe, y, test_size = .25, random_state  =  123)

In [21]:
# Discretized & One Hot Encoded Dataset
X_dis_train, X_dis_test, y_train, y_test = train_test_split(
    X_dis_ohe, y, test_size = .25, random_state  =  123)

In [22]:
# PCA Dataset
X_pca_train, X_pca_test, y_train, y_test = train_test_split(
    X_pca, y, test_size = .25, random_state  =  123)

# Model

## k-Nearest Neighbor

In [23]:
# check kNN model base performance
knn=KNeighborsClassifier()
score_knn0 = cross_val_score(knn, X_train, y_train, cv=10)
print("kNN - original dataset: %0.8f" % score_knn0.mean())
score_knn1 = cross_val_score(knn, X_ohe_train, y_train, cv=10)
print("kNN - OHE dataset: %0.8f" % score_knn1.mean())
score_knn2 = cross_val_score(knn, X_dis_train, y_train, cv=10)
print("kNN - Discretized OHE dataset: %0.8f" % score_knn2.mean())
score_knn3 = cross_val_score(knn, X_pca_train, y_train, cv=10)
print("kNN - PCA dataset: %0.8f" % score_knn3.mean())

kNN - original dataset: 0.75271098
kNN - OHE dataset: 0.75288876
kNN - Discretized OHE dataset: 0.76097771
kNN - PCA dataset: 0.73551140


In [24]:
# Set Up Parameter Tuning
knn_param_grid = { 
    'n_neighbors': [50, 100, 150, 200, 250, 300],
    'leaf_size': [1, 50, 100, 150],
    'weights': ['uniform', 'distance']
}
CV_knn0 = GridSearchCV(estimator=knn, param_grid=knn_param_grid, 
                      scoring='accuracy', n_jobs=-1, cv=5)
CV_knn1 = GridSearchCV(estimator=knn, param_grid=knn_param_grid, 
                      scoring='accuracy', n_jobs=-1, cv=5)
CV_knn2 = GridSearchCV(estimator=knn, param_grid=knn_param_grid, 
                      scoring='accuracy', n_jobs=-1, cv=5)
CV_knn3 = GridSearchCV(estimator=knn, param_grid=knn_param_grid, 
                      scoring='accuracy', n_jobs=-1, cv=5)

In [25]:
# check best paramters on original dataset
CV_knn0.fit(X_train, y_train)
print("Best score - original dataset: %0.8f" % CV_knn0.best_score_)
print("Best parameters set - original dataset:")
print(CV_knn0.best_params_)

Best score - original dataset: 0.77857778
Best parameters set - original dataset:
{'leaf_size': 1, 'n_neighbors': 200, 'weights': 'distance'}


In [27]:
# RFE
rfe_knn = RFE(estimator=KNeighborsClassifier(**CV_knn0.best_params_),
              n_features_to_select=23, step=1)
cross_val = cross_val_score(rfe_knn, X_train, y_train, cv=5)
print(cross_val.mean())

0.7785777832581622


In [28]:
# check best paramters on OHE dataset
CV_knn1.fit(X_ohe_train, y_train)
print("Best score - OHE dataset: %0.8f" % CV_knn1.best_score_)
print("Best parameters set - OHE dataset:")
print(CV_knn1.best_params_)

Best score - OHE dataset: 0.77875556
Best parameters set - OHE dataset:
{'leaf_size': 1, 'n_neighbors': 200, 'weights': 'distance'}


In [29]:
# check best paramters on Discretized OHE dataset
CV_knn2.fit(X_dis_train, y_train)
print("Best score - Discretized & OHE dataset: %0.8f" % CV_knn2.best_score_)
print("Best parameters set - Discretized & OHE dataset:")
print(CV_knn2.best_params_)

Best score - Discretized & OHE dataset: 0.78075556
Best parameters set - Discretized & OHE dataset:
{'leaf_size': 50, 'n_neighbors': 50, 'weights': 'uniform'}


In [30]:
# check best paramters on PCA dataset
CV_knn3.fit(X_pca_train, y_train)
print("Best score - PCA dataset: %0.8f" % CV_knn3.best_score_)
print("Best parameters set - PCA dataset:")
print(CV_knn3.best_params_)

Best score - PCA dataset: 0.77737778
Best parameters set - PCA dataset:
{'leaf_size': 1, 'n_neighbors': 200, 'weights': 'uniform'}


## Random Forest

In [31]:
# check Random Forest base performance
rf = RandomForestClassifier(n_estimators=10)
score_rf0 = cross_val_score(rf, X_train, y_train, cv=10)
print("RF - original dataset: %0.8f" % score_rf0.mean())
score_rf1 = cross_val_score(rf, X_ohe_train, y_train, cv=10)
print("RF - OHE dataset: %0.8f" % score_rf1.mean())
score_rf2 = cross_val_score(rf, X_dis_train, y_train, cv=10)
print("RF - Discretized OHE dataset: %0.8f" % score_rf2.mean())
score_rf3 = cross_val_score(rf, X_pca_train, y_train, cv=10)
print("RF - PCA dataset: %0.8f" % score_rf3.mean())

RF - original dataset: 0.80364519
RF - OHE dataset: 0.80448924
RF - Discretized OHE dataset: 0.80417765
RF - PCA dataset: 0.72853305


In [32]:
# Set Up Parameter Tuning
rf_param_grid = { 
    'bootstrap': ['True', 'False'],
    'criterion': ['gini', 'entropy'],
    'n_estimators': [50, 100, 150, 200],
    'min_samples_leaf': [50, 100, 150, 200, 250, 300],
    'min_samples_split': [100, 200, 300, 400]
}
CV_rf0 = GridSearchCV(estimator=rf, param_grid=rf_param_grid, 
                      scoring='accuracy', n_jobs=-1, cv=5)
CV_rf1 = GridSearchCV(estimator=rf, param_grid=rf_param_grid, 
                      scoring='accuracy', n_jobs=-1, cv=5)
CV_rf2 = GridSearchCV(estimator=rf, param_grid=rf_param_grid, 
                      scoring='accuracy', n_jobs=-1, cv=5)
CV_rf3 = GridSearchCV(estimator=rf, param_grid=rf_param_grid, 
                      scoring='accuracy', n_jobs=-1, cv=5)

In [33]:
# check best paramters on original dataset
CV_rf0.fit(X_train, y_train)
print("Best score - original dataset: %0.8f" % CV_rf0.best_score_)
print("Best parameters set - original dataset:")
print(CV_rf0.best_params_)

Best score - original dataset: 0.82000000
Best parameters set - original dataset:
{'bootstrap': 'True', 'criterion': 'entropy', 'min_samples_leaf': 50, 'min_samples_split': 400, 'n_estimators': 50}


In [34]:
# RFE
rfe_rf = RFE(estimator=RandomForestClassifier(**CV_rf0.best_params_),
              n_features_to_select=23, step=1)
cross_val = cross_val_score(rfe_rf, X_train, y_train, cv=5)
print(cross_val.mean())

0.81666670257339


In [35]:
# check best paramters on OHE dataset
CV_rf1.fit(X_ohe_train, y_train)
print("Best score - OHE dataset: %0.8f" % CV_rf1.best_score_)
print("Best parameters set - OHE dataset:")
print(CV_rf1.best_params_)

Best score - OHE dataset: 0.81955556
Best parameters set - OHE dataset:
{'bootstrap': 'False', 'criterion': 'gini', 'min_samples_leaf': 50, 'min_samples_split': 100, 'n_estimators': 150}


In [36]:
# check best paramters on Discretized OHE dataset
CV_rf2.fit(X_dis_train, y_train)
print("Best score - Discretized & OHE dataset: %0.8f" % CV_rf2.best_score_)
print("Best parameters set - Discretized & OHE dataset:")
print(CV_rf2.best_params_)

Best score - Discretized & OHE dataset: 0.81960000
Best parameters set - Discretized & OHE dataset:
{'bootstrap': 'False', 'criterion': 'entropy', 'min_samples_leaf': 50, 'min_samples_split': 400, 'n_estimators': 50}


In [37]:
# check best paramters on PCA dataset
CV_rf3.fit(X_pca_train, y_train)
print("Best score - PCA dataset: %0.8f" % CV_rf3.best_score_)
print("Best parameters set - PCA dataset:")
print(CV_rf3.best_params_)

Best score - PCA dataset: 0.77737778
Best parameters set - PCA dataset:
{'bootstrap': 'True', 'criterion': 'gini', 'min_samples_leaf': 100, 'min_samples_split': 100, 'n_estimators': 50}


## Decision Tree

In [38]:
# check Decision Tree base performance
dt = tree.DecisionTreeClassifier()
score_dt0 = cross_val_score(dt, X_train, y_train, cv=10)
print("Decision Tree - original dataset: %0.8f" % score_dt0.mean())
score_dt1 = cross_val_score(dt, X_ohe_train, y_train, cv=10)
print("Decision Tree - OHE dataset: %0.8f" % score_dt1.mean())
score_dt2 = cross_val_score(dt, X_dis_train, y_train, cv=10)
print("Decision Tree - Discretized OHE dataset: %0.8f" % score_dt2.mean())
score_dt3 = cross_val_score(dt, X_pca_train, y_train, cv=10)
print("Decision Tree - PCA dataset: %0.8f" % score_dt3.mean())

Decision Tree - original dataset: 0.72133439
Decision Tree - OHE dataset: 0.72044512
Decision Tree - Discretized OHE dataset: 0.72457810
Decision Tree - PCA dataset: 0.67093360


In [39]:
# Set Up Parameter Tuning
dt_param_grid = { 
    'criterion': ['gini', 'entropy'],
    'min_samples_leaf': [100, 150, 200, 250, 300],
    'min_samples_split': [10, 50, 100, 150]
}
CV_dt0 = GridSearchCV(estimator=dt, param_grid=dt_param_grid, 
                      scoring='accuracy', n_jobs=-1, cv=5)
CV_dt1 = GridSearchCV(estimator=dt, param_grid=dt_param_grid, 
                      scoring='accuracy', n_jobs=-1, cv=5)
CV_dt2 = GridSearchCV(estimator=dt, param_grid=dt_param_grid, 
                      scoring='accuracy', n_jobs=-1, cv=5)
CV_dt3 = GridSearchCV(estimator=dt, param_grid=dt_param_grid, 
                      scoring='accuracy', n_jobs=-1, cv=5)

In [40]:
# check best paramters on original dataset
CV_dt0.fit(X_train, y_train)
print("Best score - original dataset: %0.8f" % CV_dt0.best_score_)
print("Best parameters set - original dataset:")
print(CV_dt0.best_params_)

Best score - original dataset: 0.82008889
Best parameters set - original dataset:
{'criterion': 'gini', 'min_samples_leaf': 250, 'min_samples_split': 10}


In [41]:
# RFE
rfe_dt = RFE(estimator=tree.DecisionTreeClassifier(**CV_dt0.best_params_),
              n_features_to_select=23, step=1)
cross_val = cross_val_score(rfe_dt, X_train, y_train, cv=5)
print(cross_val.mean())

0.8200891025821779


In [42]:
# check best paramters on OHE dataset
CV_dt1.fit(X_ohe_train, y_train)
print("Best score - OHE dataset: %0.8f" % CV_dt1.best_score_)
print("Best parameters set - OHE dataset:")
print(CV_dt1.best_params_)

Best score - OHE dataset: 0.82008889
Best parameters set - OHE dataset:
{'criterion': 'gini', 'min_samples_leaf': 250, 'min_samples_split': 10}


In [43]:
# check best paramters on Discretized OHE dataset
CV_dt2.fit(X_dis_train, y_train)
print("Best score - Discretized & OHE dataset: %0.8f" % CV_dt2.best_score_)
print("Best parameters set - Discretized & OHE dataset:")
print(CV_dt2.best_params_)

Best score - Discretized & OHE dataset: 0.82008889
Best parameters set - Discretized & OHE dataset:
{'criterion': 'gini', 'min_samples_leaf': 200, 'min_samples_split': 10}


In [46]:
# check best paramters on PCA dataset
CV_dt3.fit(X_pca_train, y_train)
print("Best score - PCA dataset: %0.8f" % CV_dt3.best_score_)
print("Best parameters set - PCA dataset:")
print(CV_dt3.best_params_)

Best score - PCA dataset: 0.77737778
Best parameters set - PCA dataset:
{'criterion': 'gini', 'min_samples_leaf': 150, 'min_samples_split': 10}


## SVC

In [47]:
# check SVC base performance
svc = SVC(gamma='auto')
score_svc0 = cross_val_score(svc, X_train, y_train, cv=10)
print("SVC - original dataset: %0.8f" % score_svc0.mean())
score_svc1 = cross_val_score(svc, X_ohe_train, y_train, cv=10)
print("SVC - OHE dataset: %0.8f" % score_svc1.mean())
score_svc2 = cross_val_score(svc, X_dis_train, y_train, cv=10)
print("SVC - Discretized OHE dataset: %0.8f" % score_svc2.mean())
score_svc3 = cross_val_score(svc, X_pca_train, y_train, cv=10)
print("SVC - PCA dataset: %0.8f" % score_svc3.mean())

SVC - original dataset: 0.77795559
SVC - OHE dataset: 0.77795561
SVC - Discretized OHE dataset: 0.78039997
SVC - PCA dataset: 0.77906676


In [48]:
# Set Up Parameter Tuning
svc_param_grid = { 
    'C': [0.1, 1, 10],
    'gamma': ['auto', 'scale', 0.001, 0.1, 1, 10]
}
CV_svc0 = GridSearchCV(estimator=svc, param_grid=svc_param_grid, 
                      scoring='accuracy', n_jobs=-1, cv=5)
CV_svc1 = GridSearchCV(estimator=svc, param_grid=svc_param_grid, 
                      scoring='accuracy', n_jobs=-1, cv=5)
CV_svc2 = GridSearchCV(estimator=svc, param_grid=svc_param_grid, 
                      scoring='accuracy', n_jobs=-1, cv=5)
CV_svc3 = GridSearchCV(estimator=svc, param_grid=svc_param_grid, 
                      scoring='accuracy', n_jobs=-1, cv=5)

In [49]:
# check best paramters on original dataset
CV_svc0.fit(X_train, y_train)
print("Best score - original dataset: %0.8f" % CV_svc0.best_score_)
print("Best parameters set - original dataset:")
print(CV_svc0.best_params_)

Best score - original dataset: 0.77911111
Best parameters set - original dataset:
{'C': 1, 'gamma': 0.001}


In [50]:
# RFE
rfe_svc = RFE(estimator=SVC(**CV_svc0.best_params_),
              n_features_to_select=23, step=1)
cross_val = cross_val_score(rfe_svc, X_train, y_train, cv=5)
print(cross_val.mean())

0.779111235140747


In [51]:
# check best paramters on OHE dataset
CV_svc1.fit(X_ohe_train, y_train)
print("Best score - OHE dataset: %0.8f" % CV_svc1.best_score_)
print("Best parameters set - OHE dataset:")
print(CV_svc1.best_params_)

Best score - OHE dataset: 0.77915556
Best parameters set - OHE dataset:
{'C': 1, 'gamma': 0.001}


In [52]:
# check best paramters on Discretized OHE dataset
CV_svc2.fit(X_dis_train, y_train)
print("Best score - Discretized & OHE dataset: %0.8f" % CV_svc2.best_score_)
print("Best parameters set - Discretized & OHE dataset:")
print(CV_svc2.best_params_)

Best score - Discretized & OHE dataset: 0.78057778
Best parameters set - Discretized & OHE dataset:
{'C': 1, 'gamma': 'auto'}


In [53]:
# check best paramters on PCA dataset
CV_svc3.fit(X_pca_train, y_train)
print("Best score - PCA dataset: %0.8f" % CV_svc3.best_score_)
print("Best parameters set - PCA dataset:")
print(CV_svc3.best_params_)

Best score - PCA dataset: 0.77902222
Best parameters set - PCA dataset:
{'C': 1, 'gamma': 'auto'}


# Making Predictions

In [78]:
predictions = CV_dt2.predict(X_dis_test)

## Evaluate Results

In [79]:
print(classification_report(y_test, predictions))

              precision    recall  f1-score   support

           0       0.85      0.94      0.89      5873
           1       0.64      0.39      0.49      1627

   micro avg       0.82      0.82      0.82      7500
   macro avg       0.75      0.66      0.69      7500
weighted avg       0.80      0.82      0.80      7500



In [80]:
accuracy = accuracy_score(y_test, predictions)

In [81]:
bal_accuracy = balanced_accuracy_score(y_test, predictions)

In [82]:
print('Accuray: %.3f' % accuracy)
print('Balanced Accuracy: %.3f' % bal_accuracy)

Accuray: 0.821
Balanced Accuracy: 0.665


In [91]:
model = tree.DecisionTreeClassifier(**CV_dt0.best_params_)
model.fit(X_train, y_train)
print(model.feature_importances_)

[2.27193656e-02 0.00000000e+00 1.87101871e-03 1.88430854e-03
 4.86961238e-04 6.57845464e-01 1.35694182e-01 5.03277491e-03
 3.27570938e-02 0.00000000e+00 2.14280129e-02 2.64398136e-02
 9.20138302e-03 2.46978977e-03 0.00000000e+00 0.00000000e+00
 1.36605756e-03 7.65147428e-03 4.61385451e-02 4.29237343e-03
 1.73214478e-02 4.37909273e-03 1.02084122e-03]


In [89]:
X_train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 22500 entries, 16095 to 19966
Data columns (total 23 columns):
LIMIT_BAL    22500 non-null int64
SEX          22500 non-null int64
EDUCATION    22500 non-null int64
MARRIAGE     22500 non-null int64
AGE          22500 non-null int64
PAY_0        22500 non-null int64
PAY_2        22500 non-null int64
PAY_3        22500 non-null int64
PAY_4        22500 non-null int64
PAY_5        22500 non-null int64
PAY_6        22500 non-null int64
BILL_AMT1    22500 non-null int64
BILL_AMT2    22500 non-null int64
BILL_AMT3    22500 non-null int64
BILL_AMT4    22500 non-null int64
BILL_AMT5    22500 non-null int64
BILL_AMT6    22500 non-null int64
PAY_AMT1     22500 non-null int64
PAY_AMT2     22500 non-null int64
PAY_AMT3     22500 non-null int64
PAY_AMT4     22500 non-null int64
PAY_AMT5     22500 non-null int64
PAY_AMT6     22500 non-null int64
dtypes: int64(23)
memory usage: 4.1 MB
