In [None]:
%matplotlib inline
matplotlib.rcParams['savefig.dpi'] = 144

import matplotlib
import seaborn as sns
import numpy as np
import pandas as pd

In [None]:
train_x = pd.read_csv('./UCI_Credit_Card_train.csv', index_col=False)
train_y = data.pop('default.payment.next.month')  # default = 1, no default = 0
test_x = pd.read_csv('./UCI_Credit_Card_test.csv', index_col=False)

In [None]:
billing_data = data.drop(['SEX', 'EDUCATION', 'MARRIAGE', 'AGE'], axis=1)
test_data = test.drop(['SEX', 'EDUCATION', 'MARRIAGE', 'AGE'], axis=1)

In [None]:
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.base import TransformerMixin, BaseEstimator

In [None]:
class feature_selector(TransformerMixin, BaseEstimator):
    def __init__(self, features):
        self.features = features
    
    def fit(self, x, y=None):
        return self

    def transform(self, x, y=None):
        return x[self.features]

In [None]:
class EstTransformer(BaseEstimator, TransformerMixin):
    def __init__(self, estimator):
        self.estimator = estimator
    
    def fit(self, X, y):
        self.estimator.fit(X, y)
        return self

    def transform(self, X):
        return np.atleast_2d(self.estimator.predict(X)).T

In [None]:
billing_pipeline = Pipeline([
    ('selector', feature_selector(['LIMIT_BAL', 'PAY_0', 'PAY_2', 'PAY_3',
                                  'PAY_4', 'PAY_5', 'PAY_6', 'BILL_AMT1', 
                                  'BILL_AMT2', 'BILL_AMT3', 'BILL_AMT4', 
                                  'BILL_AMT5', 'BILL_AMT6', 'PAY_AMT1', 
                                  'PAY_AMT2', 'PAY_AMT3', 'PAY_AMT4', 
                                  'PAY_AMT5', 'PAY_AMT6'])),
    ('forest', EstTransformer(RandomForestClassifier(random_state=0, 
                                                     n_estimators=100)))
])

demo_pipeline = Pipeline([
    ('selector', feature_selector(['SEX', 'EDUCATION', 'MARRIAGE', 'AGE',
                                   'LIMIT_BAL'])),
    ('forest1', EstTransformer(RandomForestClassifier(random_state=1, 
                                                      n_estimators=100)))
])

In [None]:
union = FeatureUnion(transformer_list=[
    ('billing_data', billing_pipeline),
    ('demo_data', demo_pipeline)
])

In [None]:
ensemble_model = Pipeline([
    ('first_layer_stacking', union),
    ('blender', LogisticRegression())
])

In [None]:
ensemble_model.fit(train_x, train_y)
proba = ensemble_model.predict_proba(test_x)

In [None]:
prediction = ensemble_model.predict(test_x)