In [33]:
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from sklearn import linear_model, metrics
from sklearn import tree
import copy
import random

In [34]:
class onego():
    """
    This class is a template of stacking method for classification.
    It only provides fit and predict_proba functions, and works with binary [0, 1] labels.
    predict_proba function returns the probability of label 1.
    To learn how to use, see test/test_stackingclassifier.py
    This stacking technique creates prediction dataset in one go
    """
    def __init__(self, base_classifiers, combiner, n=3):
        self.base_classifiers = base_classifiers
        self.combiner = combiner
        self.n = n

    def fit(self, X, y):
        stacking_train = np.full(
            (np.shape(X)[0], len(self.base_classifiers)),
            np.nan
        )

        for model_no in range(len(self.base_classifiers)):
            cv = KFold(len(X), n_folds=self.n)
            for j, (traincv, testcv) in enumerate(cv):
                self.base_classifiers[model_no].fit(X[traincv, ], y[traincv])
                predicted_y_proba = self.base_classifiers[model_no].predict_proba(X[testcv,])[:, 1]
                stacking_train[testcv, model_no] = predicted_y_proba

            self.base_classifiers[model_no].fit(X, y)
        self.combiner.fit(stacking_train, y)

    def predict_proba(self, X):
        stacking_predict_data = np.full(
            (np.shape(X)[0], len(self.base_classifiers)),
            np.nan
        )
        for model_no in range(len(self.base_classifiers)):
            stacking_predict_data[:, model_no] = self.base_classifiers[model_no].predict_proba(X)[:, 1]
        return self.combiner.predict_proba(stacking_predict_data)[:, 1]

In [35]:
import numpy as np
from sklearn.model_selection import KFold

class outoffolds():
    """
    This class is a template of stacking method for classification.
    It only provides fit and predict_proba functions, and works with binary [0, 1] labels.
    predict_proba function returns the probability of label 1.
    To learn how to use, see test/test_stackingclassifier.py
    This stacking technique creates prediction dataset by taking the average of
    the out-of-fold predictors' predictions
    """
    def __init__(self, base_classifiers, combiner, n=3):
        self.base_classifiers = base_classifiers
        self.combiner = combiner
        self.n = n
        self.models = [[None for j in range(n)] for i in range(len(base_classifiers))]

    def fit(self, X, y):
        stacking_train = np.full(
            (np.shape(X)[0], len(self.base_classifiers)),
            np.nan
        )

        for model_no in range(len(self.base_classifiers)):
            cv = cross_validation.KFold(len(X), n_folds=self.n)
            for j, (traincv, testcv) in enumerate(cv):
                self.base_classifiers[model_no].fit(X[traincv, ], y[traincv])
                predicted_y_proba = self.base_classifiers[model_no].predict_proba(X[testcv,])[:, 1]
                stacking_train[testcv, model_no] = predicted_y_proba
                self.models[model_no][j] = copy.deepcopy(self.base_classifiers[model_no])
        self.combiner.fit(stacking_train, y)

    def predict_proba(self, X):
        stacking_predict_data = np.full(
            (np.shape(X)[0], len(self.base_classifiers)),
            np.nan
        )
        for model_no in range(len(self.base_classifiers)):
            dataset_blend = np.full(
                (np.shape(X)[0], len(self.models[model_no])),
                np.nan
            )
            for j in range(len(self.models[model_no])):
                dataset_blend[:, j] = self.models[model_no][j].predict_proba(X)[:, 1]
            stacking_predict_data[:, model_no] = dataset_blend.mean(1)
        return self.combiner.predict_proba(stacking_predict_data)[:, 1]

In [36]:
class StackingClassifier():

    ONEGO = 1
    OUTOFFOLDS = 2

    def __init__(self, base_classifiers, combiner, n=3, technique=ONEGO):
        if technique == self.ONEGO:
            self.stacking = onego.StackingClassifier(base_classifiers, combiner, n)
        elif technique == self.OUTOFFOLDS:
            self.stacking = outoffolds.StackingClassifier(base_classifiers, combiner, n)

    def fit(self, X, y):
        self.stacking.fit(X, y)

    def predict_proba(self, X):
        return self.stacking.predict_proba(X)

In [37]:
#reading Data
X = pd.read_csv('../data/raw/Data Cleaning.csv')
y = pd.read_excel('../data/raw/Training outputs.xlsx')

X.drop(columns=['Unnamed: 0'],axis=1,inplace=True)
y.drop(columns=['Unnamed: 0'],axis=1,inplace=True)


In [38]:
#use ONEGO technique to create stacking model
stacking_classifier = StackingClassifier(
    base_classifiers=[
        linear_model.SGDClassifier(loss='log', random_state=0),
        linear_model.LogisticRegression(random_state=0),
        tree.DecisionTreeClassifier(random_state=0)
    ],
    combiner=linear_model.LogisticRegression(),
    technique=StackingClassifier.ONEGO
)

stacking_classifier.fit(X, y)
predicted_y_proba = stacking_classifier.predict_proba(X)
print(metrics.roc_auc_score(y, predicted_y_proba))
# Since the dataset is meaningless, roc_auc_score will not produce meaningful result
# I am using it just to see, if there are any problems

#use OUTOFFOLDS technique to create stacking model
stacking_classifier = StackingClassifier(
    base_classifiers=[
        linear_model.SGDClassifier(loss='log', random_state=0),
        linear_model.LogisticRegression(random_state=0),
        tree.DecisionTreeClassifier(random_state=0)
    ],
    combiner=linear_model.LogisticRegression(),
    technique=StackingClassifier.OUTOFFOLDS
)

stacking_classifier.fit(X, y)
predicted_y_proba = stacking_classifier.predict_proba(X)
print(metrics.roc_auc_score(y, predicted_y_proba))
# Since the dataset is meaningless, roc_auc_score will not produce meaningful result
# I am using it just to see, if there are any problems

AttributeError: type object 'onego' has no attribute 'StackingClassifier'