In [20]:
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.ensemble import RandomForestClassifier, VotingClassifier, ExtraTreesClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import GridSearchCV, cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures

import numpy as np

from sklearn.datasets import load_breast_cancer

In [22]:
data = load_breast_cancer()
X = data['data']
y = data['target']

## Custom sklearn estimator

In [24]:
class ProbExtractor(BaseEstimator, TransformerMixin):
    """
    Combines predicted label probabilities of all models
    """    
    def __init__(self, models):
        self.models = models  

    def transform(self, X, y=None):
        y_probas = []
        for model in self.models:
            y_prob = model.predict_proba(X)[:, 1]
            y_probas.append(y_prob)
            
        return np.array(y_probas).transpose()

    def fit(self, X, y=None):
        for model in self.models:
            model.fit(X, y)
        return self 

In [25]:
rf = RandomForestClassifier()
nb = GaussianNB()
lr = LogisticRegression()

probas = ProbExtractor([rf,
                        nb,
                        lr])

probas.fit(X, y)
print(probas.transform(X, y).shape)

(569, 3)


### Combining with a meta-learner

In [27]:
params = {'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000]}

model = Pipeline([           
            ('proba', ProbExtractor([RandomForestClassifier(n_estimators=300),
                                    ExtraTreesClassifier(n_estimators=300),
                                    LogisticRegression(),
                                    GaussianNB()])),
        
            ('polynomial', PolynomialFeatures(degree=2)),
        
            ('logistic_regression', GridSearchCV(
                        LogisticRegression(penalty='l2', 
                                           random_state=42), 
                                           param_grid=params))
        ])

score = cross_val_score(model, X, y, cv=5, scoring='roc_auc').mean()
print(score)

0.993170521839


## Voting classifier

In [28]:
eclf = VotingClassifier(estimators=[('rf', rf), ('gnb', nb), ('lr', lr)], voting='hard')
eclf.fit(X, y)
eclf.predict(X).shape

(569,)