In [1]:
# Import required libs
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score
import pandas as pd

In [2]:
# Load data
iris = load_iris()

In [3]:
# Split features and target
X = iris.data
y = iris.target

In [4]:
# Split data as train, validation, test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2)

In [5]:
# Train base classifiers
base_clf = [RandomForestClassifier(), ExtraTreesClassifier(), SVC()]

for clf in base_clf:
    clf.fit(X_train, y_train)

In [6]:
df = pd.DataFrame(data = { 'RandomForest': base_clf[0].predict(X_val),
                           'ExtraTrees': base_clf[1].predict(X_val),
                           'SVM': base_clf[2].predict(X_val),
                           'y_true': y_val
                         }
                 )

df

Unnamed: 0,ExtraTrees,RandomForest,SVM,y_true
0,0,0,0,0
1,0,0,0,0
2,2,2,2,2
3,2,2,2,2
4,2,2,2,2
5,1,1,1,1
6,2,2,2,2
7,1,1,1,1
8,1,1,1,1
9,1,1,1,1


In [7]:
# Create a blender
blender = LogisticRegression()

In [8]:
# Train Blender
blender.fit(df[['RandomForest', 'ExtraTrees', 'SVM']],df['y_true'])

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [9]:
# Test Blender   
df_test = pd.DataFrame(data = { 'RandomForest': base_clf[0].predict(X_test),
                                'ExtraTrees': base_clf[1].predict(X_test),
                                'SVM': base_clf[2].predict(X_test),
                                'y_true': y_test
                              }
                      )

y_test_pred = blender.predict(df_test[['RandomForest', 'ExtraTrees', 'SVM']])

In [10]:
# Look accuracy
accuracy = accuracy_score(y_test, y_test_pred)
conf_mat = confusion_matrix(y_test, y_test_pred)
print(accuracy)
print(conf_mat)

0.9
[[ 6  0  0]
 [ 0  9  2]
 [ 0  1 12]]
