In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.datasets import make_regression
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedKFold
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR


In [4]:
def get_dataset():
    X, y = make_regression(n_samples=1000, n_features=20, n_informative=15, random_state=1)
    return X, y

In [9]:
def get_models():
    models = {
        'knn' : KNeighborsRegressor(),
        'cart' : DecisionTreeRegressor(),
        'svm' : SVR(),
#         'linReg' : LinearRegression()
    }
    return models

In [10]:
def evaluate_model(model, X, y):
    cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
    scores = cross_val_score(model, X, y, scoring='neg_mean_absolute_error', cv = cv, error_score = 'raise')
    return scores

In [11]:
X, y = get_dataset()
models = get_models()

results, names = [], []

for name, model in models.items():
    scores = evaluate_model(model, X, y)
    names.append(name)
    print('> %s %.3f (%.3f)' % (name, np.mean(scores), np.std(scores)))

> knn -100.975 (7.631)
> cart -147.355 (10.456)
> svm -162.303 (13.719)


In [12]:
from sklearn.ensemble import StackingRegressor

In [13]:
def get_stacking():
    level0 = [
        ('knn', KNeighborsRegressor()),
        ('cart', DecisionTreeRegressor()),
        ('svm', SVR())
    ]
    level1 = LinearRegression()
    model = StackingRegressor(estimators=level0, final_estimator=level1, cv = 5)
    return model

In [14]:
def get_models():
    models = {
        'knn' : KNeighborsRegressor(),
        'cart' : DecisionTreeRegressor(),
        'svm' : SVR(),
        'stacking' : get_stacking()
    }
    return models

In [15]:
X, y = get_dataset()
models = get_models()

In [17]:
results, names = [], []

for name, model in models.items():
    scores = evaluate_model(model, X, y)
    results.append(scores)
    names.append(name)
    print('> %s %.3f (%.3f)' % (name, np.mean(scores), np.std(scores)))

> knn -100.975 (7.631)
> cart -145.449 (11.087)
> svm -162.303 (13.719)
> stacking -56.948 (4.000)


In [18]:
X, y = get_dataset()
model = get_stacking()
model.fit(X, y)
data = [np.random.randn(20)]
print(data)
print(model.predict(data))

[array([ 1.83220384,  0.30410582,  0.19446483, -2.83267053, -1.21557477,
        0.28036087, -0.13750112, -1.16920582, -1.07760939, -0.13876609,
       -1.4729998 ,  0.29150096,  0.91817733,  0.25947818,  0.76746053,
       -0.20400758, -1.48500999, -0.67795925, -0.86386545,  1.19473584])]
[-107.38454657]
