In [1]:
# example of multioutput regression test problem
from sklearn.datasets import make_regression
# create datasets
X, y = make_regression(n_samples=1000, n_features=10, n_informative=5, n_targets=2, random_state=1, noise=0.5)
# summarize dataset
print(X.shape, y.shape)

(1000, 10) (1000, 2)


In [3]:
import pandas as pd
pd.DataFrame(X)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0.219477,0.329490,0.815600,0.440956,-0.060630,-0.292579,-0.282006,-0.002905,0.964023,0.049922
1,0.843369,0.564675,0.328040,-0.199352,-1.354650,-0.029456,-0.647895,0.055339,0.136536,0.630311
2,-1.044998,-0.883771,0.767113,1.616794,-1.105432,0.298835,-1.693349,0.482598,0.420755,1.638889
3,-0.091764,-0.769384,1.202059,-1.251391,-0.013095,-0.254311,-1.271874,1.083952,-0.594676,1.040153
4,-0.166486,-1.039182,-1.898812,0.838635,1.071252,0.733023,-1.117111,0.984952,-1.097154,-0.508972
...,...,...,...,...,...,...,...,...,...,...
995,1.376503,0.204160,-0.258589,-0.393081,-0.642044,0.534805,-0.694732,1.605953,-0.367468,0.040235
996,-0.196810,-1.127430,-1.221542,-0.494713,-1.049821,0.091312,0.685183,-1.543406,-0.849149,-0.824043
997,1.055336,0.693456,-0.040289,-0.278880,-1.254537,-0.131957,0.033678,1.775566,0.786216,1.406868
998,-1.213338,-0.379518,0.435986,-0.600225,0.725917,-1.014804,-0.687487,0.966221,3.321079,-2.698362


In [4]:
pd.DataFrame(y)

Unnamed: 0,0,1
0,49.931371,64.084850
1,-117.185283,16.600951
2,39.796220,-77.575799
3,-34.482660,-95.946399
4,203.824031,-90.500991
...,...,...
995,50.912037,-8.976950
996,-315.379294,-167.786199
997,45.760167,58.888402
998,135.211331,97.097782


In [5]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()
# fit model
model.fit(X, y)
# make a prediction
row = [0.21947749, 0.32948997, 0.81560036, 0.440956, -0.0606303, -0.29257894, -0.2820059, -0.00290545, 0.96402263, 0.04992249]
yhat = model.predict([row])
# summarize prediction
print(yhat[0])

[50.06781717 64.564973  ]


In [6]:
from sklearn.neighbors import KNeighborsRegressor
model = KNeighborsRegressor()
# fit model
model.fit(X, y)
# make a prediction
row = [0.21947749, 0.32948997, 0.81560036, 0.440956, -0.0606303, -0.29257894, -0.2820059, -0.00290545, 0.96402263, 0.04992249]
yhat = model.predict([row])
# summarize prediction
print(yhat[0])

[-11.73511093  52.78406297]


In [7]:
from sklearn.tree import DecisionTreeRegressor
model = DecisionTreeRegressor()
# fit model
model.fit(X, y)
# make a prediction
row = [0.21947749, 0.32948997, 0.81560036, 0.440956, -0.0606303, -0.29257894, -0.2820059, -0.00290545, 0.96402263, 0.04992249]
yhat = model.predict([row])
# summarize prediction
print(yhat[0])

[49.93137149 64.08484989]


In [8]:
# evaluate multioutput regression model with k-fold cross-validation
from numpy import absolute
from numpy import mean
from numpy import std
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedKFold

# define model
model = DecisionTreeRegressor()
# define the evaluation procedure
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
# evaluate the model and collect the scores
n_scores = cross_val_score(model, X, y, scoring='neg_mean_absolute_error', cv=cv, n_jobs=-1)
# force the scores to be positive
n_scores = absolute(n_scores)
# summarize performance
print('MAE: %.3f (%.3f)' % (mean(n_scores), std(n_scores)))

MAE: 52.283 (3.115)


In [9]:
model.fit(X,y)
row = [0.21947749, 0.32948997, 0.81560036, 0.440956, -0.0606303, -0.29257894, -0.2820059, -0.00290545, 0.96402263, 0.04992249]
yhat = model.predict([row])
# summarize prediction
print(yhat[0])

[49.93137149 64.08484989]


Direct Multioutput: Develop an independent model for each numerical value to be predicted.


In [10]:
from numpy import mean
from numpy import std
from numpy import absolute
from sklearn.datasets import make_regression
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedKFold
from sklearn.multioutput import MultiOutputRegressor
from sklearn.svm import LinearSVR
# define dataset
X, y = make_regression(n_samples=1000, n_features=10, n_informative=5, n_targets=2, random_state=1, noise=0.5)
# define base model
model = LinearSVR()
# define the direct multioutput wrapper model
wrapper = MultiOutputRegressor(model)
# define the evaluation procedure
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
# evaluate the model and collect the scores
n_scores = cross_val_score(wrapper, X, y, scoring='neg_mean_absolute_error', cv=cv, n_jobs=-1)
# force the scores to be positive
n_scores = absolute(n_scores)
# summarize performance
print('MAE: %.3f (%.3f)' % (mean(n_scores), std(n_scores)))

MAE: 0.419 (0.024)


In [11]:
wrapper.fit(X, y)
# make a single prediction
row = [0.21947749, 0.32948997, 0.81560036, 0.440956, -0.0606303, -0.29257894, -0.2820059, -0.00290545, 0.96402263, 0.04992249]
yhat = wrapper.predict([row])
# summarize the prediction
print('Predicted: %s' % yhat[0])

Predicted: [50.03864356 64.51903748]


 Chained Multioutput: Develop a sequence of dependent models to match the number of numerical values to be predicted.

In [12]:
# example of evaluating chained multioutput regression with an SVM model
from numpy import mean
from numpy import std
from numpy import absolute
from sklearn.datasets import make_regression
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedKFold
from sklearn.multioutput import RegressorChain
from sklearn.svm import LinearSVR
# define dataset
X, y = make_regression(n_samples=1000, n_features=10, n_informative=5, n_targets=2, random_state=1, noise=0.5)
# define base model
model = LinearSVR()
# define the chained multioutput wrapper model
wrapper = RegressorChain(model)
# define the evaluation procedure
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
# evaluate the model and collect the scores
n_scores = cross_val_score(wrapper, X, y, scoring='neg_mean_absolute_error', cv=cv, n_jobs=-1)
# force the scores to be positive
n_scores = absolute(n_scores)
# summarize performance
print('MAE: %.3f (%.3f)' % (mean(n_scores), std(n_scores)))

MAE: 0.599 (0.217)


In [13]:
wrapper.fit(X, y)
# make a single prediction
row = [0.21947749, 0.32948997, 0.81560036, 0.440956, -0.0606303, -0.29257894, -0.2820059, -0.00290545, 0.96402263, 0.04992249]
yhat = wrapper.predict([row])
# summarize the prediction
print('Predicted: %s' % yhat[0])

Predicted: [50.03792744 64.3426646 ]




: 

https://machinelearningmastery.com/multi-output-regression-models-with-python/