In [15]:
from sklearn.datasets import make_regression
import pandas as pd

In [16]:
X, y = make_regression(n_samples=1000, n_features=10, n_informative=5, n_targets = 2, random_state=1, noise=0.5)

In [17]:
pd.set_option('display.max_columns', None)
X.shape

(1000, 10)

In [18]:
y.shape

(1000, 2)

some regression ml algorithms support multiple outputs directly:
- LinearRegression 
- KNeighborsRegressor
- DecisionTreeRegressor
- RandomForestRegressor

In [19]:
# linear regression for multioutput regression
from sklearn.linear_model import LinearRegression

model = LinearRegression()
model.fit(X,y)

In [20]:
row = [0.21947749, 0.32948997, 0.81560036, 0.440956, -0.0606303, -0.29257894, -0.2820059, -0.00290545, 0.96402263, 0.04992249]

yhat = model.predict([row])
print(yhat)

[[50.06781717 64.564973  ]]


In [21]:
# K-nearest neighbors for multioutput regression 

from sklearn.neighbors import KNeighborsRegressor

model = KNeighborsRegressor()
model.fit(X,y)

row = [0.21947749, 0.32948997, 0.81560036, 0.440956, -0.0606303, -0.29257894, -0.2820059, -0.00290545, 0.96402263, 0.04992249]
yhat = model.predict([row])

print(yhat)

[[-11.73511093  52.78406297]]


In [23]:
# Using DecisionTreeRegressor 
from sklearn.tree import DecisionTreeRegressor

model = DecisionTreeRegressor()

model.fit(X, y)

row = [0.21947749, 0.32948997, 0.81560036, 0.440956, -0.0606303, -0.29257894, -0.2820059, -0.00290545, 0.96402263, 0.04992249]

yhat = model.predict([row])
print(yhat)

[[49.93137149 64.08484989]]


In [24]:
# Evaluation

from numpy import absolute
from numpy import mean 
from numpy import std 
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedKFold

cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
n_scores = cross_val_score(model, X, y, scoring='neg_mean_absolute_error',cv=cv, n_jobs=-1) # n_jobs is for parallel execution

n_scores = absolute(n_scores)

print('MAE: %.3f (%.3f)' % (mean(n_scores), std(n_scores)))

MAE: 52.548 (3.390)


but there are two methods more :
1. Direct Multioutput : here for y column ill create a model and then predict it

2. Chained Multioutput : here ill train X,y1 then ill train X,y1,y2 then X,y1,y2,y3 and so on till as many features available 

In [28]:
# Direct Multioutput

from sklearn.multioutput import MultiOutputRegressor
from sklearn.svm import LinearSVR

model = LinearSVR()
wrapper = MultiOutputRegressor(model)

cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)

n_scores = cross_val_score(wrapper, X, y, scoring='neg_mean_absolute_error', cv=cv, n_jobs=-1) 

n_scores = absolute(n_scores)

print('MAE: %.3f (%.3f)' % (mean(n_scores), std(n_scores)))

MAE: 0.419 (0.024)


In [29]:
from sklearn.multioutput import RegressorChain

model = LinearSVR()
wrapper = RegressorChain(model)

cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
n_scores = cross_val_score(wrapper, X, y, scoring='neg_mean_absolute_error', cv=cv, n_jobs=-1)

n_scores = absolute(n_scores)

print('MAE: %.3f (%.3f)' % (mean(n_scores), std(n_scores)))



MAE: 0.609 (0.354)


