# key words
- preprocessing: OneHotEncoder
- feature extraction: DictVectorizer
- regressor: MultiOutputRegressor
- metrics: roc_auc_score

In [1]:
import numpy as np

from sklearn import datasets
from sklearn import preprocessing

from sklearn.model_selection import train_test_split

from sklearn.feature_extraction import DictVectorizer

from sklearn.linear_model import Ridge
from sklearn.multioutput import MultiOutputRegressor

from sklearn.metrics import roc_auc_score
from sklearn.metrics import accuracy_score

iris = datasets.load_iris()
X, y = iris.data, iris.target

one_hot_encoder = preprocessing.OneHotEncoder(categories='auto')
one_hot_encoder.fit(y.reshape(-1, 1))
label_example = one_hot_encoder.transform(np.arange(0, 3).reshape(-1, 1)).toarray()
print("label example:\n%s" % label_example)

ridge_estimator = Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None, normalize=False, random_state=None, solver='auto', tol=0.001)
multi_ridge = MultiOutputRegressor(ridge_estimator, n_jobs=-1)
y_multi = one_hot_encoder.transform(y.reshape(-1, 1)).toarray()
X_train, X_test, y_train, y_test = train_test_split(X, y_multi, stratify=y, random_state=7)

multi_ridge.fit(X_train, y_train)
y_multi_pred_ori = multi_ridge.predict(X_test)
print("y_multi_pred_ori:\n%s" % y_multi_pred_ori[:5])

y_multi_pred = preprocessing.binarize(y_multi_pred_ori, threshold=0.5)
print("y_multi_pred:\n%s" % y_multi_pred[:5])

score = roc_auc_score(y_test, y_multi_pred_ori)
print("roc_auc_score:", score)

print()

print ("Multi-Output Scores for the Iris Flowers: ")
for column_number in range(0, 3):
    print("Accuracy score of flower - Col %d: %.4f" % (column_number, accuracy_score(y_test[:, column_number], y_multi_pred[:, column_number])))
    print("AUC score of flower - Col %d: %.4f" % (column_number, roc_auc_score(y_test[:, column_number], y_multi_pred_ori[:, column_number])))
    print()

dv = DictVectorizer()
species_dict = [{'species': iris.target_names[i]} for i in y]
label_example_2 = dv.fit_transform(species_dict).toarray()
print(label_example_2[:5])

label example:
[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]
y_multi_pred_ori:
[[ 0.81327059  0.37348962 -0.1867602 ]
 [ 0.9524035   0.17877376 -0.13117727]
 [-0.01657826  0.36538441  0.65119386]
 [ 0.1772412   0.47768186  0.34507695]
 [ 0.87693053  0.14905609 -0.02598662]]
y_multi_pred:
[[1. 0. 0.]
 [1. 0. 0.]
 [0. 0. 1.]
 [0. 0. 0.]
 [1. 0. 0.]]
roc_auc_score: 0.9198717948717948

Multi-Output Scores for the Iris Flowers: 
Accuracy score of flower - Col 0: 1.0000
AUC score of flower - Col 0: 1.0000

Accuracy score of flower - Col 1: 0.7368
AUC score of flower - Col 1: 0.7692

Accuracy score of flower - Col 2: 0.9737
AUC score of flower - Col 2: 0.9904

[[1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]]
