In [7]:
from hyperparameter_hunter import Environment, CrossValidationExperiment

import pandas as pd

from sklearn.datasets import make_regression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

from sklearn.linear_model import LinearRegression, Ridge, MultiTaskLasso, MultiTaskElasticNet
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error



In [10]:
# Trivial Linear Multiple Regression Problem with a little Noise (0.1)
x, y = make_regression(
    n_samples=1000,
    n_features=4,
    n_informative=4,
    n_targets=4,
    noise=0.1,
    random_state=42,
)

#################### Train/Holdout Split ####################
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.10, random_state=42)

#################### Scale Data ####################
x_scaler = StandardScaler()
x_train_scaled = x_scaler.fit_transform(x_train)
x_test_scaled = x_scaler.transform(x_test)

y_scaler = StandardScaler()
y_train_scaled = y_scaler.fit_transform(y_train)
y_test_scaled = y_scaler.transform(y_test)

#################### Reorganize Into Scaled DFs ####################
x_train_df = pd.DataFrame(x_train_scaled, columns=['x1', 'x2', 'x3', 'x4'])
y_train_df = pd.DataFrame(y_train_scaled, columns=['y1', 'y2', 'y3', 'y4'])
train_df = pd.concat([x_train_df, y_train_df], axis=1)

x_holdout_df = pd.DataFrame(x_test_scaled, columns=['x1', 'x2', 'x3', 'x4'])
y_holdout_df = pd.DataFrame(y_test_scaled, columns=['y1', 'y2', 'y3', 'y4'])
holdout_df = pd.concat([x_holdout_df, y_holdout_df], axis=1)

train_df.shape

(900, 8)

In [3]:
regressors = [
    LinearRegression,
    KNeighborsRegressor,
    DecisionTreeRegressor,
    MultiTaskLasso,
    MultiTaskElasticNet,
    Ridge,
    MLPRegressor,
]

regressor_params = [
    dict(),
    dict(),
    dict(),
    dict(alpha=0.01),
    dict(alpha=0.01),
    dict(alpha=0.05),
    dict(
        hidden_layer_sizes=(5,),
        activation='relu',
        solver='adam',
        learning_rate='adaptive',
        max_iter=1000,
        learning_rate_init=0.01,
        alpha=0.01,
    ),
]



In [12]:
#################### HyperparameterHunter ####################
env = Environment(
    train_dataset=train_df,
    holdout_dataset=holdout_df,
    root_results_path="multiple_regression_assets",
    metrics_map=["mean_squared_error"],
    target_column=['y1', 'y2', 'y3', 'y4'],
    cross_validation_type="KFold",
    cross_validation_params=dict(n_splits=10, shuffle=True, random_state=32),
    file_blacklist=['script_backup']
)

for initializer, init_params in zip(regressors, regressor_params):
    print("training",str(initializer))
    exp = CrossValidationExperiment(
        model_initializer=initializer,
        model_init_params=init_params,
    )

<2018-11-21 07:32:10,525> Cross-Experiment Key: loC6GFDRB3cDEvCqtc6WHqxmA6JHagjM3SvWxaMEYsc=
<2018-11-21 07:32:10,541> Validated Environment with key: 'loC6GFDRB3cDEvCqtc6WHqxmA6JHagjM3SvWxaMEYsc='
<2018-11-21 07:32:10,546> 
<2018-11-21 07:32:10,548> Initialized new Experiment with ID: a252f87c-6d0f-499f-9a96-75e729be3c10
<2018-11-21 07:32:10,550> Skipped creating backup of file: 'C:\Users\rditlas8\Documents\projects\2019_1_vrsensors\<ipython-input-12-cdc66ca71ae4>'
<2018-11-21 07:32:10,590> Generated hyperparameter key: 2JN3Zs4EV7Oc2QP-vWnv59AI7qabIeW4jmYzaVYbhv4=
<2018-11-21 07:32:10,603> Initial preprocessing stage complete
<2018-11-21 07:32:10,606> 

<2018-11-21 07:32:10,612> Starting Repetition 0
<2018-11-21 07:32:10,613> 
<2018-11-21 07:32:10,615> 
<2018-11-21 07:32:10,621> Model has no random_state/seed parameter to update
<2018-11-21 07:32:10,623> F0/R0  |  Seed: 10967   Time: 07:32:10


training <class 'sklearn.linear_model.base.LinearRegression'>


ValueError: Shape of passed values is (100, 100), indices imply (4, 100)