In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split

abalone = pd.read_csv("A6_datasets/abalone.csv")
concretecs = pd.read_csv("A6_datasets/concretecs.csv")
parkinsons = pd.read_csv("A6_datasets/parkinsons.csv")
skillcraft = pd.read_csv("A6_datasets/skillcraft.csv")
wine = pd.read_csv("A6_datasets/winequality-white.csv")

abalone_y = abalone.pop('rings')
concretecs_y = concretecs.pop('Concrete compressive strength')
parkinsons_y = parkinsons.pop('total_UPDRS')
skillcraft_y = skillcraft.pop('ComplexAbilitiesUsed')
wine_y = wine.pop('quality')

abalone_trainval, abalone_test, abalone_y_trainval, abalone_y_test = train_test_split(abalone, abalone_y, train_size = 500, random_state = 1016)
concretecs_trainval, concretecs_test, concretecs_y_trainval, concretecs_y_test = train_test_split(concretecs, concretecs_y, train_size = 500, random_state = 1016)
parkinsons_trainval, parkinsons_test, parkinsons_y_trainval, parkinsons_y_test = train_test_split(parkinsons, parkinsons_y, train_size = 500, random_state = 1016)
skillcraft_trainval, skillcraft_test, skillcraft_y_trainval, skillcraft_y_test = train_test_split(skillcraft, skillcraft_y, train_size = 500, random_state = 1016)
wine_trainval, wine_test, wine_y_trainval, wine_y_test = train_test_split(wine, wine_y, train_size = 500, random_state = 1016)

In [2]:
import warnings
warnings.filterwarnings(action='ignore')

In [3]:
from sklearn.pipeline import Pipeline
from sklearn.model_selection import StratifiedKFold, GridSearchCV, KFold
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor

In [4]:
pipe = Pipeline([('preprocessing', None), ('regressor', LinearRegression())])
hyperparam_grid = [
    {'regressor' : [LinearRegression()], 'preprocessing' : [StandardScaler(), MinMaxScaler(), None]},
    {'regressor' : [Ridge()], 'preprocessing' : [StandardScaler(), MinMaxScaler(), None],
    'regressor__alpha' : [0, 0.01, 1, 10, 100]},
    {'regressor' : [Lasso()], 'preprocessing' : [StandardScaler(), MinMaxScaler(), None],
    'regressor__alpha' : [0.0001, 0.001, 0.01, 0.1, 1, 10], 'regressor__tol':[0.00001, 0.0001,0.001]},
    {'regressor' : [RandomForestRegressor()], 'preprocessing' : [None],
    'regressor__max_features' : ['auto', 'sqrt', 'log2']},
    {'regressor' : [SVR()], 'preprocessing' : [StandardScaler(), MinMaxScaler(), None],
    'regressor__epsilon' : [0.001, 0.01, 0.1], 'regressor__gamma' : [0.01, 0.1], 'regressor__C': [1, 100]},
    {'regressor' : [MLPRegressor()], 'preprocessing' : [StandardScaler(), MinMaxScaler(), None],
    'regressor__max_iter' : [5000,10000], 'regressor__activation' : ['tanh', 'relu'],
    'regressor__solver' : ['lbfgs', 'sgd', 'adam'], 'regressor__hidden_layer_sizes': [(10,),(20,),(50,),(100,)]}]

In [None]:
kfold = KFold(5, shuffle=True, random_state=1016)
grid = GridSearchCV(pipe, hyperparam_grid, scoring='neg_mean_squared_error', refit=True, cv = kfold)

data = [[abalone_trainval,abalone_y_trainval,abalone_test,abalone_y_test],
        [concretecs_trainval,concretecs_y_trainval,concretecs_test,concretecs_y_test],
        [parkinsons_trainval,parkinsons_y_trainval,parkinsons_test,parkinsons_y_test],
        [skillcraft_trainval,skillcraft_y_trainval,skillcraft_test,skillcraft_y_test],
        [wine_trainval,wine_y_trainval,wine_test,wine_y_test],]

for i in data:
    grid.fit(i[0], i[1])
    print("best hyperparam : \n{}".format(grid.best_params_))
    print("best cross-validation score : {:.2f}".format(-grid.best_score_))
    print("test-set score : {:.2f}".format(-grid.score(i[2], i[3])))

best hyperparam : 
{'preprocessing': None, 'regressor': MLPRegressor(max_iter=5000), 'regressor__activation': 'relu', 'regressor__hidden_layer_sizes': (100,), 'regressor__max_iter': 5000, 'regressor__solver': 'adam'}
best cross-validation score : 5.10
test-set score : 4.67
best hyperparam : 
{'preprocessing': StandardScaler(), 'regressor': MLPRegressor(max_iter=5000), 'regressor__activation': 'relu', 'regressor__hidden_layer_sizes': (100,), 'regressor__max_iter': 5000, 'regressor__solver': 'adam'}
best cross-validation score : 32.53
test-set score : 38.73
