# Model training

In [None]:
import pandas as pd
import os
import sklearn.metrics as metrics
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import itertools
import json

from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.model_selection import LeaveOneOut, LeavePOut, RandomizedSearchCV, GridSearchCV, cross_val_score, KFold, cross_validate
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.decomposition import PCA
from sklearn.feature_selection import SelectKBest, RFE, f_regression
from collections.abc import MutableMapping

from scipy.stats import lognorm, uniform, randint

import warnings
from sklearn.exceptions import FitFailedWarning
warnings.filterwarnings("ignore", category=FitFailedWarning)
warnings.filterwarnings("ignore", category=UserWarning)

RS = 42

# Utility functions

In [None]:
# Read full DF if it is present, merge all other DFs instead
def load_csv():
    if ('full_df.csv' not in os.listdir()):
        df_list = []

        df_paths = [path for path in os.listdir() if path.lower().endswith('.csv') and path != "full_df.csv"]
        for path in df_paths:
            df = pd.read_csv(path, index_col=['pole_id', 'height_id'])
            df_list.append(df)

        full_df = pd.concat(df_list, axis=1, join='inner').reset_index()
        full_df.to_csv('full_df.csv')

    else:
        full_df = pd.read_csv('full_df.csv').drop('Unnamed: 0', axis=1)
        
    return full_df

def dict_product(dicts):
    return list(dict(zip(dicts, x)) for x in itertools.product(*dicts.values()))

def flatten(dictionary, parent_key='', separator='_'):
    items = []
    for key, value in dictionary.items():
        new_key = key
        if isinstance(value, MutableMapping):
            items.extend(flatten(value, new_key, separator=separator).items())
        else:
            items.append((new_key, value))
    return dict(items)

# Loading the data

In [None]:
full_df = load_csv()
X, y_surf, y_mid, y_int, y_diam, y_sdepth = full_df.iloc[:,2:-6], full_df.iloc[:,-6], full_df.iloc[:,-5], full_df.iloc[:,-4], full_df.iloc[:,-2], full_df.iloc[:,-1]

# Experiments - pole surface

## Experiment #1: Scaling + linear model

The most naïve approach is to just apply scaling to the dataset variables, and then training some linear model on top of this.

In [None]:
def run_exp(exp_name, target, pipe, param_grid, randomize=False, rs_iters=250, random_state=42, mae_plot_padding = True):
    # Directory for plots
    if not os.path.exists(f'plots/{exp_name}'):
        os.mkdir(f'plots/{exp_name}')
    
    # Create and fit the model. Use outer CV to give honest estimates. Use inner CV to estimate hyperparams.
    cv_outer = KFold(n_splits=10, shuffle=True, random_state=random_state)
    cv_outer_results_mae = []
    cv_outer_results_r2 = []
    
    best_estimator = None
    best_mae = float('inf')

    def get_gs():
        if not randomize:
            gs = GridSearchCV(pipe, param_grid, cv=cv_inner, scoring=['explained_variance', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'neg_median_absolute_error', 'r2'], refit='neg_mean_absolute_error', n_jobs=-1)
        else:
            gs = RandomizedSearchCV(pipe, param_grid, cv=cv_inner, scoring=['explained_variance', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'neg_median_absolute_error', 'r2'], refit='neg_mean_absolute_error', random_state=random_state, n_iter=rs_iters, n_jobs=-1)

        return gs
    
    for train_ix, test_ix in cv_outer.split(X):
        # Split data
        X_train, X_test = X.iloc[train_ix, :], X.iloc[test_ix, :]
        y_train, y_test = target.iloc[train_ix], target.iloc[test_ix]
        
        cv_inner = KFold(n_splits=5, shuffle=True, random_state=random_state)
        gs = get_gs()

        result = gs.fit(X_train, y_train)
        
        # get the best performing model fit on the whole training set
        best_model = result.best_estimator_
        # evaluate model on the hold out dataset
        yhat = best_model.predict(X_test)
        # evaluate the model
        mae = mean_absolute_error(y_test, yhat)
        r2 = r2_score(y_test, yhat)
        # store the result
        cv_outer_results_mae.append(mae)
        cv_outer_results_r2.append(r2)
        # report progress
        print('>mae=%.3f, r2=%.3f, est. mae=%.3f, cfg=%s' % (mae, r2, result.best_score_, result.best_params_))        
    print('MAE: %.3f (%.3f) // R2: %.3f (%.3f)' % (np.mean(cv_outer_results_mae), np.std(cv_outer_results_mae), np.mean(cv_outer_results_r2), np.std(cv_outer_results_r2)))
    
    # Save CV results to DataFrame
    df_results = pd.DataFrame.from_dict({'mae_splits': cv_outer_results_mae, 'r2_splits': cv_outer_results_r2})
    df_results.to_csv(f'results/{exp_name}.csv')
    
    # Plot histogram of MAE and R2 estimates
    _, axes = plt.subplots(1,1,figsize=(7,7))
    axes.plot(range(len(cv_outer_results_r2)), cv_outer_results_r2)
    axes.set_title("$R^2$ on all outer CV folds", fontsize=18)
    axes.set_xlabel("#fold", fontsize=16)
    axes.set_ylabel("$R^2$", fontsize=16)
    axes.set_xticks(list(range(10)))
    axes.tick_params(labelsize=14)
    padding = 100 if mae_plot_padding else 0
    axes.set_ylim([-16,16])
    axes.axhline(1, linestyle="--", c='black')
    axes.axhline(-1, linestyle="--", c='black')
    plt.savefig(f'plots/{exp_name}/r2_values_splits.svg')
    plt.close()
    
    _, axes = plt.subplots(1,1,figsize=(7,7))
    axes.plot(range(len(cv_outer_results_mae)), cv_outer_results_mae)
    axes.set_title("$MAE$ on all outer CV folds", fontsize=18)
    axes.set_xlabel("#fold", fontsize=16)
    axes.set_ylabel("$MAE$", fontsize=16)
    axes.set_xticks(list(range(10)))
    axes.tick_params(labelsize=14)
    padding = 100 if mae_plot_padding else 0
    axes.set_ylim([0,np.max(cv_outer_results_mae) + padding])
    plt.savefig(f'plots/{exp_name}/mae_values_splits.svg')
    plt.close()
    
    '''
    # Plot actual vs. predicted values
    _, ax = plt.subplots(figsize=(7,7))
    display = PredictionErrorDisplay.from_predictions(y_true=target, y_pred=gs.predict(X), ax=ax, kind="actual_vs_predicted")
    ax.title.set_text(f'$R^2$ estimate: {gs.cv_results_["mean_test_r2"][best_mae_idx]:.2f} ± {gs.cv_results_["std_test_r2"][best_mae_idx]:.2f}\nMAE estimate: {gs.cv_results_["mean_test_neg_mean_absolute_error"][best_mae_idx]:.2f} ± {gs.cv_results_["std_test_neg_mean_absolute_error"][best_mae_idx]:.2f}')
    plt.savefig(f'plots/{exp_name}/prediction_error.svg')
    '''
    final_gs = get_gs()
    final_result = final_gs.fit(X, target)
    final_estimator = final_result.best_estimator_
    print(f"Model trained on all the dataset: {final_result.best_params_}")
    feature_names = final_estimator[:-1].get_feature_names_out()
    cv_coef_var = cross_validate(
        final_estimator,
        X,
        target,
        cv=cv_inner,
        return_estimator=True,
        n_jobs=-1
    )
    final_estimator_coefs = pd.DataFrame(
        [est[-1].coef_ for est in cv_coef_var["estimator"]], columns=feature_names
    )
    
    # Plot largest coefficients
    coefs = pd.DataFrame(
        final_estimator[-1].coef_,
        columns=["Coefficients"],
        index=feature_names,
    )

    coefs['abs_coef'] = coefs['Coefficients'].abs()
    coefs = coefs.sort_values(by='abs_coef', axis=0, ascending=False).head(20).drop('abs_coef', axis=1).iloc[::-1]

    coefs.plot.barh(figsize=(9, 7))
    plt.title("Best model")
    plt.axvline(x=0, color=".5")
    plt.xlabel("Raw coefficient values")
    plt.subplots_adjust(left=0.3)
    plt.savefig(f'plots/{exp_name}/coeff_values.svg')
    plt.close()
    
    # Plot coefficient variance    
    plt.figure(figsize=(9, len(coefs) * 0.8))
    sns.stripplot(data=final_estimator_coefs[coefs.index.to_list()], orient="h", palette="dark:k", alpha=0.5, order=coefs.index.to_list()[::-1])
    sns.boxplot(data=final_estimator_coefs[coefs.index.to_list()], orient="h", color="cyan", saturation=0.5, whis=10, order=coefs.index.to_list()[::-1])
    plt.axvline(x=0, color=".5")
    plt.title("Coefficient variability (most important coefficients)")
    plt.subplots_adjust(left=0.3)
    plt.savefig(f'plots/{exp_name}/coeff_variability.svg')
    plt.close()

In [None]:
pipeline_exp1 = Pipeline([('scaler', StandardScaler()),('model', LinearRegression())])

params_exp1 = [
    {'model': [LinearRegression()]},
    {'model': [Ridge()], 'model__alpha': [0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0]},
    {'model': [Lasso(max_iter=100000, random_state=RS)], 'model__alpha': [0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0]},
    {'model': [ElasticNet(max_iter=100000, random_state=RS)], 'model__alpha': [0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0]}
]

run_exp("surface_scale_linear", y_surf, pipeline_exp1, params_exp1, random_state=RS)

## Experiment #2: Scaling + dimensionality reduction + linear model

As the preliminary analysis has shown, this dataset has 89 predictor variables, but only 50 samples. This high dimensionality may lead to overfitting issues, and therefore it could be interesting to try different dimensionality reduction techniques.

In [None]:
pipeline_exp2 = Pipeline([('scaler', StandardScaler()), ('reduce_dims', PCA()), ('model', LinearRegression())])

params_exp2 = [
    {'reduce_dims__n_components': list(range(1,31)), 'model': [LinearRegression()]},
    {'reduce_dims__n_components': list(range(1,31)), 'model': [Ridge()], 'model__alpha': [0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0]},
    {'reduce_dims__n_components': list(range(1,31)), 'model': [Lasso(max_iter=100000, random_state=RS)], 'model__alpha': [0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0]},
    {'reduce_dims__n_components': list(range(1,31)), 'model': [ElasticNet(max_iter=100000, random_state=RS)], 'model__alpha': [0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0]}
]

run_exp("surface_scale_reduce_linear", y_surf, pipeline_exp2, params_exp2, randomize=True, random_state=RS)

## Experiment #3: Scaling + feature selection + linear model

In [None]:
pipeline_exp3 = Pipeline([('scaler', StandardScaler()), ('select_features', SelectKBest()), ('model', LinearRegression())])

params_exp3 = {
    'feats': [
        {'select_features': [SelectKBest(score_func=f_regression)], 'select_features__k': list(range(1,81))},
        {'select_features': [RFE(LinearRegression())], 'select_features__n_features_to_select': list(range(1,81))}
    ],
    'regressor': [
        {'model': [LinearRegression()]},
        {'model': [Ridge()], 'model__alpha': [0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0]},
        {'model': [Lasso(max_iter=100000, random_state=RS)], 'model__alpha': [0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0]},
        {'model': [ElasticNet(max_iter=100000, random_state=RS)], 'model__alpha': [0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0]}
    ]
}

params_exp3 = [flatten(params, parent_key=None) for params in dict_product(params_exp3)]

run_exp("surface_scale_select_linear", y_surf, pipeline_exp3, params_exp3, randomize=True, random_state=RS)

## Experiment #4: Scaling + feature selection + dimensionality reduction + linear model

In [None]:
pipeline_exp4 = Pipeline([('scaler', StandardScaler()), ('select_features', SelectKBest()), ('reduce_dims', PCA()), ('model', LinearRegression())])

params_exp4 = {
    'feats': [
        {'select_features': [SelectKBest(score_func=f_regression)], 'select_features__k': list(range(1,81))},
        {'select_features': [RFE(LinearRegression())], 'select_features__n_features_to_select': list(range(1,81))}
    ],
    'dims': [
        {'reduce_dims__n_components': list(range(1,31))}
    ],
    'regressor': [
        {'model': [LinearRegression()]},
        {'model': [Ridge()], 'model__alpha': [0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0]},
        {'model': [Lasso(max_iter=100000, random_state=RS)], 'model__alpha': [0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0]},
        {'model': [ElasticNet(max_iter=100000, random_state=RS)], 'model__alpha': [0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0]}
    ]
}

params_exp4 = [flatten(params, parent_key=None) for params in dict_product(params_exp4)]
run_exp("surface_scale_select_reduce_linear", y_surf, pipeline_exp4, params_exp4, randomize=True, rs_iters=1000, random_state=RS)

# Experiments - pole middle

## Experiment #1: Scaling + linear model

In [None]:
pipeline_exp1 = Pipeline([('scaler', StandardScaler()),('model', LinearRegression())])

params_exp1 = [
    {'model': [LinearRegression()]},
    {'model': [Ridge()], 'model__alpha': [0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0]},
    {'model': [Lasso(max_iter=100000, random_state=RS)], 'model__alpha': [0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0]},
    {'model': [ElasticNet(max_iter=100000, random_state=RS)], 'model__alpha': [0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0]}
]

run_exp("middle_scale_linear", y_mid, pipeline_exp1, params_exp1, random_state=RS)

## Experiment #2: Scaling + dimensionality reduction + linear model

In [None]:
pipeline_exp2 = Pipeline([('scaler', StandardScaler()), ('reduce_dims', PCA()), ('model', LinearRegression())])

params_exp2 = [
    {'reduce_dims__n_components': list(range(1,31)), 'model': [LinearRegression()]},
    {'reduce_dims__n_components': list(range(1,31)), 'model': [Ridge()], 'model__alpha': [0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0]},
    {'reduce_dims__n_components': list(range(1,31)), 'model': [Lasso(max_iter=100000, random_state=RS)], 'model__alpha': [0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0]},
    {'reduce_dims__n_components': list(range(1,31)), 'model': [ElasticNet(max_iter=100000, random_state=RS)], 'model__alpha': [0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0]}
]

run_exp("middle_scale_reduce_linear", y_mid, pipeline_exp2, params_exp2, randomize=True, random_state=RS)

## Experiment #3: Scaling + feature selection + linear model

In [None]:
pipeline_exp3 = Pipeline([('scaler', StandardScaler()), ('select_features', SelectKBest()), ('model', LinearRegression())])

params_exp3 = {
    'feats': [
        {'select_features': [SelectKBest(score_func=f_regression)], 'select_features__k': list(range(1,81))},
        {'select_features': [RFE(LinearRegression())], 'select_features__n_features_to_select': list(range(1,81))}
    ],
    'regressor': [
        {'model': [LinearRegression()]},
        {'model': [Ridge()], 'model__alpha': [0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0]},
        {'model': [Lasso(max_iter=100000, random_state=RS)], 'model__alpha': [0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0]},
        {'model': [ElasticNet(max_iter=100000, random_state=RS)], 'model__alpha': [0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0]}
    ]
}

params_exp3 = [flatten(params, parent_key=None) for params in dict_product(params_exp3)]

run_exp("middle_scale_select_linear", y_mid, pipeline_exp3, params_exp3, randomize=True, random_state=RS)

## Experiment #4: Scaling + feature selection + dimensionality reduction + linear model

In [None]:
pipeline_exp4 = Pipeline([('scaler', StandardScaler()), ('select_features', SelectKBest()), ('reduce_dims', PCA()), ('model', LinearRegression())])

params_exp4 = {
    'feats': [
        {'select_features': [SelectKBest(score_func=f_regression)], 'select_features__k': list(range(1,81))},
        {'select_features': [RFE(LinearRegression())], 'select_features__n_features_to_select': list(range(1,81))}
    ],
    'dims': [
        {'reduce_dims__n_components': list(range(1,31))}
    ],
    'regressor': [
        {'model': [LinearRegression()]},
        {'model': [Ridge()], 'model__alpha': [0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0]},
        {'model': [Lasso(max_iter=100000, random_state=RS)], 'model__alpha': [0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0]},
        {'model': [ElasticNet(max_iter=100000, random_state=RS)], 'model__alpha': [0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0]}
    ]
}

params_exp4 = [flatten(params, parent_key=None) for params in dict_product(params_exp4)]
run_exp("middle_scale_select_reduce_linear", y_mid, pipeline_exp4, params_exp4, randomize=True, rs_iters=1000, random_state=RS)

# Experiments - pole interior

## Experiment #1: Scaling + linear model

In [None]:
pipeline_exp1 = Pipeline([('scaler', StandardScaler()),('model', LinearRegression())])

params_exp1 = [
    {'model': [LinearRegression()]},
    {'model': [Ridge()], 'model__alpha': [0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0]},
    {'model': [Lasso(max_iter=100000, random_state=RS)], 'model__alpha': [0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0]},
    {'model': [ElasticNet(max_iter=100000, random_state=RS)], 'model__alpha': [0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0]}
]

run_exp("interior_scale_linear", y_int, pipeline_exp1, params_exp1, random_state=RS)

## Experiment #2: Scaling + dimensionality reduction + linear model

In [None]:
pipeline_exp2 = Pipeline([('scaler', StandardScaler()), ('reduce_dims', PCA()), ('model', LinearRegression())])

params_exp2 = [
    {'reduce_dims__n_components': list(range(1,31)), 'model': [LinearRegression()]},
    {'reduce_dims__n_components': list(range(1,31)), 'model': [Ridge()], 'model__alpha': [0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0]},
    {'reduce_dims__n_components': list(range(1,31)), 'model': [Lasso(max_iter=100000, random_state=RS)], 'model__alpha': [0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0]},
    {'reduce_dims__n_components': list(range(1,31)), 'model': [ElasticNet(max_iter=100000, random_state=RS)], 'model__alpha': [0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0]}
]

run_exp("interior_scale_reduce_linear", y_int, pipeline_exp2, params_exp2, randomize=True, random_state=RS)

In this case, the obtained model seems to perform slightly better than the previous one. The estimated $R^2$ score has improved from 0.29 to 0.35, and the MAE has improved from 1114 to 1092. It is a small improvement, but a sign that dimensionality reduction might be good for this problem, even if model interpretability becomes harder this way.

However, by looking at the coefficient variability plot, it can be seen that coefficients vary much more wildly than in the previous setting, so the coefficients obtained by the model trained upon the dimensionality-reduced dataset might not be as reliable.

## Experiment #3: Scaling + feature selection + linear model

In [None]:
pipeline_exp3 = Pipeline([('scaler', StandardScaler()), ('select_features', SelectKBest()), ('model', LinearRegression())])

params_exp3 = {
    'feats': [
        {'select_features': [SelectKBest(score_func=f_regression)], 'select_features__k': list(range(1,81))},
        {'select_features': [RFE(LinearRegression())], 'select_features__n_features_to_select': list(range(1,81))}
    ],
    'regressor': [
        {'model': [LinearRegression()]},
        {'model': [Ridge()], 'model__alpha': [0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0]},
        {'model': [Lasso(max_iter=100000, random_state=RS)], 'model__alpha': [0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0]},
        {'model': [ElasticNet(max_iter=100000, random_state=RS)], 'model__alpha': [0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0]}
    ]
}

params_exp3 = [flatten(params, parent_key=None) for params in dict_product(params_exp3)]

run_exp("interior_scale_select_linear", y_int, pipeline_exp3, params_exp3, randomize=True, random_state=RS)

## Experiment #4: Scaling + feature selection + dimensionality reduction + linear model

In [None]:
pipeline_exp4 = Pipeline([('scaler', StandardScaler()), ('select_features', SelectKBest()), ('reduce_dims', PCA()), ('model', LinearRegression())])

params_exp4 = {
    'feats': [
        {'select_features': [SelectKBest(score_func=f_regression)], 'select_features__k': list(range(1,81))},
        {'select_features': [RFE(LinearRegression())], 'select_features__n_features_to_select': list(range(1,81))}
    ],
    'dims': [
        {'reduce_dims__n_components': list(range(1,31))}
    ],
    'regressor': [
        {'model': [LinearRegression()]},
        {'model': [Ridge()], 'model__alpha': [0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0]},
        {'model': [Lasso(max_iter=100000, random_state=RS)], 'model__alpha': [0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0]},
        {'model': [ElasticNet(max_iter=100000, random_state=RS)], 'model__alpha': [0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0]}
    ]
}

params_exp4 = [flatten(params, parent_key=None) for params in dict_product(params_exp4)]
run_exp("interior_scale_select_reduce_linear", y_int, pipeline_exp4, params_exp4, randomize=True, rs_iters=1000, random_state=RS)

# Experiments - surface depth

## Experiment #1: Scaling + linear model

In [None]:
pipeline_exp1 = Pipeline([('scaler', StandardScaler()),('model', LinearRegression())])

params_exp1 = [
    {'model': [LinearRegression()]},
    {'model': [Ridge()], 'model__alpha': [0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0]},
    {'model': [Lasso(max_iter=100000, random_state=RS)], 'model__alpha': [0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0]},
    {'model': [ElasticNet(max_iter=100000, random_state=RS)], 'model__alpha': [0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0]}
]

run_exp("depth_scale_linear", y_sdepth, pipeline_exp1, params_exp1, random_state=RS, mae_plot_padding=False)

## Experiment #2: Scaling + dimensionality reduction + linear model

As the preliminary analysis has shown, this dataset has 89 predictor variables, but only 50 samples. This high dimensionality may lead to overfitting issues, and therefore it could be interesting to try different dimensionality reduction techniques.

In [None]:
pipeline_exp2 = Pipeline([('scaler', StandardScaler()), ('reduce_dims', PCA()), ('model', LinearRegression())])

params_exp2 = [
    {'reduce_dims__n_components': list(range(1,31)), 'model': [LinearRegression()]},
    {'reduce_dims__n_components': list(range(1,31)), 'model': [Ridge()], 'model__alpha': [0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0]},
    {'reduce_dims__n_components': list(range(1,31)), 'model': [Lasso(max_iter=100000, random_state=RS)], 'model__alpha': [0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0]},
    {'reduce_dims__n_components': list(range(1,31)), 'model': [ElasticNet(max_iter=100000, random_state=RS)], 'model__alpha': [0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0]}
]

run_exp("depth_scale_reduce_linear", y_sdepth, pipeline_exp2, params_exp2, randomize=True, random_state=RS, mae_plot_padding=False)

## Experiment #3: Scaling + feature selection + linear model

In [None]:
pipeline_exp3 = Pipeline([('scaler', StandardScaler()), ('select_features', SelectKBest()), ('model', LinearRegression())])

params_exp3 = {
    'feats': [
        {'select_features': [SelectKBest(score_func=f_regression)], 'select_features__k': list(range(1,81))},
        {'select_features': [RFE(LinearRegression())], 'select_features__n_features_to_select': list(range(1,81))}
    ],
    'regressor': [
        {'model': [LinearRegression()]},
        {'model': [Ridge()], 'model__alpha': [0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0]},
        {'model': [Lasso(max_iter=100000, random_state=RS)], 'model__alpha': [0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0]},
        {'model': [ElasticNet(max_iter=100000, random_state=RS)], 'model__alpha': [0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0]}
    ]
}

params_exp3 = [flatten(params, parent_key=None) for params in dict_product(params_exp3)]

run_exp("depth_scale_select_linear", y_sdepth, pipeline_exp3, params_exp3, randomize=True, random_state=RS, mae_plot_padding=False)

## Experiment #4: Scaling + feature selection + dimensionality reduction + linear model

In [None]:
pipeline_exp4 = Pipeline([('scaler', StandardScaler()), ('select_features', SelectKBest()), ('reduce_dims', PCA()), ('model', LinearRegression())])

params_exp4 = {
    'feats': [
        {'select_features': [SelectKBest(score_func=f_regression)], 'select_features__k': list(range(1,81))},
        {'select_features': [RFE(LinearRegression())], 'select_features__n_features_to_select': list(range(1,81))}
    ],
    'dims': [
        {'reduce_dims__n_components': list(range(1,31))}
    ],
    'regressor': [
        {'model': [LinearRegression()]},
        {'model': [Ridge()], 'model__alpha': [0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0]},
        {'model': [Lasso(max_iter=100000, random_state=RS)], 'model__alpha': [0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0]},
        {'model': [ElasticNet(max_iter=100000, random_state=RS)], 'model__alpha': [0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0]}
    ]
}

params_exp4 = [flatten(params, parent_key=None) for params in dict_product(params_exp4)]
run_exp("depth_scale_select_reduce_linear", y_sdepth, pipeline_exp4, params_exp4, randomize=True, rs_iters=1000, random_state=RS, mae_plot_padding=False)