# Libraries 

In [None]:
# trio!
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import seaborn as sns
import timeit

# ML
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

# Import Train/Test Set

This time we use sampling to reduce training time.

In [None]:
df_train = pd.read_csv('train.csv').sample(10000, random_state=2021)
df_test = pd.read_csv('test.csv').sample(3000, random_state=2021)

In [None]:
features_list = ['ambient', 'coolant', 'u_d', 'u_q', 'motor_speed', 'i_d', 'i_q']
target = 'stator_yoke'

In [None]:
X_train = df_train[features_list].to_numpy()
y_train = df_train[target].to_numpy()

In [None]:
X_train.shape, y_train.shape

In [None]:
X_test = df_test[features_list].to_numpy()
y_test = df_test[target].to_numpy()

In [None]:
X_test.shape, y_test.shape

# Data Preprocessing

In [None]:
X_scaler = MinMaxScaler()

In [None]:
X_train_scaled = X_scaler.fit_transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

#  Predictive Models

In [None]:
# Import grid search


In [None]:
def plot_test(reg, X, y):
    y_pred = reg.predict(X)
    plt.scatter(y, y_pred)
    rmse = mean_squared_error(y, y_pred, squared=False)
    print(f'RMSE: {rmse}')

## Ridge 

In [None]:
from sklearn.linear_model import Ridge

https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Ridge.html

In [None]:
# Make a dictionary of parameters to do a search
param_grid_ridge = 

### Grid Search

In [None]:
grid_search_ridge = GridSearchCV(________)

In [None]:
%timeit 

In [None]:
plot_test(grid_search_ridge, X_test_scaled, y_test)

### Randomized Search

In [None]:
rand_search_ridge = RandomizedSearchCV(________)

In [None]:
%timeit 

In [None]:
rand_search_ridge.best_params_

In [None]:
rand_search_ridge.cv_results_

In [None]:
df_rand_search_ridge = pd.DataFrame(rand_search_ridge.cv_results_)
df_rand_search_ridge.head(5)

In [None]:
df_rand_search_ridge.sort_values(["param_solver", "param_alpha", "mean_test_score"], inplace=True)

In [None]:
sns.lineplot(x="param_alpha", y="mean_test_score", hue="param_solver", data=df_rand_search_ridge)

In [None]:
rand_search_ridge.score(X_test_scaled, y_test)

In [None]:
plot_test(rand_search_ridge, X_test_scaled, y_test)

## Lasso

In [None]:
from sklearn.linear_model import Lasso

https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Lasso.html

In [None]:
param_grid_lasso = {
    
}

In [None]:
grid_search_lasso = GridSearchCV(Lasso(), param_grid=param_grid_lasso, cv=5, n_jobs=-1)

In [None]:
grid_search_lasso.fit(X_train_scaled, y_train)

In [None]:
grid_search_lasso.best_params_

In [None]:
grid_search_lasso.cv_results_

In [None]:
df_grid_search_lasso = pd.DataFrame(grid_search_lasso.cv_results_)
df_grid_search_lasso.head(5)

In [None]:
df_grid_search_lasso.sort_values(["param_alpha", "mean_test_score"], inplace=True)

In [None]:
sns.lineplot(x="param_alpha", y="mean_test_score", data=df_grid_search_lasso)

In [None]:
grid_search_lasso.score(X_test_scaled, y_test)

In [None]:
plot_test(grid_search_lasso, X_test_scaled, y_test)

## Decision Tree Regressor 

In [None]:
from sklearn.tree import DecisionTreeRegressor

https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeRegressor.html

In [None]:
param_grid_dt_reg = {
    "max_depth": [5, 7, 10, 15, 30],
    "min_samples_split": [2, 5, 10],
    "min_samples_leaf": [1, 10, 20],
}

In [None]:
grid_search_dt_reg = GridSearchCV(DecisionTreeRegressor(), param_grid_dt_reg, cv=5, n_jobs=-1)

In [None]:
grid_search_dt_reg.fit(X_train, y_train)

In [None]:
grid_search_dt_reg.best_params_

In [None]:
grid_search_dt_reg.cv_results_

In [None]:
df_grid_search_dt_reg = pd.DataFrame(grid_search_dt_reg.cv_results_)
df_grid_search_dt_reg.head(5)

In [None]:
df_grid_search_dt_reg.sort_values(["param_max_depth", "param_min_samples_leaf", "param_min_samples_split", "mean_test_score"], inplace=True)

In [None]:
fig_dt_reg = sns.FacetGrid(df_grid_search_dt_reg, col="param_min_samples_split")
fig_dt_reg.map(sns.lineplot, "param_max_depth", "mean_test_score", "param_min_samples_leaf")
plt.plot([], [], ' ', label="min_samples_leaf")
plt.legend()
plt.show()

In [None]:
grid_search_dt_reg.score(X_test, y_test)

In [None]:
plot_test(grid_search_dt_reg, X_test, y_test)

## Random Forest Regressor

## AdaBoost Regressor

## Gradient Boosting Regressor 

## XGBoost Regressor