## Hyper-parameter Tuning

In [2]:
import pandas as pd
import numpy as np
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import os

In [3]:
os.chdir("D:/meridianthe4/PML/Datasets")

In [4]:
boston = pd.read_csv("Boston.csv")
X = boston.drop('medv', axis=1)
y = boston['medv']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [7]:
alphas = np.linspace(0.001, 15, 30)
l1 = np.linspace(0, 1, 100)
scores = []

for a in alphas:
    for r in l1:
        el = ElasticNet(alpha=a, l1_ratio=r)
        el.fit(X_train, y_train)
        y_pred = el.predict(X_test)
        scores.append([a, r, mean_squared_error(y_test, y_pred)])
df_scores = pd.DataFrame(scores, columns=['alpha', 'l1_ratio', 'score'])
df_scores.sort_values('score')

Linear regression models with a zero l1 penalization strength are more efficiently fitted using one of the solvers implemented in sklearn.linear_model.Ridge/RidgeCV instead.
  model = cd_fast.enet_coordinate_descent(
Linear regression models with a zero l1 penalization strength are more efficiently fitted using one of the solvers implemented in sklearn.linear_model.Ridge/RidgeCV instead.
  model = cd_fast.enet_coordinate_descent(
Linear regression models with a zero l1 penalization strength are more efficiently fitted using one of the solvers implemented in sklearn.linear_model.Ridge/RidgeCV instead.
  model = cd_fast.enet_coordinate_descent(
Linear regression models with a zero l1 penalization strength are more efficiently fitted using one of the solvers implemented in sklearn.linear_model.Ridge/RidgeCV instead.
  model = cd_fast.enet_coordinate_descent(
Linear regression models with a zero l1 penalization strength are more efficiently fitted using one of the solvers implemented in sk

Unnamed: 0,alpha,l1_ratio,score
99,0.001,1.000000,21.531346
98,0.001,0.989899,21.533688
97,0.001,0.979798,21.536029
96,0.001,0.969697,21.538367
95,0.001,0.959596,21.540705
...,...,...,...
2995,15.000,0.959596,38.432974
2996,15.000,0.969697,38.468330
2997,15.000,0.979798,38.504200
2998,15.000,0.989899,38.540592


## Housing

In [5]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.compose import make_column_selector

In [6]:
housing = pd.read_csv("Housing.csv")
X, y = housing.drop('price', axis=1), housing['price']

In [7]:
ohe = OneHotEncoder(drop='first', sparse_output=False).set_output(transform="pandas")
col_transformer = ColumnTransformer([("OHE", ohe, make_column_selector(dtype_include=object))], 
                                    remainder='passthrough', 
                                    verbose_feature_names_out=False)
col_transformer = col_transformer.set_output(transform="pandas")
X = col_transformer.fit_transform(X)

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=25)

In [9]:
alphas = [0.001, 0.01, 0.1, 1, 1.5, 2.5, 5, 10]
scores = []

for a in alphas:
    ridge = Lasso(alpha=a)
    ridge.fit(X_train, y_train)
    y_pred = ridge.predict(X_test)
    scores.append([a, mean_squared_error(y_test, y_pred)])
df_scores = pd.DataFrame(scores, columns=['alpha', 'score'])
df_scores.sort_values('score')

Unnamed: 0,alpha,score
7,10.0,272656900.0
6,5.0,272688600.0
5,2.5,272705900.0
4,1.5,272713400.0
3,1.0,272717000.0
2,0.1,272723700.0
1,0.01,272724400.0
0,0.001,272724500.0


In [10]:
np.arange(0.001, 15, 0.5)

array([1.0000e-03, 5.0100e-01, 1.0010e+00, 1.5010e+00, 2.0010e+00,
       2.5010e+00, 3.0010e+00, 3.5010e+00, 4.0010e+00, 4.5010e+00,
       5.0010e+00, 5.5010e+00, 6.0010e+00, 6.5010e+00, 7.0010e+00,
       7.5010e+00, 8.0010e+00, 8.5010e+00, 9.0010e+00, 9.5010e+00,
       1.0001e+01, 1.0501e+01, 1.1001e+01, 1.1501e+01, 1.2001e+01,
       1.2501e+01, 1.3001e+01, 1.3501e+01, 1.4001e+01, 1.4501e+01])

In [11]:
np.linspace(0.001, 15, 20)

array([1.00000000e-03, 7.90421053e-01, 1.57984211e+00, 2.36926316e+00,
       3.15868421e+00, 3.94810526e+00, 4.73752632e+00, 5.52694737e+00,
       6.31636842e+00, 7.10578947e+00, 7.89521053e+00, 8.68463158e+00,
       9.47405263e+00, 1.02634737e+01, 1.10528947e+01, 1.18423158e+01,
       1.26317368e+01, 1.34211579e+01, 1.42105789e+01, 1.50000000e+01])

In [12]:
alphas = np.linspace(0.001, 15, 30)
scores = []

for a in alphas:
    ridge = Lasso(alpha=a)
    ridge.fit(X_train, y_train)
    y_pred = ridge.predict(X_test)
    scores.append([a, mean_squared_error(y_test, y_pred)])
df_scores = pd.DataFrame(scores, columns=['alpha', 'score'])
df_scores.sort_values('score')

Unnamed: 0,alpha,score
29,15.0,272628900.0
28,14.482793,272631600.0
27,13.965586,272634400.0
26,13.448379,272637200.0
25,12.931172,272640000.0
24,12.413966,272642900.0
23,11.896759,272645800.0
22,11.379552,272648800.0
21,10.862345,272651800.0
20,10.345138,272654800.0


## Exp Salary

In [13]:
sals = pd.read_csv("Exp_Salaries.csv")
X, y = sals.drop('Salary', axis=1), sals['Salary']

In [14]:
ohe = OneHotEncoder(drop='first', sparse_output=False).set_output(transform="pandas")
col_transformer = ColumnTransformer([("OHE", ohe, make_column_selector(dtype_include=object))], 
                                    remainder='passthrough', 
                                    verbose_feature_names_out=False)
col_transformer = col_transformer.set_output(transform="pandas")
X = col_transformer.fit_transform(X)

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=25)

In [16]:
alphas = np.linspace(0.001, 15, 30)
scores = []

for a in alphas:
    ridge = Lasso(alpha=a)
    ridge.fit(X_train, y_train)
    y_pred = ridge.predict(X_test)
    scores.append([a, mean_squared_error(y_test, y_pred)])
df_scores = pd.DataFrame(scores, columns=['alpha', 'score'])
df_scores.sort_values('score')

Unnamed: 0,alpha,score
0,0.001,13382270.0
1,0.518207,13385370.0
2,1.035414,13388430.0
3,1.552621,13391710.0
4,2.069828,13394850.0
5,2.587034,13398220.0
6,3.104241,13401330.0
7,3.621448,13404770.0
8,4.138655,13408260.0
9,4.655862,13411780.0
