In [None]:
!wget https://pyimagesearch-code-downloads.s3-us-west-2.amazonaws.com/intro-hyperparameter-tuning/intro-hyperparameter-tuning.zip
!unzip -qq intro-hyperparameter-tuning.zip
%cd intro-hyperparameter-tuning

--2023-06-02 13:26:45--  https://pyimagesearch-code-downloads.s3-us-west-2.amazonaws.com/intro-hyperparameter-tuning/intro-hyperparameter-tuning.zip
Resolving pyimagesearch-code-downloads.s3-us-west-2.amazonaws.com (pyimagesearch-code-downloads.s3-us-west-2.amazonaws.com)... 52.92.136.242, 3.5.82.126, 52.92.132.82, ...
Connecting to pyimagesearch-code-downloads.s3-us-west-2.amazonaws.com (pyimagesearch-code-downloads.s3-us-west-2.amazonaws.com)|52.92.136.242|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 152304 (149K) [binary/octet-stream]
Saving to: ‘intro-hyperparameter-tuning.zip’


2023-06-02 13:26:45 (1.34 MB/s) - ‘intro-hyperparameter-tuning.zip’ saved [152304/152304]

/content/intro-hyperparameter-tuning


In [None]:
# import the necessary packages
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import RepeatedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from scipy.stats import loguniform
import pandas as pd

In [None]:
class Config:
  CSV_PATH = 'abalone_train.csv'

  COLS = ['Length', 'Diameter', 'Height', 'Whole weight', 
          "Shucked weight", 'Viscera weight', 'Shell weight', 'Age']

config = Config()

In [None]:
dataset = pd.read_csv(config.CSV_PATH, names = config.COLS)
dataX = dataset[dataset.columns[:-1]]
dataY = dataset[dataset.columns[-1]]
(trainX, testX, trainY, testY) = train_test_split(dataX,
	dataY, random_state=3, test_size=0.15)

In [None]:
scaler = StandardScaler()
trainX = scaler.fit_transform(trainX)
testX = scaler.transform(testX)

In [None]:
model = SVR()
model.fit(trainX, trainY)
model.score(testX, testY)

0.5537874025424437

In [None]:
model = SVR()
kernel = ["linear", "rbf", "sigmoid", "poly"]
tolerance = [1e-3, 1e-4, 1e-5, 1e-6]
C = [1, 1.5, 2, 2.5, 3]
grid = dict(kernel=kernel, tol=tolerance, C=C)

In [None]:
cvFold = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
gridSearch = GridSearchCV(estimator=model, param_grid=grid, n_jobs=-1,
	cv=cvFold, scoring="neg_mean_squared_error")
searchResults = gridSearch.fit(trainX, trainY)
bestModel = searchResults.best_estimator_
print("R2: {:.2f}".format(bestModel.score(testX, testY)))

R2: 0.56


Tuning Hyperparameter with a randomized search

In [None]:
# perform the randomized-search over
model = SVR()
kernel = ["linear", "rbf", "sigmoid", "poly"]
tolerance = loguniform(1e-6, 1e-3)
C = [1, 1.5, 2, 2.5, 3]
grid = dict(kernel=kernel, tol=tolerance, C=C)

In [None]:
print("[INFO] grid searching over the hyperparameters...")
cvFold = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
randomSearch = RandomizedSearchCV(estimator=model, n_jobs=-1,
	cv=cvFold, param_distributions=grid,
	scoring="neg_mean_squared_error")
searchResults = randomSearch.fit(trainX, trainY)

# extract the best model and evaluate it
print("[INFO] evaluating...")
bestModel = searchResults.best_estimator_
print("R2: {:.2f}".format(bestModel.score(testX, testY)))

[INFO] grid searching over the hyperparameters...
[INFO] evaluating...
R2: 0.56
