In [None]:
import numpy as np
import pandas as pd
from sklearn.linear_model import Lasso, Ridge
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

In [None]:
# reading the data set
df = pd.read_excel("input/palmer_penguins.xlsx")
df.head()

In [None]:
# remove rows containing NaN
print(df.shape)
df.dropna(inplace=True)
print(df.shape)

In [None]:
# recode target variable's values
df['sex_recoded'] = df.sex.replace({'female' : 0 , 'male' : 1})
print(df.sex_recoded.unique())

In [None]:
# split the data into Predictor (X) and Target (y) variables
X = df[['bill_length_mm', 'flipper_length_mm']].values
y = df.sex_recoded.values.reshape(-1,1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=123)

In [None]:
# instantiate the cross validation object
kf = KFold(n_splits=5, shuffle=True, random_state=123)

In [None]:
# tunning the hyper-parameter using Grid Search CV
param_grid = {'alpha':np.linspace(0.00001, 1, 20)}
lasso_cv = GridSearchCV(Lasso(), param_grid, cv=kf)

# fit the train data to the model
lasso_cv.fit(X_train, y_train)

# print the best parameters and best score
print('Tunned parameters: {}'.format(lasso_cv.best_params_))
print('Best score: {}'.format(lasso_cv.best_score_))

In [None]:
# tunning the hyper-parameter using Randomized Search CV
ridge_cv = RandomizedSearchCV(Ridge(), param_grid, cv=kf, n_iter=2)

# fit the train data to the model
ridge_cv.fit(X_train, y_train)

# print the best parameters and best score
print('Tunned parameters: {}'.format(ridge_cv.best_params_))
print('Tunned parameters: {}'.format(ridge_cv.best_score_))