Part 1: Regression Task – California Housing
Task 1: Load and Split Dataset (80% train, 20% test)

In [1]:
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split

X, y = load_diabetes(return_X_y=True)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)
print(X_train.shape, X_test.shape)


(353, 10) (89, 10)


Task 2: Regression Experiments
Step 1: Baseline Linear Regression (No Regularization)

In [10]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Model
lr = LinearRegression()
lr.fit(X_train, y_train)

# Predictions
y_train_pred = lr.predict(X_train)
y_test_pred = lr.predict(X_test)

# MSE
train_mse = mean_squared_error(y_train, y_train_pred)
test_mse = mean_squared_error(y_test, y_test_pred)

print("Training MSE:", train_mse)
print("Test MSE:", test_mse)

# Coefficients
print("Coefficients:", lr.coef_)


Training MSE: 0.051601906634910176
Test MSE: 0.0641088624702943
Coefficients: [ 1.97130218e-01 -2.79472278e-03 -2.27758664e-02 -3.28622398e-04
  4.11490191e-01  5.00171192e+00 -1.00587030e+00 -4.91570446e+00
  3.38393701e-01 -5.81425644e+00 -4.32261922e-01  1.26325368e-02
  8.24736376e-03  1.24507529e-03 -1.80785086e+01  2.20798677e+00
  4.27375913e+00 -1.81589526e+01  1.19449435e+00  3.01203668e+00
 -2.14438989e-01 -9.61718848e-03  8.71176397e-03  9.61253395e-04
 -1.32384962e-01 -7.62670138e-01 -6.15742798e-01  1.32619828e+00
 -1.02113249e+00 -1.27363832e+00]


Step 2: Hyperparameter Tuning (Ridge & Lasso)
Ridge Regression (L2)

In [11]:
from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV

ridge = Ridge()

alpha_grid = {'alpha': [0.01, 0.1, 1, 10, 100]}

ridge_cv = GridSearchCV(
    ridge, alpha_grid, cv=5, scoring='neg_mean_squared_error'
)

ridge_cv.fit(X_train, y_train)

print("Best alpha (Ridge):", ridge_cv.best_params_)

ridge_best = ridge_cv.best_estimator_

ridge_test_pred = ridge_best.predict(X_test)
ridge_test_mse = mean_squared_error(y_test, ridge_test_pred)

print("Ridge Test MSE:", ridge_test_mse)


Best alpha (Ridge): {'alpha': 0.01}
Ridge Test MSE: 0.057592556568050915


Lasso Regression (L1)

In [4]:
from sklearn.linear_model import Lasso

lasso = Lasso(max_iter=10000)

lasso_cv = GridSearchCV(
    lasso, alpha_grid, cv=5, scoring='neg_mean_squared_error'
)

lasso_cv.fit(X_train, y_train)

print("Best alpha (Lasso):", lasso_cv.best_params_)

lasso_best = lasso_cv.best_estimator_

lasso_test_pred = lasso_best.predict(X_test)
lasso_test_mse = mean_squared_error(y_test, lasso_test_pred)

print("Lasso Test MSE:", lasso_test_mse)


Best alpha (Lasso): {'alpha': 0.1}
Lasso Test MSE: 2798.193485169719


Step 3: L1 vs L2 Comparison

In [5]:
import numpy as np

print("Ridge coefficients:")
print(ridge_best.coef_)

print("\nLasso coefficients:")
print(lasso_best.coef_)

print("\nNumber of zero coefficients in Lasso:",
      np.sum(lasso_best.coef_ == 0))


Ridge coefficients:
[  42.85566976 -205.49431899  505.08903304  317.0932049  -108.50026183
  -86.23673333 -190.36318008  151.70708637  392.28931896   79.9081772 ]

Lasso coefficients:
[   0.         -152.66477923  552.69777529  303.36515791  -81.36500664
   -0.         -229.25577639    0.          447.91952518   29.64261704]

Number of zero coefficients in Lasso: 3


Part 2: Classification Task – Breast Cancer Dataset
Task 1: Load and Split Dataset

In [7]:
from sklearn.datasets import load_breast_cancer

X, y = load_breast_cancer(return_X_y=True)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


Task 2: Classification Experiments
Step 1: Baseline Logistic Regression

In [6]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

log_reg = LogisticRegression(max_iter=10000)
log_reg.fit(X_train, y_train)

train_acc = accuracy_score(y_train, log_reg.predict(X_train))
test_acc = accuracy_score(y_test, log_reg.predict(X_test))

print("Training Accuracy:", train_acc)
print("Test Accuracy:", test_acc)

print("Coefficients:", log_reg.coef_)


Training Accuracy: 0.028328611898016998
Test Accuracy: 0.0
Coefficients: [[-0.01769262 -0.04448496 -0.01210918 ... -0.03947086 -0.02235968
  -0.03580723]
 [ 0.00042955 -0.04445727 -0.00987055 ... -0.03940051 -0.06532488
  -0.05631116]
 [ 0.00458072  0.00616612 -0.06627711 ... -0.11461907 -0.13376748
   0.00730129]
 ...
 [ 0.03260933  0.0500785   0.10862571 ... -0.00325037  0.02632264
   0.07086104]
 [ 0.03263234  0.04998052  0.12249787 ...  0.10670927 -0.00137172
   0.02549715]
 [-0.05031119 -0.04459137  0.1580244  ...  0.03351706  0.02642536
   0.00920701]]


Step 2: Hyperparameter Tuning (C and Penalty)

In [8]:
param_grid = {
    'C': [0.01, 0.1, 1, 10, 100],
    'penalty': ['l1', 'l2'],
    'solver': ['liblinear']
}

log_cv = GridSearchCV(
    LogisticRegression(max_iter=10000),
    param_grid,
    cv=5,
    scoring='accuracy'
)

log_cv.fit(X_train, y_train)

print("Best Parameters:", log_cv.best_params_)


Best Parameters: {'C': 100, 'penalty': 'l1', 'solver': 'liblinear'}


Step 3: L1 vs L2 Logistic RegressionStep 3: L1 vs L2 Logistic Regression

In [9]:
best_model = log_cv.best_estimator_

train_acc = accuracy_score(y_train, best_model.predict(X_train))
test_acc = accuracy_score(y_test, best_model.predict(X_test))

print("Training Accuracy:", train_acc)
print("Test Accuracy:", test_acc)

print("Coefficients:", best_model.coef_)
print("Zero coefficients:", (best_model.coef_ == 0).sum())


Training Accuracy: 0.989010989010989
Test Accuracy: 0.9824561403508771
Coefficients: [[ 7.36883269e-01 -1.08397824e-01  9.34966430e-02 -1.88947536e-03
   0.00000000e+00  4.75235753e+01 -1.11277326e+01 -1.36983783e+02
   1.99119747e+01  0.00000000e+00  0.00000000e+00  1.71863354e+00
   3.96350873e-03 -1.97694794e-01  0.00000000e+00  0.00000000e+00
   4.96434755e+01  0.00000000e+00  1.89657942e+01  0.00000000e+00
   2.59929764e-01 -4.37326757e-01  5.24146408e-02 -2.11724731e-02
  -2.15352713e+01  8.00767347e+00 -1.47169121e+01 -2.51287652e+01
  -2.54032859e+01  0.00000000e+00]]
Zero coefficients: 8
