<a href="https://colab.research.google.com/github/np03cs4a240386-prog/Concepts-and-Technologies-of-AI/blob/main/Worksheet7.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.datasets import load_diabetes, load_breast_cancer
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LinearRegression, Ridge, Lasso, LogisticRegression
from sklearn.metrics import mean_squared_error, accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

In [None]:
X, y = load_diabetes(return_X_y=True)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

print("Training samples:", X_train.shape)
print("Test samples:", X_test.shape)

Training samples: (353, 10)
Test samples: (89, 10)


In [None]:
lin_reg = LinearRegression()
lin_reg.fit(X_train, y_train)

y_train_pred = lin_reg.predict(X_train)
y_test_pred = lin_reg.predict(X_test)

train_mse = mean_squared_error(y_train, y_train_pred)
test_mse = mean_squared_error(y_test, y_test_pred)

print("Training MSE:", train_mse)
print("Test MSE:", test_mse)


Training MSE: 2868.549702835577
Test MSE: 2900.193628493482


In [None]:
pd.Series(lin_reg.coef_)


Unnamed: 0,0
0,37.904021
1,-241.964362
2,542.428759
3,347.703844
4,-931.488846
5,518.062277
6,163.419983
7,275.317902
8,736.198859
9,48.670657


In [None]:
ridge_pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('ridge', Ridge())
])

ridge_params = {
    'ridge__alpha': [0.01, 0.1, 1, 10, 100]
}

ridge_gs = GridSearchCV(
    ridge_pipe,
    ridge_params,
    cv=5,
    scoring='neg_mean_squared_error'
)

ridge_gs.fit(X_train, y_train)

print("Best Ridge alpha:", ridge_gs.best_params_)

Best Ridge alpha: {'ridge__alpha': 10}


In [None]:
lasso_pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('lasso', Lasso(max_iter=5000))
])

lasso_params = {
    'lasso__alpha': [0.001, 0.01, 0.1, 1]
}

lasso_gs = GridSearchCV(
    lasso_pipe,
    lasso_params,
    cv=5,
    scoring='neg_mean_squared_error'
)

lasso_gs.fit(X_train, y_train)

print("Best Lasso alpha:", lasso_gs.best_params_)

Best Lasso alpha: {'lasso__alpha': 1}


In [None]:
ridge_best = ridge_gs.best_estimator_

ridge_train_mse = mean_squared_error(y_train, ridge_best.predict(X_train))
ridge_test_mse = mean_squared_error(y_test, ridge_best.predict(X_test))

print("Ridge Train MSE:", ridge_train_mse)
print("Ridge Test MSE:", ridge_test_mse)

Ridge Train MSE: 2887.5156823843467
Ridge Test MSE: 2875.7787184218428


In [None]:
lasso_best = lasso_gs.best_estimator_

lasso_train_mse = mean_squared_error(y_train, lasso_best.predict(X_train))
lasso_test_mse = mean_squared_error(y_test, lasso_best.predict(X_test))

print("Lasso Train MSE:", lasso_train_mse)
print("Lasso Test MSE:", lasso_test_mse)

Lasso Train MSE: 2901.216098848626
Lasso Test MSE: 2824.568094049959


In [None]:
ridge_coefs = ridge_best.named_steps['ridge'].coef_
lasso_coefs = lasso_best.named_steps['lasso'].coef_

print("Number of zero coefficients (Lasso):", np.sum(lasso_coefs == 0))

Number of zero coefficients (Lasso): 1


In [None]:
X, y = load_breast_cancer(return_X_y=True)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

print("Training samples:", X_train.shape)
print("Test samples:", X_test.shape)

Training samples: (455, 30)
Test samples: (114, 30)


In [None]:
log_reg = LogisticRegression(max_iter=5000)
log_reg.fit(X_train, y_train)

train_acc = accuracy_score(y_train, log_reg.predict(X_train))
test_acc = accuracy_score(y_test, log_reg.predict(X_test))

print("Training Accuracy:", train_acc)
print("Test Accuracy:", test_acc)

Training Accuracy: 0.9582417582417583
Test Accuracy: 0.956140350877193


In [None]:
pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('logreg', LogisticRegression(
        solver='liblinear',
        max_iter=5000
    ))
])

param_grid = {
    'logreg__C': [0.01, 0.1, 1, 10],
    'logreg__penalty': ['l1', 'l2']
}

gs = GridSearchCV(
    pipe,
    param_grid,
    cv=5,
    scoring='accuracy'
)

gs.fit(X_train, y_train)

print("Best parameters:", gs.best_params_)


Best parameters: {'logreg__C': 0.1, 'logreg__penalty': 'l2'}


In [None]:
best_model = gs.best_estimator_

train_acc = accuracy_score(y_train, best_model.predict(X_train))
test_acc = accuracy_score(y_test, best_model.predict(X_test))

print("Best Model Train Accuracy:", train_acc)
print("Best Model Test Accuracy:", test_acc)


Best Model Train Accuracy: 0.9824175824175824
Best Model Test Accuracy: 0.9912280701754386


In [None]:
coefs = best_model.named_steps['logreg'].coef_[0]
print("Number of zero coefficients:", np.sum(coefs == 0))


Number of zero coefficients: 0
