<a href="https://colab.research.google.com/github/rajtiwari04/MLAssigment01/blob/main/ridge%26lasso.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Regularization (Ridge and Lasso Regression)**

In [1]:
import pandas as pd

In [2]:
data=pd.read_csv("diabetes.csv")
data

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1
...,...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63,0
764,2,122,70,27,0,36.8,0.340,27,0
765,5,121,72,23,112,26.2,0.245,30,0
766,1,126,60,0,0,30.1,0.349,47,1


In [9]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

df = pd.read_csv("diabetes.csv")

X = df[['Glucose', 'BMI', 'Age']].values
y = df['Outcome'].values

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

X_train_b = np.hstack((np.ones((X_train_scaled.shape[0], 1)), X_train_scaled))
X_test_b = np.hstack((np.ones((X_test_scaled.shape[0], 1)), X_test_scaled))

alpha = 1.0

I = np.eye(X_train_b.shape[1])
I[0, 0] = 0  

beta = np.linalg.inv(X_train_b.T @ X_train_b + alpha * I) @ X_train_b.T @ y_train

# Predictions
y_pred_ridge = X_test_b @ beta

# Metrics
mse = np.mean((y_test - y_pred_ridge)**2)
rmse = np.sqrt(mse)

ss_total = np.sum((y_test - np.mean(y_test))**2)
ss_residual = np.sum((y_test - y_pred_ridge)**2)
r2 = 1 - (ss_residual / ss_total)

print("Intercept:", beta[0])
print("Coefficients:", beta[1:])
print("Ridge MSE:", mse)
print("Ridge RMSE:", rmse)
print("Ridge R2:", r2)

Intercept: 0.3469055374592833
Coefficients: [0.17073295 0.11021744 0.08673113]
Ridge MSE: 0.18575161276639798
Ridge RMSE: 0.4309891098002338
Ridge R2: 0.1909485310619108


In [10]:
import numpy as np

# Metrics (manual)
mse_ridge = np.mean((y_test - y_pred_ridge)**2)
rmse_ridge = np.sqrt(mse_ridge)

ss_total = np.sum((y_test - np.mean(y_test))**2)
ss_residual = np.sum((y_test - y_pred_ridge)**2)
r2_ridge = 1 - (ss_residual / ss_total)

print("Ridge Intercept:", beta[0])
print("Ridge Coefficients:", beta[1:])
print("Ridge MSE:", mse_ridge)
print("Ridge RMSE:", rmse_ridge)
print("Ridge R2:", r2_ridge)

Ridge Intercept: 0.3469055374592833
Ridge Coefficients: [0.17073295 0.11021744 0.08673113]
Ridge MSE: 0.18575161276639798
Ridge RMSE: 0.4309891098002338
Ridge R2: 0.1909485310619108


**Lasso**

In [11]:
import numpy as np

# Add intercept column
X_train_b = np.hstack((np.ones((X_train_scaled.shape[0], 1)), X_train_scaled))
X_test_b = np.hstack((np.ones((X_test_scaled.shape[0], 1)), X_test_scaled))

n_samples, n_features = X_train_b.shape

alpha = 0.1
learning_rate = 0.01
iterations = 1000

beta = np.zeros(n_features)

for _ in range(iterations):
    y_pred = X_train_b @ beta
    error = y_pred - y_train
    
    gradient = (2/n_samples) * (X_train_b.T @ error)
    
    l1_penalty = alpha * np.sign(beta)
    l1_penalty[0] = 0
    
    beta -= learning_rate * (gradient + l1_penalty)

y_pred_lasso = X_test_b @ beta

mse = np.mean((y_test - y_pred_lasso)**2)
rmse = np.sqrt(mse)

ss_total = np.sum((y_test - np.mean(y_test))**2)
ss_residual = np.sum((y_test - y_pred_lasso)**2)
r2 = 1 - (ss_residual / ss_total)

print("Lasso Intercept:", beta[0])
print("Lasso Coefficients:", beta[1:])
print("Lasso MSE:", mse)
print("Lasso RMSE:", rmse)
print("Lasso R2:", r2)

Lasso Intercept: 0.34690553687545234
Lasso Coefficients: [0.14123678 0.06663406 0.04505228]
Lasso MSE: 0.17769634532588782
Lasso RMSE: 0.4215404432861547
Lasso R2: 0.22603369591391065


**Ridge in polynomial linear regression**

In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

df = pd.read_csv("diabetes.csv")

X = df[['Glucose']].values
y = df['Outcome'].values

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

X_train_poly = np.hstack((X_train, X_train**2))
X_test_poly = np.hstack((X_test, X_test**2))

mean = X_train_poly.mean(axis=0)
std = X_train_poly.std(axis=0)

X_train_scaled = (X_train_poly - mean) / std
X_test_scaled = (X_test_poly - mean) / std

X_train_b = np.hstack((np.ones((X_train_scaled.shape[0], 1)), X_train_scaled))
X_test_b = np.hstack((np.ones((X_test_scaled.shape[0], 1)), X_test_scaled))

alpha = 1.0

I = np.eye(X_train_b.shape[1])
I[0, 0] = 0

beta = np.linalg.inv(X_train_b.T @ X_train_b + alpha * I) @ X_train_b.T @ y_train

# --- Step 5: Predictions ---
y_pred = X_test_b @ beta

# --- Step 6: Metrics (manual) ---
mse = np.mean((y_test - y_pred)**2)
rmse = np.sqrt(mse)

ss_total = np.sum((y_test - np.mean(y_test))**2)
ss_residual = np.sum((y_test - y_pred)**2)
r2 = 1 - (ss_residual / ss_total)

print("Polynomial Ridge Intercept:", beta[0])
print("Polynomial Ridge Coefficients:", beta[1:])
print("Polynomial Ridge MSE:", mse)
print("Polynomial Ridge RMSE:", rmse)
print("Polynomial Ridge R2:", r2)

Polynomial Ridge Intercept: 0.34690553745928343
Polynomial Ridge Coefficients: [ 0.         -0.09320553  0.31817661]
Polynomial Ridge MSE: 0.17225566691723296
Polynomial Ridge RMSE: 0.4150369464484252
Polynomial Ridge R2: 0.24973087298271845


**lasso in polynomial linear regression**

In [7]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score


df = pd.read_csv("diabetes.csv")

X = df[['Glucose']]
y = df['Outcome']


X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


poly = PolynomialFeatures(degree=2, include_bias=True)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)


scaler = StandardScaler()
X_train_poly_scaled = scaler.fit_transform(X_train_poly)
X_test_poly_scaled = scaler.transform(X_test_poly)


lasso_poly_model = Lasso(alpha=0.01)
lasso_poly_model.fit(X_train_poly_scaled, y_train)

y_pred_poly_lasso = lasso_poly_model.predict(X_test_poly_scaled)


mse = mean_squared_error(y_test, y_pred_poly_lasso)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred_poly_lasso)

print("Polynomial Lasso Intercept:", lasso_poly_model.intercept_)
print("Polynomial Lasso Coefficients:", lasso_poly_model.coef_)
print("Polynomial Lasso MSE:", mse)
print("Polynomial Lasso RMSE:", rmse)
print("Polynomial Lasso R2:", r2)


Polynomial Lasso Intercept: 0.3469055374592834
Polynomial Lasso Coefficients: [0.         0.         0.21774905]
Polynomial Lasso MSE: 0.1717171976152149
Polynomial Lasso RMSE: 0.4143877382539388
Polynomial Lasso R2: 0.25207620594261937
