<a href="https://colab.research.google.com/github/nitrogoose/MACHINE_LEARNING/blob/main/ASS4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_regression

np.random.seed(42)
n_samples = 500
X = np.random.rand(n_samples, 7)
for i in range(1, 7):
    X[:, i] = X[:, 0] + np.random.normal(0, 0.01, n_samples)
y = 5 * X[:, 0] + 3 + np.random.normal(0, 0.1, n_samples)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

learning_rates = [0.0001, 0.001, 0.01, 0.1, 1, 10]
regularization_params = [1e-15, 1e-10, 1e-5, 1e-3, 0, 1, 10, 20]

best_lr = None
best_reg = None
best_r2 = -np.inf
best_weights = None

for lr in learning_rates:
    for reg in regularization_params:
        weights = np.zeros(X_scaled.shape[1])
        for _ in range(1000):
            gradient = -2 * (X_scaled.T @ (y - X_scaled @ weights)) / n_samples + 2 * reg * weights
            weights -= lr * gradient

        y_pred = X_scaled @ weights
        r2 = r2_score(y, y_pred)

        if r2 > best_r2:
            best_r2 = r2
            best_lr = lr
            best_reg = reg
            best_weights = weights

print("Best Learning Rate:", best_lr)
print("Best Regularization Parameter:", best_reg)
print("Best R2 Score:", best_r2)

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge, Lasso

file_path = '/content/Hitters.csv'
data = pd.read_csv(file_path)
data.dropna(subset=['Salary'], inplace=True)
data.fillna(data.median(numeric_only=True), inplace=True)
data = pd.get_dummies(data, drop_first=True)

X = data.drop(columns=['Salary'])
y = data['Salary']
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

linear_model = LinearRegression().fit(X_train, y_train)
ridge_model = Ridge(alpha=0.5748).fit(X_train, y_train)
lasso_model = Lasso(alpha=0.5748).fit(X_train, y_train)

linear_r2 = linear_model.score(X_test, y_test)
ridge_r2 = ridge_model.score(X_test, y_test)
lasso_r2 = lasso_model.score(X_test, y_test)

print("Linear Regression R2 Score:", linear_r2)
print("Ridge Regression R2 Score:", ridge_r2)
print("LASSO Regression R2 Score:", lasso_r2)

if ridge_r2 > lasso_r2 and ridge_r2 > linear_r2:
    print("Ridge performs the best due to regularization handling multicollinearity.")
elif lasso_r2 > ridge_r2 and lasso_r2 > linear_r2:
    print("LASSO performs the best as it also performs feature selection.")
else:
    print("Linear Regression performs best with no regularization penalty.")

from sklearn.datasets import load_boston
from sklearn.linear_model import RidgeCV, LassoCV

boston = load_boston()
X = boston.data
y = boston.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

ridge_cv = RidgeCV(alphas=[0.1, 1, 10, 20, 50, 100], cv=5).fit(X_train, y_train)
ridge_r2 = ridge_cv.score(X_test, y_test)

lasso_cv = LassoCV(alphas=[0.1, 1, 10, 20, 50, 100], cv=5).fit(X_train, y_train)
lasso_r2 = lasso_cv.score(X_test, y_test)

print("RidgeCV Best Alpha:", ridge_cv.alpha_)
print("RidgeCV R2 Score:", ridge_r2)
print("LassoCV Best Alpha:", lasso_cv.alpha_)
print("LassoCV R2 Score:", lasso_r2)

if ridge_r2 > lasso_r2:
    print("RidgeCV performs better due to stability in regularization.")
else:
    print("LassoCV performs better due to feature selection.")
