In [None]:
# Question ->1
import numpy as np
import pandas as pd
from sklearn.metrics import r2_score

np.random.seed(42)
num_samples = 1000
x1 = np.random.rand(num_samples)
x2 = 2 * x1 + np.random.normal(0, 0.1, num_samples)
x3 = 3 * x1 + np.random.normal(0, 0.1, num_samples)
x4 = 4 * x1 + np.random.normal(0, 0.1, num_samples)
x5 = 5 * x1 + np.random.normal(0, 0.1, num_samples)
x6 = 6 * x1 + np.random.normal(0, 0.1, num_samples)
x7 = 7 * x1 + np.random.normal(0, 0.1, num_samples)

y = 10 * x1 + 2 * x2 + 3 * x3 + np.random.normal(0, 0.5, num_samples)

data = pd.DataFrame({'x1': x1, 'x2': x2, 'x3': x3, 'x4': x4, 'x5': x5,
                     'x6': x6, 'x7': x7, 'y': y})


def ridge_regression(X, y, learning_rate, regularization_param, num_iterations):
    m, n = X.shape
    weights = np.zeros(n)
    costs = []

    for _ in range(num_iterations):
        y_pred = X @ weights
        error = y_pred - y
        cost = (1 / (2 * m)) * (np.sum(error ** 2) + regularization_param * np.sum(weights ** 2))
        costs.append(cost)

        gradient = (1 / m) * (X.T @ error) + (regularization_param / m) * weights
        weights -= learning_rate * gradient


        if np.isnan(cost) or np.isinf(cost):
            break

    return weights, costs


learning_rates = [0.00001, 0.0001, 0.001, 0.01]
regularization_params = [10**-15, 10**-10, 10**-5, 10**-3, 0, 1, 10, 20]
num_iterations = 1000

best_r2 = -np.inf
best_weights = None
best_lr = None
best_reg = None

X = data.drop('y', axis=1).values
y = data['y'].values

for lr in learning_rates:
    for reg in regularization_params:
        weights, _ = ridge_regression(X, y, lr, reg, num_iterations)
        y_pred = X @ weights
        r2 = r2_score(y, y_pred)

        if r2 > best_r2:
            best_r2 = r2
            best_weights = weights
            best_lr = lr
            best_reg = reg

print(f"Best R² Score: {best_r2:.4f}")
print(f"Best Learning Rate: {best_lr}")
print(f"Best Regularization Parameter: {best_reg}")
print(f"Best Weights: {best_weights}")


Best R² Score: 0.9918
Best Learning Rate: 0.01
Best Regularization Parameter: 1e-15
Best Weights: [0.17040012 0.51783856 0.72342713 0.60211981 0.79585619 0.96123597
 1.07377076]


In [None]:
# Question ->2

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

data = pd.read_csv('/content/Hitters.csv')

data = data.dropna()
data = pd.get_dummies(data, drop_first=True)

X = data.drop(columns=['Salary'])
y = data['Salary']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

linear_model = LinearRegression()
linear_model.fit(X_train_scaled, y_train)

ridge_model = Ridge(alpha=0.5748)
ridge_model.fit(X_train_scaled, y_train)

lasso_model = Lasso(alpha=0.5748)
lasso_model.fit(X_train_scaled, y_train)

y_pred_linear = linear_model.predict(X_test_scaled)
y_pred_ridge = ridge_model.predict(X_test_scaled)
y_pred_lasso = lasso_model.predict(X_test_scaled)

mse_linear = mean_squared_error(y_test, y_pred_linear)
mse_ridge = mean_squared_error(y_test, y_pred_ridge)
mse_lasso = mean_squared_error(y_test, y_pred_lasso)

print("Mean Squared Error for Linear Regression:", mse_linear)
print("Mean Squared Error for Ridge Regression:", mse_ridge)
print("Mean Squared Error for Lasso Regression:", mse_lasso)

if mse_linear < mse_ridge and mse_linear < mse_lasso:
    print("Linear Regression performs the best.")
elif mse_ridge < mse_linear and mse_ridge < mse_lasso:
    print("Ridge Regression performs the best.")
else:
    print("Lasso Regression performs the best.")


Mean Squared Error for Linear Regression: 128284.34549672354
Mean Squared Error for Ridge Regression: 126603.90264424692
Mean Squared Error for Lasso Regression: 126739.56899132291
Ridge Regression performs the best.


  model = cd_fast.enet_coordinate_descent(


In [None]:
# Question->3

from sklearn.datasets import fetch_openml
from sklearn.linear_model import RidgeCV, LassoCV
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

data = fetch_openml(name="boston", version=1, as_frame=True)
X, y = data.data, data.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

ridge_cv = RidgeCV(alphas=[0.1, 1.0, 10.0], cv=5)
ridge_cv.fit(X_train_scaled, y_train)
y_pred_ridge = ridge_cv.predict(X_test_scaled)
mse_ridge = mean_squared_error(y_test, y_pred_ridge)

lasso_cv = LassoCV(alphas=[0.1, 1.0, 10.0], cv=5)
lasso_cv.fit(X_train_scaled, y_train)
y_pred_lasso = lasso_cv.predict(X_test_scaled)
mse_lasso = mean_squared_error(y_test, y_pred_lasso)

print("Mean Squared Error for RidgeCV:", mse_ridge)
print("Mean Squared Error for LassoCV:", mse_lasso)

if mse_ridge < mse_lasso:
    print("RidgeCV performs the best.")
else:
    print("LassoCV performs the best.")


Mean Squared Error for RidgeCV: 24.312903830491614
Mean Squared Error for LassoCV: 25.656739367167678
RidgeCV performs the best.
