Q.1

In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

np.random.seed(0)
num_points=500
base_feature=np.random.randn(num_points)

data_matrix=np.column_stack([base_feature + 0.01 * np.random.randn(num_points) for _ in range(7)])
data_matrix=np.column_stack([data_matrix, 0.5 * base_feature + 0.2 * np.random.randn(num_points)])

actual_weights=np.array([3, -2, 1, 0, 0.5, -1, 2, 4], dtype=float)
target_values=data_matrix.dot(actual_weights) + 0.5 * np.random.randn(num_points)

X_train, X_test, y_train, y_test = train_test_split(data_matrix, target_values, test_size=0.25, random_state=1)

mean_vec= X_train.mean(axis=0)
std_vec= X_train.std(axis=0)
std_vec= np.where(np.isfinite(std_vec) & (std_vec > 0), std_vec, 1.0)

X_train=(X_train - mean_vec) / std_vec
X_test=(X_test - mean_vec) / std_vec

def ridge_regression(X, y, learning_rate, lamda, iterations=2000):
    X = X.astype(np.float64)
    y = y.astype(np.float64)
    samples, features = X.shape
    weights = np.zeros(features, dtype=np.float64)
    bias = 0.0
    for _ in range(iterations):
        predictions = X.dot(weights) + bias
        errors = predictions - y
        grad_w = (2 / samples) * (X.T.dot(errors)) + 2 * lamda * weights
        grad_b = (2 / samples) * np.sum(errors)
        weights -= learning_rate * grad_w
        bias -= learning_rate * grad_b

        if not (np.isfinite(weights).all() and np.isfinite(bias)):
            return None

    return weights, bias

learning_rates = [0.0001, 0.001, 0.01, 0.1]
lambda_values = [0, 0.001, 0.01, 0.1, 1, 10]

best_r2 = -1
best_config = None

for lr in learning_rates:
    for lam in lambda_values:
        result = ridge_regression(X_train, y_train, lr, lam)
        if result is None:
            continue
        coeffs, intercept = result
        y_pred = X_test.dot(coeffs) + intercept
        if not np.isfinite(y_pred).all():
            continue
        score = r2_score(y_test, y_pred)
        if score > best_r2:
            best_r2 = score
            best_config = (lr, lam, score)

print("Optimal Learning Rate, Lambda, and R² =", best_config)


Optimal Learning Rate, Lambda, and R² = (0.1, 0, 0.9920703853817209)


Q.2

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression,Ridge,Lasso
from sklearn.metrics import mean_squared_error,r2_score

# (a) Load and Preprocess Data
dataset=pd.read_csv('/content/Hitters.csv')
dataset=dataset.dropna(subset=["Salary"])
dataset=dataset.fillna(dataset.median(numeric_only=True))
for col in["League","Division","NewLeague"]:
    dataset[col]=dataset[col].astype("category").cat.codes

# (b) Split Data and Standardize
X=dataset.drop("Salary",axis=1)
y=dataset["Salary"]
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.25,random_state=0)
scaler=StandardScaler()
X_train_scaled=scaler.fit_transform(X_train)
X_test_scaled=scaler.transform(X_test)

# (c) Train Models
model_lin=LinearRegression()
model_ridge=Ridge(alpha=0.5748)
model_lasso=Lasso(alpha=0.5748,max_iter=5000)
model_lin.fit(X_train_scaled,y_train)
model_ridge.fit(X_train_scaled,y_train)
model_lasso.fit(X_train_scaled,y_train)

# (d) Evaluate Models
models=[("Linear Regression",model_lin),("Ridge Regression",model_ridge),("Lasso Regression",model_lasso)]
for name,model in models:
    y_pred=model.predict(X_test_scaled)
    mse_val=mean_squared_error(y_test,y_pred)
    r2_val=r2_score(y_test,y_pred)
    print(f"{name}: MSE={mse_val:.2f}, R²={r2_val:.4f}")


Linear Regression: MSE=131898.53, R²=0.5532
Ridge Regression: MSE=128967.77, R²=0.5631
Lasso Regression: MSE=128572.34, R²=0.5644


Q.3

In [4]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import RidgeCV,LassoCV
from sklearn.metrics import r2_score
import numpy as np

# (a) Load California Housing dataset
data=fetch_california_housing()
X_data,y_data=data.data,data.target

# (b) Split Data
X_train,X_test,y_train,y_test=train_test_split(X_data,y_data,test_size=0.25,random_state=1)

# (c) Scale Features
scaler=StandardScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)

# (d) Define Alpha Values for CV
alpha_range=np.logspace(-6,6,13)

# (e) Ridge and Lasso Regression with Cross-Validation
ridge_model=RidgeCV(alphas=alpha_range,cv=5).fit(X_train,y_train)
lasso_model=LassoCV(alphas=alpha_range,cv=5,max_iter=5000).fit(X_train,y_train)

# (f) Evaluate Results
print("Ridge Best Alpha:",ridge_model.alpha_)
print("Ridge R²:",r2_score(y_test,ridge_model.predict(X_test)))
print("Lasso Best Alpha:",lasso_model.alpha_)
print("Lasso R²:",r2_score(y_test,lasso_model.predict(X_test)))


Ridge Best Alpha: 10.0
Ridge R²: 0.5930419134859937
Lasso Best Alpha: 0.001
Lasso R²: 0.5930027880694111


Q.4

In [5]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import accuracy_score,classification_report

#(a) Load Dataset
data=load_iris()
features=data.data
labels=data.target

#(b) Split Dataset
X_train,X_test,y_train,y_test=train_test_split(features,labels,test_size=0.3,random_state=0)

#(c) Scale Features
scaler=StandardScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)

#(d) Train One-vs-Rest Logistic Regression
ovr_model=OneVsRestClassifier(LogisticRegression(max_iter=2000))
ovr_model.fit(X_train,y_train)

#(e) Predictions and Evaluation
y_pred=ovr_model.predict(X_test)
print("Accuracy:",accuracy_score(y_test,y_pred))
print("\nClassification Report:\n",classification_report(y_test,y_pred))


Accuracy: 0.9111111111111111

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        16
           1       0.94      0.83      0.88        18
           2       0.77      0.91      0.83        11

    accuracy                           0.91        45
   macro avg       0.90      0.91      0.91        45
weighted avg       0.92      0.91      0.91        45

