<a href="https://colab.research.google.com/github/kenpreet/ML-assignment/blob/main/ML_Assignment_5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Q.1
import numpy as np
import pandas as pd
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler

np.random.seed(42)
n_samples = 200
X_base = np.random.rand(n_samples, 1)
X = np.hstack([X_base + np.random.normal(0, 0.01, (n_samples, 1)) for _ in range(7)])
true_weights = np.array([5, 4.8, 5.2, 5, 4.9, 5.1, 5])
y = X.dot(true_weights) + np.random.normal(0, 0.5, n_samples)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

def ridge_regression_gradient_descent(X, y, alpha, lamda, epochs=1000):
    m, n = X.shape
    w = np.zeros(n)
    prev_cost = float('inf')

    for i in range(epochs):
        y_pred = X.dot(w)
        error = y_pred - y
        cost = (1/(2*m)) * (np.sum(error**2) + lamda * np.sum(w**2))

        grad = (1/m) * (X.T.dot(error) + lamda * w)
        w -= alpha * grad

        if np.isnan(cost) or np.isinf(cost) or cost > prev_cost * 1e5:
            print("stop")
            break

        prev_cost = cost

    return w, cost

alphas = [0.0001, 0.001, 0.01, 0.1]
lambdas = [1e-15, 1e-10, 1e-5, 1e-3, 0, 1, 10, 20]

results = []

for alpha in alphas:
    for lamda in lambdas:
        w, cost = ridge_regression_gradient_descent(X_scaled, y, alpha, lamda)
        y_pred = X_scaled.dot(w)
        if np.any(np.isnan(y_pred)) or np.any(np.isinf(y_pred)):
            continue  # skip invalid runs
        r2 = r2_score(y, y_pred)
        results.append((alpha, lamda, cost, r2))

results_df = pd.DataFrame(results, columns=['Learning Rate (α)', 'Lambda (λ)', 'Cost', 'R² Score'])
best_params = results_df.loc[results_df['R² Score'].idxmax()]

print("best Parameters:")
print(best_params)


best Parameters:
Learning Rate (α)    1.000000e-01
Lambda (λ)           1.000000e-15
Cost                 1.445737e+02
R² Score            -1.723585e+00
Name: 24, dtype: float64


In [None]:
# Q.2
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import r2_score, mean_squared_error

df = pd.read_csv("/content/drive/MyDrive/ML/Hitters.csv")

df = df.dropna(subset=['Salary'])
df = pd.get_dummies(df, drop_first=True)

X = df.drop('Salary', axis=1)
y = df['Salary']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

ridge = Ridge(alpha=0.5748)
lasso = Lasso(alpha=0.5748)
linear = LinearRegression()

models = {'Linear': linear, 'Ridge': ridge, 'Lasso': lasso}
results = {}

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    results[name] = {'R2': r2_score(y_test, y_pred), 'MSE': mean_squared_error(y_test, y_pred)}

results_df = pd.DataFrame(results).T
print(results_df)


              R2            MSE
Linear  0.290745  128284.345497
Ridge   0.300036  126603.902644
Lasso   0.299286  126739.568991


  model = cd_fast.enet_coordinate_descent(


In [None]:
# Q.3
import pandas as pd
from sklearn.linear_model import RidgeCV, LassoCV
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score

df = pd.read_csv("/content/drive/MyDrive/ML/housing.csv").dropna()

df = pd.get_dummies(df, drop_first=True)

X = df.iloc[:, :-1]
y = df.iloc[:, -1]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

ridgecv = RidgeCV(alphas=[0.1, 1, 10, 100], cv=5)
lassocv = LassoCV(alphas=[0.1, 1, 10, 100], cv=5, max_iter=10000)

ridgecv.fit(X_train, y_train)
lassocv.fit(X_train, y_train)

ridge_pred = ridgecv.predict(X_test)
lasso_pred = lassocv.predict(X_test)

print("Ridge Best Alpha:", ridgecv.alpha_)
print("Ridge R2:", r2_score(y_test, ridge_pred))
print("Lasso Best Alpha:", lassocv.alpha_)
print("Lasso R2:", r2_score(y_test, lasso_pred))



  model = cd_fast.enet_coordinate_descent(


Ridge Best Alpha: 0.1
Ridge R2: 0.0
Lasso Best Alpha: 100.0
Lasso R2: 0.0


In [None]:
# Q.4
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix

iris = load_iris()
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model_ovr = LogisticRegression(multi_class='ovr', max_iter=1000)
model_ovr.fit(X_train, y_train)
y_pred_ovr = model_ovr.predict(X_test)

model_ovo = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000)
model_ovo.fit(X_train, y_train)
y_pred_ovo = model_ovo.predict(X_test)

print("OvR Accuracy:", accuracy_score(y_test, y_pred_ovr))
print("OvO Accuracy:", accuracy_score(y_test, y_pred_ovo))


OvR Accuracy: 0.9666666666666667
OvO Accuracy: 1.0


