In [13]:
import numpy as np
import pandas as pd
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.linear_model import RidgeCV, LassoCV
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

-------- Q1------------

In [3]:
np.random.seed(0)
X = np.random.rand(100,7)
y = 4*X[:,0] + 3*X[:,1] + 2*X[:,2] + np.random.randn(100)

X = (X - X.mean(axis=0))/X.std(axis=0)
X = np.c_[np.ones((X.shape[0],1)),X]

def ridge_grad_desc(X,y,lr,lam,epochs):
    m,n = X.shape
    theta = np.zeros(n)
    prev_cost = float('inf')

    for _ in range(epochs):
        preds = X.dot(theta)

        grad = (1/m)*(X.T.dot(preds - y))
        grad[1:] += (lam/m)*theta[1:]

        grad = np.clip(grad, -1e3, 1e3)

        theta -= lr*grad

        cost = (1/(2*m))*np.sum((preds - y)**2) + (lam/(2*m))*np.sum(theta[1:]**2)

        if abs(prev_cost - cost) < 1e-8:
            break
        prev_cost = cost

        if np.any(np.isnan(theta)) or np.any(np.isinf(theta)):
            return None,None

    return theta,cost

lrs=[0.0001,0.001,0.01,0.05,0.1,1]
lams=[1e-15,1e-10,1e-5,1e-3,0,1,10,20]
best=(None,float('inf'),-1,0,0)

for lr in lrs:
    for lam in lams:
        t,c=ridge_grad_desc(X,y,lr,lam,5000)
        if t is None:
            continue
        r2=r2_score(y,X.dot(t))
        if c<best[1] and r2>best[2]:
            best=(t,c,r2,lr,lam)

print("Best parameters -> learning_rate:",best[3],"lambda:",best[4])
print("Minimum Cost:",best[1])
print("R2 Score:",best[2])

Best parameters -> learning_rate: 1 lambda: 1e-15
Minimum Cost: 0.3962359335443123
R2 Score: 0.7710954736783029


------------Q2--------------

In [4]:
url = "https://raw.githubusercontent.com/selva86/datasets/master/Hitters.csv"
data = pd.read_csv(url)

(a) Preprocess

In [5]:
data = data.dropna()
for col in data.select_dtypes(include='object').columns:
    data[col] = LabelEncoder().fit_transform(data[col])

(b) Separate and scale

In [6]:
X = data.drop('Salary', axis=1)
y = data['Salary']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

(c) Fit models

In [8]:
lr = LinearRegression().fit(X_train, y_train)
ridge = Ridge(alpha=0.5748).fit(X_train, y_train)
lasso = Lasso(alpha=0.5748, max_iter=10000).fit(X_train, y_train)

 (d) Evaluate performance

In [9]:
models = {'Linear': lr, 'Ridge': ridge, 'Lasso': lasso}
results = {}
for name, model in models.items():
    preds = model.predict(X_test)
    results[name] = {'R2': r2_score(y_test, preds), 'MSE': mean_squared_error(y_test, preds)}
df = pd.DataFrame(results).T
print(df)
best = df['R2'].idxmax()
print("\nBest performing model:", best)




              R2            MSE
Linear  0.290745  128284.345497
Ridge   0.300036  126603.902644
Lasso   0.299626  126678.114633

Best performing model: Ridge


---------------Q3------------------

In [12]:



url = "http://lib.stat.cmu.edu/datasets/boston"
raw_df = pd.read_csv(url, sep=r"\s+", skiprows=22, header=None)
X = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
y = raw_df.values[1::2, 2]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
ridgecv = RidgeCV(alphas=[0.1, 1, 10]).fit(X_train, y_train)
lassocv = LassoCV(alphas=[0.1, 1, 10]).fit(X_train, y_train)
print("Best alpha (Ridge):", ridgecv.alpha_)
print("R2 (Ridge):", r2_score(y_test, ridgecv.predict(X_test)))
print("Best alpha (Lasso):", lassocv.alpha_)
print("R2 (Lasso):", r2_score(y_test, lassocv.predict(X_test)))


Best alpha (Ridge): 0.1
R2 (Ridge): 0.6686244180115211
Best alpha (Lasso): 0.1
R2 (Lasso): 0.6569712802223937


---------------Q4-----------------

In [14]:
iris=load_iris()
X,y=iris.data,iris.target
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)
X_train=StandardScaler().fit_transform(X_train)
X_test=StandardScaler().fit_transform(X_test)

def sigmoid(z): return 1/(1+np.exp(-z))
def train_one_vs_rest(X,y,cls,lr,epochs):
    y_bin=(y==cls).astype(int)
    w=np.zeros(X.shape[1])
    for _ in range(epochs):
        z=X.dot(w)
        grad=X.T.dot(sigmoid(z)-y_bin)/len(y)
        w-=lr*grad
    return w

weights=[train_one_vs_rest(X_train,y_train,i,0.1,1000) for i in np.unique(y_train)]
preds=[np.argmax([sigmoid(X_test.dot(w))[i] for w in weights]) for i in range(len(X_test))]
print(accuracy_score(y_test,preds))


0.9
