RIDGE AND LASSO REGRESSION

1. Ridge with strong multicollinearity (2 features)
Task: Fit RidgeCV with alphas = logspace(-3, 3, 50). Report best alpha, coefficients, intercept, R² (test), RMSE (test).

In [4]:
import numpy as np 
import pandas as pd 
from sklearn.linear_model import RidgeCV
from sklearn.metrics import mean_squared_error,r2_score
from sklearn.model_selection import train_test_split

df=pd.read_csv('ridge_correlated_150.csv')

X=df.iloc[:,:-1]
y=df.iloc[:,-1]

#test_size --> it takes 20% of the data for testing and remaining 80% for training
#random_state --> it will select the same data for testing and training each time
X_train, X_test, y_train, y_test=train_test_split(X,y,test_size=0.2,random_state=42)

alphas=np.logspace(-3,3,50)
ridge=RidgeCV(alphas=alphas)  #it will test all alpha values and pick one which gives best performance (lower error on cross-validation(CV))
ridge.fit(X_train,y_train)

y_pred=ridge.predict(X_test)

best_alpha=ridge.alpha_
coefficients=ridge.coef_
intercept=ridge.intercept_
r2=r2_score(y_test,y_pred)
rmse=np.sqrt(mean_squared_error(y_test,y_pred))

print(f'Best alpha: {best_alpha}')
print(f'Coefficients: {coefficients}')
print(f'Intercept: {intercept}')
print(f'R² (test): {r2}')
print(f'RMSE (test): {rmse}')

Best alpha: 0.0071968567300115215
Coefficients: [ 4.41273599 -3.41175106]
Intercept: 0.012383721735237843
R² (test): 0.6538727524919414
RMSE (test): 0.5705925764636008


2. Ridge with 10 moderately collinear features
Task: Fit RidgeCV with alphas = logspace(-3, 3, 50). Report alpha, coefficients, intercept, R² (test), RMSE (test).

In [5]:
import numpy as np
import pandas as pd
from sklearn.linear_model import RidgeCV
from sklearn.metrics import mean_squared_error,r2_score
from sklearn.model_selection import train_test_split

df=pd.read_csv('ridge_10feat_150.csv')

X=df.iloc[:,:-1]
y=df.iloc[:,-1]

X_train, X_test, y_train, y_test=train_test_split(X,y,test_size=0.2,random_state=42)

alphas=np.logspace(-3,3,50)
ridge=RidgeCV(alphas=alphas)
ridge.fit(X_train,y_train)

y_pred=ridge.predict(X_test)

best_alpha=ridge.alpha_
coefficients=ridge.coef_
intercept=ridge.intercept_
r2=r2_score(y_test,y_pred)
rmse=np.sqrt(mean_squared_error(y_test,y_pred))

print(f'Best alpha: {best_alpha}')
print(f'Coefficients: {coefficients}')
print(f'Intercept: {intercept}')
print(f'R² (test): {r2}')
print(f'RMSE (test): {rmse}')

Best alpha: 0.655128556859551
Coefficients: [ 2.30095156 -1.28258227  0.28615561  1.55443374 -0.18857766 -0.72907171
 -0.19564402  0.05193988  0.01304106  0.05181519]
Intercept: -0.07437205435600158
R² (test): 0.9881868768074262
RMSE (test): 0.6057776327741521


3. Lasso on high-dimensional sparse data (30 features)
Task: Fit LassoCV (5-fold). Report best alpha, list selected features (non-zero), R² (test), RMSE (test).

In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LassoCV
from sklearn.metrics import mean_squared_error,r2_score
from sklearn.model_selection import train_test_split

df=pd.read_csv('lasso_sparse_150.csv')

X=df.iloc[:,:-1]
Y=df.iloc[:,-1]

X_train, X_test, Y_train, Y_test=train_test_split(X,Y,test_size=0.2,random_state=42)

lasso=LassoCV(cv=5,alphas=None,random_state=42)
lasso.fit(X_train,Y_train)

Y_pred=lasso.predict(X_test)

best_alpha=lasso.alpha_
coefficients=lasso.coef_
selected_features=[name for name, coef in zip(X.columns,coefficients) if coef!=0]   #it selects the features whose coefficient is not zero
r2=r2_score(Y_test,Y_pred)
rmse=np.sqrt(mean_squared_error(Y_test,Y_pred))

print(f'Best alpha: {best_alpha}')
print(f'Selected features: {selected_features}')
print(f'R² (test): {r2}')
print(f'RMSE (test): {rmse}')

Best alpha: 0.06662744526920758
Selected features: ['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'x8', 'x9', 'x11', 'x12', 'x13', 'x16', 'x18', 'x21', 'x23', 'x24', 'x26', 'x28', 'x29']
R² (test): 0.9620885670252837
RMSE (test): 1.1860751609124554




4. Lasso with grouped/overlapping signals
Task: Fit LassoCV (5-fold). Report alpha, coefficients, intercept, R² (test), RMSE (test).
 (Goal: see how Lasso picks one from correlated pairs xA/xB and xC/xD.)

In [6]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LassoCV
from sklearn.metrics import mean_squared_error,r2_score
from sklearn.model_selection import train_test_split

df=pd.read_csv('lasso_groups_150.csv')

X=df.iloc[:,:-1]
y=df.iloc[:,-1]

X_train, X_test, y_train, y_test=train_test_split(X,y,test_size=0.2,random_state=42)

lasso=LassoCV(cv=5,alphas=None,random_state=42)
lasso.fit(X_train,y_train)

y_pred=lasso.predict(X_test)

best_alpha=lasso.alpha_
coefficients=lasso.coef_
intercept=lasso.intercept_
r2=r2_score(y_test,y_pred)
rmse=np.sqrt(mean_squared_error(y_test,y_pred))

print(f'Best alpha: {best_alpha}')
print(f'Coefficients: {coefficients}')
print(f'Intercept: {intercept}')
print(f'R² (test): {r2}')
print(f'RMSE (test): {rmse}')

#lasso selects only one of the correlated features and set the others to zero
#kept --> non zero coefficients, dropped --> zero coefficients
coef_dict=dict(zip(X.columns,coefficients))
pairs=[('xA','xB'),('xC','xD')]
for pair in pairs:
  kept=[f for f in pair if coef_dict[f]!=0]
  dropped=[f for f in pair if coef_dict[f]==0]
  print(f'Pair {pair}: kept -> {kept},dropped -> {dropped}')

Best alpha: 0.003608591334120754
Coefficients: [ 2.04028628 -0.          1.60923693  0.         -0.16289269]
Intercept: -0.06746553791233204
R² (test): 0.9139898313782668
RMSE (test): 0.9098913642117109
Pair ('xA', 'xB'): kept -> ['xA'],dropped -> ['xB']
Pair ('xC', 'xD'): kept -> ['xC'],dropped -> ['xD']




5. Quadratic model vs linear baseline
Task:
Fit linear model y ~ x, record test R² and RMSE.
Fit polynomial (degree=2) model y ~ x + x², record coefficients & test metrics.
Predict y at x = 1.5 using the polynomial model.

In [8]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_squared_error,r2_score
from sklearn.model_selection import train_test_split

df=pd.read_csv('poly_quadratic_150.csv')

X=df.iloc[:,:-1]
y=df.iloc[:,-1]

X_train, X_test, y_train, y_test=train_test_split(X,y,test_size=0.2,random_state=42)

#Linear
model=LinearRegression()
model.fit(X_train,y_train)

y_pred=model.predict(X_test)

r2=r2_score(y_test,y_pred)
rmse=np.sqrt(mean_squared_error(y_test,y_pred))

print('Linear Model Results')
print(f'Intercept: {model.intercept_}')
print(f'Coefficients: {model.coef_[0]}')
print(f'R² (test): {r2}')
print(f'RMSE (test): {rmse}')

#Quadratic
poly=PolynomialFeatures(degree=2,include_bias=False)  #degree=2 --> x^2, include_bias --> avoids adding a new column of 1s(Intercept)
X_train_poly=poly.fit_transform(X_train)   #Converts training and testing x into [x, x²]
X_test_poly=poly.transform(X_test)  

poly_model=LinearRegression()   #Fits a linear regression, but now on two features: x and x² (y=intercept+a⋅x+b⋅x2)
poly_model.fit(X_train_poly,y_train)  

y_pred_poly=poly_model.predict(X_test_poly)
r2_poly=r2_score(y_test,y_pred_poly)
rmse_poly=np.sqrt(mean_squared_error(y_test,y_pred_poly))

print('\nQuadratic Model Results')
print(f'Intercept: {poly_model.intercept_}')
print(f'Coefficients: {poly_model.coef_}')
print(f'R² (test): {r2_poly}')
print(f'RMSE (test): {rmse_poly}')

x_new=np.array([[1.5]])
x_new_poly=poly.transform(x_new)
y_pred_new=poly_model.predict(x_new_poly)
print(f'\nPrediction at x=1.5: {y_pred_new[0]}')

Linear Model Results
Intercept: -0.06676345438486156
Coefficients: 2.7703542957458045
R² (test): 0.890481731073773
RMSE (test): 1.7497017806063881

Quadratic Model Results
Intercept: 1.9542801042051452
Coefficients: [ 3.04036622 -0.70463416]
R² (test): 0.975208521028767
RMSE (test): 0.8324764690249341

Prediction at x=1.5: 4.929402576921072


