In [1]:
# prompt: Regularized Linear Models
# Objective: Implement Ridge and Lasso regression.
# Tasks: Compare the performance of Ridge and Lasso regression on a dataset and
# analyze the effect of regularization.

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge, Lasso
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler

# Load the dataset (replace 'your_dataset.csv' with the actual file name)
# Assuming the dataset is in CSV format and the target variable is the last column
try:
  data = pd.read_csv('/content/Salary_dataset.csv')
except FileNotFoundError:
  print("Error: 'your_dataset.csv' not found. Please upload your dataset or provide the correct file name.")
  exit()


# Separate features (X) and target variable (y)
X = data.iloc[:, :-1]
y = data.iloc[:, -1]

# Handle missing values (replace with your preferred method)
X.fillna(X.mean(), inplace=True) # Example: filling missing values with mean

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# Feature scaling (important for regularization)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Ridge Regression
ridge_model = Ridge(alpha=1.0) # alpha is the regularization strength
ridge_model.fit(X_train, y_train)
ridge_predictions = ridge_model.predict(X_test)
ridge_mse = mean_squared_error(y_test, ridge_predictions)
print(f"Ridge Regression MSE: {ridge_mse}")


# Lasso Regression
lasso_model = Lasso(alpha=1.0)
lasso_model.fit(X_train, y_train)
lasso_predictions = lasso_model.predict(X_test)
lasso_mse = mean_squared_error(y_test, lasso_predictions)
print(f"Lasso Regression MSE: {lasso_mse}")

# Analyze coefficients to see the effect of regularization
print("\nRidge Coefficients:")
print(ridge_model.coef_)

print("\nLasso Coefficients:")
lasso_model.coef_

# You can experiment with different alpha values to observe the change in performance and coefficients.
# Smaller alphas mean less regularization.

Ridge Regression MSE: 48162093.28449041
Lasso Regression MSE: 55421152.30983243

Ridge Coefficients:
[ 8231.35461842 18265.5781623 ]

Lasso Coefficients:


array([-7573.4372463, 34626.0228057])