Title: Regularization Techniques

Task 1: Ridge Regression on House Prices<br>
Apply Ridge regularization to the Linear Regression model.

In [1]:
from sklearn.linear_model import Ridge
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import numpy as np
import pandas as pd

# Sample house price dataset
np.random.seed(42)
size = 1000
sqft = np.random.randint(500, 4000, size=size)
beds = np.random.randint(1, 6, size=size)
price = sqft * 200 + beds * 10000 + np.random.normal(0, 30000, size=size)
df = pd.DataFrame({'Sqft': sqft, 'Beds': beds, 'Price': price})

X = df[['Sqft', 'Beds']]
y = df['Price']

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize Ridge regression model
ridge_model = Ridge(alpha=1.0)

# Train the model
ridge_model.fit(X_train, y_train)

# Make predictions
y_pred = ridge_model.predict(X_test)

# Evaluate model performance
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"RMSE for Ridge Regression: {rmse:.2f}")


RMSE for Ridge Regression: 30526.24


Task 2: Lasso Regression for Feature Selection<br>
Use Lasso to select important features in the housing data.

In [2]:
from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd

# Sample housing dataset
np.random.seed(42)
size = 1000
sqft = np.random.randint(500, 4000, size=size)
beds = np.random.randint(1, 6, size=size)
age = np.random.randint(0, 50, size=size)
price = sqft * 200 + beds * 10000 + age * 1500 + np.random.normal(0, 30000, size=size)
df = pd.DataFrame({'Sqft': sqft, 'Beds': beds, 'Age': age, 'Price': price})

X = df[['Sqft', 'Beds', 'Age']]
y = df['Price']

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize Lasso regression model
lasso_model = Lasso(alpha=0.1)

# Train the model
lasso_model.fit(X_train, y_train)

# Get the coefficients of the features
coefficients = lasso_model.coef_

# Identify important features (non-zero coefficients)
important_features = X.columns[coefficients != 0]

print(f"Important features selected by Lasso: {important_features}")


Important features selected by Lasso: Index(['Sqft', 'Beds', 'Age'], dtype='object')


Task 3: ElasticNet for Customer Churn<br>
Implement ElasticNet to balance between Ridge and Lasso.

In [3]:
from sklearn.linear_model import ElasticNet
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import numpy as np
import pandas as pd

# Sample customer churn dataset (assuming 'TotalCharges' and 'Tenure' are features)
np.random.seed(42)
size = 1000
tenure = np.random.randint(1, 72, size=size)
charges = np.random.uniform(20, 120, size=size)
total = tenure * charges + np.random.normal(0, 50, size=size)
churn = np.random.choice([0, 1], size=size, p=[0.7, 0.3])

df = pd.DataFrame({'Tenure': tenure, 'Charges': charges, 'Total': total, 'Churn': churn})

X = df[['Tenure', 'Charges', 'Total']]
y = df['Churn']

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize ElasticNet regression model
elasticnet_model = ElasticNet(alpha=0.1, l1_ratio=0.5)  # l1_ratio=0.5 means balancing between Ridge and Lasso

# Train the model
elasticnet_model.fit(X_train, y_train)

# Make predictions
y_pred = elasticnet_model.predict(X_test)

# Evaluate model performance (since churn is binary, we use mean squared error)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)

print(f"RMSE for ElasticNet: {rmse:.2f}")


RMSE for ElasticNet: 0.45
