In [None]:
# Question 1: Optimal alpha value and changes with doubled alpha
# Assuming you have already found the optimal alpha using GridSearchCV in the script:

optimal_alpha_ridge = ridge_gs.best_params_['ridge__alpha']
optimal_alpha_lasso = lasso_gs.best_params_['lasso__alpha']

# Double the alpha values
double_alpha_ridge = 2 * optimal_alpha_ridge
double_alpha_lasso = 2 * optimal_alpha_lasso

# Retrain models with the new alphas
ridge_double_alpha = Ridge(alpha=double_alpha_ridge).fit(X_train, y_train)
lasso_double_alpha = Lasso(alpha=double_alpha_lasso).fit(X_train, y_train)

# Evaluate the changes in coefficients
ridge_coef_change = ridge_double_alpha.coef_ - ridge_best.named_steps['ridge'].coef_
lasso_coef_change = lasso_double_alpha.coef_ - lasso_best.named_steps['lasso'].coef_

# Identify the most important predictor variables after the change
# For Ridge
ridge_most_important = pd.Series(ridge_double_alpha.coef_, index=X_train.columns).abs().nlargest(5)
# For Lasso
lasso_most_important = pd.Series(lasso_double_alpha.coef_, index=X_train.columns).abs().nlargest(5)



In [None]:
# Print optimal alphas for Ridge and Lasso
print("Optimal Alpha for Ridge:", optimal_alpha_ridge)
print("Optimal Alpha for Lasso:", optimal_alpha_lasso)

# Print changes in coefficients for doubled alpha
print("\nChanges in Ridge Coefficients with doubled alpha:", ridge_coef_change)
print("Changes in Lasso Coefficients with doubled alpha:", lasso_coef_change)

# Print the most important predictor variables after doubling alpha
print("\nMost Important Predictors in Ridge after doubling alpha:", ridge_most_important)
print("Most Important Predictors in Lasso after doubling alpha:", lasso_most_important)


In [None]:
# Question 2: Choosing between Ridge and Lasso
# This would be a conceptual decision based on the model performance and the nature of the data. 
# You'd typically look at the cross-validated scores, the interpretability of the model, and the number of features you want to end up with.

# Question 3: Important predictor variables after excluding top 5 from Lasso
# Exclude the top 5 predictors from the data
top_5_predictors_lasso = list(lasso_most_important.index)
X_train_reduced = X_train.drop(columns=top_5_predictors_lasso)
X_test_reduced = X_test.drop(columns=top_5_predictors_lasso)

# Retrain the Lasso model on the reduced dataset
lasso_reduced = Lasso(alpha=optimal_alpha_lasso).fit(X_train_reduced, y_train)

# Find the new top 5 predictors
new_top_5_lasso = pd.Series(lasso_reduced.coef_, index=X_train_reduced.columns).abs().nlargest(5)

# Question 4: Ensuring robustness and generalizability
# This question is more about the approach and practices rather than specific code. 
# Ensure cross-validation, proper data preprocessing, feature selection, outlier handling, and possibly use ensemble methods to improve robustness and generalizability.

# Note: The actual implementation and results would depend on running this code with your specific data and Python environment.
# Make sure to interpret the coefficients and model changes accordingly, considering the context and domain knowledge of the data you're working with.


In [None]:
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

# Calculate and print RMSE for Ridge and Lasso
ridge_test_pred = ridge_double_alpha.predict(X_test)
lasso_test_pred = lasso_double_alpha.predict(X_test)

print("Ridge Regression RMSE:", np.sqrt(mean_squared_error(y_test, ridge_test_pred)))
print("Lasso Regression RMSE:", np.sqrt(mean_squared_error(y_test, lasso_test_pred)))

# Calculate and print R² for Ridge and Lasso
print("Ridge Regression R²:", r2_score(y_test, ridge_test_pred))
print("Lasso Regression R²:", r2_score(y_test, lasso_test_pred))

# Calculate and print MAE for Ridge and Lasso
print("Ridge Regression MAE:", mean_absolute_error(y_test, ridge_test_pred))
print("Lasso Regression MAE:", mean_absolute_error(y_test, lasso_test_pred))


In [None]:
# Boxplot for cross-validation scores
plt.figure(figsize=(7, 5))
sns.boxplot(data=[ridge_test_pred, lasso_test_pred])
plt.xticks([0, 1], ['Ridge', 'Lasso'])
plt.title('Cross-Validation Scores for Ridge and Lasso Regression')
plt.ylabel('Scores')
plt.show()

In [None]:
# Print the new top 5 predictors after excluding the initial top 5 from Lasso
print("New Top 5 Important Predictor Variables after excluding the initial top 5 from Lasso:", new_top_5_lasso)
