In [None]:
# Train a Gradient Boosting Regressor to predict continuous values by minimizing the error iteratively.

from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor

# Generate synthetic regression data
X, y = make_regression(n_samples=1000, n_features=5, noise=0.1, random_state=42)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train Gradient Boosting Regressor
gbr = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
gbr.fit(X_train, y_train)

# Evaluate model performance
print("Training R^2 Score:", gbr.score(X_train, y_train))
print("Testing R^2 Score:", gbr.score(X_test, y_test))

In [None]:
# Use Grid Search to optimize the hyperparameters of the Gradient Boosting Regressor.

from sklearn.model_selection import GridSearchCV

# Define hyperparameter grid
param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [2, 3, 5]
}

# Perform Grid Search
grid_search = GridSearchCV(estimator=gbr, param_grid=param_grid, cv=3, scoring='r2', verbose=1)
grid_search.fit(X_train, y_train)

# Best parameters and performance
print("Best Parameters:", grid_search.best_params_)
print("Best R^2 Score:", grid_search.best_score_)

In [None]:
# Visualize the importance of features in predicting the target variable using Gradient Boosting.

import matplotlib.pyplot as plt
import numpy as np

# Get feature importance scores
feature_importances = gbr.feature_importances_
features = [f"Feature {i+1}" for i in range(len(feature_importances))]

# Plot feature importance
plt.figure(figsize=(10, 6))
plt.bar(features, feature_importances, color='skyblue')
plt.title("Feature Importance in Gradient Boosting Regressor")
plt.xlabel("Features")
plt.ylabel("Importance Score")
plt.show()

In [None]:
# Use XGBoost for regression and compare its performance with Gradient Boosting Regressor.

from xgboost import XGBRegressor

# Initialize and train XGBoost Regressor
xgboost = XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
xgboost.fit(X_train, y_train)

# Evaluate XGBoost performance
print("XGBoost Training R^2 Score:", xgboost.score(X_train, y_train))
print("XGBoost Testing R^2 Score:", xgboost.score(X_test, y_test))

In [None]:
# Visualize residuals to assess the model’s prediction errors and identify patterns.

import matplotlib.pyplot as plt

# Predict on test set
y_pred = gbr.predict(X_test)

# Calculate residuals
residuals = y_test - y_pred

# Plot residuals
plt.figure(figsize=(10, 6))
plt.scatter(y_test, residuals, color='blue', alpha=0.6)
plt.axhline(0, color='red', linestyle='--', linewidth=1)
plt.title("Residuals of Gradient Boosting Regressor")
plt.xlabel("True Values")
plt.ylabel("Residuals")
plt.show()