[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/googlecolab/colab-samples/blob/main/notebooks/basic_notebook_features/text_cells.ipynb)




In [None]:
# Install PyCaret from GitHub master branch
!pip install git+https://github.com/pycaret/pycaret.git@master --upgrade -q

In [None]:
# Load Boston Housing dataset for regression
from sklearn.datasets import fetch_openml
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

# Load Boston Housing dataset from OpenML
boston = fetch_openml(name='boston', version=1, as_frame=True, parser='auto')
df = boston.frame

# Rename target column for clarity
df.rename(columns={'MEDV': 'house_price'}, inplace=True)

print(f"‚úÖ Boston Housing Dataset loaded: {df.shape}")
print(f"\nTarget Statistics (House Prices in $1000s):")
print(df['house_price'].describe())
print(f"\nFeatures: {list(df.columns[:-1])}")
df.head()

In [None]:
from pycaret.regression import *

# Initialize regression environment
reg_exp = setup(
    data=df,
    target='house_price',
    session_id=456,
    normalize=True,
    transformation=True,
    remove_outliers=True,
    polynomial_features=True,
    fold=10,
    verbose=False
)

In [None]:
# Check if GPU is connected
!nvidia-smi --query-gpu=name --format=csv,noheader

In [None]:
# Compare all regression models
print("üîç Comparing regression models...")
top5 = compare_models(n_select=5, sort='R2')
print("\n‚úÖ Top 5 models selected based on R¬≤ score")

In [None]:
# Get the best model
best_model = top5[0]

print("üìä Evaluating the best regression model...")
print(f"Best Model: {best_model.__class__.__name__}")

# Generate multiple evaluation plots
plot_model(best_model, plot='residuals')
plot_model(best_model, plot='error')
plot_model(best_model, plot='feature')
plot_model(best_model, plot='learning')

In [None]:
# Interactive dashboard for model evaluation
evaluate_model(best_model)

In [None]:
# Tune hyperparameters for optimal performance
print("‚ö° Optimizing hyperparameters...")
tuned_model = tune_model(best_model, n_iter=20, optimize='MAE')

print("\nüìà Performance Metrics:")
print("Tuned Model Results:")

In [None]:
# Train on full dataset
final_model = finalize_model(tuned_model)

# Generate predictions on test set
predictions = predict_model(final_model)

# Calculate prediction metrics
from sklearn.metrics import mean_absolute_error, r2_score

mae = mean_absolute_error(predictions['house_price'], predictions['prediction_label'])
r2 = r2_score(predictions['house_price'], predictions['prediction_label'])

print(f"\n‚úÖ Test Set Performance:")
print(f"   Mean Absolute Error: ${mae:.2f}k")
print(f"   R¬≤ Score: {r2:.4f}")
print(f"\nüìä Sample Predictions (Actual vs Predicted):")

predictions[['house_price', 'prediction_label']].head(10)

In [None]:
# Plot actual vs predicted prices
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 6))
plt.scatter(predictions['house_price'], predictions['prediction_label'], alpha=0.6)
plt.plot([predictions['house_price'].min(), predictions['house_price'].max()],
         [predictions['house_price'].min(), predictions['house_price'].max()],
         'r--', lw=2)
plt.xlabel('Actual House Price ($1000s)', fontsize=12)
plt.ylabel('Predicted House Price ($1000s)', fontsize=12)
plt.title('Actual vs Predicted House Prices', fontsize=14, fontweight='bold')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

print("‚úÖ Perfect predictions would fall on the red diagonal line")

In [None]:
# Save the final model
model_name = 'boston_housing_regression_model'
save_model(final_model, model_name)

print(f"‚úÖ Model successfully saved as '{model_name}.pkl'")
print(f"üì¶ Model can be loaded using: loaded_model = load_model('{model_name}')")
print(f"\nüéØ Model Summary:")
print(f"   ‚Ä¢ Dataset: Boston Housing (506 samples)")
print(f"   ‚Ä¢ Target: Median house prices")
print(f"   ‚Ä¢ Best Algorithm: {best_model.__class__.__name__}")
print(f"   ‚Ä¢ Test MAE: ${mae:.2f}k")
print(f"   ‚Ä¢ Test R¬≤: {r2:.4f}")