# 4.1 Introduction to Gradient Boosting - Code Brief

Condensed reference for gradient boosting concepts and visualizations.

## Setup

In [None]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots

## AdaBoost Sample Reweighting

In [None]:
# Demonstrate AdaBoost reweighting
np.random.seed(42)
n_samples = 10
students = [f"S{i+1}" for i in range(n_samples)]

# Initial uniform weights
initial_weights = np.ones(n_samples) / n_samples

# Misclassified samples get higher weights
misclassified_r1 = [2, 6, 7]
weights_r1 = initial_weights.copy()
weights_r1[misclassified_r1] *= 2.5
weights_r1 = weights_r1 / weights_r1.sum()

print(f"Initial weights: {initial_weights}")
print(f"After round 1: {weights_r1}")

## Gradient Boosting Residuals

In [None]:
# Gradient boosting fits trees to residuals
np.random.seed(42)

X = np.linspace(0, 10, 50)
y_true = np.sin(X) + 0.5 * X
y = y_true + np.random.normal(0, 0.3, len(X))

# Iteration 0: Start with mean
pred_0 = np.full_like(y, y.mean())
residuals_0 = y - pred_0

# Iteration 1: Fit to residuals
tree_1 = np.where(X < 5, residuals_0[X < 5].mean(), residuals_0[X >= 5].mean())
pred_1 = pred_0 + 0.5 * tree_1  # learning_rate = 0.5
residuals_1 = y - pred_1

print(f"MSE after iteration 0: {np.mean(residuals_0**2):.3f}")
print(f"MSE after iteration 1: {np.mean(residuals_1**2):.3f}")

## Library Comparison

In [None]:
# Comparison of XGBoost, LightGBM, CatBoost
comparison_data = {
    'Feature': ['Release Year', 'Developer', 'Tree Growth', 'Categorical Handling', 'Speed'],
    'XGBoost': ['2014', 'DMLC', 'Level-wise', 'Requires encoding', 'Fast'],
    'LightGBM': ['2017', 'Microsoft', 'Leaf-wise', 'Native (integer)', 'Very Fast'],
    'CatBoost': ['2017', 'Yandex', 'Symmetric', 'Native (strings OK)', 'Fast']
}

pd.DataFrame(comparison_data)

## Key Concepts

| Concept | Description |
|:--------|:------------|
| **Bagging** | Parallel training, reduces variance |
| **Boosting** | Sequential training, reduces bias |
| **AdaBoost** | Reweights samples |
| **Gradient Boosting** | Fits residuals |
| **XGBoost** | General-purpose, regularized |
| **LightGBM** | Fast, memory-efficient |
| **CatBoost** | Best for categorical features |