# 3.1 Code Brief: Introduction to Ensemble Learning and Random Forests

Quick reference for ensemble learning and random forest concepts.

## Setup

In [None]:
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots

## Wisdom of Crowds Visualization

In [None]:
np.random.seed(42)
true_value = 500
n_guessers = 100
individual_guesses = np.random.normal(true_value, 100, n_guessers)
ensemble_averages = np.cumsum(individual_guesses) / np.arange(1, n_guessers + 1)

fig = go.Figure()
fig.add_trace(go.Scatter(x=np.arange(1, n_guessers + 1), y=individual_guesses, mode='markers',
                         name='Individual Guesses', marker=dict(color='lightblue', size=6, opacity=0.6)))
fig.add_trace(go.Scatter(x=np.arange(1, n_guessers + 1), y=ensemble_averages, mode='lines',
                         name='Ensemble Average', line=dict(color='darkblue', width=3)))
fig.add_hline(y=true_value, line_dash="dash", line_color="red", annotation_text="True Value")
fig.update_layout(title='Wisdom of Crowds: Ensemble Averaging', xaxis_title='Number of Models', yaxis_title='Prediction', height=450)
fig.show()

## Bootstrap Sampling Demo

In [None]:
np.random.seed(42)
original_data = np.array(['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J'])
n_samples = len(original_data)

print("Original Dataset:", list(original_data))
print("\nBootstrap Samples:")
for i in range(3):
    bootstrap_indices = np.random.choice(n_samples, size=n_samples, replace=True)
    bootstrap_sample = original_data[bootstrap_indices]
    oob_indices = set(range(n_samples)) - set(bootstrap_indices)
    print(f"Sample {i+1}: {list(bootstrap_sample)} | OOB: {list(original_data[list(oob_indices)])}")

## Variance Reduction with Ensemble Size

In [None]:
np.random.seed(42)
n_experiments = 100
tree_counts = [1, 5, 10, 25, 50, 100, 200]
true_prob = 0.3

variance_by_count = []
for n_trees in tree_counts:
    ensemble_predictions = []
    for _ in range(n_experiments):
        tree_preds = np.clip(np.random.normal(true_prob, 0.15, n_trees), 0, 1)
        ensemble_predictions.append(np.mean(tree_preds))
    variance_by_count.append(np.var(ensemble_predictions))

fig = go.Figure()
fig.add_trace(go.Scatter(x=tree_counts, y=variance_by_count, mode='lines+markers',
                         line=dict(color='darkblue', width=3), marker=dict(size=10)))
fig.update_layout(title='Variance Reduction with Ensemble Size', xaxis_title='Number of Trees',
                  yaxis_title='Variance of Predictions', xaxis_type='log', height=400)
fig.show()

## Key Concepts

| Concept | Description |
|:--------|:------------|
| Ensemble | Combine many weak learners into one strong learner |
| Bagging | Parallel training on bootstrap samples |
| Out-of-Bag (OOB) | ~37% of data not in each bootstrap sample |
| Feature Randomness | Consider only subset of features at each split |
| max_features | Typically sqrt(p) for classification |