# Module 1 Exercises: Foundations of Bayesian Thinking

Complete these exercises to solidify your understanding of Bayesian fundamentals.

---

In [None]:
# Setup
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats

np.random.seed(42)
plt.style.use('seaborn-v0_8-whitegrid')

## Exercise 1: Bayes' Theorem Calculator (Easy)

Implement a function that applies Bayes' theorem to calculate posterior probabilities.

### Scenario
A technical indicator signals "buy" on 30% of days. When it signals buy:
- The market goes up 70% of the time
- The market goes up 40% of the time when there's no signal

**Question**: If the market went up today, what's the probability there was a buy signal?

In [None]:
def bayes_theorem(prior, likelihood_given_hypothesis, likelihood_given_not_hypothesis):
    """
    Calculate posterior probability using Bayes' theorem.
    
    Parameters:
    -----------
    prior : float
        P(Hypothesis) - prior probability of hypothesis
    likelihood_given_hypothesis : float
        P(Evidence | Hypothesis) - probability of evidence given hypothesis is true
    likelihood_given_not_hypothesis : float
        P(Evidence | Not Hypothesis) - probability of evidence given hypothesis is false
    
    Returns:
    --------
    float
        P(Hypothesis | Evidence) - posterior probability
    """
    # TODO: Implement Bayes' theorem
    # Hint: P(H|E) = P(E|H) * P(H) / P(E)
    # Where P(E) = P(E|H)*P(H) + P(E|~H)*P(~H)
    
    posterior = ___________  # YOUR CODE HERE
    
    return posterior

# Test your implementation
prior_signal = 0.30  # P(Buy signal)
p_up_given_signal = 0.70  # P(Market up | Buy signal)
p_up_given_no_signal = 0.40  # P(Market up | No signal)

# Calculate: P(Buy signal | Market up)
posterior = bayes_theorem(prior_signal, p_up_given_signal, p_up_given_no_signal)
print(f"P(Buy signal | Market up) = {posterior:.1%}")

# Expected answer: approximately 42.9%

## Exercise 2: Strategy Evaluation (Medium)

You've backtested a mean-reversion strategy on crude oil futures with these results:
- 45 winning trades out of 60 total trades (75% win rate)
- Average win: $1,200
- Average loss: $800

### Tasks:
1. Calculate the posterior distribution of the true win rate (use uniform prior)
2. Find the probability the true win rate exceeds 60%
3. Calculate expected P&L per trade with uncertainty
4. Determine how many more trades needed to be 95% confident rate > 50%

In [None]:
# Given data
wins = 45
total = 60
avg_win = 1200
avg_loss = -800

# Task 1: Calculate posterior distribution
# Hint: Use Beta(alpha + wins, beta + losses) where alpha=beta=1 for uniform prior

posterior_alpha = _________  # YOUR CODE HERE
posterior_beta = _________   # YOUR CODE HERE
posterior = stats.beta(posterior_alpha, posterior_beta)

print(f"Posterior mean: {posterior.mean():.1%}")
print(f"Posterior std: {posterior.std():.1%}")
print(f"95% Credible Interval: [{posterior.ppf(0.025):.1%}, {posterior.ppf(0.975):.1%}]")

In [None]:
# Task 2: P(win rate > 60%)

prob_above_60 = _________  # YOUR CODE HERE
print(f"P(true win rate > 60%) = {prob_above_60:.1%}")

In [None]:
# Task 3: Expected P&L per trade with uncertainty
# Sample win rates from posterior, calculate expected P&L for each

n_samples = 10000
win_rate_samples = _________  # YOUR CODE HERE: sample from posterior

# Expected P&L = win_rate * avg_win + (1 - win_rate) * avg_loss
expected_pnl_samples = _________  # YOUR CODE HERE

print(f"Expected P&L per trade: ${np.mean(expected_pnl_samples):.0f}")
print(f"Std of expected P&L: ${np.std(expected_pnl_samples):.0f}")
print(f"P(Expected P&L > 0): {np.mean(expected_pnl_samples > 0):.1%}")

In [None]:
# Task 4: How many more trades to be 95% confident rate > 50%?
# Assume future trades have same 75% win rate

def confidence_above_threshold(total_wins, total_trades, threshold=0.5, target_confidence=0.95):
    """Calculate confidence that true rate > threshold."""
    posterior = stats.beta(1 + total_wins, 1 + total_trades - total_wins)
    return 1 - posterior.cdf(threshold)

# Current confidence
current_conf = confidence_above_threshold(wins, total)
print(f"Current confidence (rate > 50%): {current_conf:.1%}")

# Find additional trades needed
additional_trades = 0
observed_rate = wins / total

while True:
    future_wins = int((wins + additional_trades * observed_rate))
    future_total = total + additional_trades
    conf = confidence_above_threshold(future_wins, future_total)
    
    if conf >= 0.95:
        print(f"\nNeed {additional_trades} more trades to reach 95% confidence")
        print(f"That would be {future_wins} wins out of {future_total} trades")
        break
    
    additional_trades += 1
    if additional_trades > 1000:
        print("Already at 95% confidence!")
        break

## Exercise 3: Monte Carlo Risk Analysis (Hard)

Extend the Monte Carlo simulation to create a comprehensive risk analysis tool.

### Requirements:
1. Model uncertainty in both win rate AND win/loss sizes
2. Include transaction costs ($10 per trade)
3. Calculate maximum drawdown distribution
4. Compute probability of ruin (losing 50% of initial capital)

In [None]:
def comprehensive_risk_simulation(
    # Prior data
    observed_wins: int,
    observed_total: int,
    observed_avg_win: float,
    observed_avg_loss: float,
    observed_win_std: float,  # Standard deviation of winning trades
    observed_loss_std: float,  # Standard deviation of losing trades
    # Simulation parameters
    n_future_trades: int = 100,
    n_simulations: int = 5000,
    initial_capital: float = 100000,
    transaction_cost: float = 10,
):
    """
    Comprehensive risk simulation with parameter uncertainty.
    
    Returns dict with:
    - final_equity: array of final portfolio values
    - max_drawdowns: array of maximum drawdowns for each simulation
    - equity_paths: 2D array of equity curves (n_simulations x n_trades)
    """
    results = {
        'final_equity': np.zeros(n_simulations),
        'max_drawdowns': np.zeros(n_simulations),
        'equity_paths': np.zeros((n_simulations, n_future_trades + 1))
    }
    
    # Posterior for win rate
    win_rate_posterior = stats.beta(
        1 + observed_wins, 
        1 + observed_total - observed_wins
    )
    
    for sim in range(n_simulations):
        # TODO: Implement the simulation
        # Step 1: Draw win rate from posterior
        win_rate = _________  # YOUR CODE HERE
        
        # Step 2: Initialize equity curve
        equity = np.zeros(n_future_trades + 1)
        equity[0] = initial_capital
        
        for trade in range(n_future_trades):
            # Step 3: Determine if trade wins (Bernoulli with win_rate probability)
            is_win = _________  # YOUR CODE HERE
            
            # Step 4: Draw trade P&L from appropriate distribution
            if is_win:
                trade_pnl = _________  # YOUR CODE HERE: sample from Normal(avg_win, win_std)
            else:
                trade_pnl = _________  # YOUR CODE HERE: sample from Normal(avg_loss, loss_std)
            
            # Step 5: Update equity (subtract transaction cost)
            equity[trade + 1] = _________  # YOUR CODE HERE
        
        # Step 6: Calculate max drawdown for this simulation
        running_max = np.maximum.accumulate(equity)
        drawdowns = (equity - running_max) / running_max
        max_dd = _________  # YOUR CODE HERE: minimum (most negative) drawdown
        
        # Store results
        results['final_equity'][sim] = equity[-1]
        results['max_drawdowns'][sim] = max_dd
        results['equity_paths'][sim] = equity
    
    return results

# Test with our strategy data
sim_results = comprehensive_risk_simulation(
    observed_wins=45,
    observed_total=60,
    observed_avg_win=1200,
    observed_avg_loss=-800,
    observed_win_std=300,
    observed_loss_std=200,
    n_future_trades=100,
    n_simulations=5000,
    initial_capital=100000,
    transaction_cost=10
)

In [None]:
# Analyze and visualize results

fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# 1. Final equity distribution
ax = axes[0, 0]
ax.hist(sim_results['final_equity'], bins=50, density=True, alpha=0.7)
ax.axvline(100000, color='red', linestyle='--', label='Initial Capital')
ax.axvline(50000, color='orange', linestyle=':', label='Ruin Threshold (50%)')
ax.set_xlabel('Final Equity ($)')
ax.set_ylabel('Density')
ax.set_title('Final Equity Distribution')
ax.legend()

# 2. Max drawdown distribution
ax = axes[0, 1]
ax.hist(-sim_results['max_drawdowns'] * 100, bins=50, density=True, alpha=0.7, color='red')
ax.set_xlabel('Maximum Drawdown (%)')
ax.set_ylabel('Density')
ax.set_title('Maximum Drawdown Distribution')

# 3. Sample equity paths
ax = axes[1, 0]
for i in range(min(100, len(sim_results['equity_paths']))):
    ax.plot(sim_results['equity_paths'][i], alpha=0.1, color='blue')
ax.axhline(100000, color='red', linestyle='--', label='Initial')
ax.axhline(50000, color='orange', linestyle=':', label='Ruin')
ax.set_xlabel('Trade Number')
ax.set_ylabel('Equity ($)')
ax.set_title('Sample Equity Paths')
ax.legend()

# 4. Summary statistics
ax = axes[1, 1]
ax.axis('off')
summary_text = f"""
RISK ANALYSIS SUMMARY
=====================

Final Equity:
  Mean: ${np.mean(sim_results['final_equity']):,.0f}
  Median: ${np.median(sim_results['final_equity']):,.0f}
  5th Percentile: ${np.percentile(sim_results['final_equity'], 5):,.0f}
  95th Percentile: ${np.percentile(sim_results['final_equity'], 95):,.0f}

Maximum Drawdown:
  Mean: {-np.mean(sim_results['max_drawdowns'])*100:.1f}%
  Median: {-np.median(sim_results['max_drawdowns'])*100:.1f}%
  95th Percentile: {-np.percentile(sim_results['max_drawdowns'], 5)*100:.1f}%

Risk Metrics:
  P(Loss): {np.mean(sim_results['final_equity'] < 100000)*100:.1f}%
  P(Ruin - 50% loss): {np.mean(sim_results['final_equity'] < 50000)*100:.2f}%
  P(Double Money): {np.mean(sim_results['final_equity'] > 200000)*100:.1f}%
"""
ax.text(0.1, 0.9, summary_text, transform=ax.transAxes, fontsize=11,
        verticalalignment='top', fontfamily='monospace')

plt.tight_layout()
plt.show()

---

## Bonus Exercise: Commodity Signal Analysis

Apply Bayesian updating to analyze a real trading signal for crude oil.

**Scenario**: You believe crude oil tends to rise in the week following a large inventory draw (reported by EIA).

Your prior (from historical research): 58% chance oil rises after inventory draw.

Recent observations (last 25 inventory draws):
- Oil rose in 18 of them

**Tasks**:
1. Update your belief using Bayesian inference
2. Should you trade this signal? (Assume you need >65% confidence for a trade)
3. How many more observations would you need to reach 65% posterior mean?

In [None]:
# Your solution here
# Hint: Translate the 58% prior belief to Beta parameters
# A reasonable choice: Beta(11.6, 8.4) has mean 0.58

# YOUR CODE HERE

---

## Solutions

Solutions are available in `solutions.ipynb`. Try to complete the exercises before checking!