# Task 2: Bayesian Change Point Detection

## Objective
Detect structural breaks in the Brent oil price time series using a Bayesian Change Point model. We will identify the date of significant changes in the mean of log returns (or volatility) and associate them with historical events.


In [None]:
import sys
import os

# Add src to path
sys.path.append(os.path.abspath(os.path.join('..')))

from src.data_loader import load_data, calculate_log_returns
import pymc as pm
import arviz as az
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style="whitegrid")
print(f"PyMC Version: {pm.__version__}")


## 1. Data Preparation
Load the data and calculate log returns. Log returns are often preferred for financial time series modeling as they are more likely to be stationary.


In [None]:
# Load Data
file_path = '../data/BrentOilPrices.csv'
df = load_data(file_path)
df = calculate_log_returns(df)

# Visualize Log Returns
plt.figure(figsize=(15, 6))
plt.plot(df.index, df['Log_Returns'], alpha=0.6)
plt.title('Brent Oil Price Log Returns')
plt.ylabel('Log Return')
plt.show()


## 2. Bayesian Change Point Model
We will model the log returns as coming from two different Gaussian distributions, separated by a change point $\tau$.

$$
y_t \sim \mathcal{N}(\mu_t, \sigma) \\mu_t = \begin{cases} \mu_1 & \text{if } t < \tau \\ \mu_2 & \text{if } t \ge \tau \end{cases}
$$

Priors:
- $\tau \sim \text{DiscreteUniform}(0, T)$
- $\mu_1, \mu_2 \sim \mathcal{N}(0, 0.1)$
- $\sigma \sim \text{HalfNormal}(0.1)$


In [None]:
# Prepare data for PyMC
y = df['Log_Returns'].values
n_samples = len(y)
idx = np.arange(n_samples)

with pm.Model() as model:
    # Priors
    tau = pm.DiscreteUniform("tau", lower=0, upper=n_samples - 1)
    mu1 = pm.Normal("mu1", mu=0, sigma=0.1)
    mu2 = pm.Normal("mu2", mu=0, sigma=0.1)
    sigma = pm.HalfNormal("sigma", sigma=0.1)
    
    # Switch function
    mu = pm.math.switch(tau >= idx, mu2, mu1)
    
    # Likelihood
    obs = pm.Normal("obs", mu=mu, sigma=sigma, observed=y)
    
    # Sampling
    trace = pm.sample(1000, tune=1000, cores=1, return_inferencedata=True)


## 3. Model Interpretation
Check convergence and visualize the posterior distributions.


In [None]:
# Trace Plot
az.plot_trace(trace)
plt.show()

# Summary Statistics
summary = az.summary(trace)
print(summary)


## 4. Identifying the Change Point
Extract the posterior distribution of $\tau$ and map it back to the original dates.


In [None]:
# Posterior of Tau
tau_posterior = trace.posterior['tau'].values.flatten()
tau_mean = int(tau_posterior.mean())

# Map back to date
change_date = df.index[tau_mean]
print(f"Detected Change Point Date: {change_date.date()}")

# Plot
plt.figure(figsize=(15, 6))
plt.plot(df.index, df['Log_Returns'], alpha=0.5, label='Log Returns')
plt.axvline(change_date, color='red', linestyle='--', label=f'Change Point ({change_date.date()})')
plt.legend()
plt.title('Detected Change Point in Log Returns')
plt.show()


## 5. Event Association
Load the compiled events data and check for proximity to the detected change point.


In [None]:
# Load Events
events_df = pd.read_csv('../data/events_data.csv')
events_df['Date'] = pd.to_datetime(events_df['Date'])

# Find closest events
events_df['Days_Diff'] = (events_df['Date'] - change_date).dt.days.abs()
closest_events = events_df.sort_values('Days_Diff').head(3)

print("Closest Historical Events to Detected Change Point:")
print(closest_events[['Date', 'Event', 'Description', 'Days_Diff']])
