# Task 2: Bayesian Change Point Detection

## Objective
Detect structural breaks in the Brent oil price time series using a Bayesian Change Point model. We will identify the date of significant changes in the mean of log returns and associate them with historical events.

In [None]:
import sys
import os

# Add src to path
sys.path.append(os.path.abspath(os.path.join('..')))

from src.data_loader import load_data, calculate_log_returns
import pymc as pm
import arviz as az
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style="whitegrid")
print(f"PyMC Version: {pm.__version__}")

## 1. Data Preparation
Load the data and calculate log returns.

In [None]:
# Load Data
file_path = '../data/BrentOilPrices.csv'
df = load_data(file_path)
df = calculate_log_returns(df)

# Visualize Log Returns
plt.figure(figsize=(15, 6))
plt.plot(df.index, df['Log_Returns'], alpha=0.6)
plt.title('Brent Oil Price Log Returns')
plt.ylabel('Log Return')
plt.show()

## 2. Bayesian Change Point Model
We model log returns as two Gaussian distributions separated by $\tau$.

$$\mu_t = \text{switch}(\tau \ge idx, \mu_2, \mu_1)$$

In [None]:
# Prepare data for PyMC
y = df['Log_Returns'].values
n_samples = len(y)
idx = np.arange(n_samples)

with pm.Model() as model:
    tau = pm.DiscreteUniform("tau", lower=0, upper=n_samples - 1)
    mu1 = pm.Normal("mu1", mu=0, sigma=0.1)
    mu2 = pm.Normal("mu2", mu=0, sigma=0.1)
    sigma = pm.HalfNormal("sigma", sigma=0.1)
    mu = pm.math.switch(tau >= idx, mu2, mu1)
    obs = pm.Normal("obs", mu=mu, sigma=sigma, observed=y)
    trace = pm.sample(500, tune=500, cores=1, return_inferencedata=True, progressbar=False)

## 3. Quantitative Impact Analysis

Now we quantify the statistical shift between the two identified regimes.

In [None]:
tau_mean = int(trace.posterior['tau'].values.mean())
mu1_mean = float(trace.posterior['mu1'].values.mean())
mu2_mean = float(trace.posterior['mu2'].values.mean())
sigma_mean = float(trace.posterior['sigma'].values.mean())
change_date = df.index[tau_mean]

print(f"Structural Change Identified at: {change_date.date()}")
print(f"Mean Log-Return (Regime 1): {mu1_mean:.6f}")
print(f"Mean Log-Return (Regime 2): {mu2_mean:.6f}")
print(f"Absolute Shift in Mean: {abs(mu2_mean - mu1_mean):.6f}")
print(f"Posterior Residual Volatility (Sigma): {sigma_mean:.6f}")

## 4. Causal Narratives & Event Association
We link our detected change point to the structured events dataset to understand the plausible drivers of this structural shift.

In [None]:
events_df = pd.read_csv('../data/events_data.csv')
events_df['Date'] = pd.to_datetime(events_df['Date'])
events_df['Days_Diff'] = (events_df['Date'] - change_date).dt.days.abs()
key_event = events_df.sort_values('Days_Diff').iloc[0]

print(f"Detected Change Point: {change_date.date()}")
print(f"Closest Event: {key_event['Event']} ({key_event['Date'].date()})")
print(f"Description: {key_event['Description']}")

print("\n--- Causal Hypothesis ---")
if "COVID" in key_event['Event'] or "Pandemic" in key_event['Event']:
    print("H1: The change point identifies the demand destruction peak where global lockdowns led to an unprecedented collapse in crude demand.")
    print("The model detects a shift in return behavior as markets moved from stability into extreme negative volatility and subsequent policy-driven recovery.")
elif "Gulf War" in key_event['Event']:
    print("H1: The change point aligns with the supply-shock expectation at the start of the conflict, as speculators priced in the risk of sustained Middle Eastern disruption.")