# Task 2: Change Point Modeling and Insight Generation

## 1. Data Preparation and EDA
We investigate the properties of Brent oil prices, specifically focusing on log returns and volatility clustering.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pymc as pm
import arviz as az
import os

%matplotlib inline
sns.set_theme(style="whitegrid")

# Load data
data_path = "../data/processed/BrentOilPrices_cleaned.csv"
df = pd.read_csv(data_path, parse_dates=['Date'])
df.set_index('Date', inplace=True)

# 1.1 Plot Raw Price Series
plt.figure(figsize=(15, 6))
plt.plot(df.index, df['Price'], label='Raw Brent Price')
plt.title('Historical Brent Oil Prices (1987-2026)')
plt.xlabel('Date')
plt.ylabel('Price (USD)')
plt.legend()
plt.show()

### 1.2 Log Returns and Volatility Clustering
We analyze log returns: $r_t = \log(P_t) - \log(P_{t-1})$ to observe stationarity and volatility clustering.

In [None]:
df['Log_Price'] = np.log(df['Price'])
df['Log_Returns'] = df['Log_Price'].diff().dropna()

plt.figure(figsize=(15, 6))
plt.plot(df.index, df['Log_Returns'], label='Log Returns', alpha=0.7, color='orange')
plt.title('Daily Log Returns of Brent Oil Prices')
plt.xlabel('Date')
plt.ylabel('Log Return')
plt.legend()
plt.show()

print("Volality clustering is visible during major market shocks (1990, 2008, 2014, 2020).")

## 2. Bayesian Change Point Model (PyMC)
We build a simple model to detect a structural break in the mean log-price during the 2018-2022 period.

In [None]:
# Focus on 2018-2022 for the core analysis
df_model = df[(df.index >= '2018-01-01') & (df.index <= '2022-12-31')].copy()
data = df_model['Log_Price'].values
n_data = len(data)
idx = np.arange(n_data)

with pm.Model() as model:
    # 2.1 Define the Switch Point (tau)
    # Prior as a discrete uniform over all possible days
    tau = pm.DiscreteUniform("tau", lower=0, upper=n_data - 1)
    
    # 2.2 Define Before and After Parameters
    mu_1 = pm.Normal("mu_1", mu=data.mean(), sigma=data.std())
    mu_2 = pm.Normal("mu_2", mu=data.mean(), sigma=data.std())
    
    # Common sigma for simplicity
    sigma = pm.HalfNormal("sigma", sigma=data.std())
    
    # 2.3 Use Switch Function
    mu_ = pm.math.switch(tau > idx, mu_1, mu_2)
    
    # 2.4 Define the Likelihood
    observation = pm.Normal("obs", mu=mu_, sigma=sigma, observed=data)
    
    # 2.5 Run the Sampler
    print("Starting MCMC sampling...")
    trace = pm.sample(2000, tune=1000, target_accept=0.9, cores=1, random_seed=42)

## 3. Interpret Model Output

### 3.1 Check for Convergence
We examine r_hat values and trace plots.

In [None]:
print(az.summary(trace))
az.plot_trace(trace)
plt.show()

### 3.2 Identify the Change Point
We plot the posterior distribution of tau to see the most likely date of the structural break.

In [None]:
plt.figure(figsize=(10, 4))
plt.hist(trace.posterior["tau"].values.flatten(), bins=n_data, color="blue", alpha=0.7)
plt.title("Posterior Distribution of Change Point (tau)")
plt.xlabel("Index")
plt.ylabel("Probability Density")
plt.show()

tau_mean = int(trace.posterior["tau"].values.mean())
cp_date = df_model.index[tau_mean]
print(f"Detected Change Point Date: {cp_date.date()}")

### 3.3 Quantify the Impact
We compare the means before and after the change point.

In [None]:
mu1_samples = np.exp(trace.posterior["mu_1"].values.flatten())
mu2_samples = np.exp(trace.posterior["mu_2"].values.flatten())

print(f"Average price before break: ${mu1_samples.mean():.2f}")
print(f"Average price after break: ${mu2_samples.mean():.2f}")
pct_change = (mu2_samples.mean() - mu1_samples.mean()) / mu1_samples.mean() * 100
print(f"Percentage change: {pct_change:.2f}%")

## 4. Advanced Extensions (Discussion)

### 4.1 Incorporating Other Factors
To build a more comprehensive model, we could include exogenous variables:
- **Global GDP Growth**: Influences overall demand levels.
- **US Dollar Index (DXY)**: Oil prices are often inversely correlated with the USD strength.
- **OPEC Spare Capacity**: A measure of supply tightness.

### 4.2 Alternative Modeling Approaches
- **VAR (Vector Autoregression)**: Useful for capturing multi-directional dependencies between oil prices, exchange rates, and stock indices.
- **Markov-Switching Models**: These explicitly model shifts between 'bull' and 'bear' market states, allowing for recurrent regimes rather than permanent structural breaks.