In [2]:
# Cell 1: Imports and Data Loading
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pymc3 as pm
import arviz as az

# Assuming you have run the data ingestion and event research scripts
# Load the cleaned data and events data
brent_prices_df = pd.read_csv('../data/processed/brent_prices_cleaned.csv', index_col='Date', parse_dates=True)
events_df = pd.read_csv('../data/processed/geopolitical_events.csv', parse_dates=['event_date'])

# For a simple model, we will use the raw prices.
# Log returns are a good alternative for a more advanced model.
prices = brent_prices_df['Price'].values

# The size of our dataset
N = prices.shape[0]

# Cell 2: Define and Run the Change Point Model
# This is a conceptual implementation of a single change point model.
# The model assumes the mean of the prices changes at a specific point in time.

with pm.Model() as oil_price_model:
    
    # 1. Define the switch point (tau)
    # A discrete uniform prior over all possible days in the dataset.
    # We are modeling the *index* of the change point.
    tau = pm.DiscreteUniform('tau', lower=0, upper=N)
    
    # 2. Define the "Before" and "After" parameters (mean and standard deviation)
    # The mean prices before and after the change point
    mu_before = pm.Normal('mu_before', mu=prices.mean(), sd=5)
    mu_after = pm.Normal('mu_after', mu=prices.mean(), sd=5)
    
    # The standard deviation (volatility) of the prices
    # Use an Exponential distribution for standard deviation as it must be positive.
    sigma = pm.Exponential('sigma', lam=1)
    
    # 3. Use a switch function to select the correct mean based on tau
    # The `pm.math.switch` function will select mu_before if the index is < tau,
    # and mu_after if the index is >= tau.
    mu = pm.math.switch(tau >= np.arange(N), mu_before, mu_after)
    
    # 4. Define the likelihood: connect the model to the data
    # The observed prices are assumed to be normally distributed around the mean.
    observation = pm.Normal('observation', mu=mu, sd=sigma, observed=prices)
    
    # 5. Run the sampler (MCMC simulation)
    # We will sample to find the posterior distributions of our parameters.
    # `tune` is the number of iterations for the sampler to "warm up"
    # `draws` is the number of samples to take from the posterior
    trace = pm.sample(draws=2000, tune=1000, cores=2, return_inferencedata=True)

# Cell 3: Interpreting the Model Output
# Check for convergence and inspect the model
az.summary(trace, round_to=2)
pm.plot_trace(trace)
plt.show()

# Identify the change point (tau)
tau_samples = trace.posterior['tau'].values.flatten()
plt.figure(figsize=(10, 6))
plt.hist(tau_samples, bins=50)
plt.title('Posterior Distribution of the Change Point (tau)')
plt.xlabel('Day Index')
plt.ylabel('Frequency')
plt.show()

# Get the most probable change point date
most_probable_tau = pd.Series(tau_samples).mode()[0]
change_point_date = brent_prices_df.index[most_probable_tau]
print(f"The most probable change point occurred on: {change_point_date.strftime('%Y-%m-%d')}")

# Visualize the impact (before vs. after prices)
az.plot_posterior(trace, var_names=['mu_before', 'mu_after'])
plt.show()

# Associate with events
# Compare `change_point_date` with your `events_df` to find a match.
# This is a manual step based on your research from Task 1.

ModuleNotFoundError: No module named 'pymc3'