In [10]:
# Imports 
import sys
sys.path.append("..")

%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np
import matplotlib

from src.data_loader import BrentDataLoader
from src.time_series_analysis import TimeSeriesAnalyzer
from src.event_compiler import EventCompiler
from src.change_point_model import SingleChangePointModel
from src.event_analyzer import EventImpactAnalyzer

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [11]:
# CELL 2: Load and prepare data
loader = BrentDataLoader("../data/raw/BrentOilPrices.csv")
df = loader.load()
print(df.head())

        Date  Price
0 1987-05-20  18.63
1 1987-05-21  18.45
2 1987-05-22  18.55
3 1987-05-25  18.60
4 1987-05-26  18.63


In [3]:
df = df.set_index('Date').sort_index()  # Now 'Date' column exists
log_returns = np.log(df['Price'] / df['Price'].shift(1)).dropna()
print(f"✓ Computed log returns for {len(log_returns)} days")
print(f"✓ Date range: {log_returns.index.min().date()} to {log_returns.index.max().date()}")

✓ Computed log returns for 9010 days
✓ Date range: 1987-05-21 to 2022-11-14


In [4]:
mask = (log_returns.index >= '2012-01-01') & (log_returns.index <= '2022-12-31')
log_returns_filtered = log_returns[mask]
print(f"✓ Filtered to {len(log_returns_filtered)} observations (2012-2022)")


✓ Filtered to 2760 observations (2012-2022)


In [5]:
# Build and run Bayesian model 
from src.change_point_model import SingleChangePointModel

model = SingleChangePointModel(
    log_returns_filtered.values,
    log_returns_filtered.index
)
model.build_model()
trace = model.sample(draws=10, tune=10, chains=2, target_accept=0.85)
print("✓ Sampling complete")

Only 10 samples per chain. Reliable r-hat and ESS diagnostics require longer chains for accurate estimate.
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (2 chains in 1 job)
NUTS: [tau_raw, mu1, mu2, sigma1, sigma2]
  return 0.5 * np.dot(x, v_out)
Sampling 2 chains for 10 tune and 10 draw iterations (20 + 20 draws total) took 314 seconds.
The number of samples is too small to check convergence reliably.


✓ Sampling complete


In [6]:

# Diagnose convergence
diagnostics = model.diagnose_convergence()
print(f"Convergence check: R̂={diagnostics['r_hat_max']:.3f}, ESS={diagnostics['ess_min']:.0f}")
print(f"Model converged: {diagnostics['converged']}")

Convergence check: R̂=3.220, ESS=5
Model converged: False


In [7]:
# Extract and visualize change point
cp_results = model.extract_change_point_date()
print(f"\nDetected change point: {cp_results['mode_date'].date()}")
print(f"95% credible interval: {cp_results['credible_interval'][0].date()} to {cp_results['credible_interval'][1].date()}")

model.plot_posterior_tau(save_path="../docs/posterior_tau.png")
print("✓ Posterior distribution saved to docs/posterior_tau.png")


Detected change point: 2018-07-06
95% credible interval: 2018-04-12 to 2018-09-17
✓ Posterior distribution saved to docs/posterior_tau.png


In [8]:
# Quantify impact
impact = model.quantify_impact()
print(f"\nImpact quantification:")
print(f"  Mean shift: {impact['mean_shift_median']:.4f} "
      f"(95% CI: {impact['mean_shift_ci'][0]:.4f} to {impact['mean_shift_ci'][1]:.4f})")
print(f"  P(mean increase >1%): {impact['prob_mean_increase']:.1%}")


Impact quantification:
  Mean shift: 0.2036 (95% CI: -0.2704 to 0.4152)
  P(mean increase >1%): 90.0%


In [9]:
# Associate with events
events = EventCompiler("../data/raw/events.csv").load()
analyzer = EventImpactAnalyzer(events)

matches = analyzer.associate_change_points(cp_results['mode_date'], window_days=7)
if matches:
    print(f"\nAssociated events within ±7 days:")
    for i, event in enumerate(matches[:3], 1):  # Top 3 matches
        print(f"  {i}. {event['date'].date()}: {event['event_type']} - {event['description'][:60]}...")
    
    # Generate impact statement for top match
    impact_statement = analyzer.quantify_event_impact(
        event=matches[0],
        impact_results=impact,
        price_level_before=df.loc[df['Date'] < cp_results['mode_date'], 'Price'].mean(),
        price_level_after=df.loc[df['Date'] >= cp_results['mode_date'], 'Price'].mean()
    )
    print(f"\nImpact statement:\n{impact_statement}")
else:
    print(f"\nNo events found within ±7 days of change point ({cp_results['mode_date'].date()})")

✅ Loaded 15 events from 2012-06-23 to 2022-10-05

No events found within ±7 days of change point (2018-07-06)
