In [None]:
# 1. IMPORTS - KEEP NOTEBOOK CLEAN
# ====================================================

import sys
sys.path.append('../src')

from bayesian_model import BayesianChangePointModel
from change_point_analyzer import ChangePointAnalyzer
from visualization import Task2Visualizer

import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

print("âœ… Modules loaded successfully")

In [1]:
# 2. LOAD DATA FROM TASK 1
# ====================================================

print("ðŸ“Š Loading preprocessed data from Task 1...")

price_df = pd.read_csv('../data/raw/BrentOilPrices.csv')
price_df['Date'] = pd.to_datetime(price_df['Date'])

events_df = pd.read_csv('../data/raw/events_1987_2022.csv')
events_df['Start_Date'] = pd.to_datetime(events_df['Start_Date'])

print(f"âœ… Price data: {len(price_df):,} records")
print(f"âœ… Event data: {len(events_df)} events")
print(f"ðŸ“… {price_df['Date'].dt.year.min()}-{price_df['Date'].dt.year.max()}")

ðŸ“Š Loading preprocessed data from Task 1...


NameError: name 'pd' is not defined

In [None]:
# 3. DATA PREPARATION FOR BAYESIAN MODELING
# ====================================================

print("\nðŸ“ˆ Preparing data for change point analysis...")

# Calculate log returns (stationary series for modeling)
price_df['Log_Returns'] = np.log(price_df['Price']).diff()
price_df['Returns'] = price_df['Price'].pct_change()

print(f"âœ… Calculated returns and log returns")
print(f"ðŸ“Š Returns mean: {price_df['Returns'].mean()*100:.2f}%")
print(f"ðŸ“Š Returns volatility: {price_df['Returns'].std()*100:.2f}%")
print(f"ðŸ“Š Log returns mean: {price_df['Log_Returns'].mean()*100:.4f}%")

In [None]:
# Alternative 2: Use frequentist method (MUCH faster)
print("ðŸ”„ Using FREQUENTIST change point detection (instant)...")
print("="*60)

from sklearn.covariance import GraphicalLassoCV
import ruptures as rpt  # pip install ruptures

# Convert to numpy array
signal = price_df['Price'].values

# Use PELT algorithm (very fast)
algo = rpt.Pelt(model="rbf").fit(signal)
change_points_idx = algo.predict(pen=10)  # Returns indices

print(f"ðŸŽ¯ Found {len(change_points_idx)} change points instantly:")

for i, idx in enumerate(change_points_idx[:-1], 1):  # Last one is end of array
    if idx < len(price_df):
        date = price_df['Date'].iloc[idx]
        price_before = price_df['Price'].iloc[max(0, idx-30):idx].mean()
        price_after = price_df['Price'].iloc[idx:min(len(price_df), idx+30)].mean()
        pct_change = ((price_after/price_before) - 1) * 100
        
        print(f"  {i}. {date.date()}: Î”={pct_change:.1f}%")