In [1]:
import pandas as pd
import numpy as np

# Load your data (update the path as needed)
df = pd.read_csv("../data/raw/BrentOilPrices.csv")

# Standardize column names if needed
df.columns = [col.strip().lower() for col in df.columns]

# Convert 'date' column to datetime
df['date'] = pd.to_datetime(df['date'], dayfirst=True)  # dayfirst=True because your date format is like '20-May-87'

# Sort by date
df = df.sort_values('date').reset_index(drop=True)

# Compute log returns
df['log_return'] = np.log(df['price']) - np.log(df['price'].shift(1))

# Drop the first NA log return
df.dropna(inplace=True)

# Now you can use:
log_returns = df['log_return'].values
n = len(log_returns)


  df['date'] = pd.to_datetime(df['date'], dayfirst=True)  # dayfirst=True because your date format is like '20-May-87'


In [9]:
import numpy.distutils.system_info
print("numpy.distutils.system_info imported successfully")


ModuleNotFoundError: No module named 'distutils.msvccompiler'

In [8]:
import pymc3 as pm
import theano.tensor as tt
import numpy as np
import matplotlib.pyplot as plt

# Assume log_returns and n are defined as above

with pm.Model() as model:
    # 1. Prior for change point (tau)
    tau = pm.DiscreteUniform('tau', lower=0, upper=n-1)

    # 2. Priors for means before and after change point
    mu1 = pm.Normal('mu1', mu=0, sigma=1)
    mu2 = pm.Normal('mu2', mu=0, sigma=1)

    # 3. Priors for standard deviations before and after change point
    sigma1 = pm.HalfNormal('sigma1', sigma=1)
    sigma2 = pm.HalfNormal('sigma2', sigma=1)

    # 4. Piecewise mean and sigma vectors based on tau
    idx = np.arange(n)
    mu = tt.switch(tau >= idx, mu1, mu2)
    sigma = tt.switch(tau >= idx, sigma1, sigma2)

    # 5. Likelihood
    obs = pm.Normal('obs', mu=mu, sigma=sigma, observed=log_returns)

    # 6. Sample posterior
    trace = pm.sample(2000, tune=1000, cores=2, random_seed=42)
    
# Plotting

pm.plot_trace(trace, var_names=['tau', 'mu1', 'mu2', 'sigma1', 'sigma2'])
plt.show()

# Plot posterior distribution of change point (tau)
plt.hist(trace['tau'], bins=50)
plt.title('Posterior distribution of change point (tau)')
plt.xlabel('Time index')
plt.ylabel('Frequency')
plt.show()

# Map tau index to date
tau_mean = int(np.mean(trace['tau']))
change_point_date = df['date'].iloc[tau_mean]
print(f"Estimated change point date: {change_point_date}")

# Compare mu1 and mu2
plt.hist(trace['mu1'], bins=30, alpha=0.6, label='Mean Before')
plt.hist(trace['mu2'], bins=30, alpha=0.6, label='Mean After')
plt.legend()
plt.title('Posterior distributions of mean log returns')
plt.show()


AttributeError: partially initialized module 'theano' has no attribute 'compile' (most likely due to a circular import)