## 02 â€“ Daily log returns

In [1]:
import numpy as np
import pandas as pd
from pathlib import Path

In [2]:
prices_path = Path("../data/prices_2018_2024.parquet")
assert prices_path.exists(), f"Missing input file: {prices_path}"

prices = pd.read_parquet(prices_path)
prices = prices.sort_index()

In [3]:
non_positive_mask = (prices <= 0).any(axis=1)
prices_pos = prices.loc[~non_positive_mask].copy()

log_returns = np.log(prices_pos / prices_pos.shift(1))
log_returns = log_returns.dropna()

assert not log_returns.empty, "log_returns is empty after cleaning; check input price data."

In [4]:
assert log_returns.index.is_monotonic_increasing, "Index is not sorted."
assert log_returns.index.is_unique, "Duplicate dates in log_returns index."
assert log_returns.notna().all().all(), "Missing values found in log_returns."
assert np.isfinite(log_returns.to_numpy()).all(), "Non-finite values found in log_returns."
assert set(["CL=F", "XLE", "ICLN"]).issubset(log_returns.columns), "Unexpected columns in log_returns."

In [5]:
log_returns_path = Path("../data/log_returns_2018_2024.parquet")
log_returns_path.parent.mkdir(parents=True, exist_ok=True)

log_returns.to_parquet(log_returns_path)