# 01 - Data Cleaning
Download and inspect raw price data; compute log returns and basic diagnostics.

In [None]:
import sys
from pathlib import Path

# Ensure repository root is on the path when running from notebooks/
ROOT = Path.cwd()
if not (ROOT / 'src').exists():
    ROOT = ROOT.parent
if str(ROOT) not in sys.path:
    sys.path.append(str(ROOT))
print(f"Using project root: {ROOT}")

In [None]:
import pandas as pd
from src.data_loader import download_price_data
from src.returns import log_returns

TICKER = 'SPY'
START = '2018-01-01'
END = None

prices = download_price_data(TICKER, START, END)
returns = log_returns(prices['adj_close']).dropna()

display(prices.head())
display(returns.describe())

# Simple missing check
print('Missing prices:', prices.isna().sum().sum())
print('Missing returns:', returns.isna().sum())

processed_dir = ROOT / 'data' / 'processed'
processed_dir.mkdir(parents=True, exist_ok=True)
prices.to_csv(processed_dir / 'sample_prices.csv')
returns.to_csv(processed_dir / 'sample_returns.csv')
print(f'Saved sample data to {processed_dir}')