# 01_Data_Exploration

This notebook performs data acquisition, cleaning, and exploratory analysis on the S&P 500 spot data.


In [None]:
# 1.1 Install & Import
%pip install yfinance pandas numpy matplotlib seaborn

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from src.utils.data_utils import download_market_data, calculate_rolling_metrics
from src.config.settings import get_config

%matplotlib inline


In [None]:
# 1.2 Load or Download Data
cfg = get_config('data')
df = download_market_data(
    symbol=cfg['symbol'],
    start_date=cfg['start_date'],
    end_date=cfg['end_date'],
    interval=cfg['interval']
)
df = calculate_rolling_metrics(df)
df.head()


## 1.3 Summary Statistics


In [None]:
df.describe().T


## 1.4 Time Series Plots


In [None]:
fig, axes = plt.subplots(2,1, figsize=(12,8))
axes[0].plot(df['date'], df['close'])
axes[0].set_title("S&P 500 Close Price")
axes[1].plot(df['date'], df['realized_vol_10d'])
axes[1].set_title("10-Day Realized Volatility")
plt.tight_layout()


## 1.5 Correlation of Features


In [None]:
corr = df[['return','vol_10d','vol_20d','momentum_5d','momentum_20d']].corr()
sns.heatmap(corr, annot=True, cmap='coolwarm')
plt.title("Feature Correlation Matrix")
