# Data Exploration
Load sample OHLCV data, basic statistics, price/volume charts, returns distribution, autocorrelation, stationarity (ADF), missing data and outliers.

In [None]:
import sys
from pathlib import Path
sys.path.insert(0, str(Path("..").resolve()))
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from data.data_loader import load_ohlcv

In [None]:
df = load_ohlcv("../data/sample_data.csv")
df["date"] = pd.to_datetime(df["date"])
df.head(10)

In [None]:
df.describe()

In [None]:
fig, axes = plt.subplots(2, 1, figsize=(12, 6))
axes[0].plot(df["date"], df["close"])
axes[0].set_title("Price over time")
axes[0].set_ylabel("Close")
axes[1].bar(df["date"], df["volume"], width=1)
axes[1].set_title("Volume")
axes[1].set_ylabel("Volume")
plt.tight_layout()
plt.show()

In [None]:
returns = df["close"].pct_change().dropna()
plt.figure(figsize=(8, 4))
plt.hist(returns * 100, bins=50, edgecolor="white")
plt.xlabel("Return (%)")
plt.title("Returns distribution")
plt.show()

In [None]:
from pandas.plotting import autocorrelation_plot
autocorrelation_plot(returns)
plt.title("Autocorrelation of returns")
plt.show()

In [None]:
from statsmodels.tsa.stattools import adfuller
adf_result = adfuller(returns.dropna())
print("ADF statistic:", adf_result[0])
print("p-value:", adf_result[1])
print("Stationary:", adf_result[1] < 0.05)

In [None]:
print("Missing:", df.isnull().sum().sum())
Q1 = df["close"].quantile(0.25)
Q3 = df["close"].quantile(0.75)
IQR = Q3 - Q1
outliers = (df["close"] < Q1 - 1.5*IQR) | (df["close"] > Q3 + 1.5*IQR)
print("Outlier count (IQR):", outliers.sum())