# Load ETH Pipeline Parquet Files

This notebook opens the parquet outputs from the ETH pipeline and loads each into a pandas DataFrame.
It also prints shape/columns and previews a few rows for quick sanity checks.


In [1]:
import os
from pathlib import Path

import pandas as pd
from IPython.display import display

try:
    from dotenv import load_dotenv
    load_dotenv()
except Exception:
    pass

PROJECT_ROOT = os.getenv("PROJECT_ROOT","")
project_root = Path(PROJECT_ROOT).expanduser().resolve()
data_path = project_root / "data"

FILENAMES = {
    "chainlink_rounds": "chainlink_rounds.parquet",
    "ethusd_hourly": "ethusd_hourly.parquet",
    "deribit_eth_dvol_raw": "deribit_eth_dvol_raw.parquet",
    "eth_iv_index_hourly": "eth_iv_index_hourly.parquet",
    "deribit_eth_option_surface_raw": "deribit_eth_option_surface_raw.parquet",
    "eth_iv_surface_hourly": "eth_iv_surface_hourly.parquet",
    "eth_training_hourly": "eth_training_hourly.parquet",
}

def resolve_parquet(name: str, filename: str) -> Path:
    candidate = data_path / filename
    if candidate.exists():
        return candidate

PARQUET_PATHS = {name: resolve_parquet(name, filename) for name, filename in FILENAMES.items()}
PARQUET_PATHS


{'chainlink_rounds': PosixPath('/Users/carlos.ortega/investing/data/chainlink_rounds.parquet'),
 'ethusd_hourly': PosixPath('/Users/carlos.ortega/investing/data/ethusd_hourly.parquet'),
 'deribit_eth_dvol_raw': PosixPath('/Users/carlos.ortega/investing/data/deribit_eth_dvol_raw.parquet'),
 'eth_iv_index_hourly': PosixPath('/Users/carlos.ortega/investing/data/eth_iv_index_hourly.parquet'),
 'deribit_eth_option_surface_raw': PosixPath('/Users/carlos.ortega/investing/data/deribit_eth_option_surface_raw.parquet'),
 'eth_iv_surface_hourly': PosixPath('/Users/carlos.ortega/investing/data/eth_iv_surface_hourly.parquet'),
 'eth_training_hourly': PosixPath('/Users/carlos.ortega/investing/data/eth_training_hourly.parquet')}

In [2]:


def resolve_parquet(name: str, filename: str) -> Path:
    candidate = data_path / filename
    if candidate.exists():
        return candidate
    
def load_parquet(path: Path):
    if not path.exists():
        print(f"Missing: {path}")
        return None
    df = pd.read_parquet(path)
    print(f"Loaded {path.name}: rows={len(df):,}, cols={len(df.columns)} from {path}")
    return df

chainlink_rounds_df = load_parquet(PARQUET_PATHS["chainlink_rounds"])
ethusd_hourly_df = load_parquet(PARQUET_PATHS["ethusd_hourly"])
deribit_eth_dvol_raw_df = load_parquet(PARQUET_PATHS["deribit_eth_dvol_raw"])
eth_iv_index_hourly_df = load_parquet(PARQUET_PATHS["eth_iv_index_hourly"])
deribit_eth_option_surface_raw_df = load_parquet(PARQUET_PATHS["deribit_eth_option_surface_raw"])
eth_iv_surface_hourly_df = load_parquet(PARQUET_PATHS["eth_iv_surface_hourly"])
eth_training_hourly_df = load_parquet(PARQUET_PATHS["eth_training_hourly"])


Loaded chainlink_rounds.parquet: rows=2,975, cols=6 from /Users/carlos.ortega/investing/data/chainlink_rounds.parquet
Loaded ethusd_hourly.parquet: rows=1,328, cols=8 from /Users/carlos.ortega/investing/data/ethusd_hourly.parquet
Loaded deribit_eth_dvol_raw.parquet: rows=2,000, cols=8 from /Users/carlos.ortega/investing/data/deribit_eth_dvol_raw.parquet
Loaded eth_iv_index_hourly.parquet: rows=625, cols=9 from /Users/carlos.ortega/investing/data/eth_iv_index_hourly.parquet
Loaded deribit_eth_option_surface_raw.parquet: rows=1,512, cols=14 from /Users/carlos.ortega/investing/data/deribit_eth_option_surface_raw.parquet
Loaded eth_iv_surface_hourly.parquet: rows=3, cols=15 from /Users/carlos.ortega/investing/data/eth_iv_surface_hourly.parquet
Loaded eth_training_hourly.parquet: rows=1,328, cols=35 from /Users/carlos.ortega/investing/data/eth_training_hourly.parquet


In [6]:
eth_training_hourly_df

Unnamed: 0,ts_hour,open,high,low,close,n_updates,staleness_sec,is_gap,iv_index_open,iv_index_high,...,oi_weighted_iv_30d,source_ivsurf,ingested_at_ivsurf,api_endpoint_version_ivsurf,quality_flag_ivsurf,ret_1h,ret_6h,ret_24h,rv_6h,rv_24h
0,2025-12-31 23:00:00+00:00,2970.640000,2970.640000,2970.640000,2970.640000,1,3109.0,False,,,...,,,NaT,,,0.001029,0.001407,0.007203,0.003086,0.008853
1,2026-01-01 00:00:00+00:00,2973.696815,2973.696815,2973.696815,2973.696815,1,3085.0,False,,,...,,,NaT,,,0.001781,0.000107,0.007383,0.002922,0.008876
2,2026-01-01 01:00:00+00:00,2978.994407,2978.994407,2978.994407,2978.994407,1,3049.0,False,,,...,,,NaT,,,0.000563,-0.001375,0.007316,0.002336,0.008862
3,2026-01-01 02:00:00+00:00,2980.672490,2980.672490,2980.672490,2980.672490,1,3025.0,False,,,...,,,NaT,,,-0.000849,-0.002544,0.004595,0.002347,0.009100
4,2026-01-01 03:00:00+00:00,2978.140521,2978.140521,2978.140521,2978.140521,1,3013.0,False,,,...,,,NaT,,,-0.001904,-0.000110,0.014303,0.002703,0.012607
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1323,2026-02-25 02:00:00+00:00,1942.420000,1942.420000,1920.632909,1920.632909,3,85.0,False,0.6818,0.6840,...,,,NaT,,,-0.005473,,,,
1324,2026-02-25 03:00:00+00:00,1910.120630,1910.120630,1910.120630,1910.120630,1,2389.0,False,0.6833,0.6997,...,,,NaT,,,-0.002033,,,,
1325,2026-02-25 04:00:00+00:00,1896.563569,1906.237252,1896.563569,1906.237252,2,2845.0,False,0.6986,0.7001,...,,,NaT,,,-0.011305,,,,
1326,2026-02-25 05:00:00+00:00,1904.564913,1904.564913,1884.687732,1884.687732,3,1465.0,False,0.6970,0.7040,...,0.894637,deribit_option_surface,2026-02-25 07:30:22.832040+00:00,deribit_api_v2,ok,0.000373,,,,
