In [3]:
! conda install pyarrow joblib pandas numpy matplotlib seaborn scikit-learn
! pip install yfinance
! pip install polars-lts-cpu
! pip install py_vollib_vectorized
! pip install git+https://github.com/vollib/py_lets_be_rational.git # overwrite py_vollib_vectorized

Channels:
 - defaults
Platform: win-64
Collecting package metadata (repodata.json): done
Solving environment: done

# All requested packages already installed.

Collecting yfinance
  Using cached yfinance-0.2.65-py2.py3-none-any.whl.metadata (5.8 kB)
Collecting multitasking>=0.0.7 (from yfinance)
  Using cached multitasking-0.0.11-py3-none-any.whl.metadata (5.5 kB)
Collecting peewee>=3.16.2 (from yfinance)
  Using cached peewee-3.18.2-py3-none-any.whl
Collecting curl_cffi>=0.7 (from yfinance)
  Using cached curl_cffi-0.11.4-cp39-abi3-win_amd64.whl.metadata (14 kB)
Collecting websockets>=13.0 (from yfinance)
  Using cached websockets-15.0.1-cp313-cp313-win_amd64.whl.metadata (7.0 kB)
Using cached yfinance-0.2.65-py2.py3-none-any.whl (119 kB)
Using cached curl_cffi-0.11.4-cp39-abi3-win_amd64.whl (1.6 MB)
Using cached multitasking-0.0.11-py3-none-any.whl (8.5 kB)
Using cached websockets-15.0.1-cp313-cp313-win_amd64.whl (176 kB)
Installing collected packages: peewee, multitasking, websocke

ERROR: Invalid requirement: '#': Expected package name at the start of dependency specifier
    #
    ^


In [None]:
from pathlib import Path

RAW_CSV_PATH = r"C:\Users\kevin\OneDrive\Documents\Draco\spxw_filtered\*.csv"
raw_dir = Path(RAW_CSV_PATH).parent.parent / "data"

PRE_FILTERED_DIR = raw_dir / "spxw_pre_filtered_parquet"
SAMPLED_DIR = raw_dir / "spxw_sampled_hybrid_parquet"
CALIBRATED_DIR = raw_dir / "spxw_calibrated_parquet"
SURFACE_PNG_DIR = raw_dir / "spxw_surface_png"

from pre_filter import pre_filter_options
from partition_writer import parallel_sample
import os

# 1. Pre-filter Options Data
pre_filter_options(
    underlying       = "SPXW",
    input_pattern    = RAW_CSV_PATH,
    output_dir       = PRE_FILTERED_DIR,
    min_window_start = 1,
    min_T_days       = 7,
    max_T_days       = 180,
    min_price        = 0.1,
    min_volume       = 5
)

# 2. Sample and write partitioned data
parallel_sample(
    input_dir  = Path(PRE_FILTERED_DIR),
    output_dir = Path(SAMPLED_DIR),
    n_jobs     = os.cpu_count(),
    n_clusters = 200,
    n_pca_extremes = 0,
    atm_width  = 0.05,
    atm_frac   = 0.20,
    num_m_bins = 10,
    random_state = 42
)


✍️  wrote pre-filtered surface to C:\Users\kevin\OneDrive\Documents\Draco\data\spxw_pre_filtered_parquet


In [None]:
from calibration import parallel_calibrate
from pathlib import Path
import os
from datetime import datetime

# 3. Calibrate Heston model for each date (run independently, can be resumed)
# Specify date range as needed
start_date = datetime(2024, 1, 2).date()
end_date   = datetime(2024, 2, 1).date()

parallel_calibrate(
    input_dir  = Path(SAMPLED_DIR),
    output_dir = Path(CALIBRATED_DIR),
    n_jobs     = os.cpu_count(),
    start_date = start_date,
    end_date   = end_date
    # Add calibration kwargs as needed
)


In [None]:
from surface_plot import plot_surface_for_date
from pathlib import Path
from joblib import Parallel, delayed
import os

# 4. Plot IV surfaces for each calibrated date (run independently after calibration)
calib_dir = Path(CALIBRATED_DIR)
surface_dir = Path(SURFACE_PNG_DIR)
all_dates = sorted(d.name.split("=")[1] for d in calib_dir.glob("date=*") if d.is_dir())
results = Parallel(n_jobs=os.cpu_count(), verbose=5)(
    delayed(plot_surface_for_date)(date, calib_dir, surface_dir) for date in all_dates
)
print(f"[Surface] done: {sum(results)}/{len(all_dates)} succeeded")
