In [None]:
    # Phase 3: Foundational Equity Data Pipeline
    
    # **Objective:** Build a robust pipeline for fetching and processing equity data, replicating the *concepts* from the original notebook with live data.
    
    # **Actions:**
    # 1.  **Fix Bugs from `01_YFinance_Exploration.ipynb`:** We will start by fixing the `KeyError` in the news feed and the negative "Time to Expiry" calculation.
    # 2.  **Refine the Equity Data Pipeline:** We will then create a clean, robust data handler class that fetches historical equity data and engineers a rich set of technical analysis features."

import yfinance as yf
import pandas as pd
import numpy as np
import logging
from datetime import datetime
from scipy.stats import norm
    
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

class EquityDataPipeline:
    def __init__(self, ticker: str = "SPY"):
        self.ticker = ticker
        self.tk = yf.Ticker(self.ticker)
        self.data = None

    def fetch_history(self, period: str = "5y", interval: str = "1d") -> pd.DataFrame:
        logging.info(f"Fetching {period} of {interval} data for {self.ticker}...")
        self.data = self.tk.history(period=period, interval=interval, auto_adjust=False)
        if self.data.empty:
            logging.error(f"No data fetched for ticker {self.ticker}.")
            return pd.DataFrame()
        self.data.columns = [str(col).lower().replace(' ', '_') for col in self.data.columns]
        logging.info(f"Equity data fetched. Shape: {self.data.shape}")
        return self.data
    
    def create_features(self) -> pd.DataFrame:
        if self.data is None or self.data.empty:
            logging.error("Equity data not fetched. Call fetch_history() first.")
            return pd.DataFrame()

        logging.info("Starting equity feature engineering...")
        # Using pandas_ta to add a wide range of technical indicators
        self.data.ta.rsi(length=14, append=True)
        self.data.ta.macd(fast=12, slow=26, signal=9, append=True)
        self.data.ta.bbands(length=20, std=2, append=True)
        self.data.ta.atr(length=14, append=True)
        self.data.ta.ema(length=50, append=True)
        self.data.ta.ema(length=200, append=True)
        self.data.ta.adx(length=14, append=True)
        self.data.ta.obv(append=True)
        self.data.ta.stoch(k=14, d=3, append=True)
        self.data.ta.cmf(length=20, append=True)
        self.data.dropna(inplace=True)
        logging.info(f"Equity feature engineering complete. Final shape: {self.data.shape}")
        return self.data
    



In [4]:

# Demonstrate the refined pipeline
pipeline = EquityDataPipeline(ticker="SPY")
equity_data = pipeline.fetch_history(period="1y")
featured_equity = pipeline.create_features()
print("--- Featured Equity Data (Tail) ---")
print(featured_equity.tail())

2025-10-03 16:21:35,266 - INFO - Fetching 1y of 1d data for SPY...
2025-10-03 16:21:35,533 - INFO - Equity data fetched. Shape: (250, 9)
2025-10-03 16:21:35,534 - INFO - Starting equity feature engineering...


AttributeError: 'DataFrame' object has no attribute 'ta'

In [5]:

## Bug Fixes from `01_YFinance_Exploration.ipynb`"

# 1. Fixing the News Feed KeyError
spy = yf.Ticker('SPY')
print("--- News Feed (with error handling) ---")
try:
    for news_item in spy.news[:5]:
        # FIX: Check if the 'title' key exists before accessing it
        title = news_item.get('title', 'No Title Available')
        print(f"- {title}")
except Exception as e:
    print(f"An error occurred while fetching news: {e}")

# 2. Fixing the Negative Time to Expiry Bug
def get_option_greeks_fixed(ticker_symbol, expiration_date):
    stock = yf.Ticker(ticker_symbol)
    S = stock.history(period="1d")['Close'].iloc[-1]
    r = 0.0415 # Placeholder for risk-free rate
    opt_chain = stock.option_chain(expiration_date)
    options = pd.concat([opt_chain.calls, opt_chain.puts], ignore_index=True)
    
    # FIX: Ensure correct date handling
    expiration_datetime = pd.to_datetime(expiration_date).tz_localize('US/Eastern')
    current_datetime = pd.Timestamp.now(tz='US/Eastern')
    T = (expiration_datetime - current_datetime).days / 365.25
    
    if T < 0:
        logging.warning(f"Expiration date {expiration_date} is in the past. Time to expiry (T) is {T:.4f} years. Greeks may be invalid.")
        T = 1e-9 # Use a very small positive number to avoid math errors
        
    return T
# Demonstrate the fix
nvda_ticker = yf.Ticker('NVDA')
first_expiration = nvda_ticker.options[0]
time_to_expiry_fixed = get_option_greeks_fixed('NVDA', first_expiration)
print(f"--- Corrected Time to Expiry Calculation ---")
print(f"Expiration: {first_expiration}")
print(f"Corrected Time to Expiry: {time_to_expiry_fixed:.4f} years")

--- News Feed (with error handling) ---
- No Title Available
- No Title Available
- No Title Available
- No Title Available
- No Title Available




--- Corrected Time to Expiry Calculation ---
Expiration: 2025-10-03
Corrected Time to Expiry: 0.0000 years
