In [12]:
import requests
import pandas as pd
import time
from datetime import datetime, timedelta

# ------------------------------
# Configuration
# ------------------------------
API_TOKEN = '67fb3d6f50c489.92544905'  # Replace with your actual token if different
BASE_URL = 'https://eodhd.com/api/mp/unicornbay/options/eod'
UNDERLYING_SYMBOL = 'QQQ'
EXPIRATION_DATE = '2025-04-17'

# Define the date range (approx 1 year back from today)
# END_TRADE_DATE = datetime.now().strftime('%Y-%m-%d') # Use today as end date
# Or keep your specific end date if needed:
END_TRADE_DATE = '2025-04-11'
START_TRADE_DATE = '2024-05-13'

LIMIT = 1000      # Maximize limit per page as per docs (Page 1 & 9)
# Remove MAX_PAGES limit for now to try and get all data
# MAX_PAGES = 20    # Adjust as needed based on total records / rate limits

def fetch_eod_data_range(option_type):
    """
    Fetch all EOD options data for the given contract type ('call' or 'put')
    for a specific expiry date across all strikes, over the specified date range.
    """
    all_records_attributes = []
    page = 1
    offset = 0
    total_fetched_this_type = 0
    total_expected = None # Keep track if API provides it

    while True:
        current_offset = (page - 1) * LIMIT
        print(f"Fetching page {page} (offset {current_offset}) for {option_type.upper()} expiring {EXPIRATION_DATE} from {START_TRADE_DATE} to {END_TRADE_DATE}...")

        params = {
            'api_token': API_TOKEN,
            'filter[underlying_symbol]': UNDERLYING_SYMBOL,
            'filter[exp_date_eq]': EXPIRATION_DATE,
            'filter[type]': option_type,    # 'call' or 'put'
            # REMOVED strike filters to get ALL strikes for the expiry
            'filter[tradetime_from]': START_TRADE_DATE,
            'filter[tradetime_to]': END_TRADE_DATE,
            'page[limit]': LIMIT,
            'page[offset]': current_offset, # Use offset instead of page number
            'sort': '-exp_date',  # Sort ascending by tradetime might be more intuitive
            'compact': '0'
        }

        try:
            response = requests.get(BASE_URL, params=params)
            response.raise_for_status() # Check for HTTP errors like 4xx/5xx
        except requests.exceptions.RequestException as e:
            print(f"HTTP error for {option_type.upper()} on page {page}: {e}")
            if hasattr(e, 'response') and e.response is not None:
                print(f"Status Code: {e.response.status_code}")
                try:
                    print(f"Response Body: {e.response.json()}")
                except requests.exceptions.JSONDecodeError:
                    print(f"Response Body: {e.response.text}")
            break # Stop trying for this type on error

        try:
            data = response.json()
        except requests.exceptions.JSONDecodeError as e:
             print(f"JSON Decode Error for {option_type.upper()} on page {page}: {e}")
             print(f"Response Text: {response.text}")
             break # Stop trying

        if total_expected is None and 'meta' in data and 'total' in data['meta']:
            total_expected = data['meta']['total']
            print(f"API reports total expected {option_type.upper()} records: {total_expected}")

        page_data = data.get('data')

        if not page_data:
            print(f"No more data returned on page {page} for {option_type.upper()}. Fetching complete for this type.")
            break # Exit loop if no data is returned

        # Extract attributes from each record in the page's data
        for record in page_data:
            attributes = record.get('attributes', {})
            # Add other useful top-level info if needed
            attributes['record_id'] = record.get('id', '') # Includes contract AND date
            attributes['record_type'] = record.get('type', '') # Should be 'options-eod'
            # Ensure the 'tradetime' from attributes is captured correctly if present
            # It should match the date part of the record_id for EOD data
            attributes['trade_date'] = attributes.get('tradetime', attributes.get('previous_date')) # Fallback needed? Check API response
            all_records_attributes.append(attributes)

        num_retrieved_this_page = len(page_data)
        total_fetched_this_type += num_retrieved_this_page
        print(f"Page {page} complete: Retrieved {num_retrieved_this_page} records (Total fetched for {option_type.upper()}: {total_fetched_this_type})")

        # Check if we've retrieved enough based on limit (standard pagination stop condition)
        if num_retrieved_this_page < LIMIT:
             print(f"Retrieved less than limit ({num_retrieved_this_page} < {LIMIT}), assuming end of data for {option_type.upper()}.")
             break

        page += 1
        # Re-introduce MAX_PAGES if needed to prevent excessive calls during testing
        # if page > MAX_PAGES:
        #    print(f"Reached MAX_PAGES limit ({MAX_PAGES}) for {option_type.upper()}.")
        #    break

        time.sleep(0.6)  # Be polite to the API, avoid hitting rate limits (1000 req/min documented)

    print(f"Completed fetching {option_type.upper()} records: {len(all_records_attributes)} records total.\n")
    return all_records_attributes

# ----------------------------
# Fetch data for calls and puts using the range query.
# ----------------------------
print(f"Fetching CALL options for {UNDERLYING_SYMBOL} expiring {EXPIRATION_DATE} from {START_TRADE_DATE} to {END_TRADE_DATE}...")
all_call_data = fetch_eod_data_range('call')

print(f"Fetching PUT options for {UNDERLYING_SYMBOL} expiring {EXPIRATION_DATE} from {START_TRADE_DATE} to {END_TRADE_DATE}...")
all_put_data = fetch_eod_data_range('put')

# Combine the data
combined_data = all_call_data + all_put_data

if not combined_data:
    print("No data retrieved for calls or puts. Exiting.")
else:
    df = pd.DataFrame(combined_data)

    # --- Data Cleaning and Type Conversion ---
    print("\n--- Processing DataFrame ---")
    print(f"Initial DataFrame shape: {df.shape}")
    print("DataFrame columns:", df.columns.tolist())

    # Convert 'trade_date' to datetime
    if 'trade_date' in df.columns:
         # The 'tradetime' field in EOD might just be the date string YYYY-MM-DD
         df['trade_date'] = pd.to_datetime(df['trade_date'], errors='coerce')
         # If record_id contains date, we could parse it as a fallback or primary source
         # Example: df['parsed_date'] = pd.to_datetime(df['record_id'].str.split('-').str[-1], errors='coerce')
    else:
        print("Warning: 'trade_date' column not found or created.")

    # Convert potential numeric fields
    numeric_cols = ['strike', 'open', 'high', 'low', 'last', 'change', 'pctchange',
                    'previous', 'bid', 'bid_size', 'ask', 'ask_size', 'moneyness',
                    'volume', 'volume_change', 'volume_pctchange', 'open_interest',
                    'open_interest_change', 'open_interest_pctchange', 'volatility',
                    'volatility_change', 'volatility_pctchange', 'theoretical',
                    'delta', 'gamma', 'theta', 'vega', 'rho', 'dte']

    for col in numeric_cols:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors='coerce')
        else:
             print(f"Column '{col}' not found for numeric conversion.")

    print("\n--- DataFrame Info after Type Conversion ---")
    df.info()

    print("\n--- DataFrame Head ---")
    print(df.head())

    output_filename = f"{UNDERLYING_SYMBOL}_{EXPIRATION_DATE}_EOD_Calls_Puts_{START_TRADE_DATE}_to_{END_TRADE_DATE}.csv"
    df.to_csv(output_filename, index=False)
    print(f"\nTotal records retrieved: {len(df)}")
    print(f"Data saved to {output_filename}")

Fetching CALL options for QQQ expiring 2025-04-17 from 2024-05-13 to 2025-04-11...
Fetching page 1 (offset 0) for CALL expiring 2025-04-17 from 2024-05-13 to 2025-04-11...
API reports total expected CALL records: 7861
Page 1 complete: Retrieved 1000 records (Total fetched for CALL: 1000)
Fetching page 2 (offset 1000) for CALL expiring 2025-04-17 from 2024-05-13 to 2025-04-11...
Page 2 complete: Retrieved 1000 records (Total fetched for CALL: 2000)
Fetching page 3 (offset 2000) for CALL expiring 2025-04-17 from 2024-05-13 to 2025-04-11...
Page 3 complete: Retrieved 1000 records (Total fetched for CALL: 3000)
Fetching page 4 (offset 3000) for CALL expiring 2025-04-17 from 2024-05-13 to 2025-04-11...
Page 4 complete: Retrieved 1000 records (Total fetched for CALL: 4000)
Fetching page 5 (offset 4000) for CALL expiring 2025-04-17 from 2024-05-13 to 2025-04-11...
Page 5 complete: Retrieved 1000 records (Total fetched for CALL: 5000)
Fetching page 6 (offset 5000) for CALL expiring 2025-04-17 

In [13]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15524 entries, 0 to 15523
Data columns (total 46 columns):
 #   Column                   Non-Null Count  Dtype         
---  ------                   --------------  -----         
 0   contract                 15524 non-null  object        
 1   underlying_symbol        15524 non-null  object        
 2   exp_date                 15524 non-null  object        
 3   expiration_type          15524 non-null  object        
 4   type                     15524 non-null  object        
 5   strike                   15524 non-null  int64         
 6   exchange                 15524 non-null  object        
 7   currency                 15524 non-null  object        
 8   open                     15524 non-null  float64       
 9   high                     15524 non-null  float64       
 10  low                      15524 non-null  float64       
 11  last                     15524 non-null  float64       
 12  last_size                15524 n

In [14]:
# Drop specified columns to create a clean dataframe
columns_to_drop = [
    'contract',
    'underlying_symbol',
    'exp_date',
    'expiration_type',
    'exchange',
    'currency',
    'bid_date',
    'ask_date',
    'tradetime',
    'record_id',
    'record_type'
]

# Create a new clean dataframe by dropping the specified columns
df_clean = df.drop(columns=columns_to_drop, errors='ignore')

# Display information about the cleaned dataframe
print("Cleaned DataFrame Info:")
df_clean.info()

# Save the cleaned dataframe to a new CSV file
print(f"Cleaned dataframe saved with {len(df_clean)} rows and {len(df_clean.columns)} columns")


Cleaned DataFrame Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15524 entries, 0 to 15523
Data columns (total 35 columns):
 #   Column                   Non-Null Count  Dtype         
---  ------                   --------------  -----         
 0   type                     15524 non-null  object        
 1   strike                   15524 non-null  int64         
 2   open                     15524 non-null  float64       
 3   high                     15524 non-null  float64       
 4   low                      15524 non-null  float64       
 5   last                     15524 non-null  float64       
 6   last_size                15524 non-null  int64         
 7   change                   15524 non-null  float64       
 8   pctchange                15524 non-null  float64       
 9   previous                 15524 non-null  float64       
 10  previous_date            9587 non-null   object        
 11  bid                      15524 non-null  float64       
 12  bid_size