In [1]:
import requests
import pandas as pd
from dotenv import load_dotenv
import os

def fetch_adjusted_prices(ticker, multiplier, timespan, start_date, end_date):
    """
    Fetch historical price data and adjust for reverse splits.

    Args:
        ticker (str): The ticker symbol.
        multiplier (int): The multiplier for the timespan.
        timespan (str): Aggregation period (e.g., 'day', 'hour').
        start_date (str): Start date in 'YYYY-MM-DD' format.
        end_date (str): End date in 'YYYY-MM-DD' format.

    Returns:
        pd.DataFrame: Adjusted historical price data.
    """
    load_dotenv()

    # Get API key from environment variables
    api_key = os.getenv("POLYGON_API_KEY")
    if not api_key:
        raise ValueError("API key not found. Ensure POLYGON_API_KEY is set in your .env file.")

    # Fetch historical price data
    url_prices = f"https://api.polygon.io/v2/aggs/ticker/{ticker}/range/{multiplier}/{timespan}/{start_date}/{end_date}"
    response_prices = requests.get(url_prices, params={"apiKey": api_key})

    if response_prices.status_code == 200:
        data_prices = response_prices.json()
        if "results" in data_prices:
            df = pd.DataFrame(data_prices["results"]).rename(columns={
                "t": "timestamp",
                "o": "open",
                "h": "high",
                "l": "low",
                "c": "close",
                "v": "volume",
            })
            df["date"] = pd.to_datetime(df["timestamp"], unit="ms").dt.date
            df = df[["date", "open", "high", "low", "close", "volume"]]
            df.set_index(pd.to_datetime(df["date"]), inplace=True)
            df.drop(columns=["date"], inplace=True)
        else:
            raise ValueError("No results found in the price response.")
    else:
        raise ConnectionError(f"Error fetching price data: {response_prices.status_code}, {response_prices.text}")

    # Fetch split data to adjust for reverse splits
    url_splits = "https://api.polygon.io/v3/reference/splits"
    response_splits = requests.get(url_splits, params={"ticker": ticker, "apiKey": api_key, "reverse_split": True})

    if response_splits.status_code == 200:
        data_splits = response_splits.json()
        if "results" in data_splits:
            splits = pd.DataFrame(data_splits["results"])
            splits["execution_date"] = pd.to_datetime(splits["execution_date"])
            splits.sort_values("execution_date", inplace=True)

            # Apply reverse split adjustments
            for _, split in splits.iterrows():
                split_date = split["execution_date"]
                split_ratio = split["split_from"] / split["split_to"]
                mask = df.index < split_date
                df.loc[mask, ["open", "high", "low", "close"]] *= split_ratio
                df.loc[mask, "volume"] /= split_ratio
        else:
            print("No split data found in the response.")
    else:
        raise ConnectionError(f"Error fetching split data: {response_splits.status_code}, {response_splits.text}")

    return df

# Example usage
if __name__ == "__main__":
    ticker = "VXX"
    multiplier = 1
    timespan = "day"
    start_date = "2017-01-01"
    end_date = "2024-12-23"

    try:
        vxx_history = fetch_adjusted_prices(ticker, multiplier, timespan, start_date, end_date)
        print(vxx_history)
    except Exception as e:
        print(f"An error occurred: {e}")

               open     high       low    close        volume
date                                                         
2023-01-09  3409.92  3484.16  3379.200  3448.32  2.404977e+04
2023-01-10  3467.52  3468.80  3294.720  3304.96  2.320002e+04
2023-01-11  3281.92  3338.24  3279.360  3320.32  2.547651e+04
2023-01-12  3256.32  3302.40  3151.360  3156.48  4.177954e+04
2023-01-13  3179.52  3179.52  3048.960  3077.12  3.743063e+04
...             ...      ...       ...      ...           ...
2024-12-17    43.75    44.50    43.240    44.29  4.126272e+06
2024-12-18    44.00    51.89    43.380    51.72  8.144114e+06
2024-12-19    49.59    56.85    48.890    56.14  1.142816e+07
2024-12-20    55.82    55.98    47.625    48.08  1.097014e+07
2024-12-23    46.92    48.82    44.380    44.38  4.045915e+06

[493 rows x 5 columns]


In [38]:
import pandas as pd
import requests
from dotenv import load_dotenv
import os

# Load environment variables
load_dotenv()

# Retrieve the API key from the .env file
api_key = os.getenv("IVOLATILITY_API_KEY")

symbol = "aA"
trade_date = "2024-12-23"
dteFrom = 7
dteTo = 30
deltaFrom = 0
deltaTo = 1
cp = "C"

# Construct the URL
url = "https://restapi.ivolatility.com/equities/eod/stock-opts-by-param"
params = {
    "apiKey": api_key,
    "symbol": symbol,
    "tradeDate": trade_date,
    "dteFrom": dteFrom,
    "dteTo": dteTo,
    "deltaFrom": deltaFrom,
    "deltaTo": deltaTo,
    "cp": cp
}

# Make the request
response = requests.get(url, params=params)

# Check the response
if response.status_code == 200:
    json_data = response.json()  # Parse JSON response
    data = json_data.get("data")  # Extract the 'data' key
    if data:  # Check if data exists
        df = pd.DataFrame(data)  # Convert to pandas DataFrame
        print(df)  # Display the DataFrame
    else:
        print("No data available:", json_data)
else:
    print(f"Error: {response.status_code}")
    print(response.text)

No data available: {'status': {'executionTime': 12, 'recordsFound': 1000000, 'code': 'PENDING', 'urlForDetails': 'https://restapi.ivolatility.com/data/info/86a8ce92-ac16-4a63-9ba5-e9ce2e7aea7a'}, 'query': {'requestUUID': '86a8ce92-ac16-4a63-9ba5-e9ce2e7aea7a'}, 'data': []}


In [1]:
import requests
import os
import pandas as pd
from dotenv import load_dotenv
from datetime import datetime, timedelta
import time
import gzip
import shutil

# Load environment variables from .env file
load_dotenv()

# Retrieve the API key from the .env file
api_key = os.getenv("IVOLATILITY_API_KEY")

if not api_key:
    raise ValueError("API key is not set in the .env file or environment variables.")

# Define the API endpoint
url = "https://restapi.ivolatility.com/equities/eod/stock-opts-by-param"

def fetch_option_data(symbol, trade_date, dteFrom, dteTo, deltaFrom, deltaTo, cp):
    """Fetches option data (calls or puts) and returns a dataframe."""
    params = {
        "apiKey": api_key,
        "symbol": symbol,
        "tradeDate": trade_date,
        "dteFrom": dteFrom,
        "dteTo": dteTo,
        "deltaFrom": deltaFrom,
        "deltaTo": deltaTo,
        "cp": cp
    }

    try:
        # Initial API request
        response = requests.get(url, params=params)
        response.raise_for_status()

        # Parse the response
        data = response.json()

        # Handle direct data response
        if "data" in data and data["data"]:
            return pd.DataFrame(data["data"])
        
        # Handle pending data
        elif data.get("status", {}).get("code") == "PENDING":
            status_url = data["status"]["urlForDetails"]

            while True:
                # Poll for data availability
                status_response = requests.get(status_url)
                status_response.raise_for_status()
                status_data = status_response.json()

                # Check data availability
                if isinstance(status_data, list) and len(status_data) > 0:
                    meta = status_data[0].get("meta", {})
                    if meta.get("status") == "COMPLETE":
                        file_info = status_data[0].get("data", [])[0]
                        download_url = file_info.get("urlForDownload")

                        if download_url:
                            # Download and decompress the file
                            download_response = requests.get(download_url)
                            gz_filename = f"{symbol}_{cp}_{trade_date}.csv.gz"
                            with open(gz_filename, "wb") as f:
                                f.write(download_response.content)

                            # Check if the file is empty
                            with gzip.open(gz_filename, "rb") as gz_file:
                                content = gz_file.read()
                                if not content.strip():  # Empty file
                                    print(f"No data available in the downloaded file for {cp} on {trade_date}.")
                                    os.remove(gz_filename)
                                    return pd.DataFrame()

                            # Decompress and read into a dataframe
                            with gzip.open(gz_filename, "rb") as gz_file:
                                df = pd.read_csv(gz_file)
                            os.remove(gz_filename)  # Clean up temporary file
                            return df
                        else:
                            print(f"Download URL for {cp} on {trade_date} not found.")
                            break
                    elif meta.get("status") == "FAILED":
                        print(f"Data retrieval for {cp} on {trade_date} failed.")
                        break
                else:
                    print(f"Data for {cp} on {trade_date} still processing. Retrying in 10 seconds...")
                    time.sleep(10)

        else:
            raise ValueError(f"No data or status URL found for {cp} on {trade_date}.")

    except requests.exceptions.RequestException as e:
        print(f"An error occurred while fetching {cp} data on {trade_date}: {e}")

    return pd.DataFrame()  # Return an empty dataframe if data is unavailable

# Define query parameters
symbol = "UVXY"
start_date = "2011-01-01"
# end_date = datetime.now().strftime("%Y-%m-%d")
end_date = "2013-02-15"
dteFrom = 0
dteTo = 600

# Output directory
output_dir = f"../historical/{symbol}/"
os.makedirs(output_dir, exist_ok=True)  # Create directory if it doesn't exist

# Iterate over date range in reverse
start_date_obj = datetime.strptime(start_date, "%Y-%m-%d")
end_date_obj = datetime.strptime(end_date, "%Y-%m-%d")
current_date = end_date_obj

while current_date >= start_date_obj:
    # Skip weekends
    if current_date.weekday() in [5, 6]:  # 5 = Saturday, 6 = Sunday
        current_date -= timedelta(days=1)
        continue

    trade_date = current_date.strftime("%Y-%m-%d")
    output_path = f"{output_dir}/{symbol}_daily_{trade_date}.csv"

    # Check if the file already exists
    if os.path.exists(output_path):
        print(f"File already exists for {trade_date}. Skipping...")
        current_date -= timedelta(days=1)
        continue

    # Fetch calls and puts data
    calls_df = fetch_option_data(symbol, trade_date, dteFrom, dteTo, deltaFrom=0, deltaTo=1, cp="C")
    puts_df = fetch_option_data(symbol, trade_date, dteFrom, dteTo, deltaFrom=-1, deltaTo=0, cp="P")

    # Concatenate the dataframes
    if not calls_df.empty or not puts_df.empty:
        combined_df = pd.concat([calls_df, puts_df], ignore_index=True)
        combined_df.to_csv(output_path, index=False)
        print(f"Combined data saved to '{output_path}'.")
    else:
        print(f"No data retrieved for {trade_date}.")

    # Move to the previous day
    current_date -= timedelta(days=1)

    time.sleep(.3)

print("Data fetching completed.")

File already exists for 2019-02-15. Skipping...
File already exists for 2019-02-14. Skipping...
File already exists for 2019-02-13. Skipping...
File already exists for 2019-02-12. Skipping...
File already exists for 2019-02-11. Skipping...
File already exists for 2019-02-08. Skipping...
File already exists for 2019-02-07. Skipping...
File already exists for 2019-02-06. Skipping...
File already exists for 2019-02-05. Skipping...
File already exists for 2019-02-04. Skipping...
File already exists for 2019-02-01. Skipping...
File already exists for 2019-01-31. Skipping...
File already exists for 2019-01-30. Skipping...
File already exists for 2019-01-29. Skipping...
File already exists for 2019-01-28. Skipping...
File already exists for 2019-01-25. Skipping...
File already exists for 2019-01-24. Skipping...
File already exists for 2019-01-23. Skipping...
File already exists for 2019-01-22. Skipping...
No data available in the downloaded file for C on 2019-01-21.
No data available in the d