# Small script to download real option data from Yahoo Finance

In [None]:
import numpy as np
import pandas as pd
import yfinance as yf
import random

Download Options for S&P 500 constituents

In [None]:
# Get the list of all S&P 500 tickers
sp500 = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[0]
tickers = sp500.Symbol.tolist()


Download Closing Prices of Stocks

In [None]:
stock_prices = yf.download(  # or pdr.get_data_yahoo(...
        # tickers list or string as well
        tickers = tickers,

        # use "period" instead of start/end
        # valid periods: 1d,5d,1mo,3mo,6mo,1y,2y,5y,10y,ytd,max
        # (optional, default is '1mo')
        period = "1d",

        # fetch data by interval (including intraday if period < 60 days)
        # valid intervals: 1m,2m,5m,15m,30m,60m,90m,1h,1d,5d,1wk,1mo,3mo
        # (optional, default is '1d')
        interval = "1d",

        # group by ticker (to access via data['SPY'])
        # (optional, default is 'column')
        group_by = 'ticker',

        # adjust all OHLC automatically
        # (optional, default is False)
        auto_adjust = True,

        # download pre/post regular market hours data
        # (optional, default is False)
        prepost = True,

        # use threads for mass downloading? (True/False/Integer)
        # (optional, default is True)
        threads = True,

        # proxy URL scheme use use when downloading?
        # (optional, default is None)
        proxy = None
    )

In [None]:
# Set the maturity date to one month from today
maturity_date = "2023-05-19"

list_of_prices = []

# Loop through all tickers and download call option prices
for ticker in tickers:
    try:
        option_chain = yf.Ticker(f"{ticker}").option_chain(maturity_date)
        call_options = option_chain.calls
        call_options = call_options.sort_values(by="volume",ascending = False)[:10]
        call_options = call_options.sort_values(by="strike",ascending = True)
        if len(call_options)==10:
            price_S = stock_prices[ticker]["Close"][0]
            list_of_prices.append(np.concatenate([[1],[1],
                                                  call_options["strike"]/price_S,
                                                  call_options["bid"]/price_S,
                                                  call_options["ask"]/price_S]))
    except:
        print(f"No options data found for {ticker}")

Combine 5 stocks and create dataset for training

In [None]:
no_prices = len(list_of_prices)
no_prices

In [None]:
df_list = []
size_of_dataset = 50000
for iter in range(size_of_dataset):
    indices = []
    while len(indices) < 5:
        random_index = random.randint(0, no_prices-1)
        if random_index not in indices:
            indices.append(random_index)
    df_list.append(np.concatenate([list_of_prices[i] for i in indices]))
df = pd.DataFrame(df_list)
df = df.fillna(0)

Save it

In [None]:
df

In [None]:
df.to_csv("csv/real_data.csv")

Create the Test Data

In [None]:
df_list_test = []
size_of_dataset = 5000
for iter in range(size_of_dataset):
    indices = []
    while len(indices) < 5:
        random_index = random.randint(0, no_prices-1)
        if random_index not in indices:
            indices.append(random_index)
    df_list_test.append(np.concatenate([list_of_prices[i] for i in indices]))
df_test = pd.DataFrame(df_list_test)
df_test = df_test.fillna(0)

In [None]:
df_test.to_csv("csv/real_data_test.csv")