### BigQuery Table Definitions for Stock Market Project


In [1]:
#assign dependencies and constants
from google.cloud import bigquery

PROJECT_ID = 'expiry-week'


In [6]:
def create_dataset(dataset_name):
    """create a dataset in the currently assigned project"""
    dataset_id = "{}.{}".format(PROJECT_ID, dataset_name)
    dataset = bigquery.Dataset(dataset_id)
    dataset.location = 'US'
    dataset = bq_client.create_dataset(dataset, timeout=30)
    print("Dataset {} has been created".format(dataset_id))
    

In [5]:
def create_option_quotes_table(stock_symbol):
    """
    Create a BigQuery table in the option_quotes dataset for the supplied symbol
    """
    client = bigquery.Client(project= PROJECT_ID)
    dataset_ref = client.dataset('option_quotes')

    schema = [
        bigquery.SchemaField("partition_date", "DATE", mode="REQUIRED", description="constant date used to send all records to the same partition (1970-01-01)"),
        bigquery.SchemaField("quote_date", "DATE", mode="REQUIRED", description="market closing date for which the data was captured"),
        bigquery.SchemaField("expiry_date", "DATE", mode="REQUIRED", description="date on which the options expiry"),
        bigquery.SchemaField("days_to_expiry", "INT64", mode="REQUIRED", description="calendar days between quote_date and expiry_date not including the quote_date"),
        bigquery.SchemaField("underlying_price", "NUMERIC", mode="REQUIRED", description="the closing price of the underlying asset"),
        bigquery.SchemaField("atm_price", "NUMERIC", mode="REQUIRED", description="adjusted underlying price such that the implied volatility of calls an puts are the same"),
        bigquery.SchemaField("atm_iv", "NUMERIC", mode="REQUIRED", description="the implied volatility for both calls and puts using the atm_price as the current price"),
        bigquery.SchemaField(
            "strike_prices",
            "RECORD",
            mode="REPEATED",
            description="strike prices within 2 standard deviations of the at the money price (atm_price)",
            fields=[
                bigquery.SchemaField("strike_price", "NUMERIC", mode="REQUIRED", description="price at which an option can be excercised"),
                bigquery.SchemaField("call_bid", "NUMERIC", mode="REQUIRED", description="closing bid price for the call option"),
                bigquery.SchemaField("call_ask", "NUMERIC", mode="REQUIRED", description="closing ask price for the call option"),
                bigquery.SchemaField("call_volume", "NUMERIC", mode="REQUIRED", description="number of call contracts traded"),
                bigquery.SchemaField("call_open_interest", "NUMERIC", mode="REQUIRED", description="number of open call contracts"),
                bigquery.SchemaField("call_moneyness", "NUMERIC", mode="REQUIRED", description="probability of call option closing in the money on expiry date (based on atm_iv)"),
                bigquery.SchemaField("call_iv", "NUMERIC", mode="REQUIRED", description="implied volatiity of the call option using the midpoint between the call bid and call ask price"),
                bigquery.SchemaField("put_bid", "NUMERIC", mode="REQUIRED", description="closing bid price for the put option"),
                bigquery.SchemaField("put_ask", "NUMERIC", mode="REQUIRED", description="closing ask price for the put option"),
                bigquery.SchemaField("put_volume", "NUMERIC", mode="REQUIRED", description="number of put contracts traded"),
                bigquery.SchemaField("put_open_interest", "NUMERIC", mode="REQUIRED", description="number of open call contracts"),
                bigquery.SchemaField("put_moneyness", "NUMERIC", mode="REQUIRED", description="probability of put option closing in the money on expiry date (based on atm_iv)"),
                bigquery.SchemaField("put_iv", "NUMERIC", mode="REQUIRED", description="implied volatiity of the put option using the midpoint between the put bid and put ask price"),
            ],
        ),
        bigquery.SchemaField("sampling_key", "NUMERIC", mode="REQUIRED", description="a random number between 0 and 1. Facilitates repeatable data sampling without the need for a hash key ")
    ]
    table_ref = dataset_ref.table(stock_symbol.upper())
    table = bigquery.Table(table_ref, schema=schema)
     
    table.time_partitioning = bigquery.TimePartitioning(
        type_=bigquery.TimePartitioningType.DAY,
        field="partition_date")
      
    table.clustering_fields=['expiry_date']
    table = client.create_table(table)  # API request

    print("Created table {}".format(table.full_table_id))

In [6]:
create_option_quotes_table('SPY')

Created table expiry-week:option_quotes.SPY
