## Transform an input file and load it into BigQuery
- Calcualte equilibrium stock price where call and put implied volatility are equal
- Calculate moneyness for all strikes within +- 2 standard deviations of equilibrium price
- Calculate implied volatility of all strkes filtered above
- generate json formatted output
- write output file to bigquery

## Codebase

In [8]:
#declare dependencies and constants
import pandas as pd
import datetime
import math
import mibian

In [26]:
def get_interest_rate(quote_date):
    "return the fed funds rate that was in effect on the supplied quote date"
    df_fedfunds = pd.read_csv('gs://expiry-week-data/options/FEDFUNDS.csv', parse_dates=['DATE'])
    df_fedfunds = df_fedfunds[df_fedfunds['DATE'] <= quote_date]
    target_index = df_fedfunds['DATE'].idxmax()
    return df_fedfunds.loc[target_index]['FEDFUNDS']


In [10]:
def calc_call_iv(stock_price, strike_price, interest_rate, days_to_expiry, call_price):
    """
    calculate the implied volatility of a call option
    - return annualized implied volatility as a decimal value
    """
    bs = mibian.BS([stock_price, strike_price, interest_rate, days_to_expiry], callPrice=call_price)
    return bs.impliedVolatility / 100

In [11]:
def calc_put_iv(stock_price, strike_price, interest_rate, days_to_expiry, put_price):
    """
    calculate the implied volatility of a put option
    - return annualized implied volatility as a decimal value
    """
    bs = mibian.BS([stock_price, strike_price, interest_rate, days_to_expiry], putPrice=put_price)
    return bs.impliedVolatility / 100

In [12]:
def center_underlying_price(stock_price, strike_price, interest_rate, days_to_expiry, call_price, put_price):
    """
    calculate equilibrium implied volatility and the adjusted underlying price at which it is acheived
    this is the underlying price at which both call and put implied volatility are equal
    note: interest_rate needs be passed as a percent (e.g 5 = 5%)
    """
    #get starting call and put implied volatility adjusted for time to expiry
    call_iv = calc_call_iv(stock_price, strike_price, interest_rate, days_to_expiry, call_price)       
    put_iv = calc_put_iv(stock_price, strike_price, interest_rate, days_to_expiry, put_price)
    
    #calculate implied volatility difference adjusted for time to expiry
    iv_diff = abs(put_iv - call_iv) * math.sqrt(days_to_expiry / 365) 

    adj_lower = stock_price  * math.exp(-iv_diff) 
    adj_upper = stock_price  * math.exp(iv_diff)
    if put_iv > call_iv:
        #stock price is above equilibrium price
        adj_stock_price = (stock_price + adj_lower) / 2
    else:
        #stock price is below equilibrium price
        adj_stock_price = (stock_price + adj_upper) / 2

    adj_stock_price = stock_price 
    for i in range(100):
        call_iv = calc_call_iv(adj_stock_price, strike_price, interest_rate, days_to_expiry, call_price)
        put_iv = calc_put_iv(adj_stock_price, strike_price, interest_rate, days_to_expiry, put_price)
        iv_diff = abs(put_iv - call_iv)

        #at point of convergence call and put iv will be the same, so return either 1
        if iv_diff <= .0001:
            return adj_stock_price, call_iv 
        
        if put_iv > call_iv:
            #adjusted stock price is to high
            adj_upper = adj_stock_price
        else:
            #adjusted stock price is to low
            adj_lower = adj_stock_price
       
        adj_stock_price = (adj_lower + adj_upper) / 2

    #throw error if convergence was not achieved
    raise Exception("Put and Call implied volatilities did not converge")

In [62]:
def merge_options(df_expiry):
    """
    merge calls and puts for a given expiry date and quote date using strike price as the key
    - this will produce a straddle layout and keep only strike prices that exist on both the call and put side
    """

    df_calls = df_expiry[df_expiry['option_type'] == 'call'].reset_index(drop=True)
    df_calls.rename(columns={'bid': 'call_bid', 'ask': 'call_ask', 'volume': 'call_volume', 'open_interest' : 'call_open_interest'}, inplace=True)
    del df_calls['option_type']

    df_puts = df_expiry[df_expiry['option_type'] == 'put'][['bid', 'ask', 'volume', 'open_interest', 'strike_price']].reset_index(drop=True)
    df_puts.rename(columns={'bid': 'put_bid', 'ask': 'put_ask', 'volume': 'put_volume', 'open_interest' : 'put_open_interest'}, inplace=True)

    df_straddle = pd.merge(df_calls, df_puts, on='strike_price', how='inner')
    return df_straddle


In [67]:
#process data for an exiry date
def process_expiry_date(df_expiry, interest_rate):
    """
    read and process options for a given expiry date
    """
    #transpose calls and puts into a straddle layout
    df_straddle = merge_options(df_expiry)

    #find nearest strike price
    target_index = abs(df_straddle['strike_price'] - df_straddle['underlying_price']).idxmin()
    df_strike = df_straddle.loc[target_index]

    #get option values at target strike price
    underlying_price = df_strike['underlying_price']
    strike_price = df_strike['strike_price']
    days_to_expiry = (df_strike['expiry_date'] -  df_strike['quote_date']).days 
    call_price = (df_strike['call_bid'] + df_strike['call_ask']) / 2
    put_price = (df_strike['put_bid'] + df_strike['put_ask']) / 2

    #find adjusted underlying price and Implied Volatility where call and put implied volatility are the same
    atm_price, atm_iv = center_underlying_price(underlying_price, strike_price, interest_rate, 
        days_to_expiry, call_price, put_price)    

    print(df_strike['underlying_price'], atm_price, atm_iv)
    
    
    
    
    

In [68]:
interest_rate = 1.55 
process_expiry_date(df_expiry, interest_rate)

322.43 322.6670918296397 0.1116943359375


In [18]:
def process_quote_date(df_daily, quote_date):
    """
    process options for a given quote date
    """
    #get fed funds rate that was effective on the supplied quote date
    interest_rate = get_interest_rate(quote_date)
    
    #get list of expiry dates
    expiry_dates = df_daily['expiry_date'].unique()
    
    #process each expiry date
    for expiry_date in expiry_dates:
        df_expiry = df_daily[dfi_daily['expiry_date'] == expiry_date].reset_index(drop=True)
        process_expiry_date(df_expiry, interest_rate)

    

In [17]:
def main(file_path):
    """
    file path is path to options input file
    example: gs://expiry-week-data/options/options/SPY_2020.csv
    """
    #load file into a dataframe
    input_columns = [0,1,5,6,7,8,10,11,12,13]

    column_names = ['underlying_symbol', 'underlying_price', 'option_type', 'expiry_date', 'quote_date',
        'strike_price', 'bid', 'ask', 'volume', 'open_interest']

    df_options = pd.read_csv(file_path, usecols=input_columns, names=column_names, header=0, parse_dates=[3,4])
    
    #get unique list of quote dates
    quote_dates = df['quote_date'].unique()
    
    #process each quote date
    for quote_date in quote_dates:
        df_daily = df[df['quote_date'] == quote_date]
        process_quote_date(df_daily, quote_date)
    
    
    

## Code Exploration Section

In [19]:
#load sample options file from cloud storagae
 #load file into a dataframe
file_path  = 'gs://expiry-week-data/options/sample_20200103.csv'
input_columns = [0,1,5,6,7,8,10,11,12,13]

column_names = ['underlying_symbol', 'underlying_price', 'option_type', 'expiry_date', 'quote_date',
    'strike_price', 'bid', 'ask', 'volume', 'open_interest']

df_options = pd.read_csv(file_path, usecols=input_columns, names=column_names, header=0, parse_dates=[3,4])

#get set of options for one expiry date for the sample quote date

df_expiry = df_options[df_options['expiry_date'] == '2020-01-10'].reset_index(drop=True)
df_expiry.head() 




Unnamed: 0,underlying_symbol,underlying_price,option_type,expiry_date,quote_date,strike_price,bid,ask,volume,open_interest
0,SPY,322.43,call,2020-01-10,2020-01-03,265.0,57.64,57.88,0,0
1,SPY,322.43,put,2020-01-10,2020-01-03,265.0,0.01,0.02,7,7282
2,SPY,322.43,call,2020-01-10,2020-01-03,270.0,52.64,52.88,0,0
3,SPY,322.43,put,2020-01-10,2020-01-03,270.0,0.01,0.02,3000,13675
4,SPY,322.43,call,2020-01-10,2020-01-03,275.0,47.66,47.87,0,0
