In [157]:
import pandas as pd
import numpy as np
import datetime
from datetime import timedelta
from pandas.io.json import json_normalize
import statistics as stats

In [158]:
url = 'https://query2.finance.yahoo.com/v7/finance/options/'

In [175]:
def get_dates():
    
    global timeframe
    global sym_url
    
    sym = '^SPX'
    sym_url = url + sym
    
    df = pd.read_json(sym_url)
    
    expirations = json_normalize(data = df['optionChain']['result'], record_path = 'expirationDates')
    expirations = pd.DataFrame(expirations)
    expirations = expirations.rename(index=str, columns={0: 'unix_time'})
    
    utc_time_difference = 7
    
    expirations['date'] = [datetime.datetime.fromtimestamp(_ + (utc_time_difference * 60 * 60)).strftime('%Y-%m-%d') for _ in expirations['unix_time']]
    expirations['dte'] = [((datetime.datetime.strptime(_, '%Y-%m-%d') - datetime.datetime.now()).days + 1) for _ in expirations['date']]
    expirations['delta_30d'] = abs(expirations['dte'] - 30)
    
    timeframe = expirations.loc[(expirations['dte'] > 23) & (expirations['dte'] < 37)]
    timeframe = timeframe.sort_values('delta_30d')[0:2]
    
    print('Expirations:', timeframe, '\n')

In [176]:
def make_df(timeframe, sym_url):
    
    global df
    
    optionschain = []
    
    for date in timeframe['unix_time']:
        
        newurl = sym_url + '?date=' + str(date)
        
        temp_df = pd.read_json(newurl)
        temp_df = json_normalize(data = temp_df['optionChain']['result'], record_path = 'options')
        
        for _ in temp_df.calls[0]:

            _.update({'type': 'call',
                      'date': timeframe.loc[timeframe['unix_time'] == date, 'date'].values[0],
                      'dte': timeframe.loc[timeframe['unix_time'] == date, 'dte'].values[0]})
            
            optionschain.append(_)

        for _ in temp_df.puts[0]:

            _.update({'type': 'put',
                     'date': timeframe.loc[timeframe['unix_time'] == date, 'date'].values[0],
                      'dte': timeframe.loc[timeframe['unix_time'] == date, 'dte'].values[0]})
            
            optionschain.append(_)
            
    df = pd.DataFrame(optionschain)
    df = df.drop(columns=['contractSize', 'currency', 'change', 'percentChange', 'impliedVolatility', 'lastPrice', 'lastTradeDate', 'openInterest', 'volume'])
            
    #print(df)

In [177]:
def is_third_fri(date):
    
    d = datetime.datetime.strptime(date, '%Y-%m-%d')
    return d.weekday() == 4 and 15 <= d.day <= 21

In [178]:
def get_minutes(timeframe):
    
    # Returns t1 and t2 in minutes.
    
    global n30
    global n365
    
    n30 = 30 * 24 * 60
    n365 = 365 * 24 * 60
    
    now = datetime.datetime.now()
    midnight = datetime.datetime(now.year, now.month, now.day) + timedelta(days=1)

    m1 = (midnight - now)
    m1 = m1.seconds / 60
    m2 = 510
    m3 = 900
    
    for date in timeframe['date']:
    
        m4 = timeframe.loc[timeframe['date'] == date, 'dte'].values[0] * 24 * 60

        if is_third_fri(date) == True:

            t = (m1 + m2 + m4) / n365

        else:

            t = (m1 + m3 + m4) / n365
            
        df.loc[df['date'] == date, 'minutes'] = t
        
        #print(date, 't:', round(t, 4))

In [179]:
def get_rf():
    
    # Currently hardcoded. Risk-free rate should come from daily yield curve (at the points of expiration).
    
    global rf
    
    rf = 0.00019
    
    return rf

In [180]:
def get_fward(rf):
    
    df['mid'] = (df['bid'] + df['ask']) / 2
    
    for date in df['date'].unique():
        
        for k in df.loc[df['date'] == date, 'strike']:
            
            if len(df.loc[(df['date'] == date) & (df['strike'] == k)]) == 2:
                
                #print(df.loc[(df['date'] == date) & (df['strike'] == k)])
            
                c_mid = df.loc[(df['date'] == date) & (df['strike'] == k) & (df['type'] == 'call'), 'mid'].values[0]
                p_mid = df.loc[(df['date'] == date) & (df['strike'] == k) & (df['type'] == 'put'), 'mid'].values[0]

                pc_spread = abs(c_mid-p_mid)
                
                df.loc[(df['date'] == date) & (df['strike'] == k), 'put_call_spread'] = pc_spread
                
            else:
                
                #print('no k for puts and calls inclusive')

                pass
            
        put_call_min = put_call_min = np.nanmin(df.loc[df['date'] == date, 'put_call_spread'])
        f = df.loc[(df['date'] == date) & (df['put_call_spread'] == put_call_min), 'strike'].values[0]
        
        t = df.loc[df['date'] == date, 'minutes'].values[0]
        
        f = f + np.e ** (rf * t) * put_call_min
        
        df.loc[df['date'] == date, 'f'] = f
        df.loc[df['date'] == date, 'f-k'] = f - df.loc[df['date'] == date, 'strike']
        min_fk_delta = min(df.loc[(df['date'] == date) & (df['f-k'] > 0), 'f-k'])
        k0 = df.loc[(df['date'] == date) & (df['f-k'] == min_fk_delta), 'strike'].values[0]
        df.loc[df['date'] == date, 'k0'] = k0
        
        #print(date, 'f:', f, 'put_call_min:', put_call_min, 'k0:', k0)

In [181]:
def k_checker(data):

    for _ in range(len(data)):
        
        try:
            
            if data.loc[_+1, 'bid'] == 0 and data.loc[_+2, 'bid'] == 0:
                data.loc[_, 'include'] == 'yes'
                data.loc[_, '2x0'] = 'yes'
                
            if data.loc[_, 'bid'] == 0:
                data.loc[_, 'include'] = 'no'
            
            else:
                data.loc[_, 'include'] = 'yes'
                
        except KeyError:
            
            data.loc[_, 'keyerror'] = 'yes'
            
            if data.loc[_, 'bid'] == 0:
                data.loc[_, 'include'] = 'no'
            
            else:
                data.loc[_, 'include'] = 'yes'
    
    if '2x0' in [i for i in data]:
        
        if 'yes' in data['2x0']:
            
            in_range_k = data[0:min(data[data['2x0'] == 'yes'].index) + 1]
            k_select = in_range_k[in_range_k['include'] == 'yes']
            
        else:
            
            k_select = data[data['include'] == 'yes']
            
    else:
        
        k_select = data[data['include'] == 'yes']
        
    return k_select


In [182]:
def get_ks():
    
    # Go thru put strikes < k0. Exclude strikes with bid 0. 
    
    global kl
    
    kl = pd.DataFrame()
    
    for date in df['date'].unique():
        
        k_list = pd.DataFrame()
        
        for opt in df.loc[df['date'] == date, 'type'].unique():
            
            temp_df = df[(df['date'] == date) & (df['type'] == opt)]
            
            # This goes thru the puts and calls, and checks them with k_checker(), then concats them to a new list.
            
            if opt == 'put':
                
                temp_df = temp_df[temp_df['strike'] < temp_df['k0']].sort_values('strike', ascending = False)
                temp_df = temp_df.reset_index()
                
                puts = k_checker(temp_df)
                
                if k_list.empty == True:
                    
                    k_list = pd.DataFrame(puts)

                elif k_list.empty == False:
                    
                    k_list = pd.concat([k_list, puts], sort=True)
                                      
            if opt == 'call':
            
                temp_df = temp_df[temp_df['strike'] > temp_df['k0']].sort_values('strike')
                temp_df = temp_df.reset_index()
                
                k_checker(temp_df)
                
                calls = k_checker(temp_df)
                
                if k_list.empty == True:

                    k_list = pd.DataFrame(calls)
                    
                elif k_list.empty == False:
                    
                    k_list = pd.concat([k_list, calls], sort=True)
           
        k0 = stats.mode(df.loc[df['date'] == date, 'k0'])
        k0_avg = stats.mean(df.loc[(df['date'] == date) & (df['strike'] == k0), 'mid'])
        k0 = pd.DataFrame([[k0, k0_avg, date]], columns = ['strike', 'mid', 'date'])
        
        k_list = pd.concat([k_list, k0], sort=True)
        
        k_list = k_list[['strike', 'mid', 'date', 'put_call_spread']].sort_values(by=['strike'])
        
        # Note: concatted three df's, now have an index with three 0's. This gets reset in the next function.
        
        kl = pd.concat([kl, k_list])

In [183]:
def get_vol():
    
    global vix
    
    vdf = pd.DataFrame()
    
    for date in kl['date'].unique():
        
        temp_df = kl[kl['date'] == date].reset_index(drop=True)
        
        # This gets strike increments.
        
        for i in range(len(temp_df)):

            if i == 0:

                kdelta = (max(temp_df.iloc[0:2]['strike'].values) - min(temp_df.iloc[0:2]['strike'].values)) / (len(temp_df.iloc[0:2]['strike'].values) - 1)
                temp_df.loc[i, 'kdel'] = kdelta

            elif i == len(temp_df):

                kdelta = (max(temp_df.iloc[i-1:len(temp_df)]['strike'].values) - min(temp_df.iloc[i-1:len(temp_df)]['strike'].values)) / (len(temp_df.iloc[i-1:len(temp_df)]['strike'].values) - 1)
                temp_df.loc[i, 'kdel'] = kdelta

            else:

                kdelta = (max(temp_df.iloc[i-1:i+2]['strike'].values) - min(temp_df.iloc[i-1:i+2]['strike'].values)) / (len(temp_df.iloc[i-1:i+2]['strike'].values) - 1)
                temp_df.loc[i, 'kdel'] = kdelta
                
            # This gets contribution factors
            
            ct = (kdelta / (temp_df.loc[i, 'strike'] ** 2)) * (np.e ** (rf * stats.mode(df.loc[df['date'] == date, 'minutes']))) * (temp_df.loc[i, 'mid'])
            
            temp_df.loc[i, 'ct'] = ct
                     
        m = stats.mode(df.loc[df['date'] == date, 'minutes'])
        f = stats.mode(df.loc[df['date'] == date, 'f'])
        k0 = stats.mode(df.loc[df['date'] == date, 'k0'])
        
        #print('m, f, k0:', m, f, k0)
        
        adj = (1/m) * (((f/k0)-1)**2)
        
        vol = (2/m) * sum(temp_df.loc[temp_df['date'] == date, 'ct'])
        
        #print(date, 'vol (unadj):', vol)
            
        vol = vol - adj
        
        #print(date, 'adj factor:', adj)
        #print(date, 'adj sigma/vol:', vol, '\n')
        
        temp_df.loc[temp_df['date'] == date, 'vol'] = vol
        
        vdf = pd.concat([vdf, temp_df])
    
    time_sum = []
    
    for date in vdf['date'].unique():
        
        t = stats.mode(df.loc[df['date'] == date, 'minutes'])    
        sig = stats.mode(vdf.loc[vdf['date'] == date, 'vol'])
        
        # This is a temporary fix for only having one expiration... not accurate. 
        
#         if len(vdf['date'].unique()) == 1:
#             #print('only one date')
#             #print(((t*sig) * (n365/n30)) ** 0.5)            
#             vix = ((t*sig) * (n365/n30)) ** 0.5
#             vix = vix * 100            
#             print('vix:', vix)
#             return vix
        
        # For two expirations.
            
#         else:

        if max(df['minutes'].unique()) == t:

            weight = (n30 - (min(df['minutes']) * n365)) / ((max(df['minutes']) * n365) - (min(df['minutes']) * n365))
            weight_t2 = t * sig * weight
            time_sum.append(weight_t2)

            #print('t*sigma*time weight ({}) / {} * {} * {} /'.format(date, round(t,2), round(sig, 5), round(weight, 2)), round(weight_t2, 7), '\n')

        elif min(df['minutes'].unique()) == t:

            weight = ((max(df['minutes']) * n365) - n30) / ((max(df['minutes']) * n365) - (min(df['minutes']) * n365))
            weight_t1 = t * sig * weight
            time_sum.append(weight_t1)
                
                #print('t*sigma*time weight ({}) / {} * {} * {} /'.format(date, round(t,2), round(sig, 5), round(weight, 2)), round(weight_t1, 7), '\n')

    #print('Sum weighted vol terms: \n', sum(time_sum), '\n')
    #print('Sum weighted vol * n365/n30: \n', sum(time_sum), '*', n365/n30, '\n')
    #print('Sqrt sum weighted vol * n365/n30: \n', (sum(time_sum) * (n365/n30)) ** 0.5, '\n')

    vix = (sum(time_sum) * (n365/n30)) ** 0.5
    vix = vix * 100

    print('VIX:', round(vix, 2))
    
    cur_vix = pd.read_json('https://query2.finance.yahoo.com/v7/finance/quote?symbols=^vix')['quoteResponse']['result'][0]
    cur_vix = cur_vix['regularMarketPrice']
    print('Current VIX:', cur_vix)

    # Returns our error from the most recent VIX quote.

    print('% error from VIX', str(round(abs((vix/cur_vix) - 1)*100, 2))+ '%')

    return vix

def vol_calc():
    
    get_dates()
    make_df(timeframe, sym_url)
    get_minutes(timeframe)
    get_rf()
    get_fward(rf)
    get_ks()
    get_vol()
    
vol_calc()

Expirations:     unix_time        date  dte  delta_30d
7  1536278400  2018-09-07   28          2
8  1536883200  2018-09-14   35          5 

VIX: 13.14
Current VIX: 13.49
% error from VIX 2.61%
