## Option Chain Analysis

In [1]:
import time, os, json, logging
import pandas as pd
import numpy as np
import datetime as dt
from helpFunc import *
import matplotlib.pyplot as plt
plt.style.use('ggplot')

In [2]:
# inputs
ticker = 'SPY'
dir_path = 'D:/Database/Option data/Json/2021/'
folder_path = dir_path
date_list = [dt.date(2021, 4, 1)]

In [3]:
dir_list = os.listdir(folder_path)
x = dir_list[0]
print(x)

SPY Apr 1 06_25_01 2021 Thu.txt


In [4]:
file_list = [x for x in dir_list if dt.datetime.strptime(x[len(ticker)+1:-8],'%b %d %H_%M_%S %Y').date() in date_list]  

In [5]:
%%time
df = day_chain(ticker,folder_path,date_list)
call_raw_df = df[df['putCall']=='CALL']
put_raw_df = df[df['putCall']=='PUT']

Wall time: 38.7 s


In [None]:
# %%time
# read data
# how to speed up this process? 
# idea 1: a faster json loader thru multiple files
# idea 2: a faster method / algorithm to read thru the embedded dictionary
# current speed: 40 seconds per day (82 files, 750MB)


# tup = readChain(ticker, folder_path, date_list)

# call_raw_df = tup[0]
# call_raw_df.set_index('Date', inplace = True)
# put_raw_df = tup[1]
# put_raw_df.set_index('Date', inplace = True)

# call_raw_df.head()

In [None]:
call_raw_df.iloc[:, :-1] = call_raw_df.iloc[:, :-1].astype('float64')
put_raw_df.iloc[:, :-1] = put_raw_df.iloc[:, :-1].astype('float64')

### Plot volume (both difference and cumulative)

#### Plot aggregate volume and put-call ratio over time

In [None]:
# cumulative volume
frequency = '10min'
volume_sum_call = call_raw_df.loc[:timestamp].groupby(pd.Grouper(freq=frequency, dropna = True)).sum()['volume_c']
volume_sum_put = put_raw_df.loc[:timestamp].groupby(pd.Grouper(freq=frequency, dropna = True)).sum()['volume_p']

In [None]:
plt.figure(figsize=(8, 5))
plt.plot(volume_sum_call.index, volume_sum_call, volume_sum_put.index, volume_sum_put)
plt.legend(['Call volume', 'Put volume'])
plt.title('Put Call cumulative volume')
plt.show()

In [None]:
# put-call ratio
ratio = volume_sum_put / volume_sum_call
plt.figure(figsize=(8, 5))
plt.plot(ratio)
plt.title('Put Call raio')
plt.show()

#### Near the money volume, integer strike volume, far from the money volume surprises

In [None]:
# Near the money
frequency = '10min'
call_ntm = call_raw_df.loc[(call_raw_df['delta_c'] >= 0.40) & (call_raw_df['delta_c'] <= 0.60)]
volume_ntm_call = call_ntm.loc[:timestamp].groupby(pd.Grouper(freq=frequency, dropna = True)).sum()['volume_c']
put_ntm = put_raw_df.loc[(put_raw_df['delta_p'] <= -0.40) & (put_raw_df['delta_p'] >= -0.60)]
volume_ntm_put = put_ntm.loc[:timestamp].groupby(pd.Grouper(freq=frequency, dropna = True)).sum()['volume_p']

In [None]:
# ntm put call volume
plt.figure(figsize=(8, 5))
plt.plot(volume_ntm_call.index, volume_ntm_call, volume_ntm_put.index, volume_ntm_put)
plt.legend(['NTM call volume', 'NTM put volume'])
plt.title('Near-The-Money Put Call volume')
plt.show()

In [None]:
# ntm put-call ratio
ratio_ntm = volume_ntm_put / volume_ntm_call
plt.figure(figsize=(8, 5))
plt.plot(ratio_ntm)
plt.title('Near-The-Money Put Call ratio')
plt.show()

In [None]:
# Integer strike
# strike_list = np.arange(370, 385, 5)
strike_list = [370, 375, 380, 385, 395]
call_int = call_raw_df.loc[call_raw_df['strike'].isin(strike_list)]
volume_int_call = call_int.loc[:timestamp].groupby(pd.Grouper(freq=frequency, dropna = True)).sum()['volume_c']
put_int = put_raw_df.loc[put_raw_df['strike'].isin(strike_list)]
volume_int_put = put_int.loc[:timestamp].groupby(pd.Grouper(freq=frequency, dropna = True)).sum()['volume_p']

In [None]:
# Integer put call volume
plt.figure(figsize=(8, 5))
plt.plot(volume_int_call.index, volume_int_call, volume_int_put.index, volume_int_put)
plt.legend(['Call volume', 'Put volume'])
plt.title('Put Call volume for NTM Integer Strikes')
plt.show()

In [None]:
# Integer put-call ratio
ratio_int = volume_int_put / volume_int_call
plt.figure(figsize=(8, 5))
plt.plot(ratio_int)
plt.title('Put Call raio for NTM Integer Strikes')
plt.show()

In [None]:
# Far from the money put call ratio
call_fotm = call_raw_df.loc[(call_raw_df['delta_c'] <= 0.40) | (call_raw_df['delta_c'] >= 0.60)]
volume_fotm_call = call_fotm.loc[:timestamp].groupby(pd.Grouper(freq=frequency, dropna = True)).sum()['volume_c']
put_fotm = put_raw_df.loc[(put_raw_df['delta_p'] >= -0.40) | (put_raw_df['delta_p'] <= -0.60)]
volume_fotm_put = put_fotm.loc[:timestamp].groupby(pd.Grouper(freq=frequency, dropna = True)).sum()['volume_p']

In [None]:
plt.figure(figsize=(8, 5))
plt.plot(volume_fotm_call.index, volume_fotm_call, volume_fotm_put.index, volume_fotm_put)
plt.legend(['FOTM call volume', 'FOTM put volume'])
plt.title('Far Out-of-The-Money Put Call volume')
plt.show()

In [None]:
ratio_fotm = volume_fotm_put / volume_fotm_call
plt.figure(figsize=(8, 5))
plt.plot(ratio_fotm)
plt.title('Far Out-of-The-Money Put Call ratio')
plt.show()

### Plot price

### Volume surge monitor (original version)

In [None]:
call_raw_df.head()

In [None]:
call_df = call_raw_df.loc['2021-05-11']
put_df = put_raw_df.loc['2021-05-11']

In [None]:
call_exp_tup = rankExpiry(call_df, method = 'max')
call_expiry = call_exp_tup[0]
print('highest rank expiry date is: ' + call_expiry)
call_strike_tup = rankStrike(call_df, method = 'max')
call_strike = call_strike_tup[0]
print('highest rank strike is: ' + str(call_strike))

In [None]:
# call_df.Expiry.tail(100)

In [None]:
call_strike = 425.0
put_strike = 410.0

In [None]:
put_df.groupby(pd.Grouper(freq='D')).sum()['volume_p'] / call_df.groupby(pd.Grouper(freq='D')).sum()['volume_c']

In [None]:
exp_date = '2021-05-21'

In [None]:
call_fil_df = call_df.loc[(call_df.Expiry == exp_date) & (call_df.strike == call_strike)]
put_fil_df = put_df.loc[(put_df.Expiry == exp_date) & (put_df.strike == put_strike)]

In [None]:
call_group_df = call_df.loc[(call_df.Expiry == exp_date)].groupby(level = 'Date').sum()
put_group_df = put_df.loc[(put_df.Expiry == exp_date)].groupby(level = 'Date').sum()

In [None]:
put_group_df['volume_p'][-1] / call_group_df['volume_c'][-1]

In [None]:
pd.set_option('display.max_rows', None)
call_fil_df.volume_c.diff()

In [None]:
pd.set_option('display.max_rows', None)
put_fil_df.volume_p.diff()

In [None]:
plt.plot(call_fil_df.volume_c.diff())

In [None]:
plt.plot(put_fil_df.volume_p.diff())