# Volumes and OI Data Exploration

### Import libraries and Scripts for data

In [1]:
import os
import datetime
from datetime import timedelta
import quandl
import matplotlib.pyplot as plt

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import scipy as sp
import plotnine as p9
from sklearn.preprocessing import MinMaxScaler
from scipy.stats import zscore

import warnings
import functools
import wrds
# Ignore all warnings
warnings.filterwarnings("ignore")

## Data Fetching for S&P Index and Options 

In [3]:
db = wrds.Connection()

WRDS recommends setting up a .pgpass file.
Created .pgpass file successfully.
You can create this file yourself at any time with the create_pgpass_file() function.
Loading library list...
Done


In [6]:
spx_options = {} 
for year in range(2010, 2024):  # Loop from 2001 to 2023
    table_name = f"optionm.opprcd{year}"  # Generate table name dynamically
    query = f"""
    SELECT
        date, symbol, cp_flag, volume, strike_price, exdate, open_interest, impl_volatility, best_bid, best_offer, delta, gamma, vega, theta
    FROM 
        {table_name} a 
    WHERE
        a.secid = '108105' AND
        a.exdate - a.date <= 100 AND
        a.exdate - a.date >= 10 AND 
        a.volume > 0 
    """
    spx_options[year] = db.raw_sql(query, date_cols=['date'])

In [7]:
table_name = f"crsp.dsp500"  # Generate table name dynamically
query = f"""
SELECT
    *
FROM 
    {table_name} a 
WHERE
    a.caldt >= '2000-01-01' AND
    a.caldt <= '2023-12-31'
"""
sp500_underlying = db.raw_sql(query, date_cols=['date'])

In [6]:
specific_date = '2023-02-27'
table_name = f"optionm.opprcd{2023}"  # Generate table name dynamically
query = f"""
SELECT
    DISTINCT SPLIT_PART(symbol,' ',1) as ticker_symbol, issue_type
FROM 
    {table_name} a 
WHERE
    a.date = '{specific_date}'
"""
df1 = db.raw_sql(query, date_cols=['date'])

df1

ProgrammingError: (psycopg2.errors.UndefinedColumn) column "issue_type" does not exist
LINE 3: ...STINCT SPLIT_PART(symbol,' ',1) as ticker_symbol, issue_type
                                                             ^

[SQL: 
SELECT
    DISTINCT SPLIT_PART(symbol,' ',1) as ticker_symbol, issue_type
FROM 
    optionm.opprcd2023 a 
WHERE
    a.date = '2023-02-27' AND
    a.issue_type = 0
]
(Background on this error at: https://sqlalche.me/e/20/f405)

In [9]:
df1['Split'] = df1['symbol'].str.split(' ').str[0]
df1['Split'].unique()

array(['DGRW', 'IWF', 'SP', ..., 'TUEM1', 'ONCS1', 'UTY'], dtype=object)

In [10]:
len(df1['Split'].unique())

6031

In [12]:
pd.DataFrame(df1['Split'].unique()).to_csv('Tickers_optionm.csv')

In [20]:
specific_date = '2023-02-27'
table_name = f"optionm.opprcd{2023}"  # Generate table name dynamically
query = f"""
SELECT
    *
FROM 
    {table_name} a 
WHERE
    a.date = '{specific_date}'
"""
df2 = db.raw_sql(query, date_cols=['date'])

df2

ProgrammingError: (psycopg2.errors.UndefinedTable) relation "optionm_all" does not exist
LINE 5:     optionm_all a 
            ^

[SQL: 
SELECT
    *
FROM 
    optionm_all a 
WHERE
    a.date = '2023-02-27'
]
(Background on this error at: https://sqlalche.me/e/20/f405)

In [8]:
df2.columns

Index(['secid', 'date', 'symbol', 'symbol_flag', 'exdate', 'last_date',
       'cp_flag', 'strike_price', 'best_bid', 'best_offer', 'volume',
       'open_interest', 'impl_volatility', 'delta', 'gamma', 'vega', 'theta',
       'optionid', 'cfadj', 'am_settlement', 'contract_size', 'ss_flag',
       'forward_price', 'expiry_indicator', 'root', 'suffix'],
      dtype='object')

In [9]:
df2['ss_flag']

0         0
1         0
2         0
3         0
4         0
         ..
350567    0
350568    0
350569    0
350570    0
350571    0
Name: ss_flag, Length: 1350572, dtype: object

In [10]:
df2['Split'] = df2['symbol'].str.split(' ').str[0]
df2

Unnamed: 0,secid,date,symbol,symbol_flag,exdate,last_date,cp_flag,strike_price,best_bid,best_offer,...,optionid,cfadj,am_settlement,contract_size,ss_flag,forward_price,expiry_indicator,root,suffix,Split
0,5139.0,2023-02-27,CAE 230317C12500,1,2023-03-17,2023-01-26,C,12500.0,8.50,12.4,...,148295219.0,1.0,0.0,100.0,0,,,,,CAE
1,5139.0,2023-02-27,CAE 230317C15000,1,2023-03-17,2022-09-26,C,15000.0,6.00,9.9,...,148295220.0,1.0,0.0,100.0,0,,,,,CAE
2,5139.0,2023-02-27,CAE 230317C17500,1,2023-03-17,2023-02-24,C,17500.0,3.50,7.5,...,148295221.0,1.0,0.0,100.0,0,,,,,CAE
3,5139.0,2023-02-27,CAE 230317C20000,1,2023-03-17,2023-02-14,C,20000.0,1.75,5.0,...,148295222.0,1.0,0.0,100.0,0,,,,,CAE
4,5139.0,2023-02-27,CAE 230317C22500,1,2023-03-17,2023-02-16,C,22500.0,0.55,0.9,...,148295223.0,1.0,0.0,100.0,0,,,,,CAE
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
350567,218226.0,2023-02-27,CVLC 230915P52000,1,2023-09-15,,P,52000.0,2.35,4.9,...,152871790.0,1.0,0.0,100.0,0,,,,,CVLC
350568,218226.0,2023-02-27,CVLC 230915P53000,1,2023-09-15,,P,53000.0,2.85,5.5,...,152871791.0,1.0,0.0,100.0,0,,,,,CVLC
350569,218226.0,2023-02-27,CVLC 230915P54000,1,2023-09-15,,P,54000.0,3.50,6.1,...,152871792.0,1.0,0.0,100.0,0,,,,,CVLC
350570,218226.0,2023-02-27,CVLC 230915P55000,1,2023-09-15,,P,55000.0,4.20,6.8,...,152871793.0,1.0,0.0,100.0,0,,,,,CVLC


In [15]:
df2[df2['Split']=='SPX']['symbol_flag']

323420    1
323421    1
323422    1
323423    1
323424    1
         ..
329154    1
329155    1
329156    1
329157    1
329158    1
Name: symbol_flag, Length: 5738, dtype: object

In [17]:
df2['ss_flag'].unique()

array(['0', '1'], dtype=object)

In [33]:
specific_date = '2023-02-27'
table_name = f"optionm.securd"  # Generate table name dynamically
query = f"""
SELECT
    *
FROM 
    {table_name} a
WHERE
    a.issue_type = '0'
"""
df3 = db.raw_sql(query, date_cols=['date'])

df3

Unnamed: 0,secid,cusip,ticker,sic,index_flag,exchange_d,class,issue_type,industry_group
0,5001.0,00078110,ZZZZ,3462,0,0.0,,0,
1,5003.0,00103810,AFAP,7380,0,16.0,,0,
2,5006.0,00244810,AWCSE,,0,16.0,,0,
3,5007.0,61757710,MRCBF,6531,0,16.0,,0,
4,5010.0,00504110,AACZE,,0,16.0,,0,
...,...,...,...,...,...,...,...,...,...
21892,218321.0,99999999,CLCO,,0,1.0,,0,
21893,218322.0,99999999,,1520,0,4.0,,0,
21894,218324.0,65448410,NB,,0,4.0,,0,
21895,218325.0,G9845F10,YS,6770,0,4.0,,0,


In [30]:
pd.DataFrame(df3['issue_type'].unique())

Unnamed: 0,0
0,0
1,
2,F
3,A
4,7
5,U
6,%
7,S


In [35]:
df3[df3['ticker']=='MSFT']

Unnamed: 0,secid,cusip,ticker,sic,index_flag,exchange_d,class,issue_type,industry_group
6002,107525.0,59491810,MSFT,7372,0,6.0,,0,
