In [38]:
import os
import datetime
from datetime import timedelta
import quandl
import matplotlib.pyplot as plt

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import scipy as sp
import plotnine as p9
from sklearn.preprocessing import MinMaxScaler
from scipy.stats import zscore

import warnings
import functools
import wrds
import sys
# Ignore all warnings
warnings.filterwarnings("ignore")

In [2]:
db = wrds.Connection()


WRDS recommends setting up a .pgpass file.
Created .pgpass file successfully.
You can create this file yourself at any time with the create_pgpass_file() function.
Loading library list...
Done


In [3]:
earnings_dates_for_tickers = pd.read_csv("earnings_dates_for_tickers.csv")

In [5]:
csv_file_path = "secids.csv"

# Reading the CSV file into a NumPy array
secids = np.genfromtxt(csv_file_path, delimiter=',')
secids

array([101149., 205456., 210354., ..., 211262., 207854., 105959.])

In [6]:
options_dict = {} 
for year in range(2017, 2024):
    print(year)
    table_name = f"optionm.opprcd{year}"  # Generate table name dynamically
    query = f"""
    SELECT
        date, secid, symbol, cp_flag, expiry_indicator, volume, strike_price, exdate, open_interest, impl_volatility, best_bid, best_offer, delta, gamma, vega, theta
    FROM 
        {table_name} a 
    WHERE
        a.secid IN ({','.join(map(str, secids))}) AND
        a.volume > 0 AND
        a.exdate - a.date <= 7
    """
    options_dict[year] = db.raw_sql(query, date_cols=['date'])

2017
2018
2019
2020
2021
2022
2023


In [9]:
stock_prices = pd.read_parquet("stock_price_ED_20182023.parquet")
stock_prices

Unnamed: 0,date,permno,prc,bid,ask,ticker,comnam
45,2018-02-07,87432,68.06000,68.06000,68.07000,A,AGILENT TECHNOLOGIES INC
46,2018-02-08,87432,65.05000,65.03000,65.05000,A,AGILENT TECHNOLOGIES INC
47,2018-02-09,87432,66.98000,66.97000,66.98000,A,AGILENT TECHNOLOGIES INC
48,2018-02-12,87432,68.43000,68.40000,68.41000,A,AGILENT TECHNOLOGIES INC
49,2018-02-13,87432,68.34000,68.33000,68.34000,A,AGILENT TECHNOLOGIES INC
...,...,...,...,...,...,...,...
965738,2023-02-15,13788,175.02000,175.10001,175.11000,ZTS,ZOETIS INC
965739,2023-02-16,13788,173.92999,173.97000,174.02000,ZTS,ZOETIS INC
965740,2023-02-17,13788,172.03000,172.09000,172.10001,ZTS,ZOETIS INC
965741,2023-02-21,13788,169.84000,169.87000,169.89999,ZTS,ZOETIS INC


In [17]:
stock_prices['date']

45       2018-02-07
46       2018-02-08
47       2018-02-09
48       2018-02-12
49       2018-02-13
            ...    
965738   2023-02-15
965739   2023-02-16
965740   2023-02-17
965741   2023-02-21
965742   2023-02-22
Name: date, Length: 125725, dtype: datetime64[ns]

In [14]:
earnings_dates_for_tickers['trade_entry'] = pd.to_datetime(earnings_dates_for_tickers['trade_entry'])

# Extract only the date part (YYYY-MM-DD)
earnings_dates_for_tickers['trade_entry'] = earnings_dates_for_tickers['trade_entry'].dt.date
earnings_dates_for_tickers['trade_entry']

0        2018-02-13
1        2018-05-11
2        2018-08-13
3        2018-11-16
4        2019-02-19
            ...    
19471    2023-02-13
19472    2023-05-03
19473    2023-08-07
19474    2023-11-01
19475    2024-02-12
Name: trade_entry, Length: 19476, dtype: object

In [15]:
earnings_dates_for_tickers['trade_entry'] = pd.to_datetime(earnings_dates_for_tickers['trade_entry'])
earnings_dates_for_tickers['trade_entry']


0       2018-02-13
1       2018-05-11
2       2018-08-13
3       2018-11-16
4       2019-02-19
           ...    
19471   2023-02-13
19472   2023-05-03
19473   2023-08-07
19474   2023-11-01
19475   2024-02-12
Name: trade_entry, Length: 19476, dtype: datetime64[ns]

In [16]:
earnings_dates_for_tickers['trade_exit'] = pd.to_datetime(earnings_dates_for_tickers['trade_exit'])

# Extract only the date part (YYYY-MM-DD)
earnings_dates_for_tickers['trade_exit'] = earnings_dates_for_tickers['trade_exit'].dt.date
earnings_dates_for_tickers['trade_exit'] = pd.to_datetime(earnings_dates_for_tickers['trade_exit'])
earnings_dates_for_tickers['trade_exit']



0       2018-02-14
1       2018-05-14
2       2018-08-14
3       2018-11-19
4       2019-02-20
           ...    
19471   2023-02-14
19472   2023-05-04
19473   2023-08-08
19474   2023-11-02
19475   2024-02-13
Name: trade_exit, Length: 19476, dtype: datetime64[ns]

In [18]:
stock_prices.set_index(['date','ticker'],inplace = True)
stock_prices

Unnamed: 0_level_0,Unnamed: 1_level_0,permno,prc,bid,ask,comnam
date,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-02-07,A,87432,68.06000,68.06000,68.07000,AGILENT TECHNOLOGIES INC
2018-02-08,A,87432,65.05000,65.03000,65.05000,AGILENT TECHNOLOGIES INC
2018-02-09,A,87432,66.98000,66.97000,66.98000,AGILENT TECHNOLOGIES INC
2018-02-12,A,87432,68.43000,68.40000,68.41000,AGILENT TECHNOLOGIES INC
2018-02-13,A,87432,68.34000,68.33000,68.34000,AGILENT TECHNOLOGIES INC
...,...,...,...,...,...,...
2023-02-15,ZTS,13788,175.02000,175.10001,175.11000,ZOETIS INC
2023-02-16,ZTS,13788,173.92999,173.97000,174.02000,ZOETIS INC
2023-02-17,ZTS,13788,172.03000,172.09000,172.10001,ZOETIS INC
2023-02-21,ZTS,13788,169.84000,169.87000,169.89999,ZOETIS INC


In [31]:
def entry_stock_price(row):
    try:
        return stock_prices.loc[(row['trade_entry'], row['Short_ticker'])]['prc'].mean()
    except KeyError:
        return np.nan
earnings_dates_for_tickers['stock_price_entry'] = earnings_dates_for_tickers.apply(entry_stock_price,axis = 1)

In [32]:
def exit_stock_price(row):
    try:
        return stock_prices.loc[(row['trade_exit'], row['Short_ticker'])]['prc'].mean()
    except KeyError:
        return np.nan
earnings_dates_for_tickers['stock_price_exit'] = earnings_dates_for_tickers.apply(exit_stock_price,axis = 1)

In [56]:
earnings_dates_for_tickers['year_entry'] = earnings_dates_for_tickers['trade_entry'].dt.year

# Create 'year_exit' column containing the year of 'trade_exit'
earnings_dates_for_tickers['year_exit'] = earnings_dates_for_tickers['trade_exit'].dt.year
earnings_dates_for_tickers['year_entry']

0        2018
1        2018
2        2018
3        2018
4        2019
         ... 
19471    2023
19472    2023
19473    2023
19474    2023
19475    2024
Name: year_entry, Length: 19476, dtype: int32

In [68]:
def straddle_entry_mid(row):
    if row['year_entry'] < 2017 or row['year_entry'] > 2023 or row['year_exit'] < 2017 or row['year_exit'] > 2023:
        return np.nan
    try:
        options = options_dict[row['year_entry']].loc[(row['trade_entry'],row['Short_ticker'])]
    except:
        return np.nan
    options['strike_price'] *= 0.001

    # Calculate the absolute difference between 'strike_price' and row['stock_price_entry']
    options['abs_difference'] = abs(options['strike_price'] - row['stock_price_entry'])

    # Filter the DataFrame for rows with the minimum absolute difference
    min_abs_difference = options['abs_difference'].min()
    filtered_options = options[options['abs_difference'] == min_abs_difference]
    if len(filtered_options) ==2 and ('C' in filtered_options['cp_flag'].values) and ('P' in filtered_options['cp_flag'].values):
        return (filtered_options['best_bid'].mean()+filtered_options['best_offer'].mean())/2.0
    return np.nan
earnings_dates_for_tickers['straddle_entry_mid'] = earnings_dates_for_tickers.apply(straddle_entry_mid,axis = 1)

In [76]:
def straddle_exit_mid(row):
    if row['year_entry'] < 2017 or row['year_entry'] > 2023 or row['year_exit'] < 2017 or row['year_exit'] > 2023:
        return np.nan
    try:
        options = options_dict[row['year_exit']].loc[(row['trade_exit'],row['Short_ticker'])]
    except:
        return np.nan
    options['strike_price'] *= 0.001

    # Calculate the absolute difference between 'strike_price' and row['stock_price_entry']
    options['abs_difference'] = abs(options['strike_price'] - row['stock_price_entry'])

    # Filter the DataFrame for rows with the minimum absolute difference
    min_abs_difference = options['abs_difference'].min()
    filtered_options = options[options['abs_difference'] == min_abs_difference]
    if len(filtered_options) ==2 and ('C' in filtered_options['cp_flag'].values) and ('P' in filtered_options['cp_flag'].values):
        return (filtered_options['best_bid'].mean()+filtered_options['best_offer'].mean())/2.0
    return np.nan
earnings_dates_for_tickers['straddle_exit_mid'] = earnings_dates_for_tickers.apply(straddle_exit_mid,axis = 1)

In [77]:
earnings_dates_for_tickers['straddle_exit_mid'].count()

5288

In [79]:
earnings_dates_for_tickers['straddle_pnl'] = (earnings_dates_for_tickers['straddle_entry_mid'] - earnings_dates_for_tickers['straddle_exit_mid'])*1000.0/earnings_dates_for_tickers['straddle_entry_mid']

In [80]:
earnings_dates_for_tickers['straddle_pnl'].mean()

-26.64774755013799