In [1]:
# import raw data
import numpy as np
import pandas as pd

In [2]:
# import data
data = pd.read_csv("NDXP.csv")
data

Unnamed: 0,quote_date,expiration,strike,option_type,bid_1545,ask_1545
0,2022-12-30,2022-12-30,7900.0,C,2965.3,2987.90
1,2022-12-30,2022-12-30,7900.0,P,0.0,0.05
2,2022-12-30,2022-12-30,8000.0,C,2865.3,2887.90
3,2022-12-30,2022-12-30,8000.0,P,0.0,0.05
4,2022-12-30,2022-12-30,8100.0,C,2765.2,2787.90
...,...,...,...,...,...,...
7226456,2022-12-29,2023-09-29,15400.0,P,3969.2,3994.70
7226457,2022-12-29,2023-09-29,15500.0,C,40.0,47.40
7226458,2022-12-29,2023-09-29,15500.0,P,4061.6,4087.20
7226459,2022-12-29,2023-09-29,15600.0,C,36.3,43.70


In [3]:
# second fridays between 2018 and 2022

all_fridays = pd.date_range(start="2018-01-01", end="2022-12-31", freq='W-FRI') ## all fridays in the period

second_fridays = [] # initialize second fridays list

for friday in all_fridays: # loop through every friday and determine if it is second friday
    month_start = pd.offsets.MonthBegin(1) # create a date offset to move friday to start of the month
    if len(pd.date_range(start = friday - month_start, end = friday, freq='W-FRI')) == 2: # if number of fridays since start of month is 2:
        second_fridays.append(friday)

second_fridays = pd.Series(second_fridays)
second_fridays

0    2018-01-12
1    2018-02-09
2    2018-03-09
3    2018-04-13
4    2018-05-11
5    2018-06-08
6    2018-07-13
7    2018-08-10
8    2018-09-14
9    2018-10-12
10   2018-11-09
11   2018-12-14
12   2019-01-11
13   2019-02-08
14   2019-03-08
15   2019-04-12
16   2019-05-10
17   2019-06-14
18   2019-07-12
19   2019-08-09
20   2019-09-13
21   2019-10-11
22   2019-11-08
23   2019-12-13
24   2020-01-10
25   2020-02-14
26   2020-03-13
27   2020-04-10
28   2020-05-08
29   2020-06-12
30   2020-07-10
31   2020-08-14
32   2020-09-11
33   2020-10-09
34   2020-11-13
35   2020-12-11
36   2021-01-08
37   2021-02-12
38   2021-03-12
39   2021-04-09
40   2021-05-14
41   2021-06-11
42   2021-07-09
43   2021-08-13
44   2021-09-10
45   2021-10-08
46   2021-11-12
47   2021-12-10
48   2022-01-14
49   2022-02-11
50   2022-03-11
51   2022-04-08
52   2022-05-13
53   2022-06-10
54   2022-07-08
55   2022-08-12
56   2022-09-09
57   2022-10-14
58   2022-11-11
59   2022-12-09
dtype: datetime64[ns]

In [4]:
# make sure quote_date is datetime format
data['quote_date'] = pd.to_datetime(data['quote_date'])

# perform a left join 
second_fridays_series = pd.Series(second_fridays)
second_friday_data = data[data['quote_date'].isin(second_fridays_series)]
second_friday_data

Unnamed: 0,quote_date,expiration,strike,option_type,bid_1545,ask_1545
12404,2018-01-12,2018-02-02,5300.0,C,1452.8,1469.4
12405,2018-01-12,2018-02-02,5300.0,P,0.0,1.5
12406,2018-01-12,2018-02-02,5400.0,C,1353.1,1369.7
12407,2018-01-12,2018-02-02,5400.0,P,0.0,1.9
12408,2018-01-12,2018-02-02,5500.0,C,1253.4,1270.0
...,...,...,...,...,...,...
7068048,2022-12-09,2023-09-29,15400.0,P,3395.5,3426.0
7068049,2022-12-09,2023-09-29,15500.0,C,118.3,133.2
7068050,2022-12-09,2023-09-29,15500.0,P,3481.7,3512.3
7068051,2022-12-09,2023-09-29,15600.0,C,109.4,124.0


In [5]:
# Make sure bid < ask ( buy < sell)
second_friday_data = second_friday_data[second_friday_data['bid_1545'] < second_friday_data['ask_1545']].sort_values(by = 'quote_date')
second_friday_data

Unnamed: 0,quote_date,expiration,strike,option_type,bid_1545,ask_1545
12404,2018-01-12,2018-02-02,5300.0,C,1452.8,1469.4
12950,2018-01-12,2018-02-23,6625.0,C,202.6,211.0
12951,2018-01-12,2018-02-23,6625.0,P,70.7,76.4
12952,2018-01-12,2018-02-23,6650.0,C,184.7,192.8
12953,2018-01-12,2018-02-23,6650.0,P,77.4,83.6
...,...,...,...,...,...,...
7058948,2022-12-09,2022-12-20,11920.0,P,414.7,434.6
7058949,2022-12-09,2022-12-20,11925.0,C,103.1,107.8
7058950,2022-12-09,2022-12-20,11925.0,P,418.0,438.9
7058952,2022-12-09,2022-12-20,11930.0,P,421.3,442.4


In [6]:
# export data
second_friday_data.to_csv("NDXP_2F.csv")