In [2]:
import numpy as np
import pandas as pd

In [3]:
# import options data
data = pd.read_csv("NDXP.csv").sort_values(by=['quote_date', 'strike'])
data

Unnamed: 0,quote_date,expiration,strike,option_type,bid_1545,ask_1545
10542,2018-01-04,2018-02-02,6200.0,C,401.9,423.0
10543,2018-01-04,2018-02-02,6200.0,P,11.8,16.0
10580,2018-01-04,2018-02-09,6200.0,C,406.2,427.0
10581,2018-01-04,2018-02-09,6200.0,P,18.7,23.0
10618,2018-01-04,2018-02-23,6200.0,C,417.5,439.0
...,...,...,...,...,...,...
3261,2022-12-30,2023-01-06,16000.0,P,5106.0,5122.1
5600,2022-12-30,2023-01-13,16000.0,C,0.0,0.9
5601,2022-12-30,2023-01-13,16000.0,P,5089.8,5106.2
7656,2022-12-30,2023-01-27,16000.0,C,0.0,1.2


In [4]:
data.query("bid_1545 == 0 & ask_1545 ==0")

Unnamed: 0,quote_date,expiration,strike,option_type,bid_1545,ask_1545
1319422,2020-03-18,2020-04-03,7525.0,C,0.0,0.0
1319423,2020-03-18,2020-04-03,7525.0,P,0.0,0.0
1319424,2020-03-18,2020-04-03,7550.0,C,0.0,0.0
1319425,2020-03-18,2020-04-03,7550.0,P,0.0,0.0
1319426,2020-03-18,2020-04-03,7575.0,C,0.0,0.0
1319427,2020-03-18,2020-04-03,7575.0,P,0.0,0.0
1319430,2020-03-18,2020-04-03,7625.0,C,0.0,0.0
1319431,2020-03-18,2020-04-03,7625.0,P,0.0,0.0
1343356,2020-03-25,2020-05-01,6060.0,C,0.0,0.0
1343357,2020-03-25,2020-05-01,6060.0,P,0.0,0.0


In [5]:
# import interest rate data
rates = pd.read_csv("DGS1MO.csv").ffill()
rates.replace('.', pd.NA, inplace=True)
rates.columns = ['date','rate']
rates['date'] = pd.to_datetime(rates['date'])
rates['rate'] = (pd.to_numeric(rates['rate'])/100).fillna(method='ffill')
rates

Unnamed: 0,date,rate
0,2018-01-02,0.0129
1,2018-01-03,0.0129
2,2018-01-04,0.0128
3,2018-01-05,0.0127
4,2018-01-08,0.0130
...,...,...
1299,2022-12-26,0.0380
1300,2022-12-27,0.0387
1301,2022-12-28,0.0386
1302,2022-12-29,0.0404


In [6]:
# second fridays between 2018 and 2022

all_fridays = pd.date_range(start="2018-01-01", end="2022-12-31", freq='W-FRI') ## all fridays in the period

second_fridays = [] # initialize second fridays list

for friday in all_fridays: # loop through every friday and determine if it is second friday
    month_start = pd.offsets.MonthBegin(1) # create a date offset to move friday to start of the month
    if len(pd.date_range(start = friday - month_start, end = friday, freq='W-FRI')) == 2: # if number of fridays since start of month is 2:
        second_fridays.append(friday)

second_fridays = pd.Series(second_fridays)
second_fridays

0    2018-01-12
1    2018-02-09
2    2018-03-09
3    2018-04-13
4    2018-05-11
5    2018-06-08
6    2018-07-13
7    2018-08-10
8    2018-09-14
9    2018-10-12
10   2018-11-09
11   2018-12-14
12   2019-01-11
13   2019-02-08
14   2019-03-08
15   2019-04-12
16   2019-05-10
17   2019-06-14
18   2019-07-12
19   2019-08-09
20   2019-09-13
21   2019-10-11
22   2019-11-08
23   2019-12-13
24   2020-01-10
25   2020-02-14
26   2020-03-13
27   2020-04-10
28   2020-05-08
29   2020-06-12
30   2020-07-10
31   2020-08-14
32   2020-09-11
33   2020-10-09
34   2020-11-13
35   2020-12-11
36   2021-01-08
37   2021-02-12
38   2021-03-12
39   2021-04-09
40   2021-05-14
41   2021-06-11
42   2021-07-09
43   2021-08-13
44   2021-09-10
45   2021-10-08
46   2021-11-12
47   2021-12-10
48   2022-01-14
49   2022-02-11
50   2022-03-11
51   2022-04-08
52   2022-05-13
53   2022-06-10
54   2022-07-08
55   2022-08-12
56   2022-09-09
57   2022-10-14
58   2022-11-11
59   2022-12-09
dtype: datetime64[ns]

In [7]:
# make sure dates are datetime format
data['expiration'] = pd.to_datetime(data['expiration'])
data['quote_date'] = pd.to_datetime(data['quote_date'])

# perform a left join to get second-friday expirations
second_fridays_series = pd.Series(second_fridays)
second_friday_data = data[data['expiration'].isin(second_fridays_series)]

# only get quotes that are one month before every expiration
second_friday_data = second_friday_data[(second_friday_data['expiration'] - second_friday_data['quote_date']).dt.days == 28]
second_friday_data

Unnamed: 0,quote_date,expiration,strike,option_type,bid_1545,ask_1545
12652,2018-01-12,2018-02-09,5400.0,C,1347.00,1368.30
12653,2018-01-12,2018-02-09,5400.0,P,0.05,3.10
12654,2018-01-12,2018-02-09,5500.0,C,1248.00,1269.00
12655,2018-01-12,2018-02-09,5500.0,P,0.25,3.70
12656,2018-01-12,2018-02-09,5600.0,C,1151.60,1165.90
...,...,...,...,...,...,...
6823188,2022-11-11,2022-12-09,14800.0,P,2939.00,2963.80
6823189,2022-11-11,2022-12-09,14900.0,C,0.00,2.10
6823190,2022-11-11,2022-12-09,14900.0,P,3038.60,3063.30
6823191,2022-11-11,2022-12-09,15000.0,C,0.00,1.95


In [8]:
# Make sure bid < ask ( buy < sell)
second_friday_data = second_friday_data[second_friday_data['bid_1545'] < second_friday_data['ask_1545']]
second_friday_data

Unnamed: 0,quote_date,expiration,strike,option_type,bid_1545,ask_1545
12652,2018-01-12,2018-02-09,5400.0,C,1347.00,1368.30
12653,2018-01-12,2018-02-09,5400.0,P,0.05,3.10
12654,2018-01-12,2018-02-09,5500.0,C,1248.00,1269.00
12655,2018-01-12,2018-02-09,5500.0,P,0.25,3.70
12656,2018-01-12,2018-02-09,5600.0,C,1151.60,1165.90
...,...,...,...,...,...,...
6823188,2022-11-11,2022-12-09,14800.0,P,2939.00,2963.80
6823189,2022-11-11,2022-12-09,14900.0,C,0.00,2.10
6823190,2022-11-11,2022-12-09,14900.0,P,3038.60,3063.30
6823191,2022-11-11,2022-12-09,15000.0,C,0.00,1.95


In [9]:
# organise puts and calls for each strike and date
calls = second_friday_data[second_friday_data['option_type'] == "C"]
puts = second_friday_data[second_friday_data['option_type'] == "P"]
merged_data = pd.merge(calls, puts, on=['quote_date', 'strike'], suffixes=('_C','_P'), how='inner')

# get 1 month rates on each date
merged_data = pd.merge(merged_data, rates, left_on='quote_date', right_on='date', how='left')

# use put call parity to get underlying price
merged_data['implied_S'] = (merged_data['bid_1545_C'] + merged_data['ask_1545_C'])/2 + merged_data['strike']/((1+merged_data['rate'])**(1/12)) - (merged_data['bid_1545_P'] + merged_data['ask_1545_P'])/2
merged_data['moneyness'] = merged_data['implied_S'] - merged_data['strike']
merged_data

Unnamed: 0,quote_date,expiration_C,strike,option_type_C,bid_1545_C,ask_1545_C,expiration_P,option_type_P,bid_1545_P,ask_1545_P,date,rate,implied_S,moneyness
0,2018-01-12,2018-02-09,5400.0,C,1347.00,1368.30,2018-02-09,P,0.05,3.1,2018-01-12,0.0131,6750.221453,1350.221453
1,2018-01-12,2018-02-09,5500.0,C,1248.00,1269.00,2018-02-09,P,0.25,3.7,2018-01-12,0.0131,6750.563054,1250.563054
2,2018-01-12,2018-02-09,5600.0,C,1151.60,1165.90,2018-02-09,P,0.65,4.2,2018-01-12,0.0131,6750.254655,1150.254655
3,2018-01-12,2018-02-09,5700.0,C,1052.40,1066.70,2018-02-09,P,1.70,4.3,2018-01-12,0.0131,6750.371256,1050.371256
4,2018-01-12,2018-02-09,5800.0,C,953.10,967.40,2018-02-09,P,2.40,5.1,2018-01-12,0.0131,6750.212857,950.212857
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10893,2022-11-11,2022-12-09,14600.0,C,0.20,2.60,2022-12-09,P,2740.00,2764.7,2022-11-11,0.0371,11804.796038,-2795.203962
10894,2022-11-11,2022-12-09,14700.0,C,0.10,2.40,2022-12-09,P,2839.40,2864.2,2022-11-11,0.0371,11804.892928,-2895.107072
10895,2022-11-11,2022-12-09,14800.0,C,0.05,2.25,2022-12-09,P,2939.00,2963.8,2022-11-11,0.0371,11804.889819,-2995.110181
10896,2022-11-11,2022-12-09,14900.0,C,0.00,2.10,2022-12-09,P,3038.60,3063.3,2022-11-11,0.0371,11804.936710,-3095.063290


In [10]:
# might need to consider picking a single implied S (maybe one closest to at the money?)
def get_at_the_money(group): 
    return group.iloc[(group['moneyness'].abs()).argmin()] # for each date, get most at the money strike
at_the_money_df = merged_data.groupby('quote_date').apply(get_at_the_money)[['quote_date', 'implied_S']] # pull out the calculated S for most at the money strike
at_the_money_df.reset_index(drop=True, inplace=True)
merged_data = pd.merge(merged_data, at_the_money_df, left_on='quote_date', right_on='quote_date', how='left')
merged_data

Unnamed: 0,quote_date,expiration_C,strike,option_type_C,bid_1545_C,ask_1545_C,expiration_P,option_type_P,bid_1545_P,ask_1545_P,date,rate,implied_S_x,moneyness,implied_S_y
0,2018-01-12,2018-02-09,5400.0,C,1347.00,1368.30,2018-02-09,P,0.05,3.1,2018-01-12,0.0131,6750.221453,1350.221453,6751.233067
1,2018-01-12,2018-02-09,5500.0,C,1248.00,1269.00,2018-02-09,P,0.25,3.7,2018-01-12,0.0131,6750.563054,1250.563054,6751.233067
2,2018-01-12,2018-02-09,5600.0,C,1151.60,1165.90,2018-02-09,P,0.65,4.2,2018-01-12,0.0131,6750.254655,1150.254655,6751.233067
3,2018-01-12,2018-02-09,5700.0,C,1052.40,1066.70,2018-02-09,P,1.70,4.3,2018-01-12,0.0131,6750.371256,1050.371256,6751.233067
4,2018-01-12,2018-02-09,5800.0,C,953.10,967.40,2018-02-09,P,2.40,5.1,2018-01-12,0.0131,6750.212857,950.212857,6751.233067
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10893,2022-11-11,2022-12-09,14600.0,C,0.20,2.60,2022-12-09,P,2740.00,2764.7,2022-11-11,0.0371,11804.796038,-2795.203962,11805.333099
10894,2022-11-11,2022-12-09,14700.0,C,0.10,2.40,2022-12-09,P,2839.40,2864.2,2022-11-11,0.0371,11804.892928,-2895.107072,11805.333099
10895,2022-11-11,2022-12-09,14800.0,C,0.05,2.25,2022-12-09,P,2939.00,2963.8,2022-11-11,0.0371,11804.889819,-2995.110181,11805.333099
10896,2022-11-11,2022-12-09,14900.0,C,0.00,2.10,2022-12-09,P,3038.60,3063.3,2022-11-11,0.0371,11804.936710,-3095.063290,11805.333099


In [11]:

NDX100 = pd.read_csv("NDX100_Data.csv").ffill()
NDX100.replace('.', pd.NA, inplace=True)
NDX100.columns = ['date','close']
NDX100['date'] = pd.to_datetime(NDX100['date'])
NDX100['close'] = (pd.to_numeric(NDX100['close'])).fillna(method='ffill')
NDX100
#NDX100['close']
NDX100_close = NDX100['close']

#merged_data = pd.merge(merged_data, NDX100['close'], on='date')

In [12]:
strike = merged_data['strike']

bid_call = merged_data['bid_1545_C']
bid_put = merged_data['bid_1545_P']

ask_call = merged_data['ask_1545_C']
ask_put = merged_data['ask_1545_P']

close = NDX100['close']

#option = merged_data['option_type_C']
# option = merged_data['option_type_P']


def pml_formulae(strike, bid, ask, close, option):
    
    if option == "C":
        result = close - strike - (bid - close) / 2
    elif option == "P":
        result = strike -close - (bid - close) / 2
    else:
        result = None
    return result

In [13]:
ga = merged_data.quote_date[0]

In [14]:
ga

Timestamp('2018-01-12 00:00:00')

In [15]:
merged_data

Unnamed: 0,quote_date,expiration_C,strike,option_type_C,bid_1545_C,ask_1545_C,expiration_P,option_type_P,bid_1545_P,ask_1545_P,date,rate,implied_S_x,moneyness,implied_S_y
0,2018-01-12,2018-02-09,5400.0,C,1347.00,1368.30,2018-02-09,P,0.05,3.1,2018-01-12,0.0131,6750.221453,1350.221453,6751.233067
1,2018-01-12,2018-02-09,5500.0,C,1248.00,1269.00,2018-02-09,P,0.25,3.7,2018-01-12,0.0131,6750.563054,1250.563054,6751.233067
2,2018-01-12,2018-02-09,5600.0,C,1151.60,1165.90,2018-02-09,P,0.65,4.2,2018-01-12,0.0131,6750.254655,1150.254655,6751.233067
3,2018-01-12,2018-02-09,5700.0,C,1052.40,1066.70,2018-02-09,P,1.70,4.3,2018-01-12,0.0131,6750.371256,1050.371256,6751.233067
4,2018-01-12,2018-02-09,5800.0,C,953.10,967.40,2018-02-09,P,2.40,5.1,2018-01-12,0.0131,6750.212857,950.212857,6751.233067
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10893,2022-11-11,2022-12-09,14600.0,C,0.20,2.60,2022-12-09,P,2740.00,2764.7,2022-11-11,0.0371,11804.796038,-2795.203962,11805.333099
10894,2022-11-11,2022-12-09,14700.0,C,0.10,2.40,2022-12-09,P,2839.40,2864.2,2022-11-11,0.0371,11804.892928,-2895.107072,11805.333099
10895,2022-11-11,2022-12-09,14800.0,C,0.05,2.25,2022-12-09,P,2939.00,2963.8,2022-11-11,0.0371,11804.889819,-2995.110181,11805.333099
10896,2022-11-11,2022-12-09,14900.0,C,0.00,2.10,2022-12-09,P,3038.60,3063.3,2022-11-11,0.0371,11804.936710,-3095.063290,11805.333099


In [16]:
merged_data["underlying_close_price"] = \
    merged_data.apply(
        lambda row: NDX100.query("date == @row.quote_date").close.values[0], 
        axis=1
    )

In [17]:
merged_data

Unnamed: 0,quote_date,expiration_C,strike,option_type_C,bid_1545_C,ask_1545_C,expiration_P,option_type_P,bid_1545_P,ask_1545_P,date,rate,implied_S_x,moneyness,implied_S_y,underlying_close_price
0,2018-01-12,2018-02-09,5400.0,C,1347.00,1368.30,2018-02-09,P,0.05,3.1,2018-01-12,0.0131,6750.221453,1350.221453,6751.233067,6758.54
1,2018-01-12,2018-02-09,5500.0,C,1248.00,1269.00,2018-02-09,P,0.25,3.7,2018-01-12,0.0131,6750.563054,1250.563054,6751.233067,6758.54
2,2018-01-12,2018-02-09,5600.0,C,1151.60,1165.90,2018-02-09,P,0.65,4.2,2018-01-12,0.0131,6750.254655,1150.254655,6751.233067,6758.54
3,2018-01-12,2018-02-09,5700.0,C,1052.40,1066.70,2018-02-09,P,1.70,4.3,2018-01-12,0.0131,6750.371256,1050.371256,6751.233067,6758.54
4,2018-01-12,2018-02-09,5800.0,C,953.10,967.40,2018-02-09,P,2.40,5.1,2018-01-12,0.0131,6750.212857,950.212857,6751.233067,6758.54
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10893,2022-11-11,2022-12-09,14600.0,C,0.20,2.60,2022-12-09,P,2740.00,2764.7,2022-11-11,0.0371,11804.796038,-2795.203962,11805.333099,11817.01
10894,2022-11-11,2022-12-09,14700.0,C,0.10,2.40,2022-12-09,P,2839.40,2864.2,2022-11-11,0.0371,11804.892928,-2895.107072,11805.333099,11817.01
10895,2022-11-11,2022-12-09,14800.0,C,0.05,2.25,2022-12-09,P,2939.00,2963.8,2022-11-11,0.0371,11804.889819,-2995.110181,11805.333099,11817.01
10896,2022-11-11,2022-12-09,14900.0,C,0.00,2.10,2022-12-09,P,3038.60,3063.3,2022-11-11,0.0371,11804.936710,-3095.063290,11805.333099,11817.01


In [22]:
pml_call = lambda underlying_close_price, strike, bid_1545_C, ask_1545_C: underlying_close_price - strike - (bid_1545_C + ask_1545_C) / 2
pmlcall = pml_call(merged_data['underlying_close_price'], merged_data['strike'], merged_data['bid_1545_C'], merged_data['ask_1545_C'])

pml_put = lambda underlying_close_price, strike, bid_1545_P, ask_1545_P: strike - underlying_close_price - (bid_1545_P + ask_1545_P) / 2
pmlput = pml_put(merged_data['underlying_close_price'], merged_data['strike'], merged_data['bid_1545_P'], merged_data['ask_1545_P'])

ratio = lambda implied_S_y, strike: strike / implied_S_y
sp_ratio = round(ratio(merged_data['implied_S_y'], merged_data['strike']), 2)

In [23]:
merged_data["pmlput"] = pmlput
merged_data["pmlcall"] = pmlcall
merged_data["sp_ratio"] = sp_ratio

merged_data

Unnamed: 0,quote_date,expiration_C,strike,option_type_C,bid_1545_C,ask_1545_C,expiration_P,option_type_P,bid_1545_P,ask_1545_P,date,rate,implied_S_x,moneyness,implied_S_y,underlying_close_price,pmlput,pmlcall,sp_ratio
0,2018-01-12,2018-02-09,5400.0,C,1347.00,1368.30,2018-02-09,P,0.05,3.1,2018-01-12,0.0131,6750.221453,1350.221453,6751.233067,6758.54,-1360.115,0.890,0.80
1,2018-01-12,2018-02-09,5500.0,C,1248.00,1269.00,2018-02-09,P,0.25,3.7,2018-01-12,0.0131,6750.563054,1250.563054,6751.233067,6758.54,-1260.515,0.040,0.81
2,2018-01-12,2018-02-09,5600.0,C,1151.60,1165.90,2018-02-09,P,0.65,4.2,2018-01-12,0.0131,6750.254655,1150.254655,6751.233067,6758.54,-1160.965,-0.210,0.83
3,2018-01-12,2018-02-09,5700.0,C,1052.40,1066.70,2018-02-09,P,1.70,4.3,2018-01-12,0.0131,6750.371256,1050.371256,6751.233067,6758.54,-1061.540,-1.010,0.84
4,2018-01-12,2018-02-09,5800.0,C,953.10,967.40,2018-02-09,P,2.40,5.1,2018-01-12,0.0131,6750.212857,950.212857,6751.233067,6758.54,-962.290,-1.710,0.86
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10893,2022-11-11,2022-12-09,14600.0,C,0.20,2.60,2022-12-09,P,2740.00,2764.7,2022-11-11,0.0371,11804.796038,-2795.203962,11805.333099,11817.01,30.640,-2784.390,1.24
10894,2022-11-11,2022-12-09,14700.0,C,0.10,2.40,2022-12-09,P,2839.40,2864.2,2022-11-11,0.0371,11804.892928,-2895.107072,11805.333099,11817.01,31.190,-2884.240,1.25
10895,2022-11-11,2022-12-09,14800.0,C,0.05,2.25,2022-12-09,P,2939.00,2963.8,2022-11-11,0.0371,11804.889819,-2995.110181,11805.333099,11817.01,31.590,-2984.140,1.25
10896,2022-11-11,2022-12-09,14900.0,C,0.00,2.10,2022-12-09,P,3038.60,3063.3,2022-11-11,0.0371,11804.936710,-3095.063290,11805.333099,11817.01,32.040,-3084.040,1.26


In [24]:
#set the upper and lower boundaries

ratio_max = merged_data.groupby("quote_date")["sp_ratio"].max()
ratio_min = merged_data.groupby("quote_date")["sp_ratio"].min()
min_max = np.min(ratio_max)
max_min = np.max(ratio_min)
print(np.min(ratio_max))
print(np.max(ratio_min))

1.09
0.8


In [25]:
#find the lowest qty of strike price

merged_data.query("@max_min <= sp_ratio <= @min_max")
merged_data2 = merged_data.query("@max_min <= sp_ratio <= @min_max")
strike_num = merged_data2.groupby("quote_date")["strike"].nunique()
print(np.max(strike_num))
print(np.min(strike_num))

195
42


In [69]:
np.random.uniform(low= 0, high=118, size=10)

array([110.71050065, 105.6127434 ,   9.38358829,   1.5169458 ,
         0.55679497,  60.86301094, 117.13754575,  38.10941402,
        26.85103694,  90.04417451])

In [60]:
ga2 = np.array([1, 7, 9, 10])
ga2

array([ 1,  7,  9, 10])

In [61]:
temporal_data.iloc[ga2]

Unnamed: 0,quote_date,expiration_C,strike,option_type_C,bid_1545_C,ask_1545_C,expiration_P,option_type_P,bid_1545_P,ask_1545_P,date,rate,implied_S_x,moneyness,implied_S_y,underlying_close_price,pmlput,pmlcall,sp_ratio
1,2018-01-12,2018-02-09,5500.0,C,1248.0,1269.0,2018-02-09,P,0.25,3.7,2018-01-12,0.0131,6750.563054,1250.563054,6751.233067,6758.54,-1260.515,0.04,0.81
7,2018-01-12,2018-02-09,6025.0,C,731.5,745.7,2018-02-09,P,5.1,7.9,2018-01-12,0.0131,6750.568959,725.568959,6751.233067,6758.54,-740.04,-5.06,0.89
9,2018-01-12,2018-02-09,6075.0,C,682.2,696.3,2018-02-09,P,5.9,8.6,2018-01-12,0.0131,6750.41476,675.41476,6751.233067,6758.54,-690.79,-5.71,0.9
10,2018-01-12,2018-02-09,6100.0,C,658.0,672.1,2018-02-09,P,6.4,9.0,2018-01-12,0.0131,6750.73766,650.73766,6751.233067,6758.54,-666.24,-6.51,0.9


In [65]:
temporal_data = merged_data.query("quote_date=='2018-01-12'")
temporal_data#.strike.nunique()



Unnamed: 0,quote_date,expiration_C,strike,option_type_C,bid_1545_C,ask_1545_C,expiration_P,option_type_P,bid_1545_P,ask_1545_P,date,rate,implied_S_x,moneyness,implied_S_y,underlying_close_price,pmlput,pmlcall,sp_ratio
0,2018-01-12,2018-02-09,5400.0,C,1347.0,1368.30,2018-02-09,P,0.05,3.1,2018-01-12,0.0131,6750.221453,1350.221453,6751.233067,6758.54,-1360.115,0.890,0.80
1,2018-01-12,2018-02-09,5500.0,C,1248.0,1269.00,2018-02-09,P,0.25,3.7,2018-01-12,0.0131,6750.563054,1250.563054,6751.233067,6758.54,-1260.515,0.040,0.81
2,2018-01-12,2018-02-09,5600.0,C,1151.6,1165.90,2018-02-09,P,0.65,4.2,2018-01-12,0.0131,6750.254655,1150.254655,6751.233067,6758.54,-1160.965,-0.210,0.83
3,2018-01-12,2018-02-09,5700.0,C,1052.4,1066.70,2018-02-09,P,1.70,4.3,2018-01-12,0.0131,6750.371256,1050.371256,6751.233067,6758.54,-1061.540,-1.010,0.84
4,2018-01-12,2018-02-09,5800.0,C,953.1,967.40,2018-02-09,P,2.40,5.1,2018-01-12,0.0131,6750.212857,950.212857,6751.233067,6758.54,-962.290,-1.710,0.86
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
113,2018-01-12,2018-02-09,7600.0,C,0.0,1.70,2018-02-09,P,830.00,851.0,2018-01-12,0.0131,6752.111675,-847.888325,6751.233067,6758.54,0.960,-842.310,1.13
114,2018-01-12,2018-02-09,7700.0,C,0.0,1.45,2018-02-09,P,929.90,951.0,2018-01-12,0.0131,6751.928276,-948.071724,6751.233067,6758.54,1.010,-942.185,1.14
115,2018-01-12,2018-02-09,7800.0,C,0.0,1.25,2018-02-09,P,1029.60,1051.0,2018-01-12,0.0131,6751.869877,-1048.130123,6751.233067,6758.54,1.160,-1042.085,1.16
116,2018-01-12,2018-02-09,7900.0,C,0.0,1.10,2018-02-09,P,1129.40,1150.8,2018-01-12,0.0131,6751.886478,-1148.113522,6751.233067,6758.54,1.360,-1142.010,1.17


In [83]:
import random

In [129]:
temporal_data = merged_data.query("quote_date=='2018-01-12'")
indices = temporal_data.index
min_index = indices[0]
max_index = indices[-1]
test = random.sample(range(min_index, max_index), 42)

#print(test)

def random_selection(data):
    # part 0: extra definition 
    indices_temp = data.index
    min_index = indices_temp[0]
    max_index = indices_temp[-1] 

    # part 1: random selection
    rand = random.sample(range(min_index, max_index), 42)
    pml_put = data.pmlput
    pml_call = data.pmlcall
    random_pml_put = pml_put.loc[rand]
    random_pml_call = pml_call.loc[rand]

    # part 2: sorting process
    sort_pml_random_put = random_pml_put.sort_values(axis=0)
    sort_pml_random_call = random_pml_call.sort_values(axis=0)

    # return values >>> RANDOM CALL, RANDOM PUT
    return sort_pml_random_call.values, sort_pml_random_put.values

In [142]:
dataframe_result = merged_data.groupby("quote_date").apply(lambda grouped_sub_df: random_selection(grouped_sub_df))
dataframe_result

quote_date
2018-01-12    ([-1142.01, -742.51, -642.7850000000001, -592....
2018-02-09    ([-1689.8199999999997, -1214.8199999999997, -1...
2018-03-16    ([-607.0000000000002, -582.2000000000002, -557...
2018-04-13    ([-576.7599999999999, -391.30999999999983, -32...
2018-05-11    ([-948.3899999999996, -748.6399999999996, -648...
2018-06-15    ([-1344.8649999999998, -1244.9149999999997, -1...
2018-07-13    ([-1224.7050000000004, -231.5800000000003, -22...
2018-08-17    ([-1924.96, -1824.96, -1624.96, -1524.96, -142...
2018-09-14    ([-1755.05, -1655.05, -1555.05, -1255.075, -10...
2018-10-12    ([-1144.1399999999999, -1044.19, -944.29, -770...
2018-11-16    ([-1333.6049999999996, -1233.8049999999996, -9...
2018-12-14    ([-1705.865, -1505.915, -1405.94, -1206.065, -...
2019-01-11    ([-1798.8750000000005, -1698.8750000000005, -1...
2019-02-08    ([-1387.195, -1287.2199999999998, -1187.245, -...
2019-03-15    ([-893.4600000000003, -693.9600000000003, -569...
2019-04-12    ([-1272.0000000

In [153]:
dataset_team = pd.DataFrame
dataset_teamm

pandas.core.frame.DataFrame

In [183]:
temp_team_df = pd.DataFrame(dataframe_result).reset_index()
temp_team_df.columns = ["quote_date", "tuple_info"]
new_team_df = pd.DataFrame(temp_team_df.tuple_info.tolist(), index= temp_team_df.quote_date, columns=["call_ranked_pml", "put_ranked_pml"]).reset_index()
new_team_df.head()

Unnamed: 0,quote_date,call_ranked_pml,put_ranked_pml
0,2018-01-12,"[-1142.01, -742.51, -642.7850000000001, -592.9...","[-1360.115, -1260.5149999999999, -764.64, -740..."
1,2018-02-09,"[-1689.8199999999997, -1214.8199999999997, -10...","[-2415.1800000000003, -1818.8800000000003, -15..."
2,2018-03-16,"[-607.0000000000002, -582.2000000000002, -557....","[-1423.9249999999997, -1126.3999999999999, -83..."
3,2018-04-13,"[-576.7599999999999, -391.30999999999983, -329...","[-1829.6650000000002, -1729.7900000000002, -16..."
4,2018-05-11,"[-948.3899999999996, -748.6399999999996, -648....","[-2153.5850000000005, -1953.6600000000003, -17..."


In [187]:
call_pml_df = pd.DataFrame(new_team_df.call_ranked_pml.tolist(), index= new_team_df.quote_date)
put_pml_df = pd.DataFrame(new_team_df.put_ranked_pml.tolist(), index= new_team_df.quote_date)

In [188]:
call_pml_df

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,32,33,34,35,36,37,38,39,40,41
quote_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-01-12,-1142.01,-742.51,-642.785,-592.985,-518.56,-420.235,-371.91,-324.46,-278.91,-215.66,...,-18.81,-18.56,-15.46,-12.06,-9.91,-5.71,-5.06,-4.66,0.04,0.89
2018-02-09,-1689.82,-1214.82,-1089.82,-1064.82,-1039.82,-989.82,-914.82,-839.82,-809.82,-769.82,...,-82.67,-69.77,-65.92,-38.67,-34.17,-18.62,-17.32,-0.52,4.43,9.58
2018-03-16,-607.0,-582.2,-557.525,-532.85,-508.3,-483.825,-297.9,-289.4,-277.1,-237.95,...,-46.9,-43.65,-43.55,-41.65,-38.75,-28.45,-22.4,-19.7,-14.7,-11.25
2018-04-13,-576.76,-391.31,-329.16,-309.81,-273.41,-234.31,-225.06,-216.31,-210.91,-205.36,...,-23.01,-11.81,-5.71,-3.61,-1.96,-0.66,1.49,1.94,2.74,2.94
2018-05-11,-948.39,-748.64,-648.915,-549.115,-264.94,-187.89,-155.64,-141.34,-128.09,-116.09,...,1.71,1.76,2.61,2.91,3.11,3.21,3.31,3.71,4.01,4.51
2018-06-15,-1344.865,-1244.915,-1144.965,-845.14,-745.89,-472.59,-306.79,-263.29,-242.74,-223.29,...,-26.09,-23.84,-22.64,-18.44,-13.09,-11.84,-8.99,-8.19,-6.44,-4.79
2018-07-13,-1224.705,-231.58,-228.03,-220.93,-207.53,-197.98,-183.03,-177.43,-169.18,-166.68,...,-22.23,-21.73,-19.28,-18.38,-15.63,-11.58,-9.08,-7.53,-6.98,-5.88
2018-08-17,-1924.96,-1824.96,-1624.96,-1524.96,-1424.96,-1224.96,-1099.96,-1024.96,-999.96,-974.96,...,-48.16,-44.21,-42.36,-33.71,-29.56,-28.71,-25.56,-20.81,-19.61,-14.01
2018-09-14,-1755.05,-1655.05,-1555.05,-1255.075,-1055.075,-955.1,-855.125,-755.175,-555.95,-482.025,...,-27.6,-21.25,-19.25,-16.65,-14.1,-11.95,-11.1,-9.7,-9.25,-8.75
2018-10-12,-1144.14,-1044.19,-944.29,-770.19,-671.74,-647.64,-623.24,-506.94,-441.94,-401.04,...,-19.04,-16.39,-15.34,-14.54,-12.34,-11.44,-8.69,-6.89,-2.74,-0.69


In [189]:
call_pml_df.to_csv("call_pml_df.csv")

In [190]:
put_pml_df.to_csv("put_pml_df.csv")