In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('~/Dropbox/Element/Phase2/TradesJan22.csv')

In [3]:
# Drop zero fill rows
df = df[df['fillQuantity'] > 0]

In [4]:
# Deal with apparent float precision issues to return true penny prices
round_cols = [col for col in df.columns[3:] if df[col].dtype=='float64']
round_cols = [col for col in round_cols if ('Vol' not in col) and ('Prob' not in col) and ('Mark' not in col)]
for col in round_cols:
    df[col] = df[col].apply(lambda x: round(x, 2))

In [5]:
# Convert Date Cols and add in microsecond cols
def col_to_time(col):
    df[col] = df[col].apply(pd.to_datetime)
    df[col] = df[col].dt.tz_localize('America/Chicago').dt.tz_convert('America/New_York')
    s = df[col+'_us'].apply(pd.Timedelta, unit='micros')
    df[col] = df[col] + s
    
for col in ['childDttm', 'fillTransactDttm']:
    col_to_time(col)

df['parentDttm'] = df['parentDttm'].apply(lambda s: pd.to_datetime(s))
df['parentDttm'] = df['parentDttm'].dt.tz_localize('America/Chicago').dt.tz_convert('America/New_York')

In [6]:
parents = df['parentNumber'].unique()
parents

array([1.05556e+18, 1.13663e+18, 7.63531e+17, 1.05558e+18])

In [7]:
order1 = df[df['parentNumber'] == parents[0]] # buying $15m of SPY to hedge overnight delta (40,000 shrs)
order2 = df[df['parentNumber'] == parents[1]] # sell SPX Mar 3850 C autohedge AlphaVwap2pct (152 ctr)
order3 = df[df['parentNumber'] == parents[2]] # autohedges (500,782 shrs)
order4 = df[df['parentNumber'] == parents[3]] # sell SPX Mar 3850 C autohedge SpdrAuto(848 ctr)

In [8]:
for o in [order1, order2, order3, order4]:
    print(o['parentDttm'].unique())

<DatetimeArray>
['2021-01-22 10:10:33-05:00']
Length: 1, dtype: datetime64[ns, America/New_York]
<DatetimeArray>
['2021-01-22 12:29:06-05:00']
Length: 1, dtype: datetime64[ns, America/New_York]
<DatetimeArray>
['2021-01-22 12:29:22-05:00', '2021-01-22 12:29:36-05:00',
 '2021-01-22 12:35:56-05:00', '2021-01-22 12:44:40-05:00',
 '2021-01-22 12:56:48-05:00', '2021-01-22 13:04:26-05:00',
 '2021-01-22 13:10:52-05:00', '2021-01-22 13:21:48-05:00',
 '2021-01-22 13:42:43-05:00', '2021-01-22 13:42:54-05:00',
 '2021-01-22 13:43:04-05:00', '2021-01-22 13:43:18-05:00',
 '2021-01-22 13:43:24-05:00', '2021-01-22 13:43:52-05:00',
 '2021-01-22 13:43:53-05:00', '2021-01-22 13:44:20-05:00',
 '2021-01-22 13:44:22-05:00', '2021-01-22 13:44:28-05:00',
 '2021-01-22 13:44:41-05:00', '2021-01-22 13:44:43-05:00',
 '2021-01-22 13:45:05-05:00', '2021-01-22 13:45:06-05:00',
 '2021-01-22 13:45:08-05:00', '2021-01-22 13:45:52-05:00']
Length: 24, dtype: datetime64[ns, America/New_York]
<DatetimeArray>
['2021-01-22 1

In [9]:
def calc_fil_pctSpd(df):
    # Returns average fill as % of spread at arrival time.  0 = Bid, 0.5 = Mid, 1 = Ask
    s = (df['fillPrice'] - df['fillBid']) / (df['fillAsk'] - df['fillBid']) * df['fillQuantity']
    return s.sum() / df['fillQuantity'].sum()

for o in [order1, order2, order3, order4]:
    print(f'{calc_fil_pctSpd(o):.2%}')
    
# Ok.  'Active taker algos' only ever hit the bid or offer - they just think they're smart about when
# The 'patient' algos seem to do very little making

99.67%
0.00%
99.93%
0.00%


In [10]:
# Compare delta adjusted fill vs arrival mid and mark
# NOTE SHOULD BE USING PARENT VALUES REALLY HERE
arrival_ul = (order3.loc[order3.index[0], 'childAsk'] + order3.loc[order3.index[0], 'childBid'])/2
arrival_idx = (order2.loc[order2.index[0], 'childUAsk'] + order2.loc[order2.index[0], 'childUBid'])/2
arrival_mid = (order2.loc[order2.index[0], 'childAsk'] + order2.loc[order2.index[0], 'childBid'])/2
arrival_mark = order2.loc[order2.index[0], 'childMark']

print(arrival_ul)
print(arrival_idx)
print(arrival_mid)
print(arrival_mark)

383.695
3848.99
105.94999999999999
105.8869934


In [11]:
avg_ul = (order3['fillPrice'] * order3['fillQuantity']).sum() / order3['fillQuantity'].sum()
s = (order2['fillPrice'] * order2['fillQuantity']).sum() + (order4['fillPrice'] * order4['fillQuantity']).sum()
t = (order2['fillQuantity'].sum() + order4['fillQuantity'].sum())
avg_opt = s / t
delta = order3['fillQuantity'].sum() / (t * 100 * 10)
arrival_opt_adj = avg_opt + delta * (arrival_ul - avg_ul)
print(avg_opt)
print(arrival_opt_adj)
print (arrival_opt_adj - arrival_mid)
print (arrival_opt_adj - arrival_mark)

105.38909999999998
105.41468088999996
-0.5353191100000316
-0.4723125100000374


In [12]:
# Compare vs contract vega
vega = 6.009
print((arrival_opt_adj - arrival_mid) / vega)
print((arrival_opt_adj - arrival_mark) / vega)
# so less than .1 vol discount vs. mark

-0.08908622233317218
-0.07860085039108626


In [13]:
# Compare vs (*not* fill-weighted) average spread
spd_sum = (order2['fillAsk'] - order2['fillBid']).sum() + (order4['fillAsk'] - order4['fillBid']).sum()
half_spd = 0.5 * spd_sum / (order2.shape[0] + order4.shape[0])
print(half_spd)
print((arrival_opt_adj - arrival_mid) / half_spd)
print((arrival_opt_adj - arrival_mark) / half_spd)

# This doesn't look good.  It means we crossed spread on both options and delta
# so ended up selling below the bid

# Although - we only capture the bid-offer when it trades.  That might be when it's unusually tight

# Also, this analysis doesn't take into account whether vol went down over the period.
# We could use the ATM vols provided to investigate that

0.15489130434782516
-3.4560953066668914
-3.0493158540353473


In [14]:
# For the stock trades, how many were making vs taking?

def calc_post_pct(df):
    post_shrs = df.loc[df['childOrderHandling']=='PostLimit', 'fillQuantity'].sum()
    return post_shrs / df['fillQuantity'].sum()

print(f'{calc_post_pct(order1):.1%}') # TwapAlpha
print(f'{calc_post_pct(order3):.1%}') # Mix of AlphaVwap2pct and SpdrAuto

4.4%
0.7%
