# Calculate Limits

## Simulate First

You can just run the whole notebook

### Imports / Helper Functions

In [1]:
import sys
import math
import warnings

import psycopg2
import wrds
import gzip

import seaborn as sns
import os
import quandl
import json
import zipfile
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

import functools
import requests
import io

import urllib.request
from urllib.error import HTTPError
# from html_table_parser.parser import HTMLTableParser
import re

import plotnine as p9
from plotnine import ggplot, scale_x_date, guides, guide_legend, geom_bar, scale_y_continuous, \
    scale_color_identity, geom_line, geom_point, labs, theme_minimal, theme, element_blank, element_text, \
        geom_ribbon, geom_hline, aes, scale_size_manual, scale_color_manual, ggtitle

from datetime import datetime
import datetime

import pandas as pd
# import pandas_market_calendars as mcal
from pandas.plotting import autocorrelation_plot
import numpy as np
from numpy import cumsum, log, polyfit, sqrt, std, subtract
import scipy as sp
from scipy.stats import norm
import scipy.stats as stats

from statsmodels.tsa.stattools import coint
from statsmodels.graphics.tsaplots import plot_acf
import statsmodels.api as sm
from statsmodels.tsa.stattools import acf
from statsmodels.regression.linear_model import OLS
from statsmodels.tools.tools import add_constant

from collections import deque
from bisect import insort, bisect_left
from itertools import islice

pd.set_option('display.max_columns', None)

In [2]:
def max_drawdown(returns):
        local_max = [n for n in range(len(returns)-1) if ((n==0) and (returns[0] > returns[1])) or 
           ((n > 0) and  (returns[n-1]<returns[n]) and (returns[n+1]<returns[n]))] 
        
        local_min = [n for n in range(1,len(returns)) if ((n == len(returns)-1) and (returns[-1] < returns[-2])) or
                (returns[n-1]>returns[n]) and (returns[n+1]>returns[n])]
        
        def next_local_min(n):
            if [m for m in local_min if m > n]:
                return [m for m in local_min if m > n][0]
            else: return None
        
        drawdowns = [(n,next_local_min(n)) for n in local_max]
        drawdown_values = [returns[n] - returns[m] for (n,m) in drawdowns if m != None]
        if drawdown_values:
            return  np.max(drawdown_values)
        else: return 0.0

### Dividends

In [3]:
start_date = '2018-01-01'
end_date = '2023-02-28'
spy_divdata = pd.read_csv('spy_tickerdata.csv')[['date','dividend']].sort_values(by='date').reset_index(drop=True)
spy_divdata = spy_divdata.loc[(spy_divdata['date'] >= start_date) & (spy_divdata['date'] <= end_date)].copy().reset_index(drop=True)
spy_divdata['date'] = pd.to_datetime(spy_divdata['date'])
trading_days = spy_divdata['date']
spy_divdata = spy_divdata.loc[spy_divdata['dividend'] != 0]

# Step 1: Calculate the last day of the following month
spy_divdata['date'] = pd.to_datetime(spy_divdata['date'])  # Ensure 'date' is in datetime format
spy_divdata['end_of_next_month'] = spy_divdata['date'] + pd.offsets.MonthEnd(2)

# Step 2: Adjust to the nearest trading day

# This function finds the last trading day of the month or the nearest previous trading day
def find_pay_date(end_of_month, trading_days):
    if end_of_month in trading_days:
        return end_of_month
    else:
        # Find the nearest previous trading day
        eligible_days = trading_days[trading_days <= end_of_month]
        return eligible_days.max()  # The last trading day before or on the end_of_month

# Apply the function to each row to determine the 'pay_date'
spy_divdata['pay_date'] = spy_divdata['end_of_next_month'].apply(lambda date: find_pay_date(date, trading_days))

# Optionally, you can drop the intermediate 'end_of_next_month' column if it's no longer needed
spy_divdata.drop(columns=['end_of_next_month'], inplace=True)

spy_divdata

Unnamed: 0,date,dividend,pay_date
51,2018-03-16,1.09678,2018-04-30
114,2018-06-15,1.246,2018-07-31
182,2018-09-21,1.323,2018-10-31
245,2018-12-21,1.4354,2019-01-31
301,2019-03-15,1.2331,2019-04-30
369,2019-06-21,1.4316,2019-07-31
432,2019-09-20,1.38362,2019-10-31
496,2019-12-20,1.57,2020-01-31
557,2020-03-20,1.40556,2020-04-30
620,2020-06-19,1.3662,2020-07-31


### Simulations

In [4]:
data = pd.read_csv('combinedata.csv')
options = pd.read_csv('option_df.csv')

data['exdate'] = pd.to_datetime(data['exdate'])
options['exdate'] = pd.to_datetime(options['exdate'])


data['exdate_str'] = data['exdate'].dt.strftime('%Y%m%d')  
data['strikeID'] = data['exdate_str'] + '_' + data['strike_price'].astype(str)  
data.drop(columns=['exdate_str'], inplace=True)  

options['exdate_str'] = options['exdate'].dt.strftime('%Y%m%d')  
options['strikeID'] = options['exdate_str'] + '_' + options['strike_price'].astype(str)  
options.drop(columns=['exdate_str'], inplace=True)  

options['date'] = pd.to_datetime(options['date'])
data['date'] = pd.to_datetime(data['date'])

In [5]:
def create_simulations(options_subset, data, dropna_greeks=False):
    simulations = {}

    for index, row in options_subset.iterrows():
        strikeID = row['exdate'].strftime('%Y%m%d') + '_' + str(row['strike_price'])
        mask = (data['strikeID'] == strikeID) & (data['date'] >= row['date']) & (data['date'] <= row['close_date'])
        temp_df = data[mask].sort_values(by=['date', 'cp_flag'])

        shared_cols = ['date', 'exdate', 'strike_price', 'close', 'strikeID'] # 'expiry_indicator',  'adj_open', 'adj_close', 'adj_volume',
        greeks_cols = ['impl_volatility', 'delta'] # , 'gamma', 'vega', 'theta'
        call_specific_cols = ['cp_flag', 'best_bid', 'best_offer'] + greeks_cols # , 'volume', 'open_interest'
        put_specific_cols = call_specific_cols

        calls = temp_df[temp_df['cp_flag'] == 'C'][shared_cols + call_specific_cols].rename(columns={col: col + '_c' for col in call_specific_cols})
        puts = temp_df[temp_df['cp_flag'] == 'P'][shared_cols + put_specific_cols].rename(columns={col: col + '_p' for col in put_specific_cols})

        merged_df = pd.merge(calls, puts, on=shared_cols, how='outer')

        if dropna_greeks:
            greeks_cols_c = [col + '_c' for col in greeks_cols]
            greeks_cols_p = [col + '_p' for col in greeks_cols]
            merged_df = merged_df.dropna(subset=greeks_cols_c + greeks_cols_p, how='any')

        merged_df['delta_sum'] = merged_df['delta_c'].fillna(0) + merged_df['delta_p'].fillna(0)
        merged_df['shares_held'] = -1 * merged_df['delta_sum']

        merged_df = merged_df.sort_values(by='date')
        merged_df['sharechange'] = merged_df['shares_held'].diff()

        simulations[row['date'].strftime('%Y-%m-%d')] = merged_df

    return simulations

In [60]:
%%time
simulations = create_simulations(options, data, dropna_greeks=True)

CPU times: total: 8min 34s
Wall time: 8min 34s


In [61]:
len(trading_days) - len(simulations) # Adds up to contracts with missing greeks

158

In [62]:
filtered_simulations = {}

for key, df in simulations.items():
    # Ensure 'date' column is in datetime64 dtype
    df['date'] = pd.to_datetime(df['date'])
    
    # Find the range of trading days for each simulation
    start_date = df['date'].min()
    end_date = df['date'].max()
    
    # Generate the expected range of trading days
    expected_trading_days = trading_days[(trading_days >= start_date) & (trading_days <= end_date)]
    
    # Check if all expected trading days are present in the DataFrame
    actual_trading_days = df['date'].unique()
    actual_trading_days = pd.to_datetime(actual_trading_days)  # Ensure this is in datetime format for comparison
    
    # Use .isin() to check if each item in expected_trading_days is in actual_trading_days, then check if all are True
    if expected_trading_days.isin(actual_trading_days).all():
        # If there are no gaps, add the DataFrame to the filtered_simulations dictionary
        filtered_simulations[key] = df

In [63]:
len(trading_days) - len(filtered_simulations)

325

In [64]:
for key, df in list(filtered_simulations.items())[0:1]: 
    print(f"DataFrame for {key}:")
    print(df.columns)
    display(df)  
    print("\n")

DataFrame for 2018-01-03:
Index(['date', 'exdate', 'strike_price', 'close', 'strikeID', 'cp_flag_c',
       'best_bid_c', 'best_offer_c', 'impl_volatility_c', 'delta_c',
       'cp_flag_p', 'best_bid_p', 'best_offer_p', 'impl_volatility_p',
       'delta_p', 'delta_sum', 'shares_held', 'sharechange'],
      dtype='object')


Unnamed: 0,date,exdate,strike_price,close,strikeID,cp_flag_c,best_bid_c,best_offer_c,impl_volatility_c,delta_c,cp_flag_p,best_bid_p,best_offer_p,impl_volatility_p,delta_p,delta_sum,shares_held,sharechange
0,2018-01-03,2018-02-02,270.0,270.47,20180202_270.0,C,2.65,2.69,0.072398,0.562218,P,1.77,1.8,0.069568,-0.442522,0.119696,-0.119696,
1,2018-01-04,2018-02-02,270.0,271.61,20180202_270.0,C,3.45,3.49,0.077848,0.63269,P,1.43,1.46,0.074706,-0.366922,0.265768,-0.265768,-0.146072
2,2018-01-05,2018-02-02,270.0,273.42,20180202_270.0,C,4.8,4.84,0.08382,0.727658,P,1.02,1.04,0.081783,-0.270119,0.457539,-0.457539,-0.191771
3,2018-01-08,2018-02-02,270.0,273.92,20180202_270.0,C,5.17,5.2,0.089623,0.74924,P,0.81,0.82,0.082307,-0.234696,0.514544,-0.514544,-0.057005
4,2018-01-09,2018-02-02,270.0,274.54,20180202_270.0,C,5.67,5.72,0.093639,0.773171,P,0.83,0.84,0.091492,-0.223283,0.549888,-0.549888,-0.035344
5,2018-01-10,2018-02-02,270.0,274.12,20180202_270.0,C,5.27,5.33,0.092791,0.75907,P,0.85,0.87,0.089866,-0.235707,0.523363,-0.523363,0.026525
6,2018-01-11,2018-02-02,270.0,276.12,20180202_270.0,C,6.76,6.91,0.090857,0.854846,P,0.56,0.58,0.097319,-0.162272,0.692574,-0.692574,-0.169211
7,2018-01-12,2018-02-02,270.0,277.92,20180202_270.0,C,8.45,8.6,0.102281,0.890014,P,0.43,0.44,0.107499,-0.121856,0.768158,-0.768158,-0.075584
8,2018-01-16,2018-02-02,270.0,276.97,20180202_270.0,C,7.82,7.97,0.130638,0.827166,P,0.6,0.61,0.121851,-0.157003,0.670163,-0.670163,0.097995
9,2018-01-17,2018-02-02,270.0,279.61,20180202_270.0,C,10.03,10.25,0.131682,0.904279,P,0.38,0.39,0.135023,-0.101488,0.802791,-0.802791,-0.132628






### PnL and Misc for future calcs

In [75]:
def calculate_realized_PL(df, long_op=True):
    df = df.reset_index(drop=True)
    
    # Vectorized initial operations for stock
    df['stock_pos'] = np.where(long_op, df['shares_held'], -df['shares_held'])
    df = df.drop(columns=['shares_held'])
    df['pos_change'] = np.where(long_op, df['sharechange'], -df['sharechange'])
    df = df.drop(columns=['sharechange'])
    df.loc[0, 'pos_change'] = df.loc[0, 'stock_pos']
    
    df['change_cost_basis'] = df['pos_change'] * df['close']
    df['stock_cost_basis'] = df['change_cost_basis'].cumsum()
    df['daily_stock_value'] = df['stock_pos'] * df['close']
    df['stock_PL'] = df['daily_stock_value'] - df['stock_cost_basis']

    # Initial option value and vectorized daily option value calculation
    df['option_cost_basis'] = df.loc[0, 'best_offer_c'] + df.loc[0, 'best_offer_p'] if long_op else -df.loc[0, 'best_bid_c'] - df.loc[0, 'best_bid_p']
    df['change_cost_basis_op'] = 0.0
    df.loc[0, 'change_cost_basis_op'] = df.loc[0, 'option_cost_basis']
    df['daily_option_value'] = np.where(long_op, df['best_bid_c'] + df['best_bid_p'], -(df['best_offer_c'] + df['best_offer_p']))
    df['option_PL'] = df['daily_option_value']- df['option_cost_basis']

    # Column to track total positions, PL, and cash flow after positions are closed
    df['total_cost_basis'] = df['stock_cost_basis'] + df['option_cost_basis']
    df['total_pos_value'] = df['daily_stock_value'] + df['daily_option_value']
    df['total_PL'] = df['stock_PL'] + df['option_PL']
    df['realized_stock_PL'] = 0.0
    df['realized_option_PL'] = 0.0
    df['realized_PL'] = 0.0

    # Misc
    df['UID'] = df['strikeID'] + '_' + str(df.loc[0, 'date'].date())
    df['to_open'] = 0
    df.loc[0, 'to_open'] = 1
#    df['gross_trades_value'] = abs(df['to_open'] * df['option_cost_basis']) + abs(df['change_cost_basis']) # Need to do this at end

    # Close positions on final day
    final_row_index = len(df) - 1
    df.loc[final_row_index, 'realized_stock_PL'] = df.loc[final_row_index, 'stock_PL']
    df.loc[final_row_index, 'realized_option_PL'] = df.loc[final_row_index, 'option_PL']
    df.loc[final_row_index, 'realized_PL'] = df.loc[final_row_index, 'total_PL']
#    df.loc[final_row_index, 'gross_trades_value'] = abs(df.loc[final_row_index, 'daily_option_value']) + abs(df.loc[final_row_index - 1, 'stock_pos']) * df.loc[final_row_index, 'close'] 

    final_close_price = df.loc[final_row_index, 'close']
    df.loc[final_row_index, 'stock_pos'] = 0
    df.loc[final_row_index, 'pos_change'] = - df.loc[final_row_index - 1, 'pos_change'] if final_row_index > 0 else 0
    df.loc[final_row_index, 'change_cost_basis'] = df.loc[final_row_index, 'pos_change'] * final_close_price
    df.loc[final_row_index, 'stock_cost_basis'] = 0
    df.loc[final_row_index, 'daily_stock_value'] = 0
    df.loc[final_row_index, 'stock_PL'] = 0

    df.loc[final_row_index, 'option_cost_basis'] = 0
    df.loc[final_row_index, 'change_cost_basis_op'] = -df.loc[final_row_index, 'daily_option_value']
    df.loc[final_row_index, 'daily_option_value'] = 0
    df.loc[final_row_index, 'option_PL'] = 0

    df.loc[final_row_index, 'total_cost_basis'] = 0
    df.loc[final_row_index, 'total_pos_value'] = 0
    df.loc[final_row_index, 'total_PL'] = 0
    
    return df

*Cash - total cost basis + net realized PL* gives you how much cash is available to use

In [79]:
simulations_long = {date: calculate_realized_PL(df.copy(), long_op=True) for date, df in filtered_simulations.items()}
simulations_short = {date: calculate_realized_PL(df.copy(), long_op=False) for date, df in filtered_simulations.items()}

In [80]:
for key, df in list(simulations_short.items())[0:1]: 
    print(f"DataFrame for {key}:")
    print(df.columns)
    display(df)  
    print("\n")

DataFrame for 2018-01-03:
Index(['date', 'exdate', 'strike_price', 'close', 'strikeID', 'cp_flag_c',
       'best_bid_c', 'best_offer_c', 'impl_volatility_c', 'delta_c',
       'cp_flag_p', 'best_bid_p', 'best_offer_p', 'impl_volatility_p',
       'delta_p', 'delta_sum', 'stock_pos', 'pos_change', 'change_cost_basis',
       'stock_cost_basis', 'daily_stock_value', 'stock_PL',
       'option_cost_basis', 'change_cost_basis_op', 'daily_option_value',
       'option_PL', 'total_cost_basis', 'total_pos_value', 'total_PL',
       'realized_stock_PL', 'realized_option_PL', 'realized_PL', 'UID',
       'to_open'],
      dtype='object')


Unnamed: 0,date,exdate,strike_price,close,strikeID,cp_flag_c,best_bid_c,best_offer_c,impl_volatility_c,delta_c,cp_flag_p,best_bid_p,best_offer_p,impl_volatility_p,delta_p,delta_sum,stock_pos,pos_change,change_cost_basis,stock_cost_basis,daily_stock_value,stock_PL,option_cost_basis,change_cost_basis_op,daily_option_value,option_PL,total_cost_basis,total_pos_value,total_PL,realized_stock_PL,realized_option_PL,realized_PL,UID,to_open
0,2018-01-03,2018-02-02,270.0,270.47,20180202_270.0,C,2.65,2.69,0.072398,0.562218,P,1.77,1.8,0.069568,-0.442522,0.119696,0.119696,0.119696,32.374177,32.374177,32.374177,0.0,-4.42,-4.42,-4.49,-0.07,27.954177,27.884177,-0.07,0.0,0.0,0.0,20180202_270.0_2018-01-03,1
1,2018-01-04,2018-02-02,270.0,271.61,20180202_270.0,C,3.45,3.49,0.077848,0.63269,P,1.43,1.46,0.074706,-0.366922,0.265768,0.265768,0.146072,39.674616,72.048793,72.185246,0.136453,-4.42,0.0,-4.95,-0.53,67.628793,67.235246,-0.393547,0.0,0.0,0.0,20180202_270.0_2018-01-03,0
2,2018-01-05,2018-02-02,270.0,273.42,20180202_270.0,C,4.8,4.84,0.08382,0.727658,P,1.02,1.04,0.081783,-0.270119,0.457539,0.457539,0.191771,52.434027,124.48282,125.100313,0.617494,-4.42,0.0,-5.88,-1.46,120.06282,119.220313,-0.842506,0.0,0.0,0.0,20180202_270.0_2018-01-03,0
3,2018-01-08,2018-02-02,270.0,273.92,20180202_270.0,C,5.17,5.2,0.089623,0.74924,P,0.81,0.82,0.082307,-0.234696,0.514544,0.514544,0.057005,15.61481,140.097629,140.943892,0.846263,-4.42,0.0,-6.02,-1.6,135.677629,134.923892,-0.753737,0.0,0.0,0.0,20180202_270.0_2018-01-03,0
4,2018-01-09,2018-02-02,270.0,274.54,20180202_270.0,C,5.67,5.72,0.093639,0.773171,P,0.83,0.84,0.091492,-0.223283,0.549888,0.549888,0.035344,9.703342,149.800971,150.966252,1.16528,-4.42,0.0,-6.56,-2.14,145.380971,144.406252,-0.97472,0.0,0.0,0.0,20180202_270.0_2018-01-03,0
5,2018-01-10,2018-02-02,270.0,274.12,20180202_270.0,C,5.27,5.33,0.092791,0.75907,P,0.85,0.87,0.089866,-0.235707,0.523363,0.523363,-0.026525,-7.271033,142.529938,143.464266,0.934327,-4.42,0.0,-6.2,-1.78,138.109938,137.264266,-0.845673,0.0,0.0,0.0,20180202_270.0_2018-01-03,0
6,2018-01-11,2018-02-02,270.0,276.12,20180202_270.0,C,6.76,6.91,0.090857,0.854846,P,0.56,0.58,0.097319,-0.162272,0.692574,0.692574,0.169211,46.722541,189.25248,191.233533,1.981053,-4.42,0.0,-7.49,-3.07,184.83248,183.743533,-1.088947,0.0,0.0,0.0,20180202_270.0_2018-01-03,0
7,2018-01-12,2018-02-02,270.0,277.92,20180202_270.0,C,8.45,8.6,0.102281,0.890014,P,0.43,0.44,0.107499,-0.121856,0.768158,0.768158,0.075584,21.006305,210.258785,213.486471,3.227687,-4.42,0.0,-9.04,-4.62,205.838785,204.446471,-1.392313,0.0,0.0,0.0,20180202_270.0_2018-01-03,0
8,2018-01-16,2018-02-02,270.0,276.97,20180202_270.0,C,7.82,7.97,0.130638,0.827166,P,0.6,0.61,0.121851,-0.157003,0.670163,0.670163,-0.097995,-27.141675,183.11711,185.615046,2.497936,-4.42,0.0,-8.58,-4.16,178.69711,177.035046,-1.662064,0.0,0.0,0.0,20180202_270.0_2018-01-03,0
9,2018-01-17,2018-02-02,270.0,279.61,20180202_270.0,C,10.03,10.25,0.131682,0.904279,P,0.38,0.39,0.135023,-0.101488,0.802791,0.802791,0.132628,37.084115,220.201225,224.468392,4.267167,-4.42,0.0,-10.64,-6.22,215.781225,213.828392,-1.952833,0.0,0.0,0.0,20180202_270.0_2018-01-03,0






## Trading Strat / Positions / Limits

### Trading Strat

In [81]:
# Replace with actual model free IV calculations. Make sure to select for only the dates present in simulations
# (use loop below with df.loc[0, 'date']

MF_IV = pd.Series([0.120] * len(filtered_simulations))
MF_IV

0      0.12
1      0.12
2      0.12
3      0.12
4      0.12
       ... 
968    0.12
969    0.12
970    0.12
971    0.12
972    0.12
Length: 973, dtype: float64

In [82]:
temp_data = []

for key, df in filtered_simulations.items():
    temp_data.append({'date': key, 'BS_Call_IV': df.loc[0, 'impl_volatility_c']})

IV_compare = pd.DataFrame(temp_data)

IV_compare['MF_Call_IV'] = MF_IV.reset_index(drop=True)
IV_compare['IV_diff'] = IV_compare['MF_Call_IV'] - IV_compare['BS_Call_IV']

In [83]:
def trade_strategy_1(x):
    if x > 0.002:
        return 1
    elif x < -0.002:
        return -1
    else:
        return 0

def trade_strategy_2(x):
    if x > 0.005:
        return 1
    else:
        return 0

In [84]:
strat_dict = {'trade_1': trade_strategy_1, 'trade_2': trade_strategy_2}

In [85]:
for key, func in strat_dict.items():
    IV_compare[key] = IV_compare['IV_diff'].apply(func)
IV_compare

Unnamed: 0,date,BS_Call_IV,MF_Call_IV,IV_diff,trade_1,trade_2
0,2018-01-03,0.072398,0.12,0.047602,1,1
1,2018-01-04,0.071179,0.12,0.048821,1,1
2,2018-01-10,0.080641,0.12,0.039359,1,1
3,2018-01-11,0.076890,0.12,0.043110,1,1
4,2018-01-12,0.079350,0.12,0.040650,1,1
...,...,...,...,...,...,...
968,2023-02-22,0.202075,0.12,-0.082075,-1,0
969,2023-02-23,0.196720,0.12,-0.076720,-1,0
970,2023-02-24,0.201088,0.12,-0.081088,-1,0
971,2023-02-27,0.194675,0.12,-0.074675,-1,0


In [86]:
def generate_trades_dfs(strat_dict = strat_dict, initial_df = IV_compare):
    
    trades_dfs = {}
    
    for key in strat_dict.keys():

        dfs_to_combine = []
        
        for index, row in initial_df.iterrows():
            date = row['date']
            trade = row[key]
            iv_diff = row['IV_diff']
            
            if trade == 1 and date in simulations_long:
                df_to_add = simulations_long[date].copy()
            elif trade == -1 and date in simulations_short:
                df_to_add = simulations_short[date].copy()
            else:
                # Skip if 'trade' is 0 or the date is not in the dictionaries
                continue
            
            # Add 'trade' & 'IV_diff' column
            df_to_add['IV_diff'] = iv_diff # Needed for position calculation 
            df_to_add[key] = trade  # Include the 'trade' value
            dfs_to_combine.append(df_to_add)
        
        # Concatenate all collected DataFrames
        trades_dfs[key] = pd.concat(dfs_to_combine, ignore_index=True)
        trades_dfs[key] = trades_dfs[key].sort_values(by=['date', 'exdate', 'strike_price', 'to_open']).reset_index(drop=True)

    return trades_dfs

In [87]:
%%time
trades_dfs = generate_trades_dfs()

trades_dfs['trade_1']

CPU times: total: 609 ms
Wall time: 584 ms


Unnamed: 0,date,exdate,strike_price,close,strikeID,cp_flag_c,best_bid_c,best_offer_c,impl_volatility_c,delta_c,cp_flag_p,best_bid_p,best_offer_p,impl_volatility_p,delta_p,delta_sum,stock_pos,pos_change,change_cost_basis,stock_cost_basis,daily_stock_value,stock_PL,option_cost_basis,change_cost_basis_op,daily_option_value,option_PL,total_cost_basis,total_pos_value,total_PL,realized_stock_PL,realized_option_PL,realized_PL,UID,to_open,IV_diff,trade_1
0,2018-01-03,2018-02-02,270.0,270.47,20180202_270.0,C,2.65,2.69,0.072398,0.562218,P,1.77,1.80,0.069568,-0.442522,0.119696,-0.119696,-0.119696,-32.374177,-32.374177,-32.374177,0.000000,4.49,4.49,4.42,-0.07,-27.884177,-27.954177,-0.070000,0.000000,0.00,0.000000,20180202_270.0_2018-01-03,1,0.047602,1
1,2018-01-04,2018-02-02,270.0,271.61,20180202_270.0,C,3.45,3.49,0.077848,0.632690,P,1.43,1.46,0.074706,-0.366922,0.265768,-0.265768,-0.146072,-39.674616,-72.048793,-72.185246,-0.136453,4.49,0.00,4.88,0.39,-67.558793,-67.305246,0.253547,0.000000,0.00,0.000000,20180202_270.0_2018-01-03,0,0.047602,1
2,2018-01-04,2018-02-02,272.0,271.61,20180202_272.0,C,2.13,2.17,0.071179,0.500445,P,2.12,2.15,0.068201,-0.508652,-0.008207,0.008207,0.008207,2.229103,2.229103,2.229103,0.000000,4.32,4.32,4.25,-0.07,6.549103,6.479103,-0.070000,0.000000,0.00,0.000000,20180202_272.0_2018-01-04,1,0.048821,1
3,2018-01-05,2018-02-02,270.0,273.42,20180202_270.0,C,4.80,4.84,0.083820,0.727658,P,1.02,1.04,0.081783,-0.270119,0.457539,-0.457539,-0.191771,-52.434027,-124.482820,-125.100313,-0.617494,4.49,0.00,5.82,1.33,-119.992820,-119.280313,0.712506,0.000000,0.00,0.000000,20180202_270.0_2018-01-03,0,0.047602,1
4,2018-01-05,2018-02-02,272.0,273.42,20180202_272.0,C,3.27,3.32,0.076904,0.622425,P,1.48,1.52,0.074806,-0.379226,0.243199,-0.243199,-0.251406,-68.739429,-66.510325,-66.495471,0.014855,4.32,0.00,4.75,0.43,-62.190325,-61.745471,0.444855,0.000000,0.00,0.000000,20180202_272.0_2018-01-04,0,0.048821,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14541,2023-02-28,2023-03-24,399.0,396.26,20230324_399.0,C,7.03,7.05,0.207028,0.465189,P,8.85,8.87,0.170603,-0.573872,-0.108683,0.000000,-0.041990,-16.638957,0.000000,0.000000,0.000000,0.00,15.92,0.00,0.00,0.000000,0.000000,0.000000,-0.416764,3.79,3.373236,20230324_399.0_2023-02-21,0,-0.091530,-1
14542,2023-02-28,2023-03-24,399.0,396.26,20230324_399.0,C,7.03,7.05,0.207028,0.465189,P,8.85,8.87,0.170603,-0.573872,-0.108683,0.000000,-0.041990,-16.638957,0.000000,0.000000,0.000000,0.00,15.92,0.00,0.00,0.000000,0.000000,0.000000,-0.398548,2.83,2.431452,20230324_399.0_2023-02-22,0,-0.082075,-1
14543,2023-02-28,2023-03-24,401.0,396.26,20230324_401.0,C,5.99,6.01,0.202116,0.424099,P,9.85,9.87,0.164628,-0.623364,-0.199265,0.000000,-0.037696,-14.937417,0.000000,0.000000,0.000000,0.00,15.88,0.00,0.00,0.000000,0.000000,0.000000,-0.085759,1.74,1.654241,20230324_401.0_2023-02-23,0,-0.076720,-1
14544,2023-02-28,2023-03-31,396.0,396.26,20230331_396.0,C,9.55,9.58,0.207305,0.519384,P,8.15,8.17,0.174174,-0.498412,0.020972,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00,17.75,0.00,0.00,0.000000,0.000000,0.000000,0.000000,-0.05,-0.050000,20230331_396.0_2023-02-28,1,-0.087305,-1


In [88]:
KAPITAL = 1e7

In [92]:
def pos_size(IV_diff, strike_price, option_cost_basis, UID, key):
    factor = .01* strike_price # Adjust this based on whatever

    if option_cost_basis == 0:
        # Get the first matching 'option_cost_basis' safely
        filtered_df = trades_dfs[key].loc[trades_dfs[key]['UID'] == UID, 'option_cost_basis']

        option_cost_basis = filtered_df.iloc[0] 

    posSize = round((abs(IV_diff) / factor) * KAPITAL / abs(option_cost_basis+.000000000001)) # some too close to zero, trigger infinity
    
    return posSize
    
    # kept it simple instead of my earlier complex one

In [90]:
for key, df in trades_dfs.items():
    trades_dfs[key] = df.drop(columns=[col for col in df.columns if col.endswith('_p') or col.endswith('_c')]).copy()

In [93]:
for key, df in trades_dfs.items():

    df['pos_size'] = df.apply(lambda row: pos_size(row['IV_diff'], row['strike_price'], row['option_cost_basis'], row['UID'], key), axis=1)
    lot_size = 100 * df['pos_size']
    
    for col in ['stock_pos',	'pos_change',	'change_cost_basis',	'stock_cost_basis',	'daily_stock_value',	'stock_PL',	'option_cost_basis',
                'change_cost_basis_op', 'daily_option_value',	'option_PL',	'total_cost_basis',	'total_pos_value',	'total_PL',	'realized_stock_PL',
                'realized_option_PL',	'realized_PL']:
        df['sized_' + col] = lot_size * df[col]

In [94]:
trades_dfs['trade_1'].head()

Unnamed: 0,date,exdate,strike_price,close,strikeID,delta_sum,stock_pos,pos_change,change_cost_basis,stock_cost_basis,daily_stock_value,stock_PL,option_cost_basis,change_cost_basis_op,daily_option_value,option_PL,total_cost_basis,total_pos_value,total_PL,realized_stock_PL,realized_option_PL,realized_PL,UID,to_open,IV_diff,trade_1,pos_size,sized_stock_pos,sized_pos_change,sized_change_cost_basis,sized_stock_cost_basis,sized_daily_stock_value,sized_stock_PL,sized_option_cost_basis,sized_change_cost_basis_op,sized_daily_option_value,sized_option_PL,sized_total_cost_basis,sized_total_pos_value,sized_total_PL,sized_realized_stock_PL,sized_realized_option_PL,sized_realized_PL
0,2018-01-03,2018-02-02,270.0,270.47,20180202_270.0,0.119696,-0.119696,-0.119696,-32.374177,-32.374177,-32.374177,0.0,4.49,4.49,4.42,-0.07,-27.884177,-27.954177,-0.07,0.0,0.0,0.0,20180202_270.0_2018-01-03,1,0.047602,1,39266,-469998.3,-469998.3,-127120400.0,-127120400.0,-127120400.0,0.0,17630434.0,17630434.0,17355572.0,-274862.0,-109490000.0,-109764900.0,-274862.0,0.0,0.0,0.0
1,2018-01-04,2018-02-02,270.0,271.61,20180202_270.0,0.265768,-0.265768,-0.146072,-39.674616,-72.048793,-72.185246,-0.136453,4.49,0.0,4.88,0.39,-67.558793,-67.305246,0.253547,0.0,0.0,0.0,20180202_270.0_2018-01-03,0,0.047602,1,39266,-1043565.0,-573566.3,-155786300.0,-282906800.0,-283442600.0,-535798.1,17630434.0,0.0,19161808.0,1531374.0,-265276400.0,-264280800.0,995575.9,0.0,0.0,0.0
2,2018-01-04,2018-02-02,272.0,271.61,20180202_272.0,-0.008207,0.008207,0.008207,2.229103,2.229103,2.229103,0.0,4.32,4.32,4.25,-0.07,6.549103,6.479103,-0.07,0.0,0.0,0.0,20180202_272.0_2018-01-04,1,0.048821,1,41548,34098.44,34098.44,9261478.0,9261478.0,9261478.0,0.0,17948736.0,17948736.0,17657900.0,-290836.0,27210210.0,26919380.0,-290836.0,0.0,0.0,0.0
3,2018-01-05,2018-02-02,270.0,273.42,20180202_270.0,0.457539,-0.457539,-0.191771,-52.434027,-124.48282,-125.100313,-0.617494,4.49,0.0,5.82,1.33,-119.99282,-119.280313,0.712506,0.0,0.0,0.0,20180202_270.0_2018-01-03,0,0.047602,1,39266,-1796573.0,-753008.0,-205887400.0,-488794200.0,-491218900.0,-2424650.0,17630434.0,0.0,22852812.0,5222378.0,-471163800.0,-468366100.0,2797728.0,0.0,0.0,0.0
4,2018-01-05,2018-02-02,272.0,273.42,20180202_272.0,0.243199,-0.243199,-0.251406,-68.739429,-66.510325,-66.495471,0.014855,4.32,0.0,4.75,0.43,-62.190325,-61.745471,0.444855,0.0,0.0,0.0,20180202_272.0_2018-01-04,0,0.048821,1,41548,-1010443.0,-1044542.0,-285598600.0,-276337100.0,-276275400.0,61718.18,17948736.0,0.0,19735300.0,1786564.0,-258388400.0,-256540100.0,1848282.0,0.0,0.0,0.0


In [95]:
for key, df in trades_dfs.items():
    # Calculate the multiplication of 'identifier' and 'pos_size'
    df['signed_pos']  = - df[key] * df['pos_size']
    
trades_dfs['trade_1'].head()

Unnamed: 0,date,exdate,strike_price,close,strikeID,delta_sum,stock_pos,pos_change,change_cost_basis,stock_cost_basis,daily_stock_value,stock_PL,option_cost_basis,change_cost_basis_op,daily_option_value,option_PL,total_cost_basis,total_pos_value,total_PL,realized_stock_PL,realized_option_PL,realized_PL,UID,to_open,IV_diff,trade_1,pos_size,sized_stock_pos,sized_pos_change,sized_change_cost_basis,sized_stock_cost_basis,sized_daily_stock_value,sized_stock_PL,sized_option_cost_basis,sized_change_cost_basis_op,sized_daily_option_value,sized_option_PL,sized_total_cost_basis,sized_total_pos_value,sized_total_PL,sized_realized_stock_PL,sized_realized_option_PL,sized_realized_PL,signed_pos
0,2018-01-03,2018-02-02,270.0,270.47,20180202_270.0,0.119696,-0.119696,-0.119696,-32.374177,-32.374177,-32.374177,0.0,4.49,4.49,4.42,-0.07,-27.884177,-27.954177,-0.07,0.0,0.0,0.0,20180202_270.0_2018-01-03,1,0.047602,1,39266,-469998.3,-469998.3,-127120400.0,-127120400.0,-127120400.0,0.0,17630434.0,17630434.0,17355572.0,-274862.0,-109490000.0,-109764900.0,-274862.0,0.0,0.0,0.0,-39266
1,2018-01-04,2018-02-02,270.0,271.61,20180202_270.0,0.265768,-0.265768,-0.146072,-39.674616,-72.048793,-72.185246,-0.136453,4.49,0.0,4.88,0.39,-67.558793,-67.305246,0.253547,0.0,0.0,0.0,20180202_270.0_2018-01-03,0,0.047602,1,39266,-1043565.0,-573566.3,-155786300.0,-282906800.0,-283442600.0,-535798.1,17630434.0,0.0,19161808.0,1531374.0,-265276400.0,-264280800.0,995575.9,0.0,0.0,0.0,-39266
2,2018-01-04,2018-02-02,272.0,271.61,20180202_272.0,-0.008207,0.008207,0.008207,2.229103,2.229103,2.229103,0.0,4.32,4.32,4.25,-0.07,6.549103,6.479103,-0.07,0.0,0.0,0.0,20180202_272.0_2018-01-04,1,0.048821,1,41548,34098.44,34098.44,9261478.0,9261478.0,9261478.0,0.0,17948736.0,17948736.0,17657900.0,-290836.0,27210210.0,26919380.0,-290836.0,0.0,0.0,0.0,-41548
3,2018-01-05,2018-02-02,270.0,273.42,20180202_270.0,0.457539,-0.457539,-0.191771,-52.434027,-124.48282,-125.100313,-0.617494,4.49,0.0,5.82,1.33,-119.99282,-119.280313,0.712506,0.0,0.0,0.0,20180202_270.0_2018-01-03,0,0.047602,1,39266,-1796573.0,-753008.0,-205887400.0,-488794200.0,-491218900.0,-2424650.0,17630434.0,0.0,22852812.0,5222378.0,-471163800.0,-468366100.0,2797728.0,0.0,0.0,0.0,-39266
4,2018-01-05,2018-02-02,272.0,273.42,20180202_272.0,0.243199,-0.243199,-0.251406,-68.739429,-66.510325,-66.495471,0.014855,4.32,0.0,4.75,0.43,-62.190325,-61.745471,0.444855,0.0,0.0,0.0,20180202_272.0_2018-01-04,0,0.048821,1,41548,-1010443.0,-1044542.0,-285598600.0,-276337100.0,-276275400.0,61718.18,17948736.0,0.0,19735300.0,1786564.0,-258388400.0,-256540100.0,1848282.0,0.0,0.0,0.0,-41548


### PL Positions

In [96]:
PL_temp_dfs = {}  
for key, df in trades_dfs.items():
    # Group by 'dates' and sum the columns of interest
    columns_to_sum = ['signed_pos'] 
    for col in ['stock_pos', 'change_cost_basis',	'stock_cost_basis',	'daily_stock_value',	'stock_PL',	'option_cost_basis', 'change_cost_basis_op',
                'daily_option_value',	'option_PL',	'total_cost_basis',	'total_pos_value',	'total_PL',	'realized_stock_PL',
                'realized_option_PL',	'realized_PL']:
        columns_to_sum.append('sized_' + col)
    
    
    grouped_df = df[['date'] + columns_to_sum].groupby('date').sum().reset_index()
    
    # Reindex the resulting DataFrame to match 'trading_days', filling missing values with 0 or other appropriate value
    pl_df = grouped_df.set_index('date').reindex(trading_days).fillna(0).reset_index()
    pl_df.rename(columns={'index': 'date'}, inplace=True)
    
    # Assign the processed DataFrame to PL_dfs with the same key
    PL_temp_dfs[key] = pl_df
    
PL_temp_dfs['trade_1'][70:85]

Unnamed: 0,date,signed_pos,sized_stock_pos,sized_change_cost_basis,sized_stock_cost_basis,sized_daily_stock_value,sized_stock_PL,sized_option_cost_basis,sized_change_cost_basis_op,sized_daily_option_value,sized_option_PL,sized_total_cost_basis,sized_total_pos_value,sized_total_PL,sized_realized_stock_PL,sized_realized_option_PL,sized_realized_PL
70,2018-04-13,322521.0,8306508.0,-356426300.0,2243816000.0,2202471000.0,-41345730.0,-316550117.0,9239900.0,-259742670.0,56807447.0,1927266000.0,1942728000.0,15461710.0,-13606880.0,11454128.0,-2152757.0
71,2018-04-16,305649.0,13297850.0,1394053000.0,3568583000.0,3554915000.0,-13667550.0,-305684893.0,1550360.0,-262791624.0,42893269.0,3262898000.0,3292124000.0,29225720.0,-9569996.0,9314864.0,-255132.4
72,2018-04-17,288289.0,16413260.0,1146776000.0,4410394000.0,4434699000.0,24305230.0,-282144633.0,23787122.0,-286271993.0,-4127360.0,4128249000.0,4148427000.0,20177870.0,59077.04,-246862.0,-187785.0
73,2018-04-18,268008.0,14895670.0,37107910.0,4000435000.0,4027641000.0,27205870.0,-254805576.0,27313517.0,-257316010.0,-2510434.0,3745629000.0,3770325000.0,24695440.0,382011.4,25540.0,407551.4
74,2018-04-19,245966.0,11611790.0,-451052100.0,3110873000.0,3122294000.0,11421290.0,-233137221.0,14858280.0,-217171368.0,15965853.0,2877736000.0,2905123000.0,27387140.0,-6558925.0,6810075.0,251150.4
75,2018-04-20,222071.0,8244467.0,-897761900.0,2213111000.0,2198057000.0,-15053590.0,-233137221.0,0.0,-193165484.0,39971737.0,1979974000.0,2004892000.0,24918150.0,0.0,0.0,0.0
76,2018-04-23,222071.0,6259544.0,124963000.0,1687881000.0,1668607000.0,-19274680.0,-198319767.0,34817454.0,-146375602.0,51944165.0,1489561000.0,1522231000.0,32669490.0,3891312.0,0.0,3891312.0
77,2018-04-24,193833.0,-788515.6,-1479783000.0,-174112000.0,-207363800.0,-33251850.0,-172228302.0,14926305.0,-121918725.0,50309577.0,-346340300.0,-329282600.0,17057720.0,-8494587.0,11165160.0,2670573.0
78,2018-04-25,182577.0,337684.9,483471800.0,113898700.0,89023880.0,-24874770.0,-158679771.0,2413641.0,-118033784.0,40645987.0,-44781120.0,-29009910.0,15771210.0,-8889616.0,11134890.0,2245274.0
79,2018-04-26,168225.0,4021101.0,854335000.0,1087181000.0,1070859000.0,-16321820.0,-148144031.0,1210152.0,-113190470.0,34953561.0,939037100.0,957668800.0,18631740.0,-7647958.0,9325588.0,1677630.0


In [97]:
divvies = {}

for key, df in PL_temp_dfs.items():
    # Ensure 'date' in both DataFrames is in datetime format
    df['date'] = pd.to_datetime(df['date'])
    spy_divdata['date'] = pd.to_datetime(spy_divdata['date'])
    spy_divdata['pay_date'] = pd.to_datetime(spy_divdata['pay_date'])
    
    # First Merge: Merge to align 'pos_size' with 'spy_divdata' based on 'date'
    # Assuming 'pos_size' exists in your df
    temp_merged = pd.merge(spy_divdata, df[['date', 'signed_pos']], how='left', on='date')

    # Correctly multiply 'signed_pos' by 'dividend' column
    temp_merged['div'] = temp_merged['signed_pos'] * temp_merged['dividend']
    
    # Update the DataFrame in divvies with the final merged DataFrame
    divvies[key] = temp_merged

divvies['trade_1'].head(50)

Unnamed: 0,date,dividend,pay_date,signed_pos,div
0,2018-03-16,1.09678,2018-04-30,36078.0,39569.62884
1,2018-06-15,1.246,2018-07-31,-75945.0,-94627.47
2,2018-09-21,1.323,2018-10-31,-153959.0,-203687.757
3,2018-12-21,1.4354,2019-01-31,131807.0,189195.7678
4,2019-03-15,1.2331,2019-04-30,-33701.0,-41556.7031
5,2019-06-21,1.4316,2019-07-31,54925.0,78630.63
6,2019-09-20,1.38362,2019-10-31,-6338.0,-8769.38356
7,2019-12-20,1.57,2020-01-31,-68563.0,-107643.91
8,2020-03-20,1.40556,2020-04-30,375538.0,527841.19128
9,2020-06-19,1.3662,2020-07-31,342295.0,467643.429


In [98]:
for key, pl_df in PL_temp_dfs.items():
    
    # Get the corresponding DataFrame from divvies
    div_df = divvies[key]
    pl_df['date'] = pd.to_datetime(pl_df['date'])
    # Perform the left merge
    # Note: Ensure 'date' in PL_temp_dfs and 'pay_date' in divvies are of compatible types (preferably datetime)
    merged_df = pd.merge(pl_df, div_df[['pay_date', 'div']], how='left', left_on='date', right_on='pay_date')
    
    # Optionally, you can drop the 'pay_date' column after the merge if it's no longer needed
    merged_df.drop(columns=['pay_date'], inplace=True)
    
    # Assuming 'div' can be NaN, fill NaNs with 0 for accurate calculations
    merged_df['div'] = merged_df['div'].fillna(0)
    
    # Update 'sized_realized_stock_PL' and 'sized_realized_PL' by adding 'div'
    merged_df['sized_realized_stock_PL'] = merged_df['sized_realized_stock_PL'] + merged_df['div']
    merged_df['sized_realized_PL'] = merged_df['sized_realized_PL'] + merged_df['div']
    
    # Update the PL_temp_dfs dictionary with the merged and updated result
    PL_temp_dfs[key] = merged_df

In [99]:
PL_temp_dfs['trade_1'][70:85]

Unnamed: 0,date,signed_pos,sized_stock_pos,sized_change_cost_basis,sized_stock_cost_basis,sized_daily_stock_value,sized_stock_PL,sized_option_cost_basis,sized_change_cost_basis_op,sized_daily_option_value,sized_option_PL,sized_total_cost_basis,sized_total_pos_value,sized_total_PL,sized_realized_stock_PL,sized_realized_option_PL,sized_realized_PL,div
70,2018-04-13,322521.0,8306508.0,-356426300.0,2243816000.0,2202471000.0,-41345730.0,-316550117.0,9239900.0,-259742670.0,56807447.0,1927266000.0,1942728000.0,15461710.0,-13606880.0,11454128.0,-2152757.0,0.0
71,2018-04-16,305649.0,13297850.0,1394053000.0,3568583000.0,3554915000.0,-13667550.0,-305684893.0,1550360.0,-262791624.0,42893269.0,3262898000.0,3292124000.0,29225720.0,-9569996.0,9314864.0,-255132.4,0.0
72,2018-04-17,288289.0,16413260.0,1146776000.0,4410394000.0,4434699000.0,24305230.0,-282144633.0,23787122.0,-286271993.0,-4127360.0,4128249000.0,4148427000.0,20177870.0,59077.04,-246862.0,-187785.0,0.0
73,2018-04-18,268008.0,14895670.0,37107910.0,4000435000.0,4027641000.0,27205870.0,-254805576.0,27313517.0,-257316010.0,-2510434.0,3745629000.0,3770325000.0,24695440.0,382011.4,25540.0,407551.4,0.0
74,2018-04-19,245966.0,11611790.0,-451052100.0,3110873000.0,3122294000.0,11421290.0,-233137221.0,14858280.0,-217171368.0,15965853.0,2877736000.0,2905123000.0,27387140.0,-6558925.0,6810075.0,251150.4,0.0
75,2018-04-20,222071.0,8244467.0,-897761900.0,2213111000.0,2198057000.0,-15053590.0,-233137221.0,0.0,-193165484.0,39971737.0,1979974000.0,2004892000.0,24918150.0,0.0,0.0,0.0,0.0
76,2018-04-23,222071.0,6259544.0,124963000.0,1687881000.0,1668607000.0,-19274680.0,-198319767.0,34817454.0,-146375602.0,51944165.0,1489561000.0,1522231000.0,32669490.0,3891312.0,0.0,3891312.0,0.0
77,2018-04-24,193833.0,-788515.6,-1479783000.0,-174112000.0,-207363800.0,-33251850.0,-172228302.0,14926305.0,-121918725.0,50309577.0,-346340300.0,-329282600.0,17057720.0,-8494587.0,11165160.0,2670573.0,0.0
78,2018-04-25,182577.0,337684.9,483471800.0,113898700.0,89023880.0,-24874770.0,-158679771.0,2413641.0,-118033784.0,40645987.0,-44781120.0,-29009910.0,15771210.0,-8889616.0,11134890.0,2245274.0,0.0
79,2018-04-26,168225.0,4021101.0,854335000.0,1087181000.0,1070859000.0,-16321820.0,-148144031.0,1210152.0,-113190470.0,34953561.0,939037100.0,957668800.0,18631740.0,-7647958.0,9325588.0,1677630.0,0.0


In [100]:
PL_dfs = {}  # New dictionary to store the processed DataFrames

for key, df in PL_temp_dfs.items():
    # Create a new DataFrame for PL_dfs
    pl_df = pd.DataFrame(index=df.index)
    
    # Assuming 'date' is the column you want to keep as is
    pl_df['date'] = df['date']
    
    # Sum 'sized_change_cost_basis' and 'sized_change_cost_basis_op'
    pl_df['gross_trades_value'] = abs(df['sized_change_cost_basis']) + abs(df['sized_change_cost_basis_op'])
    
    # Calculate cumulative sum of 'sized_realized_stock_PL'
    pl_df['stock_PL'] = df['sized_realized_stock_PL'].cumsum()
    pl_df['option_PL'] = df['sized_realized_option_PL'].cumsum()
    pl_df['net_PL'] = df['sized_realized_PL'].cumsum()

    # Positions & Cost Basis
    pl_df['cash'] = KAPITAL - df['sized_total_cost_basis'] + df['sized_total_cost_basis'] + pl_df['net_PL']
    pl_df['position_value'] = pl_df['cash'] + df['sized_total_pos_value']
    
    # Assign the processed DataFrame to the new dictionary
    PL_dfs[key] = pl_df

In [101]:
PL_dfs['trade_1'][60:85]

Unnamed: 0,date,gross_trades_value,stock_PL,option_PL,net_PL,cash,position_value
60,2018-03-29,668169000.0,-107170700.0,163178525.0,56007850.0,66007850.0,343033500.0
61,2018-04-02,1178280000.0,-107170700.0,163178525.0,56007850.0,66007850.0,-851941000.0
62,2018-04-03,927617800.0,-107170700.0,163178525.0,56007850.0,66007850.0,26070200.0
63,2018-04-04,891609500.0,-107170700.0,163178525.0,56007850.0,66007850.0,865471400.0
64,2018-04-05,838511500.0,-107170700.0,163178525.0,56007850.0,66007850.0,1676528000.0
65,2018-04-06,2378697000.0,-107170700.0,163178525.0,56007850.0,66007850.0,-725237400.0
66,2018-04-09,670755200.0,-107170700.0,163178525.0,56007850.0,66007850.0,-109992300.0
67,2018-04-10,2095662000.0,-107170700.0,163178525.0,56007850.0,66007850.0,1945706000.0
68,2018-04-11,672627300.0,-107170700.0,163178525.0,56007850.0,66007850.0,1288116000.0
69,2018-04-12,1359193000.0,-124905900.0,174785445.0,49879560.0,59879560.0,2570559000.0


In [None]:
os.makedirs('simdata', exist_ok=True)

for strat, df in PL_dfs.items():
    csv_path = f'simdata/PL_{strat}.csv'
    df_long.to_csv(csv_path, index=False)

### Other things to consider (WIP)

In [104]:
# Aggregate same strike/expiration options to further reduce trading costs once positions have been determined, etc.
for key, df in trades_dfs.items():
    df = df.drop(columns=[col for col in df.columns if col.endswith('_p') or col.endswith('_c')])

aggregations = {
    'exdate': lambda x: x.iloc[0] if all(date == x.iloc[0] for date in x) else 'Variable',
    'strike_price': lambda x: x.iloc[0] if all(strike == x.iloc[0] for strike in x) else None,
    'close': lambda x: x.iloc[0] if all(strike == x.iloc[0] for strike in x) else None,
    'delta_sum':
    stock_pos
    pos_change
    change_cost_basis
    stock_cost_basis
    daily_stock_value
    stock_PL
    option_cost_basis
    daily_option_value
    option_PL
    total_cost_basis
    total_pos_value	total_PL
    realized_stock_PL
    realized_option_PL
    realized_PL
    UID
    to_open
    IV_diff	
}

SyntaxError: invalid syntax (1279976394.py, line 11)