In [24]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [25]:
df = pd.read_csv('DATA.csv', index_col=0)
df['expiration_C']=pd.to_datetime(df['expiration_C'])

In [26]:
nasdaq = pd.read_csv('NASDAQ.csv')
nasdaq['Date']=pd.to_datetime(nasdaq['Date'])

In [27]:
newdf = pd.merge(df, nasdaq, how='left', left_on='expiration_C', right_on='Date')

In [28]:
newdf['expiration_C'].nunique()

56

In [29]:
# adjusted strike is strike / implied S_y

newdf['strike_adj'] = newdf['strike'] / newdf['implied_S_y']

In [30]:
newdf['strike_adj'].describe()

count    10898.000000
mean         0.990330
std          0.155167
min          0.504835
25%          0.879193
50%          0.989446
75%          1.096986
max          1.535656
Name: strike_adj, dtype: float64

In [31]:
# pnl as the close price (from NASDAQ) - strike price - (bid+ask)/2

newdf['pnl_long_Call'] =  np.max(newdf['Close/Last']-newdf['strike'])-(newdf['bid_1545_C']+newdf['ask_1545_C'])/2

In [32]:
# there is no common strike value for all the dates

newdf.strike.value_counts()

8000.0    35
9000.0    34
8500.0    34
8100.0    33
8200.0    33
          ..
8770.0     1
8760.0     1
8740.0     1
8730.0     1
9370.0     1
Name: strike, Length: 861, dtype: int64

In [33]:
# different dates have different strike values

print(len(newdf[newdf['expiration_C']=='2018-02-09']))
print(len(newdf[newdf['expiration_C']=='2022-12-09']))

118
241


In [34]:
pip install cvxpy

Note: you may need to restart the kernel to use updated packages.


Optimization problem is

$$
\min_{\sum w_i=1, w_i\geq 0}\{-\mu^T*w+\frac{\gamma}{2} w^T\Sigma w+\frac{\lambda}{2}*w^T*w\}
$$

We let $\Sigma$ be $\Sigma+\lambda*I_n$.

In [35]:
import numpy as np
import cvxpy as cp

# Generate Sigma as a random PSD n*n matrix,  would be substituded by the covariance matrix
# mu as a random n-vector, would be substituded by the expected return
n = 400
np.random.seed(1)
mu = np.random.randn(n)
Sigma = np.random.randn(n, n)
lam = 1.
Sigma_1 = P.T @ P + lam * np.identity(n)

# constraints 
# h=0_n, A=1_n, b=1
# Gw >= h equiv to w_i>=0 for all i
# Aw = b equiv to sum(w_i)=1
h = np.zeros(n)
A = np.ones(n)
b = 1

# Define and solve the CVXPY problem.
w = cp.Variable(n)
prob = cp.Problem(cp.Minimize((1/2)*cp.quad_form(w, Sigma_1) - mu.T @ w),
                 [w >= h,
                  A.T @ w == b])
prob.solve()

# Print result.
print("\nThe optimal value is", prob.value)
print("A solution w is")
print(w.value)


The optimal value is -0.16726010355959875
A solution w is
[ 3.32007043e-03  6.38534056e-03  7.42725740e-03  5.17166029e-03
  2.75412994e-03 -6.14404385e-25  7.04497008e-03  6.68660947e-03
  1.90364686e-24  1.42443013e-24  1.86894750e-03  9.51735230e-24
  3.52163073e-03  1.24801879e-03  8.93899358e-03 -4.89090159e-24
  6.66180020e-03  1.05975189e-23  3.22537509e-03 -1.75312734e-24
  4.43363687e-24  4.29820607e-03  3.80434434e-03  5.65774670e-03
  3.21593876e-24 -3.91877108e-24  6.12439052e-03 -2.61097537e-24
 -7.60152484e-25  4.89048128e-03 -9.21931174e-24  5.63728803e-03
  1.78631102e-24  2.31422759e-24  1.62577943e-24 -7.79979521e-24
  1.96069560e-03  8.53365142e-03  4.89773287e-03 -6.08035397e-24
  1.36181410e-24 -5.12136870e-24  3.43682732e-03  1.02910477e-03
  3.99857361e-04  8.45284486e-24 -1.19645107e-24  9.47194576e-03
  3.54285144e-03 -6.33830961e-25  7.18685189e-03  1.13145111e-23
  7.28484059e-24  2.84514510e-24  4.16165575e-03  6.28804806e-03
  5.18320411e-03  9.70880952e-0