In [36]:
import numpy as np
import pandas as pd

In [50]:
df_spx = pd.read_parquet("data/spx_ivs_2023-02.parquet")


In [77]:
def calculate_V(df_one_day, variables): #df_one_day only contains data from one day. variables is a list of needed ... todo save it as a vector

    # underlying price at time t
    S_t = variables[0]

    # strike price
    K = variables[1]

    # call price C(t, tau, K) depending of an underlying asset with price S(t), date to maturity tau, strike price K
    C = variables[2]

    # put price C(t, tau, K) depending of an underlying asset with price S(t), date to maturity tau, strike price K
    P = variables[3]

    # formula for integrand in integral from S_t to infinity
    puts = (2 * (1 + np.log(S_t / K)) / np.square(K) * P).dropna()

    # formula for integrand in integral from S_t to infinity
    calls = (2 * (1 - np.log(K / S_t)) / np.square(K) * C).dropna()

    x = K.copy()

    # concat works as dataset is sorted in a way that puts come before calls.
    y = pd.concat([puts, calls]).copy()

    #return trap_integral(y,x)
    return np.trapezoid(y,x)





In [39]:
def calculate_W(df_one_day, variables):

    S_t = variables[0]
    K = variables[1]
    C = variables[2]
    P = variables[3]

    # formula for integrand in integral from S_t to infinity
    puts = ((6 * np.log(S_t / K) + 3 * (np.log(S_t / K)) ** 2 ) / (K ** 2) * P).dropna()

    calls = (( 6 * np.log(K / S_t) - 3 * (np.log(K / S_t)) ** 2) / (K ** 2) * C).dropna()


    puts_integral = np.trapezoid(puts, K[df_one_day["putcall"] == "P"])
    calls_integral = np.trapezoid(calls, K[df_one_day["putcall"] == "C"])


    return calls_integral - puts_integral




In [40]:
def calculate_X(df_one_day, variables):
    S_t = variables[0]
    K = variables[1]
    C = variables[2]
    P = variables[3]

    # formula for integrand in integral from S_t to infinity
    puts = ((12 * np.log(S_t / K) ** 2 + 4 * np.log(S_t / K) ** 3) / (K ** 2) * P).dropna()

    calls = ((12 * np.log(K / S_t) ** 2 - 4 * np.log(K / S_t) ** 3 ) / (K ** 2) * C).dropna()

    puts_integral = np.trapezoid(puts, K[df_one_day["putcall"] == "P"])
    calls_integral = np.trapezoid(calls, K[df_one_day["putcall"] == "C"])

    return calls_integral + puts_integral



In [41]:
def calculate_mu(df_one_day, e_rt, variables):
    return e_rt - 1 - e_rt / 2 * calculate_V(df_one_day, variables) - e_rt / 6 * calculate_W(df_one_day, variables) - e_rt / 24 * calculate_X(df_one_day, variables)


In [42]:
df_riskfree_yields = pd.read_csv("data/riskfree_30d_2023-02.csv")

df_riskfree_yields['exp_rt'] = np.exp((df_riskfree_yields['yld_pct_annual'] / 100) * (df_riskfree_yields['daystomaturity'] / 365))

#calculate_mu(next(iter(df.groupby("loctimestamp")))[1], df_riskfree_yields['exp_rt'].iloc[0])


#Calculate the first four moments for SPX:


In [86]:
def calculate_first_four_moments(df, df_riskfree_yields):
    rows = []
    min_days = 5
    #check for at least min_days of data
    if df.groupby('loctimestamp').ngroups < min_days:
        print("Not enough data for calculating first four moments.")
        return pd.DataFrame()
    for timestamp, df_day in df.groupby('loctimestamp'):
        #print("calculating day ", timestamp)

        # underlying price at time t
        S_t = df_day["underlyingprice"].copy()

        # strike price
        K = df_day["strike"].copy()

        # call price C(t, tau, K) depending of an underlying asset with price S(t), date to maturity tau, strike price K
        C = df_day[df_day["putcall"] == "C"]["implPrice"].copy()

        # put price C(t, tau, K) depending of an underlying asset with price S(t), date to maturity tau, strike price K
        P = df_day[df_day["putcall"] == "P"]["implPrice"].copy()

        variables = [S_t, K, C, P]

        e_rt = df_riskfree_yields[df_riskfree_yields['date'] == timestamp.strftime("%Y-%m-%d")]["exp_rt"].iloc[0]

        V = calculate_V(df_day, variables)
        W = calculate_W(df_day, variables)
        X = calculate_X(df_day, variables)

        # calculate first four moments:
        mu = calculate_mu(df_day, e_rt, variables)
        impl_Var = calculate_V(df_day, variables) * e_rt - calculate_mu(df_day, e_rt, variables) ** 2
        skew = (e_rt * W - 3*mu*e_rt*V + 2*mu**3) / ((e_rt*V - mu**2) ** (3 / 2))
        kurt = (e_rt*X - 4*mu*e_rt*W + 6*e_rt*mu**2*V -3*mu**4) / ((e_rt*V - mu**2)**2)

        rows.append({
            "loctimestamp": timestamp,
            "mu": mu,
            "impl_Var": impl_Var,
            "skew": skew,
            "kurt": kurt
        })

    return pd.DataFrame(rows)

In [87]:
calculate_first_four_moments(df_spx, df_riskfree_yields)



Unnamed: 0,loctimestamp,mu,impl_Var,skew,kurt
0,2023-02-01,0.002449,0.002886,-1.890367,13.690297
1,2023-02-02,0.002273,0.003315,-1.920767,14.00841
2,2023-02-03,0.002444,0.002812,-1.956828,14.071976
3,2023-02-06,0.002251,0.00331,-1.8567,13.43053
4,2023-02-07,0.002434,0.002927,-1.91468,14.255292
5,2023-02-08,0.002297,0.003261,-1.958047,14.242735
6,2023-02-09,0.002095,0.003718,-1.935017,13.924228
7,2023-02-10,0.002206,0.003433,-1.946166,14.288918
8,2023-02-13,0.002034,0.003885,-1.857297,13.72224
9,2023-02-14,0.002452,0.002883,-1.966343,14.373541


In [90]:
df_sp500 = pd.read_parquet("data/sp500_merged_ivs_2023-02.parquet")
for ticker, df_ticker in df_sp500.groupby("Symbol"):
    print(f"calculating for ticker {ticker}")

    # filter days with NaN values in important columns
    bad_days = df_ticker.groupby('loctimestamp').filter(
        lambda x: x[['strike', 'underlyingprice', 'implPrice']].isna().any().any()
    )['loctimestamp'].unique()

    # Remove all rows for those bad days
    df_clean = df_ticker[~df_ticker['loctimestamp'].isin(bad_days)]

    calculate_first_four_moments(df_clean, df_riskfree_yields) #.to_csv(f"data/sp500_moments/{ticker}_first_four_moments.csv", index=False)

calculating for ticker A
calculating for ticker AAL
calculating for ticker AAP
calculating for ticker AAPL
calculating for ticker ABBV
calculating for ticker ABC
calculating for ticker ABT
calculating for ticker ACGL
calculating for ticker ACN
calculating for ticker ADBE
calculating for ticker ADI
calculating for ticker ADM
calculating for ticker ADP
calculating for ticker ADSK
calculating for ticker AEE
calculating for ticker AEP
calculating for ticker AES
calculating for ticker AFL
calculating for ticker AIG
calculating for ticker AIZ
calculating for ticker AJG
calculating for ticker AKAM
calculating for ticker ALB
calculating for ticker ALGN
calculating for ticker ALK
calculating for ticker ALL
calculating for ticker ALLE
calculating for ticker AMAT
calculating for ticker AMCR
calculating for ticker AMD
calculating for ticker AME
calculating for ticker AMGN
calculating for ticker AMP
calculating for ticker AMT
calculating for ticker AMZN
calculating for ticker ANET
calculating for t

  skew = (e_rt * W - 3*mu*e_rt*V + 2*mu**3) / ((e_rt*V - mu**2) ** (3 / 2))


calculating for ticker BK
calculating for ticker BKNG
calculating for ticker BKR
calculating for ticker BLK
calculating for ticker BMY
calculating for ticker BR
calculating for ticker BRK.B
calculating for ticker BRO
calculating for ticker BSX
calculating for ticker BWA
calculating for ticker BXP
calculating for ticker C
calculating for ticker CAG
calculating for ticker CAH
calculating for ticker CARR
calculating for ticker CAT
calculating for ticker CB
calculating for ticker CBOE
calculating for ticker CBRE
calculating for ticker CCI
calculating for ticker CCL
calculating for ticker CDAY
calculating for ticker CDNS
calculating for ticker CDW
calculating for ticker CE
calculating for ticker CEG
calculating for ticker CF
calculating for ticker CFG
calculating for ticker CHD
calculating for ticker CHRW
calculating for ticker CHTR
calculating for ticker CI
calculating for ticker CINF
calculating for ticker CL
calculating for ticker CLX
calculating for ticker CMA
calculating for ticker CMC

calculating day  2023-02-22 00:00:00



KeyboardInterrupt

