In [1]:
import pandas as pd
import numpy as np

In [102]:
df_raw = (
    pd
    .read_csv('Data\\aapl.csv', parse_dates = ['Date'])
    .rename(columns = {
        'Date': 'date', 
        'Adj Close': 'price'
    })
    .set_index('date')
    [['price']]
)
df_raw.head()

Unnamed: 0_level_0,price
date,Unnamed: 1_level_1
2021-01-04,126.683441
2021-01-05,128.249725
2021-01-06,123.932663
2021-01-07,128.161636
2021-01-08,129.267792


In [103]:
df_raw.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 753 entries, 2021-01-04 to 2023-12-29
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   price   753 non-null    float64
dtypes: float64(1)
memory usage: 11.8 KB


In [104]:
df_data = df_raw.copy()
df_data.tail()

Unnamed: 0_level_0,price
date,Unnamed: 1_level_1
2023-12-22,192.868149
2023-12-26,192.320206
2023-12-27,192.41983
2023-12-28,192.848206
2023-12-29,191.80217


In [105]:
df_data['return'] = np.log(df_data['price']).diff()
df_data['var_01'] = df_data['return'] ** 2 # Assumes zero mean. Very common.
df_data['var_21'] = df_data['return'].rolling(21).std() ** 2 # Why 21?: n_workingdays 
df_data['var_63'] = df_data['return'].rolling(63).std() ** 2 
# All are based on daily daily so it's daily variance no need to scale 
# For this purpose we use var_01
df_data['var'] = df_data['var_01']
df_data.tail()

Unnamed: 0_level_0,price,return,var_01,var_21,var_63,var
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-12-22,192.868149,-0.005563,3.094505e-05,8.1e-05,0.000107,3.094505e-05
2023-12-26,192.320206,-0.002845,8.094411e-06,7.8e-05,9.7e-05,8.094411e-06
2023-12-27,192.41983,0.000518,2.682e-07,7.8e-05,9.5e-05,2.682e-07
2023-12-28,192.848206,0.002224,4.945192e-06,7.8e-05,9.5e-05,4.945192e-06
2023-12-29,191.80217,-0.005439,2.958168e-05,7.8e-05,9.6e-05,2.958168e-05


In [106]:
df_data = df_data.drop(columns = [
    'var_01', 
    'var_21', 
    'var_63'
])
df_data.tail()

Unnamed: 0_level_0,price,return,var
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-01-04,126.683441,,
2021-01-05,128.249725,0.012288,1.509939e-04
2021-01-06,123.932663,-0.034241,1.172444e-03
2021-01-07,128.161636,0.033554,1.125862e-03
2021-01-08,129.267792,0.008594,7.385523e-05
...,...,...,...
2023-12-22,192.868149,-0.005563,3.094505e-05
2023-12-26,192.320206,-0.002845,8.094411e-06
2023-12-27,192.419830,0.000518,2.682000e-07
2023-12-28,192.848206,0.002224,4.945192e-06


In [115]:
# Rolling window
_m = 21
_beta = .94 # See ref on market risk

In [116]:
df_known = df_data[:'2022'].copy()
df_known['var_ha'] = df_known['var'].expanding().mean() # Expanding is continuously expanding window size
df_known['var_sma'] = df_known['var'].rolling(_m).mean() # Assume 21

In [156]:
np.nan == np.nan

False

In [182]:
df_known
def compute_ema(df):
    df['ema'] = np.nan
    for i in range(len(df)):
        if np.isnan(df.loc[df.index[i], 'var']):
            df.loc[df.index[i], 'ema'] = np.nan
        elif (~ np.isnan(df.loc[df.index[i], 'var'])) & (np.isnan(df.loc[df.index[i-1], 'var'])):
            df.loc[df.index[i], 'ema'] = df.loc[df.index[i], 'var']
        else:
            df.loc[df.index[i], 'ema'] = (1 - _beta) * df.loc[df.index[i], 'var'] + _beta * df.loc[df.index[i-1], 'ema']
    return df['ema']
df_known['ema'] = compute_ema(df_known)
df_known.tail()

Unnamed: 0_level_0,price,return,var,var_ha,var_sma,ema
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-12-23,130.631378,-0.002802,8e-06,0.000375,0.000446,0.000499
2022-12-27,128.818405,-0.013976,0.000195,0.000375,0.000437,0.000481
2022-12-28,124.865593,-0.031166,0.000971,0.000376,0.000449,0.00051
2022-12-29,128.402344,0.027931,0.00078,0.000377,0.000465,0.000526
2022-12-30,128.71933,0.002466,6e-06,0.000376,0.000358,0.000495


In [181]:
assert (
    np.isclose(
        df_known['ema'].dropna(), 
        df_known['var'].ewm(alpha = 1 - _beta, adjust = False).mean().dropna()
    )
    .all()
)

In [183]:
df_known['ema'] = df_known['var'].ewm(alpha = 1 - _beta, adjust = False).mean()
df_known

Unnamed: 0_level_0,price,return,var,var_ha,var_sma,ema
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-01-04,126.683441,,,,,
2021-01-05,128.249725,0.012288,0.000151,0.000151,,0.000151
2021-01-06,123.932663,-0.034241,0.001172,0.000662,,0.000212
2021-01-07,128.161636,0.033554,0.001126,0.000816,,0.000267
2021-01-08,129.267792,0.008594,0.000074,0.000631,,0.000256
...,...,...,...,...,...,...
2022-12-23,130.631378,-0.002802,0.000008,0.000375,0.000446,0.000499
2022-12-27,128.818405,-0.013976,0.000195,0.000375,0.000437,0.000481
2022-12-28,124.865593,-0.031166,0.000971,0.000376,0.000449,0.000510
2022-12-29,128.402344,0.027931,0.000780,0.000377,0.000465,0.000526


In [114]:
df_known = df_data[:'2022'].copy()
df_unknown = df_data['2023':][[]].copy()
df_unknown.tail()

2023-12-22
2023-12-26
2023-12-27
2023-12-28
2023-12-29


In [113]:
df = df_unknown.copy()
for i in range(len(df)):
    if i == 0:
        df.loc[df.index[i], 'ema'] = df_known['var'][-1]
    else:
        df.loc[df.index[i], 'ema'] = (1 - _beta) * df_known['var'][-1]
df
    
    
#     df.loc[df.index[i], 'ema'].isna():
#         df.loc[df.index[i], 'ema'] = np.nan
#     elif (~ df.loc[df.index[i], 'ema'].isna()) & (df.loc[df.index[i-1], 'ema'].isna()):
#         df.loc[df.index[i], 'ema'] = df.loc[df.index[i], 'var']
#     else:
#         df.loc[df.index[i], 'ema'] = _beta
# df

  df.loc[df.index[i], 'ema'] = df_known['var'][-1]


Unnamed: 0_level_0,ema
date,Unnamed: 1_level_1
2023-01-03,0.000006
2023-01-04,0.000000
2023-01-05,0.000000
2023-01-06,0.000000
2023-01-09,0.000000
...,...
2023-12-22,0.000000
2023-12-26,0.000000
2023-12-27,0.000000
2023-12-28,0.000000


In [None]:
df['var_ha'] = df['var'].expanding().mean() # Expanding is continuously expanding window size
df['var_sma'] = df['var'].rolling(_m).mean() # Assume 21

In [81]:
for i in range(len(df)):
    if df.loc[df.index[i], 'ema'].isna():
        df.loc[df.index[i], 'ema'] = np.nan
    elif (~ df.loc[df.index[i], 'ema'].isna()) & (df.loc[df.index[i-1], 'ema'].isna()):
        df.loc[df.index[i], 'ema'] = df.loc[df.index[i], 'var']
    else:
        df.loc[df.index[i], 'ema'] = _beta
df

Unnamed: 0_level_0,price,return,var,var_ha,ema
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-01-04,126.683441,,,,
2021-01-05,128.249725,0.012288,1.509939e-04,0.000151,1.0
2021-01-06,123.932663,-0.034241,1.172444e-03,0.000662,1.0
2021-01-07,128.161636,0.033554,1.125862e-03,0.000816,1.0
2021-01-08,129.267792,0.008594,7.385523e-05,0.000631,1.0
...,...,...,...,...,...
2023-12-22,192.868149,-0.005563,3.094505e-05,0.000308,1.0
2023-12-26,192.320206,-0.002845,8.094411e-06,0.000307,1.0
2023-12-27,192.419830,0.000518,2.682000e-07,0.000307,1.0
2023-12-28,192.848206,0.002224,4.945192e-06,0.000306,1.0
