In [153]:
import numpy as np
import pandas as pd
import plotly.express as px
import datetime

In [154]:
df = pd.read_csv('Stocks/abb.us.txt', index_col='Date')
df.index = pd.to_datetime(df.index)

df['log_rtn'] = np.log(df['Close']).diff()
df['intraday_log_rtn'] = np.log(df['Close'])-np.log(df['Open'])

df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,OpenInt,log_rtn,intraday_log_rtn
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2005-02-25,4.4844,4.5556,4.4765,4.5082,319324,0,,0.005293
2005-02-28,4.5873,4.5873,4.4685,4.5082,534773,0,0.0,-0.017394
2005-03-01,4.5479,4.5873,4.5399,4.5717,409945,0,0.013987,0.00522
2005-03-02,4.5717,4.5954,4.5399,4.5636,239933,0,-0.001773,-0.001773
2005-03-03,4.6112,4.6666,4.5873,4.627,962639,0,0.013797,0.003421


In [155]:
# Compute rolling volatility

df['sigma_30'] = df['log_rtn'].rolling(window=30).std(ddof=1) * np.sqrt(252)
df['sigma_7'] = df['log_rtn'].rolling(window=7).std(ddof=1) * np.sqrt(252)


In [156]:
fig = px.line(df)
fig.show()

## Time Series Forecasting

In [157]:
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 3201 entries, 2005-02-25 to 2017-11-10
Data columns (total 10 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Open              3201 non-null   float64
 1   High              3201 non-null   float64
 2   Low               3201 non-null   float64
 3   Close             3201 non-null   float64
 4   Volume            3201 non-null   int64  
 5   OpenInt           3201 non-null   int64  
 6   log_rtn           3200 non-null   float64
 7   intraday_log_rtn  3201 non-null   float64
 8   sigma_30          3171 non-null   float64
 9   sigma_7           3194 non-null   float64
dtypes: float64(8), int64(2)
memory usage: 275.1 KB


In [158]:
#train_start = '2000-01-01'
#train_end = '2013-12-31'

#test_start = '2014-01-01'
#test_end = '2014-12-31'

#forecast_start = '2015-01-01'
#forecast_end = '2016-01-01'

train_start = np.min(df.index) + datetime.timedelta(50)
test_end = np.max(df.index)
test_start = test_end - datetime.timedelta(50)
train_end = test_start - datetime.timedelta(0)

df_train = df.loc[train_start:train_end]
df_test = df.loc[test_start:test_end]

fig = px.line(df_train, x=df_train.index, y='Close')
fig.add_scatter(x = df_test.index, y = df_test['Close'] )
fig.show()

In [159]:
from sklearn.tree import DecisionTreeRegressor
from skforecast.ForecasterAutoreg import ForecasterAutoreg
from sklearn.ensemble import RandomForestRegressor
from skforecast.ForecasterAutoregMultiVariate import ForecasterAutoregMultiVariate



# Define the forecaster
forecaster = ForecasterAutoreg(
    # Add the sklearn regressor and lags
    regressor = DecisionTreeRegressor(random_state = 123),
    lags = 30
)

# Fit the model using train data
forecaster.fit(y = df_train['sigma_30'])

# Predict the test period
predicted_test = forecaster.predict(steps = len(df_test))

df_test['Predicted'] = predicted_test.values

fig = px.line({})
fig.add_scatter(x = df_train.index, y = df_train['sigma_30'] )
fig.add_scatter(x = df_test.index, y = df_test['sigma_30'] )
fig.add_scatter(x = df_test.index, y = df_test['Predicted'] )

fig.show()

df_test



Series has DatetimeIndex index but no frequency. Index is overwritten with a RangeIndex of step 1.


Series has DatetimeIndex index but no frequency. Index is overwritten with a RangeIndex of step 1.


`last_window` has DatetimeIndex index but no frequency. Index is overwritten with a RangeIndex of step 1.



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0_level_0,Open,High,Low,Close,Volume,OpenInt,log_rtn,intraday_log_rtn,sigma_30,sigma_7,Predicted
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2017-09-21,24.6,24.715,24.57,24.63,502996,0,0.001219,0.001219,0.132899,0.053684,0.133955
2017-09-22,24.64,24.745,24.64,24.73,647646,0,0.004052,0.003646,0.117101,0.048958,0.123443
2017-09-25,24.72,24.7875,24.63,24.65,1293314,0,-0.00324,-0.002836,0.118193,0.052782,0.123306
2017-09-26,24.58,24.6,24.46,24.56,955732,0,-0.003658,-0.000814,0.119246,0.060507,0.127076
2017-09-27,24.44,24.62,24.43,24.59,1114636,0,0.001221,0.006119,0.117637,0.043266,0.132854
2017-09-28,24.52,24.71,24.51,24.68,1115317,0,0.003653,0.006504,0.110899,0.04803,0.133955
2017-09-29,24.53,24.775,24.49,24.75,1365636,0,0.002832,0.008929,0.101298,0.049932,0.134237
2017-10-02,24.84,24.97,24.79,24.96,1367702,0,0.008449,0.004819,0.096733,0.067734,0.134661
2017-10-03,24.98,25.1,24.97,25.09,2259496,0,0.005195,0.004394,0.096866,0.06958,0.135705
2017-10-04,25.07,25.2,25.06,25.12,1363228,0,0.001195,0.001992,0.088386,0.059777,0.135705


In [160]:
from scipy.signal import savgol_filter

steps_param = len(df_test)

# Define the forecaster
forecaster = ForecasterAutoregMultiVariate(
    # Add the sklearn regressor and lags
    regressor = DecisionTreeRegressor(random_state = 123),
    level              = 'sigma_30',
    lags               = 7,
    steps              = steps_param,
    transformer_series = None,
    transformer_exog   = None,
    weight_func        = None
)

# Fit the model using train data
forecaster.fit(df_train[['sigma_30', 'log_rtn', 'intraday_log_rtn']])

# Predict the test period
predicted_test = forecaster.predict(steps = steps_param)

print(predicted_test)
df_test['Predicted'] = predicted_test.values



##### Kalman filter
x_est = 0
P = 1
# Kalman filter parameters
A = 1  # State transition matrix
H = 1  # Measurement matrix
Q = 1  # Process noise covariance
R = 10  # Measurement noise covariance

# Storage for filtered values
filtered_values = []

# Apply Kalman filter
for z in df_test['Predicted']:
    # Prediction
    x_pred = A * x_est
    P_pred = A * P * A + Q
    
    # Update
    K = P_pred * H / (H * P_pred * H + R)
    x_est = x_pred + K * (z - H * x_pred)
    P = (1 - K * H) * P_pred
    
    # Store filtered value
    filtered_values.append(x_est)

# Add filtered values to DataFrame
df_test['filtered'] = filtered_values



####### Savgol Filter


print(df_test['Predicted'].values)

yy = savgol_filter(df_test['Predicted'].values, steps_param, 2)
df_test['filtered2'] = yy


####### Plot


fig = px.line({})
fig.add_scatter(x = df_train.index, y = df_train['sigma_30'] )
fig.add_scatter(x = df_test.index, y = df_test['sigma_30'], name='True' )
fig.add_scatter(x = df_test.index, y = df_test['Predicted'], name='Prediction' )
fig.add_scatter(x = df_test.index, y = df_test['filtered'], name='Filtered' )
fig.add_scatter(x = df_test.index, y = df_test['filtered2'], name='Filtered 2' )

fig.show()

df_test


Series has DatetimeIndex index but no frequency. Index is overwritten with a RangeIndex of step 1.



    sigma_30
7   0.133961
8   0.125788
9   0.173590
10  0.122025
11  0.124609
12  0.121160
13  0.137209
14  0.150165
15  0.152842
16  0.150453
17  0.153449
18  0.165660
19  0.125788
20  0.148717
21  0.127101
22  0.127306
23  0.170597
24  0.173590
25  0.174844
26  0.158235
27  0.151764
28  0.164147
29  0.132868
30  0.127101
31  0.160649
32  0.169888
33  0.131650
34  0.165883
35  0.166055
36  0.237025
37  0.237450
38  0.145829
39  0.163952
40  0.148371
41  0.163952
42  0.111912
43  0.135029
[0.13396095 0.12578821 0.17359023 0.12202493 0.12460857 0.12115983
 0.13720902 0.15016523 0.15284162 0.15045279 0.15344923 0.16566003
 0.12578821 0.14871712 0.12710096 0.1273057  0.1705968  0.17359023
 0.17484369 0.15823483 0.1517637  0.16414658 0.13286818 0.12710096
 0.16064888 0.16988794 0.13165033 0.16588323 0.16605544 0.23702535
 0.23744981 0.14582935 0.16395206 0.14837073 0.16395206 0.11191248
 0.13502902]



Series has DatetimeIndex index but no frequency. Index is overwritten with a RangeIndex of step 1.


`last_window` has DatetimeIndex index but no frequency. Index is overwritten with a RangeIndex of step 1.


`last_window` has DatetimeIndex index but no frequency. Index is overwritten with a RangeIndex of step 1.



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the do

Unnamed: 0_level_0,Open,High,Low,Close,Volume,OpenInt,log_rtn,intraday_log_rtn,sigma_30,sigma_7,Predicted,filtered,filtered2
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2017-09-21,24.6,24.715,24.57,24.63,502996,0,0.001219,0.001219,0.132899,0.053684,0.133961,0.022327,0.12771
2017-09-22,24.64,24.745,24.64,24.73,647646,0,0.004052,0.003646,0.117101,0.048958,0.125788,0.044108,0.130194
2017-09-25,24.72,24.7875,24.63,24.65,1293314,0,-0.00324,-0.002836,0.118193,0.052782,0.17359,0.074789,0.132582
2017-09-26,24.58,24.6,24.46,24.56,955732,0,-0.003658,-0.000814,0.119246,0.060507,0.122025,0.086694,0.134873
2017-09-27,24.44,24.62,24.43,24.59,1114636,0,0.001221,0.006119,0.117637,0.043266,0.124609,0.096565,0.137068
2017-09-28,24.52,24.71,24.51,24.68,1115317,0,0.003653,0.006504,0.110899,0.04803,0.12116,0.103081,0.139166
2017-09-29,24.53,24.775,24.49,24.75,1365636,0,0.002832,0.008929,0.101298,0.049932,0.137209,0.112205,0.141168
2017-10-02,24.84,24.97,24.79,24.96,1367702,0,0.008449,0.004819,0.096733,0.067734,0.150165,0.122403,0.143074
2017-10-03,24.98,25.1,24.97,25.09,2259496,0,0.005195,0.004394,0.096866,0.06958,0.152842,0.130602,0.144883
2017-10-04,25.07,25.2,25.06,25.12,1363228,0,0.001195,0.001992,0.088386,0.059777,0.150453,0.135956,0.146596
