In [91]:
#import necessary libraries
import pandas as pd
import yfinance as yf
from datetime import datetime
from datetime import timedelta
from pandas.tseries.offsets import BDay
import random
import math
import numpy as np
from prophet import Prophet
from prophet.plot import plot_plotly, plot_components_plotly
from scipy.stats import norm
import plotly.graph_objects as go
import warnings

warnings.filterwarnings('ignore')

First lets establish a prophet model and use it to predict the market

In [92]:
# We are analyzing the S&P 500 Index thru the prophet API,
# thus we need to garner the relevant data using yfinance

today = datetime.today().strftime('%Y-%m-%d')
start = '2016-01-01'

sp_df = yf.download('^GSPC', start, today)
sp_df

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2016-01-04,2038.199951,2038.199951,1989.680054,2012.660034,2012.660034,4304880000
2016-01-05,2013.780029,2021.939941,2004.170044,2016.709961,2016.709961,3706620000
2016-01-06,2011.709961,2011.709961,1979.050049,1990.260010,1990.260010,4336660000
2016-01-07,1985.319946,1985.319946,1938.829956,1943.089966,1943.089966,5076590000
2016-01-08,1945.969971,1960.400024,1918.459961,1922.030029,1922.030029,4664940000
...,...,...,...,...,...,...
2022-08-10,4181.020020,4211.029785,4177.259766,4210.240234,4210.240234,3998590000
2022-08-11,4227.399902,4257.910156,4201.410156,4207.270020,4207.270020,3925060000
2022-08-12,4225.020020,4280.470215,4219.779785,4280.149902,4280.149902,3252290000
2022-08-15,4269.370117,4301.790039,4256.899902,4297.140137,4297.140137,3087740000


In [93]:
# data checks
sp_df.info()
sp_df.isnull().sum()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1667 entries, 2016-01-04 to 2022-08-16
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Open       1667 non-null   float64
 1   High       1667 non-null   float64
 2   Low        1667 non-null   float64
 3   Close      1667 non-null   float64
 4   Adj Close  1667 non-null   float64
 5   Volume     1667 non-null   int64  
dtypes: float64(5), int64(1)
memory usage: 91.2 KB


Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64

In [94]:
# prophet is a time series model so we must index by date
sp_df.reset_index(inplace = True)
sp_df.columns

Index(['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume'], dtype='object')

In [95]:
# prophet models require soley a date and y component
# here that y will be Adj Close

df = sp_df[['Date', 'Adj Close']]

# rename for prophet usage
df.rename(columns = {'Date' : 'ds', 'Adj Close' : 'y'}, inplace = True)
df

Unnamed: 0,ds,y
0,2016-01-04,2012.660034
1,2016-01-05,2016.709961
2,2016-01-06,1990.260010
3,2016-01-07,1943.089966
4,2016-01-08,1922.030029
...,...,...
1662,2022-08-10,4210.240234
1663,2022-08-11,4207.270020
1664,2022-08-12,4280.149902
1665,2022-08-15,4297.140137


In [96]:
# quickly vizualizing data before prophet use
fig1 = go.Figure()
fig1.add_trace(go.Scatter(x = df['ds'], y = df['y']))

fig1.update_layout(
    title_text = "S&P 500 Pricing with Prophet"
)

# adding sliders and buttons for interaction
fig1.update_layout(
    xaxis = dict(
        rangeselector = dict(
            buttons = list([
                dict(
                    count = 1,
                    label = '1m',
                    step = 'month',
                    stepmode = 'backward'
                ),
                dict(
                    count = 6,
                    label = '6m',
                    step = 'month',
                    stepmode = 'backward'
                ),
                dict(
                    count = 1,
                    label = 'YTD',
                    step = 'year',
                    stepmode = 'todate'
                ),
                dict(
                    count = 1,
                    label = '1y',
                    step = 'year',
                    stepmode = 'backward'
                ),
                dict(
                    count = 5,
                    label = '5y',
                    step = 'year',
                    stepmode = 'backward'
                ),
                dict(step = 'all')
            ])
        ),
        rangeslider = dict(
            visible =True
        ),
        type = 'date'
    )
)

fig1.show()

In [97]:
# build prophet model for price forcasting
m = Prophet(seasonality_mode='multiplicative')
m.fit(df)

INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1


<prophet.forecaster.Prophet at 0x7fc1e10eae50>

In [98]:
# here we create a year's worth of time series for the prophet model
future = m.make_future_dataframe(periods = 365)
business_days = BDay().onOffset
filter = pd.to_datetime(future['ds']).map(business_days)
future = future[filter]
future.tail(10)

Unnamed: 0,ds
2018,2023-08-03
2019,2023-08-04
2022,2023-08-07
2023,2023-08-08
2024,2023-08-09
2025,2023-08-10
2026,2023-08-11
2029,2023-08-14
2030,2023-08-15
2031,2023-08-16


In [99]:
# finally, we get to our predictions
forecast = m.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(10)

Unnamed: 0,ds,yhat,yhat_lower,yhat_upper
1918,2023-08-03,4000.258561,1524.952905,6390.61744
1919,2023-08-04,4005.774462,1462.900186,6395.520654
1920,2023-08-07,4015.438379,1511.499669,6420.931435
1921,2023-08-08,4021.449452,1456.415012,6452.618319
1922,2023-08-09,4027.927434,1478.163566,6476.3692
1923,2023-08-10,4029.872339,1435.457301,6518.682399
1924,2023-08-11,4036.552557,1482.558702,6509.996002
1925,2023-08-14,4049.414224,1412.089495,6666.292662
1926,2023-08-15,4056.273717,1336.002605,6620.58577
1927,2023-08-16,4063.44134,1372.959104,6681.047034


In [100]:
# lets specifically get the next days worth!
next_day = ((datetime.today() + timedelta(1)).strftime('%Y-%m-%d'))
forecast[forecast['ds'] == next_day]

Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,weekly,weekly_lower,weekly_upper,yearly,yearly_lower,yearly_upper,additive_terms,additive_terms_lower,additive_terms_upper,yhat
1668,2022-08-18,856.610146,4126.729881,4345.038354,856.610146,856.610146,3.951829,3.951829,3.951829,3.97413,3.97413,3.97413,-0.022301,-0.022301,-0.022301,0.0,0.0,0.0,4241.787254


In [101]:
plot_plotly(m, forecast)

Now, lets approach this problem with a more traditional/basic method in finance: Monte Carlo Simulation

In [102]:
data = pd.DataFrame()
data = sp_df['Adj Close']
data

0       2012.660034
1       2016.709961
2       1990.260010
3       1943.089966
4       1922.030029
           ...     
1662    4210.240234
1663    4207.270020
1664    4280.149902
1665    4297.140137
1666    4305.200195
Name: Adj Close, Length: 1667, dtype: float64

In [103]:
log_returns = np.log(1 + data.pct_change())
log_returns

0            NaN
1       0.002010
2      -0.013202
3      -0.023986
4      -0.010898
          ...   
1662    0.021067
1663   -0.000706
1664    0.017174
1665    0.003962
1666    0.001874
Name: Adj Close, Length: 1667, dtype: float64

In [104]:
#returns stats
u = log_returns.mean()
var = log_returns.var()
drift = u - (0.5 * var)
stdev = log_returns.std()

# trading days per year and num of sims
t_intervals = 252
iterations = 10

daily_returns = np.exp(drift + stdev * norm.ppf(np.random.rand(t_intervals, iterations)))

s0 = data.iloc[-1]
s0

4305.2001953125

In [105]:
price_list = np.zeros_like(daily_returns)
price_list[0] = s0
price_list

array([[4305.20019531, 4305.20019531, 4305.20019531, ..., 4305.20019531,
        4305.20019531, 4305.20019531],
       [   0.        ,    0.        ,    0.        , ...,    0.        ,
           0.        ,    0.        ],
       [   0.        ,    0.        ,    0.        , ...,    0.        ,
           0.        ,    0.        ],
       ...,
       [   0.        ,    0.        ,    0.        , ...,    0.        ,
           0.        ,    0.        ],
       [   0.        ,    0.        ,    0.        , ...,    0.        ,
           0.        ,    0.        ],
       [   0.        ,    0.        ,    0.        , ...,    0.        ,
           0.        ,    0.        ]])

In [106]:
for t in range(1, t_intervals):
    price_list[t] = price_list[t-1] * daily_returns[t]

price_list

columns = []
for i in range(0,10):
     columns.append("sim" + str(i))

df1 = pd.DataFrame(price_list, columns = columns)
df1['date'] = pd.date_range(start= today, periods=len(df1), freq=BDay())
df1

Unnamed: 0,sim0,sim1,sim2,sim3,sim4,sim5,sim6,sim7,sim8,sim9,date
0,4305.200195,4305.200195,4305.200195,4305.200195,4305.200195,4305.200195,4305.200195,4305.200195,4305.200195,4305.200195,2022-08-17
1,4349.672989,4291.320507,4320.849225,4444.867271,4280.189797,4269.755756,4297.995469,4214.330927,4356.577310,4335.818242,2022-08-18
2,4417.340422,4366.736996,4306.781913,4421.996954,4222.855468,4362.025227,4294.067169,4287.716377,4259.955226,4339.182093,2022-08-19
3,4395.521389,4382.051729,4237.364784,4260.083306,4192.199941,4402.807469,4323.608301,4271.303678,4257.545531,4261.437582,2022-08-22
4,4416.204373,4347.820458,4282.252407,4248.755700,4057.099269,4347.537502,4405.361941,4267.252442,4287.341546,4252.417507,2022-08-23
...,...,...,...,...,...,...,...,...,...,...,...
247,3693.594840,5670.128710,4987.813532,5110.786572,3758.924728,4445.650502,4052.662007,4568.505944,3106.724536,5702.675512,2023-07-28
248,3637.467560,5668.367473,5057.143454,5123.377598,3714.275695,4533.338491,4137.923962,4649.422451,3101.108301,5725.492214,2023-07-31
249,3610.428653,5685.074602,5003.163810,5236.873748,3798.436497,4544.465030,4051.797740,4635.244522,3092.238774,5643.619598,2023-08-01
250,3686.580471,5619.074174,4984.594175,5216.582699,3735.941354,4606.539360,4084.579785,4653.162140,3027.755836,5755.582387,2023-08-02


In [107]:
# Let's visualize the Monte Carlo Simulations
fig2 = go.Figure()
for name in columns:
    fig2.add_trace(go.Scatter(x = df1['date'], y = df1[name]))

fig2.update_layout(
    title_text = "S&P 500 Pricing with Monte Carlo"
)

# adding sliders and buttons for interaction
fig2.update_layout(
    xaxis = dict(
        rangeselector = dict(
            buttons = list([
                dict(
                    count = 1,
                    label = '1m',
                    step = 'month',
                    stepmode = 'backward'
                ),
                dict(
                    count = 6,
                    label = '6m',
                    step = 'month',
                    stepmode = 'backward'
                ),
                dict(
                    count = 1,
                    label = 'YTD',
                    step = 'year',
                    stepmode = 'todate'
                ),
                dict(
                    count = 1,
                    label = '1y',
                    step = 'year',
                    stepmode = 'backward'
                ),
                dict(
                    count = 5,
                    label = '5y',
                    step = 'year',
                    stepmode = 'backward'
                ),
                dict(step = 'all')
            ])
        ),
        rangeslider = dict(
            visible =True
        ),
        type = 'date'
    )
)

fig2.show()

For another comparison, lets approach Monte Carlo in a more nuanced way: Euler Discretization

In [108]:
# annual stdev of returns
stdev_a = log_returns.std() * 252 ** 0.5

# risk-free rate of return (bonds)
r = 0.026

# time horizon (years)
T = 1

# intervals within horizon (trading days)
t_intervals = 252
delta_t = t / t_intervals

In [109]:
Z = np.random.standard_normal((t_intervals + 1, iterations))
S = np.zeros_like(Z)
S0 = data.iloc[-1]
S[0] = S0
S

array([[4305.20019531, 4305.20019531, 4305.20019531, ..., 4305.20019531,
        4305.20019531, 4305.20019531],
       [   0.        ,    0.        ,    0.        , ...,    0.        ,
           0.        ,    0.        ],
       [   0.        ,    0.        ,    0.        , ...,    0.        ,
           0.        ,    0.        ],
       ...,
       [   0.        ,    0.        ,    0.        , ...,    0.        ,
           0.        ,    0.        ],
       [   0.        ,    0.        ,    0.        , ...,    0.        ,
           0.        ,    0.        ],
       [   0.        ,    0.        ,    0.        , ...,    0.        ,
           0.        ,    0.        ]])

We need to use the following recursive formula for simulation:
$$
S_t = S_{t-1} \cdot exp((r - 0.5 \cdot stdev^2) \cdot delta_t + stdev \cdot delta_t^{0.5} \cdot Z_t)
$$

In [110]:
for t in range(1, t_intervals + 1):
    S[t] = S[t-1] * np.exp((r - 0.5 * stdev ** 2) * delta_t + stdev * delta_t ** 0.5 * Z[t])
S

array([[   4305.20019531,    4305.20019531,    4305.20019531, ...,
           4305.20019531,    4305.20019531,    4305.20019531],
       [   4380.3068107 ,    4428.82901755,    4461.84275512, ...,
           4401.82243229,    4428.4819527 ,    4410.33410464],
       [   4354.74493357,    4571.16337555,    4551.67354284, ...,
           4542.00283336,    4526.53249711,    4453.97298269],
       ...,
       [2828921.67666557, 3526102.23396324, 2532765.14940394, ...,
        3263106.94898868, 2631648.22094258, 2621339.27912118],
       [2911447.39614688, 3606298.83967801, 2602749.96439487, ...,
        3361714.51158014, 2663585.75069877, 2713217.43592207],
       [2973103.28583378, 3749707.78261374, 2673639.6990097 , ...,
        3491408.11605231, 2724231.47567238, 2800669.92632158]])

In [111]:
df2 = pd.DataFrame(S, columns = columns)
df2['date'] = pd.date_range(start= today, periods=len(df2), freq=BDay())
df2

Unnamed: 0,sim0,sim1,sim2,sim3,sim4,sim5,sim6,sim7,sim8,sim9,date
0,4.305200e+03,4.305200e+03,4.305200e+03,4.305200e+03,4.305200e+03,4.305200e+03,4.305200e+03,4.305200e+03,4.305200e+03,4.305200e+03,2022-08-17
1,4.380307e+03,4.428829e+03,4.461843e+03,4.435721e+03,4.371310e+03,4.300872e+03,4.400677e+03,4.401822e+03,4.428482e+03,4.410334e+03,2022-08-18
2,4.354745e+03,4.571163e+03,4.551674e+03,4.562618e+03,4.476499e+03,4.401447e+03,4.568974e+03,4.542003e+03,4.526532e+03,4.453973e+03,2022-08-19
3,4.500446e+03,4.712402e+03,4.668428e+03,4.755238e+03,4.513102e+03,4.464764e+03,4.729722e+03,4.606136e+03,4.721987e+03,4.628830e+03,2022-08-22
4,4.632956e+03,4.814189e+03,4.816346e+03,4.894714e+03,4.670267e+03,4.527843e+03,4.876498e+03,4.873888e+03,4.829938e+03,4.751631e+03,2022-08-23
...,...,...,...,...,...,...,...,...,...,...,...
248,2.642290e+06,3.357569e+06,2.412647e+06,2.823736e+06,2.408522e+06,1.491800e+06,2.631034e+06,3.179422e+06,2.562582e+06,2.466026e+06,2023-07-31
249,2.717151e+06,3.410101e+06,2.483983e+06,2.918838e+06,2.502681e+06,1.556999e+06,2.684805e+06,3.260838e+06,2.620542e+06,2.514172e+06,2023-08-01
250,2.828922e+06,3.526102e+06,2.532765e+06,2.993724e+06,2.551258e+06,1.575157e+06,2.765371e+06,3.263107e+06,2.631648e+06,2.621339e+06,2023-08-02
251,2.911447e+06,3.606299e+06,2.602750e+06,3.078432e+06,2.610509e+06,1.620267e+06,2.819347e+06,3.361715e+06,2.663586e+06,2.713217e+06,2023-08-03


In [112]:
# Let's visualize the Euler Discretizations
fig3 = go.Figure()
for name in columns:
    fig3.add_trace(go.Scatter(x = df1['date'], y = df1[name]))

fig3.update_layout(
    title_text = "S&P 500 Pricing with Euler Discretization"
)

# adding sliders and buttons for interaction
fig3.update_layout(
    xaxis = dict(
        rangeselector = dict(
            buttons = list([
                dict(
                    count = 1,
                    label = '1m',
                    step = 'month',
                    stepmode = 'backward'
                ),
                dict(
                    count = 6,
                    label = '6m',
                    step = 'month',
                    stepmode = 'backward'
                ),
                dict(
                    count = 1,
                    label = 'YTD',
                    step = 'year',
                    stepmode = 'todate'
                ),
                dict(
                    count = 1,
                    label = '1y',
                    step = 'year',
                    stepmode = 'backward'
                ),
                dict(
                    count = 5,
                    label = '5y',
                    step = 'year',
                    stepmode = 'backward'
                ),
                dict(step = 'all')
            ])
        ),
        rangeslider = dict(
            visible =True
        ),
        type = 'date'
    )
)

fig3.show()