In [1]:
#import necessary libraries
import pandas as pd
import yfinance as yf
from datetime import datetime
from datetime import timedelta
from pandas.tseries.offsets import BDay
import random
import math
import numpy as np
from prophet import Prophet
from prophet.plot import plot_plotly, plot_components_plotly
from scipy.stats import norm
import plotly.graph_objects as go
import warnings

warnings.filterwarnings('ignore')



First lets establish a prophet model and use it to predict the market

In [2]:
# We are analyzing the S&P 500 Index thru the prophet API,
# thus we need to garner the relevant data using yfinance

today = datetime.today().strftime('%Y-%m-%d')
start = '2016-01-01'

sp_df = yf.download('^GSPC', start, today)
sp_df

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2016-01-04,2038.199951,2038.199951,1989.680054,2012.660034,2012.660034,4304880000
2016-01-05,2013.780029,2021.939941,2004.170044,2016.709961,2016.709961,3706620000
2016-01-06,2011.709961,2011.709961,1979.050049,1990.260010,1990.260010,4336660000
2016-01-07,1985.319946,1985.319946,1938.829956,1943.089966,1943.089966,5076590000
2016-01-08,1945.969971,1960.400024,1918.459961,1922.030029,1922.030029,4664940000
...,...,...,...,...,...,...
2022-08-15,4269.370117,4301.790039,4256.899902,4297.140137,4297.140137,3087740000
2022-08-16,4290.459961,4325.279785,4277.770020,4305.200195,4305.200195,3792010000
2022-08-17,4280.399902,4302.180176,4253.080078,4274.040039,4274.040039,3293430000
2022-08-18,4273.129883,4292.529785,4261.979980,4283.740234,4283.740234,2871990000


In [3]:
# data checks
sp_df.info()
sp_df.isnull().sum()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1670 entries, 2016-01-04 to 2022-08-19
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Open       1670 non-null   float64
 1   High       1670 non-null   float64
 2   Low        1670 non-null   float64
 3   Close      1670 non-null   float64
 4   Adj Close  1670 non-null   float64
 5   Volume     1670 non-null   int64  
dtypes: float64(5), int64(1)
memory usage: 91.3 KB


Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64

In [4]:
# prophet is a time series model so we must index by date
sp_df.reset_index(inplace = True)
sp_df.columns

Index(['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume'], dtype='object')

In [5]:
# prophet models require soley a date and y component
# here that y will be Adj Close

df = sp_df[['Date', 'Adj Close']]

# rename for prophet usage
df.rename(columns = {'Date' : 'ds', 'Adj Close' : 'y'}, inplace = True)
df

Unnamed: 0,ds,y
0,2016-01-04,2012.660034
1,2016-01-05,2016.709961
2,2016-01-06,1990.260010
3,2016-01-07,1943.089966
4,2016-01-08,1922.030029
...,...,...
1665,2022-08-15,4297.140137
1666,2022-08-16,4305.200195
1667,2022-08-17,4274.040039
1668,2022-08-18,4283.740234


In [6]:
# quickly vizualizing data before prophet use
fig1 = go.Figure()
fig1.add_trace(go.Scatter(x = df['ds'], y = df['y']))

fig1.update_layout(
    title_text = "S&P 500 Pricing with Prophet"
)

# adding sliders and buttons for interaction
fig1.update_layout(
    xaxis = dict(
        rangeselector = dict(
            buttons = list([
                dict(
                    count = 1,
                    label = '1m',
                    step = 'month',
                    stepmode = 'backward'
                ),
                dict(
                    count = 6,
                    label = '6m',
                    step = 'month',
                    stepmode = 'backward'
                ),
                dict(
                    count = 1,
                    label = 'YTD',
                    step = 'year',
                    stepmode = 'todate'
                ),
                dict(
                    count = 1,
                    label = '1y',
                    step = 'year',
                    stepmode = 'backward'
                ),
                dict(
                    count = 5,
                    label = '5y',
                    step = 'year',
                    stepmode = 'backward'
                ),
                dict(step = 'all')
            ])
        ),
        rangeslider = dict(
            visible =True
        ),
        type = 'date'
    )
)

fig1.show()

In [7]:
# build prophet model for price forcasting
m = Prophet(daily_seasonality = False, seasonality_mode='multiplicative')
for col in df.columns:
    if col not in ['ds', 'y']:
        m.add_regressor(col, mode = 'additive')
m.fit(df)

INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1


<prophet.forecaster.Prophet at 0x7fec1462daf0>

In [8]:
# here we create a year's worth of time series for the prophet model
future = m.make_future_dataframe(periods = 365)
business_days = BDay().onOffset
filter = pd.to_datetime(future['ds']).map(business_days)
future = future[filter]
future.tail(10)

Unnamed: 0,ds
2022,2023-08-07
2023,2023-08-08
2024,2023-08-09
2025,2023-08-10
2026,2023-08-11
2029,2023-08-14
2030,2023-08-15
2031,2023-08-16
2032,2023-08-17
2033,2023-08-18


In [9]:
# finally, we get to our predictions
forecast = m.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(10)

Unnamed: 0,ds,yhat,yhat_lower,yhat_upper
1920,2023-08-07,3996.514034,1486.346129,6763.76245
1921,2023-08-08,4002.500831,1529.553082,6814.128751
1922,2023-08-09,4009.206353,1506.702458,6857.808439
1923,2023-08-10,4011.289926,1477.812323,6841.320462
1924,2023-08-11,4017.740633,1475.468915,6853.690106
1925,2023-08-14,4030.340491,1504.314375,6911.002653
1926,2023-08-15,4037.021174,1418.738194,6874.456769
1927,2023-08-16,4044.261944,1437.417853,6930.54258
1928,2023-08-17,4046.704748,1433.834697,6959.417159
1929,2023-08-18,4053.313018,1392.528433,6958.598336


In [10]:
# lets specifically get the next days worth!
next_day = ((datetime.today() + timedelta(1)).strftime('%Y-%m-%d'))
forecast[forecast['ds'] == next_day]

Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,weekly,weekly_lower,weekly_upper,yearly,yearly_lower,yearly_upper,additive_terms,additive_terms_lower,additive_terms_upper,yhat
1671,2022-08-23,761.647758,4155.569106,4365.708215,761.647758,761.647758,4.590085,4.590085,4.590085,4.586154,4.586154,4.586154,0.003931,0.003931,0.003931,0.0,0.0,0.0,4257.675532


In [11]:
plot_plotly(m, forecast)

Now, lets approach this problem with a more traditional/basic method in finance: Monte Carlo Simulation

In [12]:
data = pd.DataFrame()
data = sp_df['Adj Close']
data

0       2012.660034
1       2016.709961
2       1990.260010
3       1943.089966
4       1922.030029
           ...     
1665    4297.140137
1666    4305.200195
1667    4274.040039
1668    4283.740234
1669    4228.479980
Name: Adj Close, Length: 1670, dtype: float64

In [13]:
log_returns = np.log(1 + data.pct_change())
log_returns

0            NaN
1       0.002010
2      -0.013202
3      -0.023986
4      -0.010898
          ...   
1665    0.003962
1666    0.001874
1667   -0.007264
1668    0.002267
1669   -0.012984
Name: Adj Close, Length: 1670, dtype: float64

In [14]:
#returns stats
u = log_returns.mean()
var = log_returns.var()
drift = u - (0.5 * var)
stdev = log_returns.std()

# trading days per year and num of sims
t_intervals = 252
iterations = 10

daily_returns = np.exp(drift + stdev * norm.ppf(np.random.rand(t_intervals, iterations)))

s0 = data.iloc[-1]
s0

4228.47998046875

In [15]:
price_list = np.zeros_like(daily_returns)
price_list[0] = s0
price_list

array([[4228.47998047, 4228.47998047, 4228.47998047, ..., 4228.47998047,
        4228.47998047, 4228.47998047],
       [   0.        ,    0.        ,    0.        , ...,    0.        ,
           0.        ,    0.        ],
       [   0.        ,    0.        ,    0.        , ...,    0.        ,
           0.        ,    0.        ],
       ...,
       [   0.        ,    0.        ,    0.        , ...,    0.        ,
           0.        ,    0.        ],
       [   0.        ,    0.        ,    0.        , ...,    0.        ,
           0.        ,    0.        ],
       [   0.        ,    0.        ,    0.        , ...,    0.        ,
           0.        ,    0.        ]])

In [16]:
for t in range(1, t_intervals):
    price_list[t] = price_list[t-1] * daily_returns[t]

price_list

columns = []
for i in range(0,10):
     columns.append("sim" + str(i))

df1 = pd.DataFrame(price_list, columns = columns)
df1['date'] = pd.date_range(start= today, periods=len(df1), freq=BDay())
df1

Unnamed: 0,sim0,sim1,sim2,sim3,sim4,sim5,sim6,sim7,sim8,sim9,date
0,4228.479980,4228.479980,4228.479980,4228.479980,4228.479980,4228.479980,4228.479980,4228.479980,4228.479980,4228.479980,2022-08-22
1,4279.120402,4226.959592,4192.526343,4262.888071,4166.995831,4173.874974,4197.797470,4271.038167,4278.708264,4222.605731,2022-08-23
2,4373.497486,4200.391243,4149.204120,4266.493493,4153.483149,4088.517087,4204.968557,4197.006343,4184.133466,4288.755990,2022-08-24
3,4399.988366,4239.948457,4202.622572,4246.231358,4217.816570,4135.107188,4195.304199,4184.736346,4225.985996,4298.656220,2022-08-25
4,4387.605997,4271.054296,4185.616114,4284.548675,4148.029071,4096.794215,4181.270773,4139.441470,4182.149235,4252.998185,2022-08-26
...,...,...,...,...,...,...,...,...,...,...,...
247,5223.179776,8396.001931,5076.306264,6125.772833,3498.983376,4370.081936,4935.343601,5013.992391,4894.847256,4839.092695,2023-08-02
248,5226.581359,8307.300674,5033.902020,6085.436138,3496.390102,4359.699685,4980.462627,5008.252701,4984.085846,4793.899573,2023-08-03
249,5344.853219,8316.276775,5010.871597,6051.433698,3520.788502,4369.520631,4929.173066,5019.113052,4953.912321,4927.372877,2023-08-04
250,5322.588030,8408.945032,4972.581238,6105.874608,3506.276971,4294.429345,4891.084143,5094.621249,4996.434027,5000.296872,2023-08-07


In [17]:
# Let's visualize the Monte Carlo Simulations
fig2 = go.Figure()
for name in columns:
    fig2.add_trace(go.Scatter(x = df1['date'], y = df1[name]))

fig2.update_layout(
    title_text = "S&P 500 Pricing with Monte Carlo"
)

# adding sliders and buttons for interaction
fig2.update_layout(
    xaxis = dict(
        rangeselector = dict(
            buttons = list([
                dict(
                    count = 1,
                    label = '1m',
                    step = 'month',
                    stepmode = 'backward'
                ),
                dict(
                    count = 6,
                    label = '6m',
                    step = 'month',
                    stepmode = 'backward'
                ),
                dict(
                    count = 1,
                    label = 'YTD',
                    step = 'year',
                    stepmode = 'todate'
                ),
                dict(
                    count = 1,
                    label = '1y',
                    step = 'year',
                    stepmode = 'backward'
                ),
                dict(
                    count = 5,
                    label = '5y',
                    step = 'year',
                    stepmode = 'backward'
                ),
                dict(step = 'all')
            ])
        ),
        rangeslider = dict(
            visible =True
        ),
        type = 'date'
    )
)

fig2.show()

For another comparison, lets approach Monte Carlo in a more nuanced way: Euler Discretization

In [18]:
# annual stdev of returns
stdev_a = log_returns.std() * 252 ** 0.5

# risk-free rate of return (bonds)
r = 0.026

# time horizon (years)
T = 1

# intervals within horizon (trading days)
t_intervals = 252
delta_t = t / t_intervals

In [19]:
Z = np.random.standard_normal((t_intervals + 1, iterations))
S = np.zeros_like(Z)
S0 = data.iloc[-1]
S[0] = S0
S

array([[4228.47998047, 4228.47998047, 4228.47998047, ..., 4228.47998047,
        4228.47998047, 4228.47998047],
       [   0.        ,    0.        ,    0.        , ...,    0.        ,
           0.        ,    0.        ],
       [   0.        ,    0.        ,    0.        , ...,    0.        ,
           0.        ,    0.        ],
       ...,
       [   0.        ,    0.        ,    0.        , ...,    0.        ,
           0.        ,    0.        ],
       [   0.        ,    0.        ,    0.        , ...,    0.        ,
           0.        ,    0.        ],
       [   0.        ,    0.        ,    0.        , ...,    0.        ,
           0.        ,    0.        ]])

We need to use the following recursive formula for simulation:
$$
S_t = S_{t-1} \cdot exp((r - 0.5 \cdot stdev^2) \cdot delta_t + stdev \cdot delta_t^{0.5} \cdot Z_t)
$$

In [20]:
for t in range(1, t_intervals + 1):
    S[t] = S[t-1] * np.exp((r - 0.5 * stdev ** 2) * delta_t + stdev * delta_t ** 0.5 * Z[t])
S

array([[   4228.47998047,    4228.47998047,    4228.47998047, ...,
           4228.47998047,    4228.47998047,    4228.47998047],
       [   4287.48231825,    4313.79752029,    4372.71405028, ...,
           4374.87600307,    4337.08939705,    4339.06792998],
       [   4373.02024047,    4471.03915342,    4512.33786789, ...,
           4475.78318135,    4458.42236497,    4421.9638982 ],
       ...,
       [2859348.6502365 , 3147838.03893883, 2287074.55250109, ...,
        3634482.56496772, 3036811.27102122, 2938283.02695802],
       [2990585.51010711, 3247592.78089376, 2314381.081466  , ...,
        3740871.66288223, 3134689.25118611, 3022135.39366507],
       [3044271.56733896, 3284600.6990247 , 2391744.16194164, ...,
        3926044.61079658, 3252704.5182633 , 3113125.71972164]])

In [21]:
df2 = pd.DataFrame(S, columns = columns)
df2['date'] = pd.date_range(start= today, periods=len(df2), freq=BDay())
df2

Unnamed: 0,sim0,sim1,sim2,sim3,sim4,sim5,sim6,sim7,sim8,sim9,date
0,4.228480e+03,4.228480e+03,4.228480e+03,4.228480e+03,4.228480e+03,4.228480e+03,4.228480e+03,4.228480e+03,4.228480e+03,4.228480e+03,2022-08-22
1,4.287482e+03,4.313798e+03,4.372714e+03,4.282761e+03,4.308622e+03,4.403445e+03,4.365668e+03,4.374876e+03,4.337089e+03,4.339068e+03,2022-08-23
2,4.373020e+03,4.471039e+03,4.512338e+03,4.445035e+03,4.424944e+03,4.521746e+03,4.414251e+03,4.475783e+03,4.458422e+03,4.421964e+03,2022-08-24
3,4.497493e+03,4.611519e+03,4.595790e+03,4.635154e+03,4.532982e+03,4.677353e+03,4.507295e+03,4.633805e+03,4.604784e+03,4.547335e+03,2022-08-25
4,4.530178e+03,4.734327e+03,4.691801e+03,4.730225e+03,4.688577e+03,4.868912e+03,4.561555e+03,4.744622e+03,4.702714e+03,4.562004e+03,2022-08-26
...,...,...,...,...,...,...,...,...,...,...,...
248,2.670750e+06,2.998888e+06,2.164006e+06,2.287227e+06,2.632658e+06,3.651592e+06,2.182735e+06,3.509560e+06,2.899185e+06,2.718347e+06,2023-08-03
249,2.774584e+06,3.086270e+06,2.224542e+06,2.369889e+06,2.712802e+06,3.696138e+06,2.211433e+06,3.593972e+06,2.963894e+06,2.776117e+06,2023-08-04
250,2.859349e+06,3.147838e+06,2.287075e+06,2.444553e+06,2.751050e+06,3.900642e+06,2.273038e+06,3.634483e+06,3.036811e+06,2.938283e+06,2023-08-07
251,2.990586e+06,3.247593e+06,2.314381e+06,2.497402e+06,2.857722e+06,3.950001e+06,2.372681e+06,3.740872e+06,3.134689e+06,3.022135e+06,2023-08-08


In [22]:
# Let's visualize the Euler Discretizations
fig3 = go.Figure()
for name in columns:
    fig3.add_trace(go.Scatter(x = df1['date'], y = df1[name]))

fig3.update_layout(
    title_text = "S&P 500 Pricing with Euler Discretization"
)

# adding sliders and buttons for interaction
fig3.update_layout(
    xaxis = dict(
        rangeselector = dict(
            buttons = list([
                dict(
                    count = 1,
                    label = '1m',
                    step = 'month',
                    stepmode = 'backward'
                ),
                dict(
                    count = 6,
                    label = '6m',
                    step = 'month',
                    stepmode = 'backward'
                ),
                dict(
                    count = 1,
                    label = 'YTD',
                    step = 'year',
                    stepmode = 'todate'
                ),
                dict(
                    count = 1,
                    label = '1y',
                    step = 'year',
                    stepmode = 'backward'
                ),
                dict(
                    count = 5,
                    label = '5y',
                    step = 'year',
                    stepmode = 'backward'
                ),
                dict(step = 'all')
            ])
        ),
        rangeslider = dict(
            visible =True
        ),
        type = 'date'
    )
)

fig3.show()