In [2]:
#import necessary libraries
import pandas as pd
import yfinance as yf
from datetime import datetime
from datetime import timedelta
from pandas.tseries.offsets import BDay
import random
import math
import numpy as np
from prophet import Prophet
from prophet.plot import plot_plotly, plot_components_plotly
from scipy.stats import norm
import plotly.graph_objects as go
import warnings

warnings.filterwarnings('ignore')



First lets establish a prophet model and use it to predict the market

In [3]:
# We are analyzing the S&P 500 Index thru the prophet API,
# thus we need to garner the relevant data using yfinance

today = datetime.today().strftime('%Y-%m-%d')
start = '2016-01-01'

sp_df = yf.download('^GSPC', start, today)
sp_df

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2016-01-04,2038.199951,2038.199951,1989.680054,2012.660034,2012.660034,4304880000
2016-01-05,2013.780029,2021.939941,2004.170044,2016.709961,2016.709961,3706620000
2016-01-06,2011.709961,2011.709961,1979.050049,1990.260010,1990.260010,4336660000
2016-01-07,1985.319946,1985.319946,1938.829956,1943.089966,1943.089966,5076590000
2016-01-08,1945.969971,1960.400024,1918.459961,1922.030029,1922.030029,4664940000
...,...,...,...,...,...,...
2022-08-09,4133.109863,4137.299805,4112.089844,4122.470215,4122.470215,3337150000
2022-08-10,4181.020020,4211.029785,4177.259766,4210.240234,4210.240234,3998590000
2022-08-11,4227.399902,4257.910156,4201.410156,4207.270020,4207.270020,3925060000
2022-08-12,4225.020020,4280.470215,4219.779785,4280.149902,4280.149902,3252290000


In [4]:
# data checks
sp_df.info()
sp_df.isnull().sum()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1666 entries, 2016-01-04 to 2022-08-15
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Open       1666 non-null   float64
 1   High       1666 non-null   float64
 2   Low        1666 non-null   float64
 3   Close      1666 non-null   float64
 4   Adj Close  1666 non-null   float64
 5   Volume     1666 non-null   int64  
dtypes: float64(5), int64(1)
memory usage: 91.1 KB


Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64

In [5]:
# prophet is a time series model so we must index by date
sp_df.reset_index(inplace = True)
sp_df.columns

Index(['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume'], dtype='object')

In [6]:
# prophet models require soley a date and y component
# here that y will be Adj Close

df = sp_df[['Date', 'Adj Close']]

# rename for prophet usage
df.rename(columns = {'Date' : 'ds', 'Adj Close' : 'y'}, inplace = True)
df

Unnamed: 0,ds,y
0,2016-01-04,2012.660034
1,2016-01-05,2016.709961
2,2016-01-06,1990.260010
3,2016-01-07,1943.089966
4,2016-01-08,1922.030029
...,...,...
1661,2022-08-09,4122.470215
1662,2022-08-10,4210.240234
1663,2022-08-11,4207.270020
1664,2022-08-12,4280.149902


In [7]:
# quickly vizualizing data before prophet use
fig1 = go.Figure()
fig1.add_trace(go.Scatter(x = df['ds'], y = df['y']))

fig1.update_layout(
    title_text = "S&P 500 Pricing with Prophet"
)

# adding sliders and buttons for interaction
fig1.update_layout(
    xaxis = dict(
        rangeselector = dict(
            buttons = list([
                dict(
                    count = 1,
                    label = '1m',
                    step = 'month',
                    stepmode = 'backward'
                ),
                dict(
                    count = 6,
                    label = '6m',
                    step = 'month',
                    stepmode = 'backward'
                ),
                dict(
                    count = 1,
                    label = 'YTD',
                    step = 'year',
                    stepmode = 'todate'
                ),
                dict(
                    count = 1,
                    label = '1y',
                    step = 'year',
                    stepmode = 'backward'
                ),
                dict(
                    count = 5,
                    label = '5y',
                    step = 'year',
                    stepmode = 'backward'
                ),
                dict(step = 'all')
            ])
        ),
        rangeslider = dict(
            visible =True
        ),
        type = 'date'
    )
)

fig1.show()

In [8]:
# build prophet model for price forcasting
m = Prophet(seasonality_mode='multiplicative')
m.fit(df)

INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1


<prophet.forecaster.Prophet at 0x7fc1d0aa6b80>

In [9]:
# here we create a year's worth of time series for the prophet model
future = m.make_future_dataframe(periods = 365)
business_days = BDay().onOffset
filter = pd.to_datetime(future['ds']).map(business_days)
future = future[filter]
future.tail(10)

Unnamed: 0,ds
2017,2023-08-02
2018,2023-08-03
2019,2023-08-04
2022,2023-08-07
2023,2023-08-08
2024,2023-08-09
2025,2023-08-10
2026,2023-08-11
2029,2023-08-14
2030,2023-08-15


In [10]:
# finally, we get to our predictions
forecast = m.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(10)

Unnamed: 0,ds,yhat,yhat_lower,yhat_upper
1917,2023-08-02,3999.11423,1475.963191,6551.942089
1918,2023-08-03,3999.66898,1445.116849,6584.924319
1919,2023-08-04,4005.047391,1509.844075,6611.947447
1920,2023-08-07,4014.08647,1410.620941,6587.707416
1921,2023-08-08,4019.661761,1399.781927,6631.291012
1922,2023-08-09,4026.19532,1402.858788,6689.331983
1923,2023-08-10,4027.866302,1402.64088,6670.361377
1924,2023-08-11,4034.38299,1367.317012,6727.078941
1925,2023-08-14,4046.636749,1373.092943,6817.836424
1926,2023-08-15,4053.099722,1278.133545,6766.349765


In [11]:
# lets specifically get the next days worth!
next_day = ((datetime.today() + timedelta(1)).strftime('%Y-%m-%d'))
forecast[forecast['ds'] == next_day]

Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,weekly,weekly_lower,weekly_upper,yearly,yearly_lower,yearly_upper,additive_terms,additive_terms_lower,additive_terms_upper,yhat
1667,2022-08-17,752.752731,4128.708012,4347.780788,752.752731,752.752731,4.625698,4.625698,4.625698,4.665383,4.665383,4.665383,-0.039685,-0.039685,-0.039685,0.0,0.0,0.0,4234.759325


In [12]:
plot_plotly(m, forecast)

Now, lets approach this problem with a more traditional method in finance: Monte Carlo Simulation

In [13]:
data = pd.DataFrame()
data = sp_df['Adj Close']
data

0       2012.660034
1       2016.709961
2       1990.260010
3       1943.089966
4       1922.030029
           ...     
1661    4122.470215
1662    4210.240234
1663    4207.270020
1664    4280.149902
1665    4297.140137
Name: Adj Close, Length: 1666, dtype: float64

In [14]:
log_returns = np.log(1 + data.pct_change())
log_returns

0            NaN
1       0.002010
2      -0.013202
3      -0.023986
4      -0.010898
          ...   
1661   -0.004258
1662    0.021067
1663   -0.000706
1664    0.017174
1665    0.003962
Name: Adj Close, Length: 1666, dtype: float64

In [18]:
#returns stats
u = log_returns.mean()
var = log_returns.var()
drift = u - (0.5 * var)
stdev = log_returns.std()

# trading days per year and num of sims
t_intervals = 252
iterations = 10

daily_returns = np.exp(drift + stdev * norm.ppf(np.random.rand(t_intervals, iterations)))

s0 = data.iloc[-1]
s0

4297.14013671875

In [19]:
price_list = np.zeros_like(daily_returns)
price_list[0] = s0
price_list

array([[4297.14013672, 4297.14013672, 4297.14013672, ..., 4297.14013672,
        4297.14013672, 4297.14013672],
       [   0.        ,    0.        ,    0.        , ...,    0.        ,
           0.        ,    0.        ],
       [   0.        ,    0.        ,    0.        , ...,    0.        ,
           0.        ,    0.        ],
       ...,
       [   0.        ,    0.        ,    0.        , ...,    0.        ,
           0.        ,    0.        ],
       [   0.        ,    0.        ,    0.        , ...,    0.        ,
           0.        ,    0.        ],
       [   0.        ,    0.        ,    0.        , ...,    0.        ,
           0.        ,    0.        ]])

In [32]:
for t in range(1, t_intervals):
    price_list[t] = price_list[t-1] * daily_returns[t]

price_list

columns = []
for i in range(0,10):
     columns.append("sim" + str(i))

df = pd.DataFrame(price_list, columns = columns)
df['date'] = pd.date_range(start= today, periods=len(df), freq=BDay())
df

Unnamed: 0,sim0,sim1,sim2,sim3,sim4,sim5,sim6,sim7,sim8,sim9,date
0,4297.140137,4297.140137,4297.140137,4297.140137,4297.140137,4297.140137,4297.140137,4297.140137,4297.140137,4297.140137,2022-08-16
1,4348.760609,4261.953183,4415.844922,4283.965347,4325.529787,4320.512671,4289.930317,4312.926669,4309.417889,4205.800223,2022-08-17
2,4350.613752,4249.668020,4336.205956,4274.531585,4328.646272,4300.614907,4212.539558,4247.120528,4319.133741,4166.351548,2022-08-18
3,4335.405538,4284.195675,4283.733446,4294.802501,4352.872099,4215.334350,4282.917261,4237.077917,4327.121136,4226.502428,2022-08-19
4,4274.011858,4255.347700,4251.623992,4308.873850,4465.343364,4205.624985,4306.972513,4185.157637,4434.266978,4247.763945,2022-08-22
...,...,...,...,...,...,...,...,...,...,...,...
247,5271.417527,3777.332925,5142.110647,4397.582993,6108.337717,4100.883041,4459.219202,6424.252392,3731.751374,4446.305690,2023-07-27
248,5425.601259,3825.993258,5142.107562,4408.807740,6127.128701,4098.772954,4431.972660,6489.918538,3808.007385,4436.514424,2023-07-28
249,5430.228833,3781.844882,5198.767274,4423.954948,6193.552518,4086.645173,4376.084463,6532.301804,3794.147347,4512.414162,2023-07-31
250,5361.419052,3758.454480,5227.876116,4389.585717,6230.369103,4065.531858,4363.654562,6668.388366,3842.379012,4556.778164,2023-08-01


In [33]:
# Let's visualize the Monte Carlo Simulations
fig2 = go.Figure()
for name in columns:
    fig2.add_trace(go.Scatter(x = df['date'], y = df[name]))

fig2.update_layout(
    title_text = "S&P 500 Pricing with Prophet"
)

# adding sliders and buttons for interaction
fig2.update_layout(
    xaxis = dict(
        rangeselector = dict(
            buttons = list([
                dict(
                    count = 1,
                    label = '1m',
                    step = 'month',
                    stepmode = 'backward'
                ),
                dict(
                    count = 6,
                    label = '6m',
                    step = 'month',
                    stepmode = 'backward'
                ),
                dict(
                    count = 1,
                    label = 'YTD',
                    step = 'year',
                    stepmode = 'todate'
                ),
                dict(
                    count = 1,
                    label = '1y',
                    step = 'year',
                    stepmode = 'backward'
                ),
                dict(
                    count = 5,
                    label = '5y',
                    step = 'year',
                    stepmode = 'backward'
                ),
                dict(step = 'all')
            ])
        ),
        rangeslider = dict(
            visible =True
        ),
        type = 'date'
    )
)

fig2.show()