In [89]:
#import necessary libraries
import pandas as pd
import yfinance as yf
from datetime import datetime
from datetime import timedelta
from pandas.tseries.offsets import BDay
import random
import math
import numpy as np
from prophet import Prophet
from prophet.plot import plot_plotly, plot_components_plotly
import plotly.graph_objects as go
import warnings

warnings.filterwarnings('ignore')

In [90]:
# We are analyzing the S&P 500 Index thru the prophet API,
# thus we need to garner the relevant data using yfinance

today = datetime.today().strftime('%Y-%m-%d')
start = '2016-01-01'

sp_df = yf.download('^GSPC', start, today)
sp_df

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2016-01-04,2038.199951,2038.199951,1989.680054,2012.660034,2012.660034,4304880000
2016-01-05,2013.780029,2021.939941,2004.170044,2016.709961,2016.709961,3706620000
2016-01-06,2011.709961,2011.709961,1979.050049,1990.260010,1990.260010,4336660000
2016-01-07,1985.319946,1985.319946,1938.829956,1943.089966,1943.089966,5076590000
2016-01-08,1945.969971,1960.400024,1918.459961,1922.030029,1922.030029,4664940000
...,...,...,...,...,...,...
2022-08-01,4112.379883,4144.950195,4096.020020,4118.629883,4118.629883,3540960000
2022-08-02,4104.209961,4140.470215,4079.810059,4091.189941,4091.189941,3880790000
2022-08-03,4107.959961,4167.660156,4107.959961,4155.169922,4155.169922,3544410000
2022-08-04,4154.850098,4161.290039,4135.419922,4151.939941,4151.939941,3565810000


In [91]:
# data checks
sp_df.info()
sp_df.isnull().sum()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1660 entries, 2016-01-04 to 2022-08-05
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Open       1660 non-null   float64
 1   High       1660 non-null   float64
 2   Low        1660 non-null   float64
 3   Close      1660 non-null   float64
 4   Adj Close  1660 non-null   float64
 5   Volume     1660 non-null   int64  
dtypes: float64(5), int64(1)
memory usage: 90.8 KB


Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64

In [92]:
# prophet is a time series model so we must index by date
sp_df.reset_index(inplace = True)
sp_df.columns

Index(['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume'], dtype='object')

In [93]:
# prophet models require soley a date and y component
# here that y will be Adj Close

df = sp_df[['Date', 'Adj Close']]

# rename for prophet usage
df.rename(columns = {'Date' : 'ds', 'Adj Close' : 'y'}, inplace = True)
df

Unnamed: 0,ds,y
0,2016-01-04,2012.660034
1,2016-01-05,2016.709961
2,2016-01-06,1990.260010
3,2016-01-07,1943.089966
4,2016-01-08,1922.030029
...,...,...
1655,2022-08-01,4118.629883
1656,2022-08-02,4091.189941
1657,2022-08-03,4155.169922
1658,2022-08-04,4151.939941


In [94]:
# quickly vizualizing data before prophet use
fig = go.Figure()
fig.add_trace(go.Scatter(x = df['ds'], y = df['y']))

fig.update_layout(
    title_text = "S&P 500 Pricing"
)

# adding sliders and buttons for interaction
fig.update_layout(
    xaxis = dict(
        rangeselector = dict(
            buttons = list([
                dict(
                    count = 1,
                    label = '1m',
                    step = 'month',
                    stepmode = 'backward'
                ),
                dict(
                    count = 6,
                    label = '6m',
                    step = 'month',
                    stepmode = 'backward'
                ),
                dict(
                    count = 1,
                    label = 'YTD',
                    step = 'year',
                    stepmode = 'todate'
                ),
                dict(
                    count = 1,
                    label = '1y',
                    step = 'year',
                    stepmode = 'backward'
                ),
                dict(
                    count = 5,
                    label = '5y',
                    step = 'year',
                    stepmode = 'backward'
                ),
                dict(step = 'all')
            ])
        ),
        rangeslider = dict(
            visible =True
        ),
        type = 'date'
    )
)

fig.show()

In [95]:
# build prophet model for price forcasting
m = Prophet(seasonality_mode='multiplicative')
m.fit(df)

INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1


<prophet.forecaster.Prophet at 0x7f9c1cc997c0>

In [96]:
# here we create a year's worth of time series for the prophet model
future = m.make_future_dataframe(periods = 365)
business_days = BDay().onOffset
filter = pd.to_datetime(future['ds']).map(business_days)
future = future[filter]
future.tail(10)

Unnamed: 0,ds
2012,2023-07-24
2013,2023-07-25
2014,2023-07-26
2015,2023-07-27
2016,2023-07-28
2019,2023-07-31
2020,2023-08-01
2021,2023-08-02
2022,2023-08-03
2023,2023-08-04


In [97]:
# finally, we get to our predictions
forecast = m.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(10)

Unnamed: 0,ds,yhat,yhat_lower,yhat_upper
1910,2023-07-24,4001.206232,1443.384469,6331.315171
1911,2023-07-25,4006.892676,1491.982407,6332.864228
1912,2023-07-26,4012.188194,1423.428498,6379.572675
1913,2023-07-27,4012.812392,1433.161085,6335.437144
1914,2023-07-28,4017.474769,1421.388218,6398.170238
1915,2023-07-31,4024.747064,1426.723632,6400.102336
1916,2023-08-01,4029.930629,1402.724449,6415.183866
1917,2023-08-02,4034.98871,1329.978339,6478.586187
1918,2023-08-03,4035.637355,1345.411489,6504.426357
1919,2023-08-04,4040.567146,1358.845718,6537.435666


In [98]:
# lets specifically get the next days worth!
next_day = ((datetime.today() + timedelta(1)).strftime('%Y-%m-%d'))
forecast[forecast['ds'] == next_day]

Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,weekly,weekly_lower,weekly_upper,yearly,yearly_lower,yearly_upper,additive_terms,additive_terms_lower,additive_terms_upper,yhat
1660,2022-08-08,597.875152,4082.706372,4302.857458,597.875152,597.875152,6.022992,6.022992,6.022992,6.15228,6.15228,6.15228,-0.129288,-0.129288,-0.129288,0.0,0.0,0.0,4198.872652


In [99]:
plot_plotly(m, forecast)