In [21]:
#import necessary libraries
import pandas as pd
import yfinance as yf
from datetime import datetime
from datetime import timedelta
import random
import math
import numpy as np
from prophet import Prophet
from prophet.plot import plot_plotly, plot_components_plotly
import plotly.graph_objects as go
import warnings

warnings.filterwarnings('ignore')

In [22]:
# We are analyzing the S&P 500 Index thru the prophet API,
# thus we need to garner the relevant data using yfinance

today = datetime.today().strftime('%Y-%m-%d')
start = '1980-01-01'

sp_df = yf.download('^GSPC', start, today)
sp_df

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1980-01-02,0.000000,108.430000,105.290001,105.760002,105.760002,40610000
1980-01-03,0.000000,106.080002,103.260002,105.220001,105.220001,50480000
1980-01-04,0.000000,107.080002,105.089996,106.519997,106.519997,39130000
1980-01-07,0.000000,107.800003,105.800003,106.809998,106.809998,44500000
1980-01-08,0.000000,109.290001,106.290001,108.949997,108.949997,53390000
...,...,...,...,...,...,...
2022-08-01,4112.379883,4144.950195,4096.020020,4118.629883,4118.629883,3540960000
2022-08-02,4104.209961,4140.470215,4079.810059,4091.189941,4091.189941,3880790000
2022-08-03,4107.959961,4167.660156,4107.959961,4155.169922,4155.169922,3544410000
2022-08-04,4154.850098,4161.290039,4135.419922,4151.939941,4151.939941,3565810000


In [23]:
# data checks
sp_df.info()
sp_df.isnull().sum()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 10741 entries, 1980-01-02 to 2022-08-05
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Open       10741 non-null  float64
 1   High       10741 non-null  float64
 2   Low        10741 non-null  float64
 3   Close      10741 non-null  float64
 4   Adj Close  10741 non-null  float64
 5   Volume     10741 non-null  int64  
dtypes: float64(5), int64(1)
memory usage: 587.4 KB


Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64

In [24]:
# prophet is a time series model so we must index by date
sp_df.reset_index(inplace = True)
sp_df.columns

Index(['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume'], dtype='object')

In [25]:
# prophet models require soley a date and y component
# here that y will be Adj Close

df = sp_df[['Date', 'Adj Close']]

# rename for prophet usage
df.rename(columns = {'Date' : 'ds', 'Adj Close' : 'y'}, inplace = True)
df

Unnamed: 0,ds,y
0,1980-01-02,105.760002
1,1980-01-03,105.220001
2,1980-01-04,106.519997
3,1980-01-07,106.809998
4,1980-01-08,108.949997
...,...,...
10736,2022-08-01,4118.629883
10737,2022-08-02,4091.189941
10738,2022-08-03,4155.169922
10739,2022-08-04,4151.939941


In [26]:
# quickly vizualizing data before prophet use
fig = go.Figure()
fig.add_trace(go.Scatter(x = df['ds'], y = df['y']))

fig.update_layout(
    title_text = "S&P 500 Pricing"
)

# adding sliders and buttons for interaction
fig.update_layout(
    xaxis = dict(
        rangeselector = dict(
            buttons = list([
                dict(
                    count = 1,
                    label = '1m',
                    step = 'month',
                    stepmode = 'backward'
                ),
                dict(
                    count = 6,
                    label = '6m',
                    step = 'month',
                    stepmode = 'backward'
                ),
                dict(
                    count = 1,
                    label = 'YTD',
                    step = 'year',
                    stepmode = 'todate'
                ),
                dict(
                    count = 1,
                    label = '1y',
                    step = 'year',
                    stepmode = 'backward'
                ),
                dict(
                    count = 5,
                    label = '5y',
                    step = 'year',
                    stepmode = 'backward'
                ),
                dict(step = 'all')
            ])
        ),
        rangeslider = dict(
            visible =True
        ),
        type = 'date'
    )
)

fig.show()

In [27]:
# build prophet model for price forcasting
m = Prophet(seasonality_mode='multiplicative')
m.fit(df)

INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:cmdstanpy:start chain 1
INFO:cmdstanpy:finish chain 1


<prophet.forecaster.Prophet at 0x7f9c1ce83850>

In [28]:
# here we create a year's worth of time series for the prophet model
future = m.make_future_dataframe(periods = 365)
future

Unnamed: 0,ds
0,1980-01-02
1,1980-01-03
2,1980-01-04
3,1980-01-07
4,1980-01-08
...,...
11101,2023-08-01
11102,2023-08-02
11103,2023-08-03
11104,2023-08-04


In [32]:
# finally, we get to our predictions
forecast = m.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(10)

Unnamed: 0,ds,yhat,yhat_lower,yhat_upper
11096,2023-07-27,4421.450803,4221.725479,4615.068486
11097,2023-07-28,4419.956023,4225.334982,4611.939465
11098,2023-07-29,-5273.07031,-5465.835207,-5058.208845
11099,2023-07-30,-5273.899274,-5485.251243,-5069.786053
11100,2023-07-31,4424.012028,4215.002717,4610.017456
11101,2023-08-01,4428.868438,4229.384543,4622.211511
11102,2023-08-02,4429.319831,4219.920474,4619.505941
11103,2023-08-03,4428.088868,4218.34419,4617.402132
11104,2023-08-04,4426.134599,4227.783724,4622.03705
11105,2023-08-05,-5279.821063,-5471.665411,-5072.136445


In [30]:
# lets specifically get the next days worth!
next_day = ((datetime.today() + timedelta(1)).strftime('%Y-%m-%d'))
forecast[forecast['ds'] == next_day]

Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,weekly,weekly_lower,weekly_upper,yearly,yearly_lower,yearly_upper,additive_terms,additive_terms_lower,additive_terms_upper,yhat
10742,2022-08-07,1534.550159,-5093.848059,-4730.918817,1534.550159,1534.550159,-4.207968,-4.207968,-4.207968,-4.214009,-4.214009,-4.214009,0.006041,0.006041,0.006041,0.0,0.0,0.0,-4922.787152


In [31]:
plot_plotly(m, forecast)