In [1]:
#import necessary libraries
import pandas as pd
import yfinance as yf
from datetime import datetime
from datetime import timedelta
import random
import math
import numpy as np
from prophet import Prophet
from prophet.plot import plot_plotly, plot_components_plotly
import plotly.graph_objects as go
import warnings

warnings.filterwarnings('ignore')

In [2]:
# We are analyzing the S&P 500 Index thru the prophet API,
# thus we need to garner the relevant data using yfinance

today = datetime.today().strftime('%Y-%m-%d')
start = '1980-01-01'

sp_df = yf.download('^GSPC', start, today)
sp_df

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1980-01-02,0.000000,108.430000,105.290001,105.760002,105.760002,40610000
1980-01-03,0.000000,106.080002,103.260002,105.220001,105.220001,50480000
1980-01-04,0.000000,107.080002,105.089996,106.519997,106.519997,39130000
1980-01-07,0.000000,107.800003,105.800003,106.809998,106.809998,44500000
1980-01-08,0.000000,109.290001,106.290001,108.949997,108.949997,53390000
...,...,...,...,...,...,...
2022-07-29,4087.330078,4140.149902,4079.219971,4130.290039,4130.290039,3817740000
2022-08-01,4112.379883,4144.950195,4096.020020,4118.629883,4118.629883,3540960000
2022-08-02,4104.209961,4140.470215,4079.810059,4091.189941,4091.189941,3880790000
2022-08-03,4107.959961,4167.660156,4107.959961,4155.169922,4155.169922,3544410000


In [3]:
# data checks
sp_df.info()
sp_df.isnull().sum()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 10740 entries, 1980-01-02 to 2022-08-04
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Open       10740 non-null  float64
 1   High       10740 non-null  float64
 2   Low        10740 non-null  float64
 3   Close      10740 non-null  float64
 4   Adj Close  10740 non-null  float64
 5   Volume     10740 non-null  int64  
dtypes: float64(5), int64(1)
memory usage: 587.3 KB


Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64

In [4]:
# prophet is a time series model so we must index by date
sp_df.reset_index(inplace = True)
sp_df.columns

Index(['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume'], dtype='object')

In [5]:
# prophet models require soley a date and y component
# here that y will be Adj Close

df = sp_df[['Date', 'Adj Close']]

# rename for prophet usage
df.rename(columns = {'Date' : 'ds', 'Adj Close' : 'y'}, inplace = True)
df

Unnamed: 0,ds,y
0,1980-01-02,105.760002
1,1980-01-03,105.220001
2,1980-01-04,106.519997
3,1980-01-07,106.809998
4,1980-01-08,108.949997
...,...,...
10735,2022-07-29,4130.290039
10736,2022-08-01,4118.629883
10737,2022-08-02,4091.189941
10738,2022-08-03,4155.169922


In [8]:
# quickly vizualizing data before prophet use
fig = go.Figure()
fig.add_trace(go.Scatter(x = df['ds'], y = df['y']))

fig.update_layout(
    title_text = "S&P 500 Pricing"
)

# adding sliders and buttons for interaction
fig.update_layout(
    xaxis = dict(
        rangeselector = dict(
            buttons = list([
                dict(
                    count = 1,
                    label = '1m',
                    step = 'month',
                    stepmode = 'backward'
                ),
                dict(
                    count = 6,
                    label = '6m',
                    step = 'month',
                    stepmode = 'backward'
                ),
                dict(
                    count = 1,
                    label = 'YTD',
                    step = 'year',
                    stepmode = 'todate'
                ),
                dict(
                    count = 1,
                    label = '1y',
                    step = 'year',
                    stepmode = 'backward'
                ),
                dict(
                    count = 5,
                    label = '5y',
                    step = 'year',
                    stepmode = 'backward'
                ),
                dict(step = 'all')
            ])
        ),
        rangeslider = dict(
            visible =True
        ),
        type = 'date'
    )
)

fig.show()