# Time Series Forecasting Assignment

In [22]:
import pandas as pd
import plotly.express as px
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.arima_model import ARMA
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from fbprophet import Prophet

### Import the Microsoft stock price data set (MSFT_data.csv) into a Pandas dataframe.

In [23]:
msft_df = pd.read_csv('https://tf-assets-prod.s3.amazonaws.com/tf-curric/data-science/Data%20Sets%20Time%20Series%20Analysis/Time%20Series%20-%20Day%204/MSFT_data.csv')

In [24]:
msft_df['date'] = pd.to_datetime(msft_df['date'])

In [25]:
msft_df.head()

Unnamed: 0,date,open,high,low,close,volume,Name
0,2013-02-08,27.35,27.71,27.31,27.55,33318306,MSFT
1,2013-02-11,27.65,27.92,27.5,27.86,32247549,MSFT
2,2013-02-12,27.88,28.0,27.75,27.88,35990829,MSFT
3,2013-02-13,27.93,28.11,27.88,28.03,41715530,MSFT
4,2013-02-14,27.92,28.06,27.87,28.04,32663174,MSFT


### Generate a line chart showing the observed values (closing prices).

In [26]:
px.line(msft_df, 'date', 'close', template='none')

### Decompose the time series and check it for stationarity. If the data is not stationary, difference the observations and store the results in a new Diff column.

In [27]:
observed = msft_df.set_index('date')['close']

In [28]:
decomposition = sm.tsa.seasonal_decompose(observed, model='additive', freq=252)

In [29]:
trend = decomposition.trend.reset_index()['close'].rename("trend")
seasonal = decomposition.seasonal.reset_index()['close'].rename("seasonal")
residuals = decomposition.resid.reset_index()['close'].rename("residuals")
t_s = (trend + seasonal).rename('trend_seasonal')

In [30]:
decompose_df = pd.concat([msft_df, trend, seasonal, residuals, t_s], 1)

In [31]:
decompose_melt = pd.melt(decompose_df, id_vars='date',
                         value_vars=['close', 'trend', 'seasonal', 'residuals', 'trend_seasonal'],
                         var_name='Variable',
                         value_name='Value')

px.line(decompose_melt, 'date', 'Value', color='Variable', 
        title='Observed, Trend, Seasonality, and Residuals',
        template='none')

In [33]:
msft_df['shift'] = msft_df['close'].shift().fillna(method='bfill')
msft_df['diff'] = msft_df['close'] - msft_df['shift']

### Forecast the time series 60 days into the future using double and triple exponential smoothing models.

In [34]:
model = sm.tsa.ExponentialSmoothing(msft_df['close'], trend='add').fit()
double_exp = model.forecast(60)

In [35]:
model = sm.tsa.ExponentialSmoothing(msft_df['close'], trend='add',
                                    seasonal='add', seasonal_periods=4).fit()
triple_exp = model.forecast(60) 

### Forecast the time series 60 days into the future using ARMA, ARIMA, and SARIMA models.

In [36]:
def forecast(data, field, model, periods):
  model_results = model.forecast(periods)[0]
  current_value = data[field].iloc[-1]
  forecasts = []

  for result in model_results:
    forecast = current_value + result
    forecasts.append(forecast)
    current_value = forecast
  
  forecasts = pd.Series(forecasts)
  forecasts.index = [x + periods for x in list(data[field].tail(periods).index)]

  return forecasts

In [37]:
model = ARMA(msft_df['diff'], order=(1,1)).fit()
ARMA_forecasts = forecast(msft_df, 'close', model, 60)

In [38]:
model = ARIMA(msft_df['close'], order=(1,1,1)).fit()
ARIMA_forecasts = pd.Series(model.forecast(60)[0])
ARIMA_forecasts.index = [x + 60 for x in list(msft_df['close'].tail(60).index)]

In [39]:
model = SARIMAX(msft_df['close'], order=(1,1,1), 
                seasonal_order=(1,1,1,1)).fit()

SARIMAX_forecasts = model.forecast(60)

### Forecast the time series 60 days into the future using the Facebook Prophet model.

In [40]:
series = msft_df[['date', 'close']]
series.columns = ['ds', 'y']

model = Prophet()
model.fit(series)

future = model.make_future_dataframe(60)
results = model.predict(future)
prophet_forecasts = results.iloc[-60:]['yhat']

INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


### Combine the observed values and all the forecasts into a single data frame and generate a line chart to visually compare the different models.

In [41]:
future_dates = pd.DataFrame({'date': pd.date_range(start=msft_df.date.iloc[-1],
                                                   periods=61,
                                                   freq='D',
                                                   closed='right')})

fcast_df = pd.concat([pd.DataFrame(msft_df['date']), future_dates], ignore_index=True)
fcast_df = fcast_df.merge(msft_df[['date', 'close']], on='date', how='left')

In [42]:
fcast_df['double_exp'] = double_exp
fcast_df['triple_exp'] = triple_exp
fcast_df['ARMA'] = ARMA_forecasts
fcast_df['ARIMA'] = ARIMA_forecasts
fcast_df['SARIMAX'] = SARIMAX_forecasts
fcast_df['Prophet'] = prophet_forecasts

In [43]:
fcast_melt = pd.melt(fcast_df, id_vars='date',
                     value_vars=['close','double_exp','triple_exp','ARMA','ARIMA','SARIMAX','Prophet'],
                     var_name='Variable',
                     value_name='Value')

px.line(fcast_melt, 'date', 'Value', color='Variable', template='none') 

#Lecture Notes

In [45]:
df = pd.read_csv('https://tf-assets-prod.s3.amazonaws.com/tf-curric/data-science/Data%20Sets%20Time%20Series%20Analysis/Time%20Series%20-%20Day%204/MSFT_data.csv')

In [46]:
df['date'] = pd.to_datetime(df['date'])

In [48]:
df.columns = ['Date', 'Open', 'High', 'Low', 'Observed', 'Volume', 'Ticket']

In [49]:
px.line(df, 'Date', 'Observed')

In [50]:
df = df[['Date', 'Observed']]

In [132]:
train = df.iloc[:-30]
test = df.iloc[-30:]

In [51]:
series = df.set_index('Date')

In [55]:
decomposition = sm.tsa.seasonal_decompose(series, model='additive', freq=252)

trend = decomposition.trend.reset_index()['Observed'].rename('Trend')
seasonality = decomposition.seasonal.reset_index()['Observed'].rename('Seasonality')
residuals = decomposition.resid.reset_index()['Observed'].rename('Residuals')

merged = pd.concat([df, trend, seasonality, residuals],1)
merged['T_S'] = merged['Trend'] + merged['Seasonality']

In [56]:
melt = pd.melt(merged, id_vars='Date',
               value_vars=['Observed', 'Trend', 'Seasonality', 'Residuals', 'T_S'],
               var_name='variable')

px.line(melt, 'Date', 'value', color='variable')

In [60]:
model = sm.tsa.ExponentialSmoothing(df['Observed']).fit()
single_exp = model.forecast(30)

In [61]:
model = sm.tsa.ExponentialSmoothing(df['Observed'], trend='additive').fit()
double_exp = model.forecast(30)

In [62]:
model = sm.tsa.ExponentialSmoothing(df['Observed'], trend='additive',
                                    seasonal='additive', seasonal_periods=4).fit()
triple_exp = model.forecast(30) 

In [97]:
def forecast(data, field, model, periods):
  model_results = model.forecast(periods)[0]
  current_value = data[field].iloc[-1]
  forecasts = []

  for result in model_results:
    forecast = current_value + result
    forecasts.append(forecast)
    current_value = forecast
  
  forecasts = pd.Series(forecasts)
  forecasts.index = [x + periods for x in list(data[field].tail(periods).index)]

  return forecasts

In [98]:
df['shift'] = df['Observed'].shift().fillna(method='bfill')
df['diff'] = df['Observed'] - df['shift']



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [99]:
model = ARMA(df['diff'], order=(1,1)).fit()
arma_forecasts = forecast(df, 'Observed', model, 30)
# arma_forecasts

In [100]:
model = ARIMA(df['diff'], order=(1,1,1)).fit()
arima_forecasts = forecast(df, 'Observed', model, 30)

In [102]:
# arima_forecasts

In [128]:
model = SARIMAX(df['Observed'], order=(1,1,1), 
                seasonal_order=(1,1,1,4)).fit()

sarimax_forecasts = model.forecast(30)
# sarimax_forecasts

In [105]:
series = df[['Date', 'Observed']]
series.columns = ['ds', 'y']
# series

In [106]:
model = Prophet()
model.fit(series)

INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


<fbprophet.forecaster.Prophet at 0x7ff5dd5d8cf8>

In [107]:
future = model.make_future_dataframe(30)

In [108]:
results = model.predict(future)
results.head()

Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,additive_terms,additive_terms_lower,additive_terms_upper,weekly,weekly_lower,weekly_upper,yearly,yearly_lower,yearly_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,yhat
0,2013-02-08,26.01198,24.978514,28.603377,26.01198,26.01198,0.737491,0.737491,0.737491,-0.210331,-0.210331,-0.210331,0.947822,0.947822,0.947822,0.0,0.0,0.0,26.749471
1,2013-02-11,26.239164,24.848214,28.770811,26.239164,26.239164,0.587495,0.587495,0.587495,-0.261213,-0.261213,-0.261213,0.848708,0.848708,0.848708,0.0,0.0,0.0,26.826659
2,2013-02-12,26.314892,25.134557,28.678613,26.314892,26.314892,0.519229,0.519229,0.519229,-0.302759,-0.302759,-0.302759,0.821988,0.821988,0.821988,0.0,0.0,0.0,26.834121
3,2013-02-13,26.39062,25.170323,28.713073,26.39062,26.39062,0.51651,0.51651,0.51651,-0.281461,-0.281461,-0.281461,0.797971,0.797971,0.797971,0.0,0.0,0.0,26.90713
4,2013-02-14,26.466348,25.01968,28.769242,26.466348,26.466348,0.457066,0.457066,0.457066,-0.319226,-0.319226,-0.319226,0.776293,0.776293,0.776293,0.0,0.0,0.0,26.923414


In [109]:
prophet_forecasts = results.iloc[-30:]['yhat']

In [129]:
future_dates = pd.DataFrame({'Date': pd.date_range(start=msft_df.date.iloc[-1],
                                                   periods=31,
                                                   freq='D',
                                                   closed='right')})

fcast_df = pd.concat([pd.DataFrame(df['Date']), future_dates], ignore_index=True)
fcast_df = fcast_df.merge(df[['Date', 'Observed']], on='Date', how='left')

In [130]:
fcast_df['double_exp'] = double_exp
fcast_df['triple_exp'] = triple_exp
fcast_df['ARMA'] = arma_forecasts
fcast_df['ARIMA'] = arima_forecasts
fcast_df['SARIMAX'] = sarimax_forecasts
fcast_df['Prophet'] = prophet_forecasts

In [131]:
fcast_melt = pd.melt(fcast_df, id_vars='Date',
                     value_vars=['Observed','double_exp','triple_exp','ARMA','ARIMA','SARIMAX','Prophet'],
                     var_name='Variable',
                     value_name='Value')

px.line(fcast_melt, 'Date', 'Value', color='Variable', template='none') 

In [133]:
from sklearn.model_selection import train_test_split
train, test = train_test_split(df, test_size=0.2, shuffle=False)

In [134]:
train

Unnamed: 0,Date,Observed,shift,diff
0,2013-02-08,27.55,27.55,0.00
1,2013-02-11,27.86,27.55,0.31
2,2013-02-12,27.88,27.86,0.02
3,2013-02-13,28.03,27.88,0.15
4,2013-02-14,28.04,28.03,0.01
...,...,...,...,...
1002,2017-02-01,63.58,64.65,-1.07
1003,2017-02-02,63.17,63.58,-0.41
1004,2017-02-03,63.68,63.17,0.51
1005,2017-02-06,63.64,63.68,-0.04


In [135]:
test

Unnamed: 0,Date,Observed,shift,diff
1007,2017-02-08,63.34,63.43,-0.09
1008,2017-02-09,64.06,63.34,0.72
1009,2017-02-10,64.00,64.06,-0.06
1010,2017-02-13,64.72,64.00,0.72
1011,2017-02-14,64.57,64.72,-0.15
...,...,...,...,...
1254,2018-02-01,94.26,95.01,-0.75
1255,2018-02-02,91.78,94.26,-2.48
1256,2018-02-05,88.00,91.78,-3.78
1257,2018-02-06,91.33,88.00,3.33
