# Time Series Forecasting Assignment

In [1]:
import pandas as pd
import plotly.express as px
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.arima_model import ARMA
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from fbprophet import Prophet


pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.



### Import the Microsoft stock price data set (MSFT_data.csv) into a Pandas dataframe.

In [2]:
df =pd.read_csv('https://tf-assets-prod.s3.amazonaws.com/tf-curric/data-science/Data%20Sets%20Time%20Series%20Analysis/Time%20Series%20-%20Day%204/MSFT_data.csv')

In [3]:
df

Unnamed: 0,date,open,high,low,close,volume,Name
0,2013-02-08,27.35,27.710,27.3100,27.55,33318306,MSFT
1,2013-02-11,27.65,27.920,27.5000,27.86,32247549,MSFT
2,2013-02-12,27.88,28.000,27.7500,27.88,35990829,MSFT
3,2013-02-13,27.93,28.110,27.8800,28.03,41715530,MSFT
4,2013-02-14,27.92,28.060,27.8700,28.04,32663174,MSFT
...,...,...,...,...,...,...,...
1254,2018-02-01,94.79,96.070,93.5813,94.26,47227882,MSFT
1255,2018-02-02,93.64,93.970,91.5000,91.78,47867753,MSFT
1256,2018-02-05,90.56,93.240,88.0000,88.00,51031465,MSFT
1257,2018-02-06,86.89,91.475,85.2500,91.33,67998564,MSFT


### Generate a line chart showing the observed values (closing prices).

In [4]:
def ilinechart(df, x, y, groups=None, title=''):
    fig = px.line(df, x=x, y=y, color=groups, title=title, 
                  template='none').update(layout=dict(title=dict(x=0.5)))
    
    fig.show()

ilinechart(df, 'date', 'close')

### Decompose the time series and check it for stationarity. If the data is not stationary, difference the observations and store the results in a new Diff column.

In [5]:
data = df[['date','close']]
data['diff'] = data.close - data.close.shift(1)
data['diff'] = data['diff'].fillna(method='bfill')
data = data[['date','diff']]
data



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,date,diff
0,2013-02-08,0.31
1,2013-02-11,0.31
2,2013-02-12,0.02
3,2013-02-13,0.15
4,2013-02-14,0.01
...,...,...
1254,2018-02-01,-0.75
1255,2018-02-02,-2.48
1256,2018-02-05,-3.78
1257,2018-02-06,3.33


In [6]:
series = data.set_index('date')
series = series[['diff']]
decomposition = sm.tsa.seasonal_decompose(series, model='additive', freq=252)

trend = decomposition.trend.reset_index()
seasonality = decomposition.seasonal.reset_index()
residuals = decomposition.resid.reset_index()

merged = data.merge(trend, on='date') # center moving average
merged = merged.merge(seasonality, on='date')
merged = merged.merge(residuals, on='date')

merged.columns = ['Date', 'Observed', 'Trend', 'Seasonality', 'Residuals']
merged['T+S'] = merged['Trend'] + merged['Seasonality']

merged.head()

Unnamed: 0,Date,Observed,Trend,Seasonality,Residuals,T+S
0,2013-02-08,0.31,,0.369835,,
1,2013-02-11,0.31,,-0.01894,,
2,2013-02-12,0.02,,0.594746,,
3,2013-02-13,0.15,,0.300381,,
4,2013-02-14,0.01,,0.212633,,


In [7]:
melted = pd.melt(merged, id_vars='Date', var_name='Variable', value_name='Value',
                         value_vars=['Observed', 'Trend', 'Seasonality', 'Residuals', 'T+S'])

ilinechart(melted, 'Date', 'Value', groups='Variable', title='Observed vs Components')

### Forecast the time series 60 days into the future using double and triple exponential smoothing models.

In [8]:
data['Observed'] = df['close']
model = sm.tsa.ExponentialSmoothing(data['Observed'], trend='additive').fit()
double_exp = model.forecast(60)

In [9]:
model = sm.tsa.ExponentialSmoothing(data['Observed'], trend='additive', 
                                    seasonal='additive', 
                                    seasonal_periods=4).fit()

triple_exp = model.forecast(60)

### Forecast the time series 60 days into the future using ARMA, ARIMA, and SARIMA models.

In [10]:
def forecast(data, field, model, periods):
    model_results = model.forecast(periods)[0]
    current_value = data[field].iloc[-1]
    forecasts = []
    
    for result in model_results:
        forecast = current_value + result
        forecasts.append(forecast)
        current_value = forecast
    
    forecasts = pd.Series(forecasts)
    forecasts.index = [x + periods 
                       for x in list(data[field].tail(periods).index)]
    
    return forecasts

In [11]:
data['Shift'] = data['Observed'].shift(1).fillna(method='bfill')
data['Diff'] = data['Observed'] - data['Shift']
model = ARMA(data['Diff'], order=(2, 1)).fit()
arma_forecasts = forecast(data, 'Observed', model, 60)
model = ARIMA(data['Diff'], order=(1, 1, 1)).fit()
arima_forecasts = forecast(data, 'Observed', model, 60)
model = SARIMAX(data['Observed'], order=(2, 1, 1), 
                seasonal_order=(1, 1, 1, 1)).fit()
sarima_forecasts = model.forecast(60)

### Forecast the time series 60 days into the future using the Facebook Prophet model.

In [12]:
series = df[['date', 'close']]
series.columns = ['ds', 'y']
model = Prophet()
model.fit(series)
future = model.make_future_dataframe(60)
results = model.predict(future)
prophet_forecasts = results.iloc[-60:]['yhat']

INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


### Combine the observed values and all the forecasts into a single data frame and generate a line chart to visually compare the different models.

In [13]:
future_dates = pd.DataFrame({'date': pd.date_range(start=df.date.iloc[-1], periods=61, 
                                                   freq='D', closed='right')})

fcast_df = pd.concat([pd.DataFrame(df['date']), future_dates], ignore_index=True)
fcast_df = fcast_df.merge(df[['date', 'close']], on='date', how='left')
fcast_df

Unnamed: 0,date,close
0,2013-02-08,27.55
1,2013-02-11,27.86
2,2013-02-12,27.88
3,2013-02-13,28.03
4,2013-02-14,28.04
...,...,...
1314,2018-04-04 00:00:00,
1315,2018-04-05 00:00:00,
1316,2018-04-06 00:00:00,
1317,2018-04-07 00:00:00,


In [14]:
fcast_df['Double_Exp_Smooth'] = double_exp
fcast_df['Triple_Exp_Smooth'] = triple_exp
fcast_df['ARMA'] = arma_forecasts
fcast_df['ARIMA'] = arima_forecasts
fcast_df['SARIMA'] = sarima_forecasts
fcast_df['Prophet'] = prophet_forecasts

In [15]:
melted = pd.melt(fcast_df, id_vars='date', var_name='Variable', value_name='Value', 
                 value_vars=list(fcast_df.columns)[1:])

ilinechart(melted, 'date', 'Value', groups='Variable', 
                title='Forecast Comparison')