# Time Series Forecasting Assignment

In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.arima_model import ARMA
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from fbprophet import Prophet


pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.



In [2]:
def ilinechart(df, x, y, groups=None, title=''):
    fig = px.line(df, x=x, y=y, color=groups, title=title, template='none').update(layout=dict(title=dict(x=0.5)))
    fig.show()

### Import the Microsoft stock price data set (MSFT_data.csv) into a Pandas dataframe.

In [3]:
MSFT = 'https://tf-assets-prod.s3.amazonaws.com/tf-curric/data-science/Data%20Sets%20Time%20Series%20Analysis/Time%20Series%20-%20Day%204/MSFT_data.csv'
data = pd.read_csv(MSFT)
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1259 entries, 0 to 1258
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   date    1259 non-null   object 
 1   open    1259 non-null   float64
 2   high    1259 non-null   float64
 3   low     1259 non-null   float64
 4   close   1259 non-null   float64
 5   volume  1259 non-null   int64  
 6   Name    1259 non-null   object 
dtypes: float64(4), int64(1), object(2)
memory usage: 69.0+ KB


In [4]:
data['date'] = pd.to_datetime(data['date'])

### Generate a line chart showing the observed values (closing prices).

In [5]:
ilinechart(data, 'date', 'close', title='Closing Prices Over Time')

### Decompose the time series and check it for stationarity. If the data is not stationary, difference the observations and store the results in a new Diff column.

In [6]:
data = data[['date','close']]

In [7]:
train = data.iloc[:-60]
test = data.iloc[-60:]  

In [8]:
series = train.set_index('date')

In [9]:
decompose = sm.tsa.seasonal_decompose(series, model='additive', freq=252)
trend = decompose.trend.reset_index()
seasonality = decompose.seasonal.reset_index()
residuals = decompose.resid.reset_index()

In [10]:
merged = train.merge(trend, on='date')
merged = merged.merge(seasonality, on='date')
merged = merged.merge(residuals, on='date')

merged.columns = ['date', 'close', 'trend', 'seasonality', 'residuals']
merged['t+s'] = merged['trend'] + merged['seasonality']
merged.head()

Unnamed: 0,date,close,trend,seasonality,residuals,t+s
0,2013-02-08,27.55,,-1.252286,,
1,2013-02-11,27.86,,-1.272395,,
2,2013-02-12,27.88,,-0.678818,,
3,2013-02-13,28.03,,-0.379607,,
4,2013-02-14,28.04,,-0.168144,,


In [11]:
melted = pd.melt(merged, id_vars='date', var_name='Variable', value_name='Value',
                         value_vars=['close', 'trend', 'seasonality', 'residuals', 't+s'])

ilinechart(melted, 'date', 'Value', groups='Variable', title='Observed vs Components')

### Forecast the time series 60 days into the future using double and triple exponential smoothing models.

In [12]:
modelDES = sm.tsa.ExponentialSmoothing(train['close'], trend='additive').fit()
double_exp = modelDES.forecast(60)
modelTES = sm.tsa.ExponentialSmoothing(train['close'], trend='additive', seasonal='additive', seasonal_periods=4).fit()
triple_exp = modelTES.forecast(60)


Optimization failed to converge. Check mle_retvals.



### Forecast the time series 60 days into the future using ARMA, ARIMA, and SARIMA models.

In [13]:
def forecast(data, field, model, periods):
    model_results = model.forecast(periods)[0]
    current_value = data[field].iloc[-1]
    forecasts = []
    
    for result in model_results:
        forecast = current_value + result
        forecasts.append(forecast)
        current_value = forecast
    
    forecasts = pd.Series(forecasts)
    forecasts.index = [x + periods 
                       for x in list(data[field].tail(periods).index)]
    
    return forecasts

In [14]:
train['shift'] = train['close'].shift(1).fillna(method='bfill')
train['diff'] = train['close'] - train['shift']



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [15]:
modelARMA = ARMA(train['diff'], order=(2, 1)).fit()
arma_forecasts = forecast(train, 'close', modelARMA, 60)

In [16]:
modelARIMA = ARIMA(train['diff'], order=(1,1,1)).fit()
arima_forecasts = forecast(train, 'close', modelARIMA, 60)

In [17]:
modelSARIMA = SARIMAX(train['close'], order=(2, 1, 1), seasonal_order=(1, 1, 1, 1)).fit()
sarima_forecasts = modelSARIMA.forecast(60)


Maximum Likelihood optimization failed to converge. Check mle_retvals



### Forecast the time series 60 days into the future using the Facebook Prophet model.

In [18]:
series1 = train[['date', 'close']]
series1.columns = ['ds', 'y']
modelPROPHET = Prophet()
modelPROPHET.fit(series1)
future = modelPROPHET.make_future_dataframe(60)
results = modelPROPHET.predict(future)
prophet_forecasts = results.iloc[-60:]['yhat']

INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


### Combine the observed values and all the forecasts into a single data frame and generate a line chart to visually compare the different models.

In [19]:
future_dates = pd.DataFrame({'date': pd.date_range(start=train.date.iloc[-1], periods=61, 
                                                   freq='D', closed='right')})

fcast_df = pd.concat([pd.DataFrame(train['date']), future_dates], ignore_index=True)
fcast_df = fcast_df.merge(data[['date', 'close']], on='date', how='left')
#fcast_df = fcast_df.merge(test[['date', 'close']], on='date', how='left')
#fcast_df.tail(55)

In [20]:
fcast_df['Double_Exp_Smooth'] = double_exp
fcast_df['Triple_Exp_Smooth'] = triple_exp
fcast_df['ARMA'] = arma_forecasts
fcast_df['ARIMA'] = arima_forecasts
fcast_df['SARIMA'] = sarima_forecasts
fcast_df['Prophet'] = prophet_forecasts
#fcast_df.tail(55)
melted = pd.melt(fcast_df, id_vars='date', var_name='Variable', value_name='Value', 
                 value_vars=list(fcast_df.columns)[1:])

ilinechart(melted, 'date', 'Value', groups='Variable', 
                title='Forecast Comparison')

In [21]:
print(data.shape)
print(train.shape)
print(test.shape)
print(fcast_df.shape)

(1259, 2)
(1199, 4)
(60, 2)
(1259, 8)


In [22]:
data.head()

Unnamed: 0,date,close
0,2013-02-08,27.55
1,2013-02-11,27.86
2,2013-02-12,27.88
3,2013-02-13,28.03
4,2013-02-14,28.04


In [23]:
train.head()

Unnamed: 0,date,close,shift,diff
0,2013-02-08,27.55,27.55,0.0
1,2013-02-11,27.86,27.55,0.31
2,2013-02-12,27.88,27.86,0.02
3,2013-02-13,28.03,27.88,0.15
4,2013-02-14,28.04,28.03,0.01


In [24]:
test.head()

Unnamed: 0,date,close
1199,2017-11-10,83.87
1200,2017-11-13,83.93
1201,2017-11-14,84.05
1202,2017-11-15,82.98
1203,2017-11-16,83.2


In [25]:
fcast_df.tail()

Unnamed: 0,date,close,Double_Exp_Smooth,Triple_Exp_Smooth,ARMA,ARIMA,SARIMA,Prophet
1254,2018-01-04,87.11,86.734117,98.670799,83.580564,88.68924,85.608066,83.336427
1255,2018-01-05,88.19,86.781313,98.835291,83.612805,88.772916,85.676531,83.414986
1256,2018-01-06,,86.82851,99.118877,83.64556,88.856647,85.74556,83.357538
1257,2018-01-07,,86.875707,99.71548,83.678807,88.940433,85.815128,83.341435
1258,2018-01-08,88.28,86.922903,99.710448,83.712527,89.024274,85.885215,83.324458


In [26]:
models = ['Double_Exp_Smooth', 'Triple_Exp_Smooth', 'ARMA', 'ARIMA', 'SARIMA', 'Prophet']

for model in models:
    diff = fcast_df['close'] - fcast_df[model]
    mae = diff.abs().mean()
    rmse = np.sqrt(np.mean(diff**2))
    print(f'Model {model} - MAE: {mae} | RMSE: {rmse}')

Model Double_Exp_Smooth - MAE: 1.1184156304966268 | RMSE: 1.457760390994032
Model Triple_Exp_Smooth - MAE: 7.303889944060277 | RMSE: 8.007305951699085
Model ARMA - MAE: 1.7102098438135498 | RMSE: 2.0982119689037018
Model ARIMA - MAE: 1.9119431158371407 | RMSE: 2.185379363719293
Model SARIMA - MAE: 0.9553247400817577 | RMSE: 1.2146182491193316
Model Prophet - MAE: 1.8388103246571494 | RMSE: 2.1176592122552873
