# Time Series Forecasting Assignment

In [0]:
import pandas as pd
import plotly.express as px
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.arima_model import ARMA
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from fbprophet import Prophet

### Import the Microsoft stock price data set (MSFT_data.csv) into a Pandas dataframe.

In [0]:
df = pd.read_csv("https://tf-assets-prod.s3.amazonaws.com/tf-curric/data-science/Data%20Sets%20Time%20Series%20Analysis/Time%20Series%20-%20Day%204/MSFT_data.csv")

In [0]:
df.sample(5)

Unnamed: 0,date,open,high,low,close,volume,Name
689,2015-11-03,52.93,54.39,52.9,54.15,36596931,MSFT
657,2015-09-18,43.5,43.99,43.33,43.48,63143684,MSFT
1213,2017-12-01,83.6,84.81,83.22,84.26,29532132,MSFT
729,2015-12-31,56.04,56.19,55.42,55.48,27334061,MSFT
420,2014-10-09,46.5,46.8,45.74,45.85,34426595,MSFT


In [0]:
df["date"] = pd.to_datetime(df["date"])

### Generate a line chart showing the observed values (closing prices).

In [0]:
px.line(df, "date", "close")

### Decompose the time series and check it for stationarity. If the data is not stationary, difference the observations and store the results in a new Diff column.

In [0]:
df["diff"] = df["close"].diff()

### Forecast the time series 60 days into the future using double and triple exponential smoothing models.

In [0]:
model = sm.tsa.ExponentialSmoothing(df["close"], "add").fit()
double_exp = model.forecast(60)

model = sm.tsa.ExponentialSmoothing(df["close"], "add", seasonal="add", seasonal_periods=4).fit()
triple_exp = model.forecast(60)

### Forecast the time series 60 days into the future using ARMA, ARIMA, and SARIMA models.

In [0]:
def forecast(data, field, model, periods):
    model_results = model.forecast(periods)[0]
    current_values = data[field].iloc[-1]
    forecasts = [] 
    for result in model_results:
        forecast = current_values + result
        forecasts.append(forecast)
        current_values = forecast
    forecasts = pd.Series(forecasts)
    forecasts.index = [x + periods for x in list(data[field].tail(periods).index)]
    return forecasts

In [0]:
df = df.fillna(0)

In [0]:
df.head()

Unnamed: 0,date,open,high,low,close,volume,Name,diff
0,2013-02-08,27.35,27.71,27.31,27.55,33318306,MSFT,0.0
1,2013-02-11,27.65,27.92,27.5,27.86,32247549,MSFT,0.31
2,2013-02-12,27.88,28.0,27.75,27.88,35990829,MSFT,0.02
3,2013-02-13,27.93,28.11,27.88,28.03,41715530,MSFT,0.15
4,2013-02-14,27.92,28.06,27.87,28.04,32663174,MSFT,0.01


In [0]:
model = ARMA(df["diff"], order=(3, 1)).fit()
ar_forecast = forecast(df, "close", model, 60)

In [0]:
model = ARIMA(df["diff"], order=(3, 1, 1)).fit()
arima_forecast = forecast(df, "close", model, 60)

In [0]:
arima_forecast

In [0]:
model = SARIMAX(df["close"], order=(3, 1, 1), seasonal_order=(1, 1, 1, 1)).fit()
sarima_forecast = model.forecast(60)


Maximum Likelihood optimization failed to converge. Check mle_retvals



### Forecast the time series 60 days into the future using the Facebook Prophet model.

In [0]:
series = df[["date", "close"]]
series.columns = ["ds", "y"]

model = Prophet()
model.fit(series)
future = model.make_future_dataframe(60)
results = model.predict(future)
prophet_forecast = results.iloc[-60:]["yhat"]

INFO:numexpr.utils:NumExpr defaulting to 2 threads.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


### Combine the observed values and all the forecasts into a single data frame and generate a line chart to visually compare the different models.

In [0]:
future_dates = pd.DataFrame(
    {
        "date": pd.date_range(start=df.date.iloc[-1], periods=61, freq="D", closed="right")
    }
)

fcast_df = pd.concat(
    [pd.DataFrame(df["date"]), future_dates], ignore_index=True)

fcast_df = fcast_df.merge(
    df[["date", "close"]], on="date", how="left"
)

fcast_df["de"] = double_exp
fcast_df["te"] = triple_exp
fcast_df["ar"] = ar_forecast
fcast_df["arima"] = arima_forecast
fcast_df["sarima"] = sarima_forecast

In [0]:
melted_fcasts = pd.melt(fcast_df, id_vars="date", value_vars=["de", "te", "ar", "arima", "sarima", "close"])

In [0]:
px.line(melted_fcasts, x="date", y="value", color="variable")

In [0]:
train = df.iloc[:-60]
test = df.iloc[-60:]

In [0]:
len(test)

60

In [0]:
train.tail()

Unnamed: 0,date,open,high,low,close,volume,Name,diff
1194,2017-11-03,84.08,84.54,83.4,84.14,17633543,MSFT,0.09
1195,2017-11-06,84.2,84.7,84.0825,84.47,19860852,MSFT,0.33
1196,2017-11-07,84.77,84.9,83.93,84.27,17939727,MSFT,-0.2
1197,2017-11-08,84.14,84.61,83.83,84.56,18034170,MSFT,0.29
1198,2017-11-09,84.11,84.27,82.9,84.09,21171550,MSFT,-0.47


In [0]:
test.head()

Unnamed: 0,date,open,high,low,close,volume,Name,diff
1199,2017-11-10,83.79,84.095,83.23,83.87,19397793,MSFT,-0.22
1200,2017-11-13,83.66,83.94,83.46,83.93,14196896,MSFT,0.06
1201,2017-11-14,83.5,84.1,82.98,84.05,18801280,MSFT,0.12
1202,2017-11-15,83.47,83.69,82.69,82.98,19383102,MSFT,-1.07
1203,2017-11-16,83.1,83.42,82.94,83.2,20962750,MSFT,0.22
