<a href="https://colab.research.google.com/github/liumOazed/Forecasting/blob/main/tensorflow_structural_time_series_template.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import libraries and data


In [2]:
%cd /content/drive/MyDrive/Forecasting

/content/drive/MyDrive/Forecasting


In [2]:
#import libraries
import pandas as pd
import numpy as np

In [3]:
#get the data
data = pd.read_csv("Daily Bike Sharing.csv", 
                   index_col = "dteday", 
                   parse_dates = True)
data.head(1)

FileNotFoundError: ignored

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
#select variables
dataset = data.loc[:, ["cnt", "holiday", "workingday", "weathersit",
                       "temp", "atemp", "hum", "windspeed"]]
dataset.head(1)                       

#Data Transformation

In [None]:
#renaming variable
dataset = dataset.rename(columns = {'cnt' : 'y'})
dataset.head(1)

In [None]:
#index
dataset = dataset.asfreq("D")
dataset.index

#Visualization

In [None]:
#viz
dataset["y"].plot(figsize = (10, 7), legend = True)

#Training and Test Set

In [None]:
#Training and test set
test_days = 31
training_set = dataset.iloc[:-test_days, :]
test_set = dataset.iloc[-test_days:, :]
test_set.tail(1)

## Focus on auto regressors (Exogenous Variables)

Steps:
1. Convert the exogenous features to matrix
2. Make sure to make them float64/decimal 
3. Initiate the Linear Regression from Tensorflow Probability
4. Find the Seasonality
5. Find Auto regressive and trend components
6. Create the model
7. Fit the model
8. Forecast using the trained model
9. FInally Predict

In [None]:
#get library
import tensorflow_probability as tfp

In [None]:
# Isolate the regressors
exog = np.asmatrix(dataset.iloc[:,1:].astype(np.float64))
exog[1]

In [None]:
# Linear regrsssion
regressors = tfp.sts.LinearRegression(design_matrix=exog, name="regressors")

In [None]:
# Seasonality

# Isolate dependent variable
y = training_set['y'].astype(np.float64)
y[:4]

In [None]:
# Find the Weekday seasonality
weekday_effect = tfp.sts.Seasonal(num_seasons=7,
                                  num_steps_per_season = 1,
                                  observed_time_series =y,
                                  name ='weekday_effect')

In [None]:
# No of days per month
num_days_per_month = np.array(
    [[31,28,31,30,31,30,31,31,30,31,30,31], #2011
     [31,29,31,30,31,30,31,31,30,31,30,31]]) #2012

In [None]:
# Monthly seasnality
monthly_effect = tfp.sts.Seasonal(num_seasons = 12,
                                  num_steps_per_season = num_days_per_month,
                                  observed_time_series=y,
                                  name="monthly_effect")

In [None]:
# Trend
trend = tfp.sts.SemiLocalLinearTrend(observed_time_series=y,
                                     name='trend')

In [None]:
# Auto Regressive
autoregressive= tfp.sts.Autoregressive(order=1,
                                       observed_time_series=y,
                                       name='autoregressive')  # Order is something u need to look after start with 1 but can be changebale like epochs

# Tensorflow Structural TimeSeries Forecasting

In [None]:
# forecasting model
model = tfp.sts.Sum([regressors,
                     weekday_effect,
                     monthly_effect,
                     autoregressive,
                     trend],
                    observed_time_series=y)

Hamiltonian Monte Carlo (Algorithm)


*   Simulation used for bayesian influence
> Causal Inference Problem Statement: We know what happened but we do not know led to it.
So baye's theory can fix this (can gtell about this causal relationship):

The Problem:
It is not possible to solve the equation and thus we simulate the outcome.


In [None]:
# Fit the model with HMC
samples, kernel_results = tfp.sts.fit_with_hmc(model = model, observed_time_series=y, 
                                               num_results=100, num_warmup_steps = 50,
                                               num_leapfrog_steps= 15, num_variational_steps=150,
                                               seed =1502)

In [None]:
#Forecast
forecast = tfp.sts.forecast(model = model,
                            observed_time_series=y,
                            parameter_samples=  samples,
                            num_steps_forecast=len(test_set))

In [None]:
# Predictions
predictions_tfp = pd.Series(forecast.mean()[:,0], name='TFP')
predictions_tfp.index = test_set.index
predictions_tfp[:2]

In [None]:
#visualization
training_set['y']['2012-10-01':].plot(figsize = (9,6), legend = True)
test_set['y'].plot(legend = True)
predictions_tfp.plot(legend = True)

#Model assessment

In [None]:
#MAE and RMSE
from sklearn.metrics import mean_squared_error, mean_absolute_error
print(round(mean_absolute_error(test_set['y'], predictions_tfp),0))
print(round(np.sqrt(mean_squared_error(test_set['y'], predictions_tfp)), 0))

In [None]:
#MAPE function
def MAPE(y_true, y_pred):
  y_true, y_pred = np.array(y_true), np.array(y_pred)
  return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
MAPE(test_set['y'], predictions_tfp)

#export forecasts

In [None]:
%cd /content/drive/MyDrive/Forecasting Models and Time Series for Business in Python/0. Ensemble

In [None]:
predictions_tfp.to_csv('predictions_tfp.csv', index = True)