<h1 style="text-align:center">   
      <font color = red >
          Visualization and Time Series Analysis + Forecasting with <b>SARIMAX</b> & <b>FB Prophet</b>
        </font>    
</h1>
<hr style="width:100%;height:5px;border-width:0;color:gray;background-color:gray">
<center><img src="https://media.giphy.com/media/xT5LMWNOjGqJzUfyve/giphy.gif"></center>

<p style="background-color:#C9DFEC;font-family:newtimeroman;color:#033E3E;font-size:200%;text-align:center;border-radius:40px 40px;">IMPORTING LIBRARIES</p>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import itertools
import warnings
#Plotly Libraris
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from plotly.colors import n_colors
from plotly.subplots import make_subplots
from plotly.offline import init_notebook_mode, iplot
from plotly import tools
from IPython.display import display, HTML


import statsmodels.api as sm
from pylab import rcParams
import scipy.stats as ss
plt.style.use('fivethirtyeight')

from fbprophet import Prophet


In [None]:
import warnings
warnings.filterwarnings('ignore')

<p style="background-color:#C9DFEC;font-family:newtimeroman;color:#033E3E;font-size:200%;text-align:center;border-radius:40px 40px;">IMPORTING DATASET</p>

In [None]:
# Load data that we will use.
df = pd.read_csv("../input/tabular-playground-series-jan-2022/train.csv")
test = pd.read_csv("../input/tabular-playground-series-jan-2022/test.csv")
sub = pd.read_csv("../input/tabular-playground-series-jan-2022/sample_submission.csv")


<p style="background-color:#C9DFEC;font-family:newtimeroman;color:#033E3E;font-size:200%;text-align:center;border-radius:40px 40px;">UNDERSTANDING THE DATASET</p>

In [None]:
# information about df
df.info()

In [None]:
df

In [None]:
df.head()

In [None]:
df.tail()

<p style="background-color:#C9DFEC;font-family:newtimeroman;color:#033E3E;font-size:200%;text-align:center;border-radius:40px 40px;">FEATURE ENGINEERING</p>

In [None]:
# Parsing dates:

df['date'] = pd.to_datetime(df['date'])
test['date'] = pd.to_datetime(test['date'])

In [None]:
# Extracting year and month data for future uses:
def to_date_column(df):
    df["year"] = df["date"].dt.year
    df["month"] = df["date"].dt.month
    df["day"] = df["date"].dt.day
    df["is_weekday"] = df["date"].dt.dayofweek
    df["is_weekend"] = df.apply(lambda x: x["date"] in [5, 6], axis=1)
to_date_column(df)
to_date_column(test)

<p style="background-color:#C9DFEC;font-family:newtimeroman;color:#033E3E;font-size:200%;text-align:center;border-radius:40px 40px;">VISUALIZING DATA</p>

**It's time to get deeper into our data and get some insights!**

In [None]:
df.info()

In [None]:
df.shape

In [None]:
df.describe()

In [None]:
fig = px.line(df, x="date", y="num_sold", color = "country", title='Number of Products sold between 2015-2018')
fig.show()

In [None]:
# Inspecting time series and rolling mean:
crossing = df[['date', 'num_sold']].groupby('date').sum()
tseries = df.groupby(['date'])['num_sold'].agg(
    ['sum']).reset_index().rename(columns={'sum': 'num_sold'})

tseries = tseries.set_index('date')

fig = plt.subplots(figsize=(30, 15))

g = sns.lineplot(x=tseries.index, y='num_sold',
                 data=tseries, label="Actual Time Series")

rmean = crossing.rolling(12, win_type='triang').mean()

g = sns.lineplot(x=rmean.index, y='num_sold',
                 data=rmean, label="Rolling Mean 12 Months")

plt.legend(fontsize='xx-large')
plt.show()

In [None]:
# Decomposing the data:

decompose = sm.tsa.seasonal_decompose(
    tseries, model='additive', extrapolate_trend='freq')

# Plot.
fig, axes = plt.subplots(nrows=4, figsize=(16, 8))

decompose.observed.plot(ax=axes[0], legend=False)
axes[0].set_ylabel('Observed')

decompose.trend.plot(ax=axes[1], legend=False)
axes[1].set_ylabel('Trend')

decompose.seasonal.plot(ax=axes[2], legend=False)
axes[2].set_ylabel('Seasonal')

decompose.resid.plot(ax=axes[3], legend=False)
axes[3].set_ylabel('Residual')


plt.tight_layout()
plt.show()

In [None]:
# Entries by month:

month_data = df.groupby(['month', 'store'])[
    'num_sold'].agg(['sum']).reset_index().rename(columns={'sum': 'num_sold'})

fig = plt.subplots(figsize=(30, 15))
sns.barplot(x='month', y='num_sold', hue='store',
            data=month_data, palette='plasma')

plt.legend(fontsize='xx-large')

plt.show()

In [None]:
# Entries by month:

month_data = df.groupby(['month', 'is_weekday'])[
    'num_sold'].agg(['sum']).reset_index().rename(columns={'sum': 'num_sold'})

fig = plt.subplots(figsize=(30, 15))
sns.barplot(x='month', y='num_sold', hue='is_weekday',
            data=month_data, palette='plasma')

plt.legend(fontsize='xx-large')

plt.show()

In [None]:
# Entries by month:

month_data = df.groupby(['month', 'country'])[
    'num_sold'].agg(['sum']).reset_index().rename(columns={'sum': 'num_sold'})

fig = plt.subplots(figsize=(30, 15))
sns.barplot(x='month', y='num_sold', hue='country',
            data=month_data, palette='plasma')

plt.legend(fontsize='xx-large')

plt.show()

In [None]:
# data
df_y_m_st = df.groupby(['year','month','store']).agg({"num_sold" : "mean"}).reset_index()

# chart
fig = px.scatter(df_y_m_st, x='month', y='store', color='num_sold', size='num_sold', 
                 facet_row='year', title='Average Sales: Store Type Vs Year(Month)')
# styling
fig.update_yaxes(ticksuffix='  ')
fig.update_xaxes(tickmode = 'array', tickvals=[i for i in range(1,13)], 
                 ticktext=['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'])
fig.update_layout(height=900, xaxis_title='', yaxis_title='',
                  margin=dict(t=70, b=0),
                  plot_bgcolor='#fafafa', paper_bgcolor='#fafafa',
                  title_font=dict(size=29, color='#444', family="Lato, sans-serif"),
                  font=dict(color='#555'), 
                  hoverlabel=dict(bgcolor="#f2f2f2", font_size=13, font_family="Lato, sans-serif"))
fig.show()

In [None]:
# data
df_y_m_st = df.groupby(['year','month','country']).agg({"num_sold" : "mean"}).reset_index()

# chart
fig = px.scatter(df_y_m_st, x='month', y='country', color='num_sold', size='num_sold', 
                 facet_row='year', title='Average Sales: Store Type Vs Year(Month)')
# styling
fig.update_yaxes(ticksuffix='  ')
fig.update_xaxes(tickmode = 'array', tickvals=[i for i in range(1,13)], 
                 ticktext=['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'])
fig.update_layout(height=900, xaxis_title='', yaxis_title='',
                  margin=dict(t=70, b=0),
                  plot_bgcolor='#fafafa', paper_bgcolor='#fafafa',
                  title_font=dict(size=29, color='#444', family="Lato, sans-serif"),
                  font=dict(color='#555'), 
                  hoverlabel=dict(bgcolor="#f2f2f2", font_size=13, font_family="Lato, sans-serif"))
fig.show()

In [None]:
# data
df_y_m_st = df.groupby(['year','month','product']).agg({"num_sold" : "mean"}).reset_index()

# chart
fig = px.scatter(df_y_m_st, x='month', y='product', color='num_sold', size='num_sold', 
                 facet_row='year', title='Average Sales: Store Type Vs Year(Month)')
# styling
fig.update_yaxes(ticksuffix='  ')
fig.update_xaxes(tickmode = 'array', tickvals=[i for i in range(1,13)], 
                 ticktext=['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'])
fig.update_layout(height=900, xaxis_title='', yaxis_title='',
                  margin=dict(t=70, b=0),
                  plot_bgcolor='#fafafa', paper_bgcolor='#fafafa',
                  title_font=dict(size=29, color='#444', family="Lato, sans-serif"),
                  font=dict(color='#555'), 
                  hoverlabel=dict(bgcolor="#f2f2f2", font_size=13, font_family="Lato, sans-serif"))
fig.show()

In [None]:
def f(row):
    if row['country'] == 'Sweden':
        val = 'SWE'
    elif ['country'] == 'Norway':
        val = 'NOR'
    else:
        val = 'FIN'
    return val
df['ISO'] = df.apply(f, axis=1)

In [None]:
# Entries by year:

year_data = df.groupby(['year', 'store'])[
    'num_sold'].agg(['sum']).reset_index().rename(columns={'sum': 'num_sold'})

fig = plt.subplots(figsize=(30, 15))

sns.barplot(x='year', y='num_sold', hue='store',
            data=year_data, palette='plasma')

plt.legend(fontsize='xx-large')

plt.show()

In [None]:
# Entries by year:

year_data = df.groupby(['year', 'month'])[
    'num_sold'].agg(['sum']).reset_index().rename(columns={'sum': 'num_sold'})

fig = plt.subplots(figsize=(30, 15))

sns.barplot(x='year', y='num_sold', hue='month',
            data=year_data, palette='plasma')

plt.legend(fontsize='xx-large')

plt.show()

In [None]:
# Entries by year:

year_data = df.groupby(['year', 'country'])[
    'num_sold'].agg(['sum']).reset_index().rename(columns={'sum': 'num_sold'})

fig = plt.subplots(figsize=(30, 15))

sns.barplot(x='year', y='num_sold', hue='country',
            data=year_data, palette='plasma')

plt.legend(fontsize='xx-large')

plt.show()

# Forecasting With SARIMAX

**We going to apply one of the most common forecasting methods: ARIMA(Autoregressive Integrated Moving Average)**

In [None]:
# Choosing train data:

train = tseries.loc['2015':'2018']

In [None]:
# Examples for seasonality, trend, and noise combinations.

p = d = q = range(0, 2)
pdq = list(itertools.product(p, d, q))

seasonal_pdq = [(x[0], x[1], x[2], 12)
                for x in list(itertools.product(p, d, q))]

print('Examples of parameter combinations for Seasonal ARIMA...')
print('SARIMAX: {} x {}'.format(pdq[1], seasonal_pdq[1]))
print('SARIMAX: {} x {}'.format(pdq[1], seasonal_pdq[2]))
print('SARIMAX: {} x {}'.format(pdq[2], seasonal_pdq[3]))
print('SARIMAX: {} x {}'.format(pdq[2], seasonal_pdq[4]))

In [None]:

mod = sm.tsa.statespace.SARIMAX(train,
                                order=(1, 1, 1),
                                seasonal_order=(1, 1, 1, 12),
                                enforce_stationarity=False,
                                enforce_invertibility=False)

results = mod.fit()

print(results.summary().tables[1])

In [None]:
# Investigating the results:

results.plot_diagnostics(figsize=(16, 8))
plt.show()

In [None]:
# Validating forecasts:

pred = results.get_prediction(
    start=pd.to_datetime('2018-01-01'), dynamic=False)

pred_ci = pred.conf_int()

ax = tseries['2015':].plot(label='observed')

pred.predicted_mean.plot(
    ax=ax, label='One-step ahead Forecast', alpha=.6, figsize=(14, 7))

ax.fill_between(pred_ci.index,
                pred_ci.iloc[:, 0],
                pred_ci.iloc[:, 1], color='k', alpha=.1)

ax.set_xlabel('Date')
ax.set_ylabel('Total Entries')

plt.legend()
plt.show()

In [None]:
# Forecasting.

pred_uc = results.get_forecast(steps=50)
pred_ci = pred_uc.conf_int()

ax = tseries.plot(label='Total Entries', figsize=(14, 7))

pred_uc.predicted_mean.plot(ax=ax, label='Forecast')

ax.fill_between(pred_ci.index,
                pred_ci.iloc[:, 0],
                pred_ci.iloc[:, 1], color='k', alpha=.15)

ax.set_xlabel('Date')
ax.set_ylabel('Total Entries')

plt.legend()
plt.show()

# Simple Forecasting With Prophet

**Developed by Core Data Science team at Facebook, Prophet is a procedure for forecasting time series data based on an additive model where non-linear trends are fit with yearly, weekly, and daily seasonality effects.**

**We're going to forecast with another model to see how it performs against the other one we used.**

**Prophet imposes the strict condition that the input columns be named ds (date column in our case) and y (entries in our case), so we should rename the columns in our DataFrame:**

In [None]:
# Choosing train data:

train = tseries.loc['2015':'2018']

In [None]:
# Getting our train data ready for Prophet modelling. It needs exact namings...

train=train.reset_index(level=0)

train=train.rename(columns={'date': 'ds', 'num_sold': 'y'})

In [None]:

test = test.rename(columns={'date': 'ds'})

In [None]:
# Fitting the model with 0.90 confidence interval, 0.80 is default:

model = Prophet(interval_width = 0.90, seasonality_mode = 'additive')

model.fit(train)

In [None]:
# Creating a future dataframe with model for next 5 years.

future = model.make_future_dataframe(periods=60, freq='MS')

future.tail()

In [None]:
# Predicting.

forecast = model.predict(future)

forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].head()

In [None]:
# Plotting the trend with changepoints in the series using fbprophet:

from fbprophet.plot import add_changepoints_to_plot

fig = model.plot(forecast)

add_changepoints_to_plot(fig.gca(), model, forecast)

plt.show()

In [None]:
# Plotting the components of the series:

model.plot_components(forecast)
plt.show()

In [None]:
fig = px.choropleth(df, locations="ISO",
                    color="num_sold", 
                    hover_name="country",
                    color_continuous_scale=px.colors.sequential.Plasma)
fig.show()