### Case Studies - Seasonal Decomposition

In [None]:
import pandas as pd
import numpy as  np

import seaborn as sns
import matplotlib.pyplot as plt

sns.set(style="ticks")

pd.options.display.max_rows = 12

In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose

#### https://www.ons.gov.uk/peoplepopulationandcommunity/leisureandtourism/timeseries/gmaa/ott


In [None]:
df = pd.read_csv("http://people.bu.edu/kalathur/datasets/GMAA-071119.csv",
                header=None, skiprows=6, parse_dates=[0], names=['period','value'])
df

In [None]:
df.set_index('period', inplace=True)
df

In [None]:
df.value.describe()

In [None]:
max_date = df.index.max()
min_date = df.index.min()

num_of_actual_points = df.index.shape[0]
num_of_expected_points = (max_date.year - min_date.year) * 12 + max_date.month - min_date.month + 1

print("Date range: {} - {}".format(min_date.strftime("%d.%m.%Y"), max_date.strftime("%d.%m.%Y")))
print("Number of data points: {} of expected {}".format(num_of_actual_points, num_of_expected_points))

In [None]:
fig, ax = plt.subplots(figsize=(18,6))

df.plot(ax=ax)

plt.legend(loc='upper left');

In [None]:
zoom_range = df['2017':'2018']
zoom_range

In [None]:
fig, ax = plt.subplots(figsize=(18,6))
zoom_range.plot(ax=ax)
plt.legend(loc='upper left');

- The data has periodic fluctuations each year and overall tendency to grow up.
- Thus, the time series has the trend and yearly seasonality components, and we can try to decompose them using statsmodels package.
- From the data view, the additive model better fits for data representation.


In [None]:
from statsmodels.tsa import stattools as stt
from statsmodels.tsa.seasonal import seasonal_decompose

In [None]:
decompfreq = 12  # 12 months seasonality
model = 'additive'

decomposition = seasonal_decompose(df.value.interpolate("linear"), 
                                   freq=decompfreq, model=model)

In [None]:
df_decomposed = pd.DataFrame({'trend': decomposition.trend,
                              'seasonal' : decomposition.seasonal,
                              'residual' : decomposition.resid})

df_decomposed

In [None]:
df_decomposed.dropna()

In [None]:
fig, ax = plt.subplots(figsize=(18,6))

df.plot(ax=ax, label="observed", c='lightgrey')

df_decomposed.plot(ax=ax)

plt.legend(loc='upper left');

#### https://fred.stlouisfed.org/series/TOTALSA
 - in millions of units

In [None]:
vehicle_sales = pd.read_csv('http://people.bu.edu/kalathur/datasets/total_vehiclesales.csv',
                           parse_dates=['DATE'],
                           index_col='DATE') 
  
vehicle_sales

In [None]:
fig, ax = plt.subplots(figsize=(18,6))
plt.ylabel('Sales')

vehicle_sales.plot(ax=ax);

plt.title('Monthly Vehicle Sales');

In [None]:
vehicle_sales_decomp = seasonal_decompose(vehicle_sales, freq=12)

In [None]:
trend    = vehicle_sales_decomp.trend
seasonal = vehicle_sales_decomp.seasonal 
residual = vehicle_sales_decomp.resid

In [None]:
seasonal

In [None]:
df_decomposed = pd.DataFrame({'trend': vehicle_sales_decomp.trend.TOTALSA,
                              'seasonal' : vehicle_sales_decomp.seasonal.TOTALSA,
                              'residual' : vehicle_sales_decomp.resid.TOTALSA})
df_decomposed

In [None]:
df_decomposed.dropna()

In [None]:
fig, ax = plt.subplots(figsize=(18,12))
vehicle_sales.plot(ax=ax, label="observed", c='lightgrey')

df_decomposed.plot(ax=ax, label="trend")
plt.legend(loc='upper left');

In [None]:
fig, ax = plt.subplots(figsize=(18,12))

ax.plot(vehicle_sales - trend, 
         color='Green', label='Detrended data')

ax.plot(seasonal, 
         color='Coral', label='Seasonal component')

plt.legend(loc='upper left');