In [1]:
import pandas as pd
import altair as alt

### Read and Clean the data

In [2]:
df = pd.read_csv('AirPassengers.csv', parse_dates=['Month']).rename({'#Passengers': 'Passengers'}, axis=1)
df.head()

Unnamed: 0,Month,Passengers
0,1949-01-01,112
1,1949-02-01,118
2,1949-03-01,132
3,1949-04-01,129
4,1949-05-01,121


### Plot the data to learn more about the pattern/seasonality

In [3]:
alt.Chart(df).mark_line().encode(
    x='Month:T',
    y='Passengers:Q'
)

### Use a centered moving average with a 1 year/12 month window

In [4]:
df['trend'] = df.rolling(window=12,center=True).mean()

In [5]:
df.head(10)

Unnamed: 0,Month,Passengers,trend
0,1949-01-01,112,
1,1949-02-01,118,
2,1949-03-01,132,
3,1949-04-01,129,
4,1949-05-01,121,
5,1949-06-01,135,
6,1949-07-01,148,126.666667
7,1949-08-01,148,126.916667
8,1949-09-01,136,127.583333
9,1949-10-01,119,128.333333


In [6]:
df.tail(10)

Unnamed: 0,Month,Passengers,trend
134,1960-03-01,419,463.333333
135,1960-04-01,461,467.083333
136,1960-05-01,472,471.583333
137,1960-06-01,535,473.916667
138,1960-07-01,622,476.166667
139,1960-08-01,606,
140,1960-09-01,508,
141,1960-10-01,461,
142,1960-11-01,390,
143,1960-12-01,432,


In [7]:
alt.Chart(df).transform_fold(
    ['Passengers','trend']
).mark_line().encode(
    x='Month:T',
    y='value:Q',
    color='key:N'
).properties(title='Trend')

### Remove the trend

In [8]:
df['detrend'] = df['Passengers'] / df['trend']
alt.Chart(df).mark_line().encode(
    x='Month:T',
    y='detrend:Q'
).properties(title='Detrended')

### From the Month column, extract the month number for grouping

In [9]:
df['Month_int'] = df['Month'].dt.month
df.head(10)

Unnamed: 0,Month,Passengers,trend,detrend,Month_int
0,1949-01-01,112,,,1
1,1949-02-01,118,,,2
2,1949-03-01,132,,,3
3,1949-04-01,129,,,4
4,1949-05-01,121,,,5
5,1949-06-01,135,,,6
6,1949-07-01,148,126.666667,1.168421,7
7,1949-08-01,148,126.916667,1.16612,8
8,1949-09-01,136,127.583333,1.06597,9
9,1949-10-01,119,128.333333,0.927273,10


### Average the detrend/seasonal component

In [10]:
avg_seasonality = df[['Month_int','detrend']].groupby('Month_int',as_index=False).mean()\
.rename({'detrend': 'avg_seasonality'}, axis=1)
avg_seasonality

Unnamed: 0,Month_int,avg_seasonality
0,1,0.914743
1,2,0.887863
2,3,1.010943
3,4,0.978781
4,5,0.983574
5,6,1.115711
6,7,1.23636
7,8,1.222717
8,9,1.06344
9,10,0.92451


### Join the data back to the original dataframe

In [11]:
df = df.merge(avg_seasonality, on='Month_int').sort_values(by='Month')
df.head()

Unnamed: 0,Month,Passengers,trend,detrend,Month_int,avg_seasonality
0,1949-01-01,112,,,1,0.914743
12,1949-02-01,118,,,2,0.887863
24,1949-03-01,132,,,3,1.010943
36,1949-04-01,129,,,4,0.978781
48,1949-05-01,121,,,5,0.983574


### Given the method above, we won't be able to chart the seasonality to see the pattern repeated as it only has one period. So concatenate these together for plotting.

In [12]:
extend_seasonality = pd.concat([pd.DataFrame(avg_seasonality['avg_seasonality']) 
                                for i in range(11)], ignore_index=True)
extend_seasonality = extend_seasonality.reset_index().rename({'index': 'X'}, axis=1)
extend_seasonality
alt.Chart(extend_seasonality).mark_line().encode(
    x='X:Q',
    y='avg_seasonality:Q'
).properties(title='Average Seasonality')

### Extract the random noise

In [13]:
df['random_noise'] = df['Passengers'] / (df['trend'] * df['avg_seasonality'])
alt.Chart(df).mark_line().encode(
    x='Month:T',
    y='random_noise:Q'
).properties(title='Random Noise')

### Reconstruct the original time series

In [14]:
df['reconstructed'] = df['trend'] * df['avg_seasonality'] * df['random_noise']
df.head(10)

Unnamed: 0,Month,Passengers,trend,detrend,Month_int,avg_seasonality,random_noise,reconstructed
0,1949-01-01,112,,,1,0.914743,,
12,1949-02-01,118,,,2,0.887863,,
24,1949-03-01,132,,,3,1.010943,,
36,1949-04-01,129,,,4,0.978781,,
48,1949-05-01,121,,,5,0.983574,,
60,1949-06-01,135,,,6,1.115711,,
72,1949-07-01,148,126.666667,1.168421,7,1.23636,0.945049,148.0
84,1949-08-01,148,126.916667,1.16612,8,1.222717,0.953712,148.0
96,1949-09-01,136,127.583333,1.06597,9,1.06344,1.002379,136.0
108,1949-10-01,119,128.333333,0.927273,10,0.92451,1.002988,119.0


In [15]:
original = alt.Chart(df).mark_line().encode(
    x='Month:T',
    y='Passengers:Q'
).properties(title='Original')
reconstructed = alt.Chart(df).mark_line(color='red').encode(
    x='Month:T',
    y='reconstructed:Q'
).properties(title='Reconstructed')
original | reconstructed