# Feature Engineering with Time Series

In [None]:
import seaborn as sns
import pandas as pd

In [None]:
df = sns.load_dataset('flights')
df.head()

#### Technique 1: Put a timestamp in the index column

In [None]:
ts = pd.to_datetime(df['month'].astype(str) + ' ' + df['year'].astype(str))
df.set_index(ts, inplace=True)
df.head()

In [None]:
df['passengers'].plot()

#### Technique 2: Calculate Differences

In [None]:
df['diff'] = df['passengers'].diff() # removes the trend
df['diff'].plot()

In [None]:
df['pct'] = df['passengers'].pct_change() # calculate percent change, removes change in variation
df['pct'].plot()

#### Technique 3: Extract Seasonality

In [None]:
season = df.groupby('month')['pct'].mean()
season

In [None]:
df['no-season'] = df['pct'] - df['month'].replace(season)
df['no-season'].plot()  # no trend, no variation change, no seasonality

In [None]:
df['no-season'].hist()  # we end up with something close to random noise

#### Technique 4: interpolating gaps

In [None]:
df['passenger_gap'] = df['passengers']
df.loc['1953-01':'1953-12', 'passenger_gap'] = None
df['passenger_gap'].plot()

many advanced time series models (exponential smoothing, ARIMA) assume:

$y_{t+1} = f(y_t)$

In [None]:
df['passenger_gap'].fillna(method='bfill').plot() # also 'ffill'

In [None]:
df['passenger_gap'].interpolate(method='linear').plot()

In [None]:
before = df.loc[df['year']==1952, 'passengers']
after = df.loc[df['year']==1954, 'passengers']

patch = (before.values + after.values) / 2
patch

In [None]:
df.loc[df['year']==1953, 'passenger_gap'] = patch

In [None]:
df['passenger_gap'].plot()