**Basic Feature Engineering with Time Series Dataset using Python**

Date Time Features

In [7]:
import pandas as pd 
import numpy as np 

In [3]:
url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/daily-min-temperatures.csv'
data = pd.read_csv(url, header = 0, index_col = 0, parse_dates = True, squeeze = True)

In [4]:
df = pd.DataFrame()
df['Month'] = [data.index[i].month for i in range(len(data))]
df['Day'] = [data.index[i].day for i in range(len(data))]
df['Temperature'] = [data[i] for i in range(len(data))]

In [36]:
df.head()

Unnamed: 0,Month,Day,Temperature
0,1,1,20.7
1,1,2,17.9
2,1,3,18.8
3,1,4,14.6
4,1,5,15.8


Lag Features

* Single Shift

In [8]:
data

Date
1981-01-01    20.7
1981-01-02    17.9
1981-01-03    18.8
1981-01-04    14.6
1981-01-05    15.8
              ... 
1990-12-27    14.0
1990-12-28    13.6
1990-12-29    13.5
1990-12-30    15.7
1990-12-31    13.0
Name: Temp, Length: 3650, dtype: float64

In [9]:
temp = pd.DataFrame(data.values)
temp.head()

Unnamed: 0,0
0,20.7
1,17.9
2,18.8
3,14.6
4,15.8


In [10]:
final_temp = pd.concat([temp.shift(1), temp], axis = 1)
final_temp.columns = ['t-1', 't+1']
final_temp.head()

Unnamed: 0,t-1,t+1
0,,20.7
1,20.7,17.9
2,17.9,18.8
3,18.8,14.6
4,14.6,15.8


* Multiple Shifts 

In [25]:
mul_temps = pd.concat([temp.shift(3), temp.shift(2), temp.shift(1), temp], axis = 1)
mul_temps.columns = ['t-3', 't-2', 't-1', 't+1']
mul_temps.head()
print()




Rolling Window Statistics 

* Single Shift

In [14]:
shifted = temp.shift(1)
windows = shifted.rolling(window = 2)

In [17]:
mean = windows.mean()
mean.head()

Unnamed: 0,0
0,
1,
2,19.3
3,18.35
4,16.7


In [18]:
rws = pd.concat([mean, temp], axis = 1)
rws.columns = ['means', 't+1']
rws.head()

Unnamed: 0,means,t+1
0,,20.7
1,,17.9
2,19.3,18.8
3,18.35,14.6
4,16.7,15.8


* Multiple Shifts

In [19]:
digit = 3
nshift = temp.shift(digit - 1)
nwindow = nshift.rolling(window = digit)
min_val = nwindow.min()
mean_val = nwindow.mean()
max_val = nwindow.max()

In [21]:
resut_frame = pd.concat([min_val, mean_val, max_val, temp], axis = 1)
resut_frame.columns = ['Min', 'Mean', 'Max', 'T+1']
resut_frame.head(10)

Unnamed: 0,Min,Mean,Max,T+1
0,,,,20.7
1,,,,17.9
2,,,,18.8
3,,,,14.6
4,17.9,19.133333,20.7,15.8
5,14.6,17.1,18.8,15.8
6,14.6,16.4,18.8,15.8
7,14.6,15.4,15.8,17.4
8,15.8,15.8,15.8,21.8
9,15.8,16.333333,17.4,20.0


In [38]:
print()




Expanding Window Statistics 

In [39]:
ex = temp.expanding()
a = ex.min()
b = ex.mean()
c = ex.max()
final = pd.concat([a, b, c, temp.shift(-1)], axis = 1)
final.columns = ['Min', 'Mean', 'Max', 'T+1']
final.head()

Unnamed: 0,Min,Mean,Max,T+1
0,20.7,20.7,20.7,17.9
1,17.9,19.3,20.7,18.8
2,17.9,19.133333,20.7,14.6
3,14.6,18.0,20.7,15.8
4,14.6,17.56,20.7,15.8
