# Feature Engineering with Time and Date data

### Parse dates

In [1]:
import numpy as np
import pandas as pd

df = pd.read_csv("./data/time-data.csv", delimiter=";", header=None)
df[0] = df[0].apply(pd.to_datetime)
df.head()

Unnamed: 0,0,1,2,3
0,2013-01-01 22:00:00,1,23.1,1
1,2013-02-01 22:15:00,1,12.2,0
2,2013-01-05 22:30:00,1,11.1,2
3,2013-05-01 23:00:00,1,10.4,2
4,2013-06-08 23:15:00,1,54.2,2


### Convert to Unix-time, DayOfWeek, Weekday, and DayOfMonth

In [2]:
df[0].dt.isocalendar()

Unnamed: 0,year,week,day
0,2013,1,2
1,2013,5,5
2,2013,1,6
3,2013,18,3
4,2013,23,6
5,2013,1,2
6,2013,27,7
7,2013,1,3
8,2013,1,3
9,2013,10,4


In [3]:
df["UNIX"] = pd.to_numeric(df[0]) // 10**9  # In Seconds since 00:00 01/01/1970

# Monday=0, Sunday=6, Ordinals however imply implicit order
df["DOW"] = df[0].dt.dayofweek
# Better
df["Monday"] = (df["DOW"] == 0).astype(int)
df["Tuesday"] = (df["DOW"] == 1).astype(int)
df["Wednesday"] = (df["DOW"] == 2).astype(int)
# ...

df["WorkingDay"] = (df["DOW"] < 5).astype(int)
df["DOM"] = df[0].dt.day
df["DOY"] = df[0].dt.dayofyear
df["WOY"] = df[0].dt.isocalendar().week
df.head()

Unnamed: 0,0,1,2,3,UNIX,DOW,Monday,Tuesday,Wednesday,WorkingDay,DOM,DOY,WOY
0,2013-01-01 22:00:00,1,23.1,1,1357077600,1,0,1,0,1,1,1,1
1,2013-02-01 22:15:00,1,12.2,0,1359756900,4,0,0,0,1,1,32,5
2,2013-01-05 22:30:00,1,11.1,2,1357425000,5,0,0,0,0,5,5,1
3,2013-05-01 23:00:00,1,10.4,2,1367449200,2,0,0,1,1,1,121,18
4,2013-06-08 23:15:00,1,54.2,2,1370733300,5,0,0,0,0,8,159,23


### Add Feature-Crossing between DayOfWeek and DayOfMonth

In [4]:
# Feature crossing to exploit weekends at the end of a month
df["DOWxDOM"] = df["DOW"] * df["DOM"]
df.head()

Unnamed: 0,0,1,2,3,UNIX,DOW,Monday,Tuesday,Wednesday,WorkingDay,DOM,DOY,WOY,DOWxDOM
0,2013-01-01 22:00:00,1,23.1,1,1357077600,1,0,1,0,1,1,1,1,1
1,2013-02-01 22:15:00,1,12.2,0,1359756900,4,0,0,0,1,1,32,5,4
2,2013-01-05 22:30:00,1,11.1,2,1357425000,5,0,0,0,0,5,5,1,25
3,2013-05-01 23:00:00,1,10.4,2,1367449200,2,0,0,1,1,1,121,18,2
4,2013-06-08 23:15:00,1,54.2,2,1370733300,5,0,0,0,0,8,159,23,40
