In [1]:
%load_ext autoreload
%autoreload 2

# Load Modules

In [2]:
import sys
sys.path.append('..')

from datefeatures import DateComponents

import numpy as np
from randdate import randdate
from datetime import datetime

# Example 1

In [3]:
# generate fake dates
X = np.c_[np.array(randdate(10)), np.array(randdate(10))]

# transform date variable to fetures
cmp = DateComponents(year=False, month=True, day=False, hour=False, minute=False, second=False)
cmp.fit(X)
Z = cmp.transform(X)

Z.head()

Unnamed: 0,0_na,0_eoq,0_quarter,0_eom,0_month,1_na,1_eoq,1_quarter,1_eom,1_month
0,False,0,3,0,8,False,0,4,0,10
1,False,0,4,0,11,False,0,4,0,11
2,False,0,1,0,2,False,0,1,0,3
3,False,0,4,0,10,False,0,4,0,11
4,False,0,1,0,1,False,0,2,0,6


# Example 2

In [4]:
n_samples = 100000
X = np.c_[np.array(randdate(n_samples)), np.array(randdate(n_samples)), np.array(randdate(n_samples))]

In [5]:
cmp = DateComponents(year=True, month=False, day=False, hour=False, minute=False, second=False)
%time Z = cmp.fit_transform(X)

CPU times: user 269 ms, sys: 17.3 ms, total: 286 ms
Wall time: 295 ms


In [6]:
cmp = DateComponents(year=False, month=False, day=False, hour=True, minute=True, second=False)
%time Z = cmp.fit_transform(X)

CPU times: user 178 ms, sys: 8.9 ms, total: 187 ms
Wall time: 192 ms


In [7]:
cmp = DateComponents(year=True, month=True, day=True, hour=True, minute=True, second=True, microsecond=True)
%time Z = cmp.fit_transform(X)

CPU times: user 1.19 s, sys: 109 ms, total: 1.29 s
Wall time: 1.47 s


# Example 3

In [8]:
n_samples = 5

# generate fake dates
X = np.c_[np.array(randdate(n_samples))]

# emulate missing value
X[1,0] = np.nan

## Example 3a -- without correction

In [9]:
cmp = DateComponents(missing=False)

# What will happen?
Z = cmp.fit_transform(X)

In [10]:
Z.dtypes

0_eoq           bool
0_quarter    float64
0_eom           bool
0_month      float64
0_dy         float64
0_dw         float64
0_week       float64
0_day        float64
dtype: object

In [11]:
Z.head()

Unnamed: 0,0_eoq,0_quarter,0_eom,0_month,0_dy,0_dw,0_week,0_day
0,False,3.0,False,8.0,231.0,2.0,34.0,19.0
1,False,,False,,,,,
2,False,4.0,False,11.0,323.0,4.0,46.0,19.0
3,False,2.0,False,6.0,179.0,1.0,26.0,28.0
4,False,4.0,False,12.0,352.0,3.0,51.0,18.0


## Example 3b -- with missing value correction

In [12]:
cmp = DateComponents(missing=True, year=True)
Z = cmp.fit_transform(X)

In [13]:
Z.dtypes

0_na          bool
0_leap        int8
0_year       int16
0_eoq         int8
0_quarter     int8
0_eom         int8
0_month       int8
0_dy         int16
0_dw          int8
0_week        int8
0_day         int8
dtype: object

In [14]:
Z.head()

Unnamed: 0,0_na,0_leap,0_year,0_eoq,0_quarter,0_eom,0_month,0_dy,0_dw,0_week,0_day
0,False,0,1970,0,3,0,8,231,2,34,19
1,True,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
2,False,0,1993,0,4,0,11,323,4,46,19
3,False,0,2005,0,2,0,6,179,1,26,28
4,False,0,1986,0,4,0,12,352,3,51,18


# Example 4

In [15]:
X = np.array(datetime(2016, 1, 1, 23, 59, 58, 12345)).reshape(1, -1)
cmp = DateComponents(
            year=False, month=False, day=False,
            hour=True, minute=True, second=True, microsecond=True)
Z = cmp.fit_transform(X)
Z

Unnamed: 0,0_na,0_hour,0_min,0_sec,0_ms
0,False,23,59,58,12345
