# TimeCircle
* enumerate different frequencies limited to multiple of seconds, minutes, hours or days/weeks
* annual or monthly phenomena (e.g. day of month) are **not** modelled (see `DateCircle`)
* The implementation is very simple and based on POSIX seconds

In [1]:
%load_ext autoreload
%autoreload 2

# Load Modules

In [2]:
import sys
sys.path.append('..')

from datefeatures import TimeCircle

import numpy as np
import pandas as pd
from randdate import randdate
from datetime import datetime

from sklearn.pipeline import Pipeline, FeatureUnion
from mlxtend.feature_selection import ColumnSelector

# Example 1

In [3]:
# generate fake dates
X = np.c_[np.array(randdate(10)), np.array(randdate(10))]

# transform date variable to fetures
cmp = TimeCircle()
cmp.fit(X)
Z = cmp.transform(X)

Z.head()

Unnamed: 0,0_na,0_s5_sin,0_s5_cos,0_m15_sin,0_m15_cos,0_h3_sin,0_h3_cos,0_d1_sin,0_d1_cos,0_d7_sin,...,1_s5_sin,1_s5_cos,1_m15_sin,1_m15_cos,1_h3_sin,1_h3_cos,1_d1_sin,1_d1_cos,1_d7_sin,1_d7_cos
0,False,-0.503609,-0.863932,-0.084234,0.996446,0.007028,-0.999975,0.924215,-0.381872,0.923831,...,0.9636,-0.267349,-0.460414,0.887704,0.885268,-0.465082,0.967204,-0.254002,0.186611,-0.982434
1,False,0.982161,-0.188043,-0.834465,-0.55106,0.337315,0.941392,-0.999075,0.042995,0.999981,...,0.894454,0.44716,-0.494664,0.869085,0.886774,-0.462203,0.253609,0.967307,0.400598,-0.916254
2,False,-0.971883,0.235465,0.979661,-0.200662,-0.930191,-0.367075,0.515907,0.856645,-0.502283,...,-0.998931,0.046219,0.80401,0.594616,-0.824526,0.565824,0.120874,-0.992668,-0.449414,0.893324
3,False,-0.95626,-0.292519,-0.802875,-0.596148,-0.983097,-0.183086,-0.844035,0.536288,-0.143045,...,-0.615619,0.788044,-0.562244,0.826972,-0.456307,-0.889822,0.327323,-0.944913,0.810635,-0.585551
4,False,-0.976312,-0.216367,-0.29955,-0.954081,-0.972176,-0.234251,-0.847524,0.530756,-0.932735,...,0.494253,0.869318,-0.002872,-0.999996,0.25905,0.965864,-0.68357,0.729885,-0.107313,0.994225


In [4]:
cmp.feature_names_

['0_na',
 '0_s5_sin',
 '0_s5_cos',
 '0_m15_sin',
 '0_m15_cos',
 '0_h3_sin',
 '0_h3_cos',
 '0_d1_sin',
 '0_d1_cos',
 '0_d7_sin',
 '0_d7_cos',
 '1_na',
 '1_s5_sin',
 '1_s5_cos',
 '1_m15_sin',
 '1_m15_cos',
 '1_h3_sin',
 '1_h3_cos',
 '1_d1_sin',
 '1_d1_cos',
 '1_d7_sin',
 '1_d7_cos']

# Example 2

In [5]:
# generate fake dates
X = np.c_[np.array(randdate(10)), np.array(randdate(10))]

# emulate missing value
X[1,0] = np.nan

# transform date variable to fetures
cmp = TimeCircle(freq = {'d': [1, 2, 7]}, out=['sin'])
cmp.fit(X)
Z = cmp.transform(X)

Z.head()

Unnamed: 0,0_na,0_d1_sin,0_d2_sin,0_d7_sin,1_na,1_d1_sin,1_d2_sin,1_d7_sin
0,False,0.478868,-0.247096,-0.988309,False,-0.04657,-0.023291,0.976387
1,True,,,,False,0.211852,0.106532,-0.461153
2,False,0.852715,-0.872461,-0.996919,False,-0.373765,0.190363,0.482514
3,False,-0.252926,0.991838,-0.036521,False,0.942564,-0.816708,0.878801
4,False,0.976563,0.626405,0.91398,False,0.757311,0.909135,-0.994837


# Example 3

In [6]:
n_samples = 100000
X = np.c_[np.array(randdate(n_samples)), np.array(randdate(n_samples)), np.array(randdate(n_samples))]

In [7]:
freq = {
    's': [1, 2, 3, 4, 6, 10, 12, 15, 20, 30, 40, 45],  # 1-59, e.g. range(1, 60)
    'm': [1, 2, 3, 4, 6, 10, 12, 15, 20, 30, 40, 45],  # 1-59, e.g. range(1, 60)
    'h': [1, 2, 3, 4, 6, 9, 12, 15, 18],  # 1-23, e.g. range(1,24)
    'd': [1, 2, 3, 7, 14, 21, 28]  # any number of days, e.g. 1-7, n*7
}

In [8]:
cmp = TimeCircle(freq = freq, out=['sin', 'cos'])
%time Z = cmp.fit_transform(X)

CPU times: user 2min 20s, sys: 3.35 s, total: 2min 23s
Wall time: 2min 30s


# Example 4

In [9]:
# generate fake dates
n_samples = 5
X = np.c_[np.array(randdate(n_samples))]
X[1,0] = np.nan

# make pipeline
pipe = Pipeline(steps=[
    ('pre', TimeCircle(freq = {'d': [1, 2, 7]}, out=['sin', 'cos']))
])

Z = pipe.fit_transform(X)
Z

Unnamed: 0,0_na,0_d1_sin,0_d1_cos,0_d2_sin,0_d2_cos,0_d7_sin,0_d7_cos
0,False,-0.675738,-0.737142,0.931971,-0.362532,0.992998,0.118128
1,True,,,,,,
2,False,0.223663,0.974667,-0.112546,-0.993646,0.40463,-0.91448
3,False,0.91388,-0.405985,-0.838446,-0.544984,-0.998217,0.059687
4,False,-0.630566,0.776135,0.334563,-0.942373,-0.344147,-0.938916


# Example 5

In [10]:
# generate fake dates
n_samples = 5
X = pd.DataFrame(data=randdate(n_samples), columns=['this_date'])
X['some_numbers'] = np.random.randn(n_samples)
X

Unnamed: 0,this_date,some_numbers
0,1975-10-18 03:31:08.036662,-1.111482
1,1975-06-23 13:20:24.185933,-1.126819
2,1990-05-02 02:53:26.341770,1.925467
3,1990-06-04 02:08:51.374228,-1.980767
4,1977-09-12 22:55:33.223736,-0.99464


In [11]:
# make pipeline
pipe = Pipeline(steps=[
    # process column by column
    ('col_by_col', FeatureUnion(transformer_list=[
        ('dates', Pipeline(steps=[
            ('sel1', ColumnSelector(cols=('this_date'))),
            ('pre1', TimeCircle(freq = {'d': [1, 2, 7]}, out=['sin', 'cos']))
        ])),
        ('numbers', ColumnSelector(cols=('some_numbers')))
    ]))
    # do some other stuff ..
])

Z = pipe.fit_transform(X)
Z

array([[False, 0.7963556261324076, 0.6048286672498758,
        0.4445060926186075, 0.8957758277744148, 0.9372963410056191,
        -0.3485334548324998, -1.1114823032670549],
       [False, -0.3436723924503427, -0.9390896052387427,
        -0.9846546616044487, 0.1745141752999696, -0.8120846540775755,
        -0.583539642707935, -1.1268189419554575],
       [False, 0.6865770198051138, 0.7270570788291167,
        0.36942043877598546, 0.9292623630679112, -0.709992687601227,
        0.7042090481900857, 1.9254668474273708],
       [False, 0.5330838995690439, 0.8460623830547376,
        -0.2774325295862602, -0.9607451230827918, -0.5047731420079254,
        -0.8632520345226223, -1.9807673113873312],
       [False, -0.2775086247485878, 0.9607231459635744,
        -0.1401371721500503, 0.9901320987533871, -0.9652047998360441,
        -0.26149511347912796, -0.994639887116065]], dtype=object)

In [12]:
colnam = list(pipe.steps[0][1].transformer_list[0][1].steps[1][1].feature_names_)
colnam += ['some_numbers']
colnam

['0_na',
 '0_d1_sin',
 '0_d1_cos',
 '0_d2_sin',
 '0_d2_cos',
 '0_d7_sin',
 '0_d7_cos',
 'some_numbers']

In [13]:
pd.DataFrame(Z, columns=colnam)

Unnamed: 0,0_na,0_d1_sin,0_d1_cos,0_d2_sin,0_d2_cos,0_d7_sin,0_d7_cos,some_numbers
0,False,0.796356,0.604829,0.444506,0.895776,0.937296,-0.348533,-1.11148
1,False,-0.343672,-0.93909,-0.984655,0.174514,-0.812085,-0.58354,-1.12682
2,False,0.686577,0.727057,0.36942,0.929262,-0.709993,0.704209,1.92547
3,False,0.533084,0.846062,-0.277433,-0.960745,-0.504773,-0.863252,-1.98077
4,False,-0.277509,0.960723,-0.140137,0.990132,-0.965205,-0.261495,-0.99464
