In [3]:
import pandas as pd
import numpy as np
from datetime import datetime

# Third day of time series with `pandas`

Shifting dates with offests

In [2]:
from pandas.tseries.offsets import Day, MonthEnd

In [9]:
now = datetime(2018, 11, 17)
now

datetime.datetime(2018, 11, 17, 0, 0)

In [6]:
now + 3 * Day()

Timestamp('2018-11-20 00:00:00')

But if you add an anchored offset you get rolled forward on the first increment

In [7]:
now + MonthEnd()

Timestamp('2018-11-30 00:00:00')

In [8]:
now + MonthEnd(2)

Timestamp('2018-12-31 00:00:00')

How does that work?

In [10]:
offset = MonthEnd()
offset.rollforward(now)

Timestamp('2018-11-30 00:00:00')

In [12]:
offset.rollback(now)

Timestamp('2018-10-31 00:00:00')

Using that with `groupby`

In [14]:
ts = pd.Series(
    np.random.randn(20),
    index=pd.date_range('1/5/2000', periods=20, freq='4d')
)
ts

2000-01-05    2.181299
2000-01-09   -1.205468
2000-01-13    0.440147
2000-01-17   -0.129908
2000-01-21    0.239876
2000-01-25   -0.637307
2000-01-29    0.346576
2000-02-02    1.652116
2000-02-06    0.038279
2000-02-10    0.623481
2000-02-14    1.291240
2000-02-18   -1.666739
2000-02-22    0.108602
2000-02-26    0.201050
2000-03-01   -0.518738
2000-03-05   -0.856472
2000-03-09    0.205815
2000-03-13    0.028720
2000-03-17    1.512677
2000-03-21   -1.681857
Freq: 4D, dtype: float64

And because passing a function to `groupby` causes it to be called with the index as the argument...

In [15]:
ts.groupby(offset.rollforward).mean()

2000-01-31    0.176459
2000-02-29    0.321147
2000-03-31   -0.218309
dtype: float64

But this is easier to do with `resample`

## Time zones

`pandas` wraps third party library `pytz` which exposes the *Olson database*. The benefit of this is that it grants access to historical data.

In [16]:
import pytz

In [17]:
pytz.common_timezones[-5:]

['US/Eastern', 'US/Hawaii', 'US/Mountain', 'US/Pacific', 'UTC']

In [18]:
tz = pytz.timezone('America/New_York')
tz

<DstTzInfo 'America/New_York' LMT-1 day, 19:04:00 STD>

And now Pandas

In [19]:
rng = pd.date_range('3/8/2012 9:30', periods=6, freq='D')
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts

2012-03-08 09:30:00    0.439153
2012-03-09 09:30:00    0.976141
2012-03-10 09:30:00   -0.748481
2012-03-11 09:30:00    0.713979
2012-03-12 09:30:00    0.042264
2012-03-13 09:30:00    1.534542
Freq: D, dtype: float64

In [20]:
print(ts.index.tz)

None


In [21]:
pd.date_range('3/8/2012 9:30', periods=6, freq='D', tz='UTC')

DatetimeIndex(['2012-03-08 09:30:00+00:00', '2012-03-09 09:30:00+00:00',
               '2012-03-10 09:30:00+00:00', '2012-03-11 09:30:00+00:00',
               '2012-03-12 09:30:00+00:00', '2012-03-13 09:30:00+00:00'],
              dtype='datetime64[ns, UTC]', freq='D')

If you need to add a timezone to the data

In [22]:
ts_utc = ts.tz_localize('UTC')

In [23]:
ts_utc

2012-03-08 09:30:00+00:00    0.439153
2012-03-09 09:30:00+00:00    0.976141
2012-03-10 09:30:00+00:00   -0.748481
2012-03-11 09:30:00+00:00    0.713979
2012-03-12 09:30:00+00:00    0.042264
2012-03-13 09:30:00+00:00    1.534542
Freq: D, dtype: float64

In [24]:
ts_utc.index

DatetimeIndex(['2012-03-08 09:30:00+00:00', '2012-03-09 09:30:00+00:00',
               '2012-03-10 09:30:00+00:00', '2012-03-11 09:30:00+00:00',
               '2012-03-12 09:30:00+00:00', '2012-03-13 09:30:00+00:00'],
              dtype='datetime64[ns, UTC]', freq='D')

In [25]:
ts_utc.tz_convert('America/New_York')

2012-03-08 04:30:00-05:00    0.439153
2012-03-09 04:30:00-05:00    0.976141
2012-03-10 04:30:00-05:00   -0.748481
2012-03-11 05:30:00-04:00    0.713979
2012-03-12 05:30:00-04:00    0.042264
2012-03-13 05:30:00-04:00    1.534542
Freq: D, dtype: float64

In [26]:
ts_eastern = ts.tz_localize('America/New_York')
ts_eastern.tz_convert('UTC')

2012-03-08 14:30:00+00:00    0.439153
2012-03-09 14:30:00+00:00    0.976141
2012-03-10 14:30:00+00:00   -0.748481
2012-03-11 13:30:00+00:00    0.713979
2012-03-12 13:30:00+00:00    0.042264
2012-03-13 13:30:00+00:00    1.534542
Freq: D, dtype: float64

In [27]:
ts_eastern.tz_convert('Europe/Berlin')

2012-03-08 15:30:00+01:00    0.439153
2012-03-09 15:30:00+01:00    0.976141
2012-03-10 15:30:00+01:00   -0.748481
2012-03-11 14:30:00+01:00    0.713979
2012-03-12 14:30:00+01:00    0.042264
2012-03-13 14:30:00+01:00    1.534542
Freq: D, dtype: float64

TZ methods are also available on `DatetimeIndex` objects.

In [28]:
ts.index.tz_localize('Asia/Shanghai')

DatetimeIndex(['2012-03-08 09:30:00+08:00', '2012-03-09 09:30:00+08:00',
               '2012-03-10 09:30:00+08:00', '2012-03-11 09:30:00+08:00',
               '2012-03-12 09:30:00+08:00', '2012-03-13 09:30:00+08:00'],
              dtype='datetime64[ns, Asia/Shanghai]', freq='D')

## Operations with timzeone aware stuff

I'll leave that for another time. I'm not really doing timezone aware stuff these days.