In [1]:
from pandas import DataFrame, Series
import pandas as pd
import sys
import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline


from datetime import datetime
from datetime import timedelta
from dateutil.parser import parse

# Time Zone Handling

Working with time zones is generally considered one of the most unpleasant parts of
time series manipulation. In particular, daylight savings time (DST) transitions are a
common source of complication. As such, many time series users choose to work with
time series in coordinated universal time or UTC, which is the successor to Greenwich
Mean Time and is the current international standard. Time zones are expressed as
offsets from UTC; for example, New York is four hours behind UTC during daylight
savings time and 5 hours the rest of the year.

In Python, time zone information comes from the 3rd party pytz library, which exposes
the Olson database, a compilation of world time zone information. This is especially
important for historical data because the DST transition dates (and even UTC offsets)
have been changed numerous times depending on the whims of local governments. In
the United States,the DST transition times have been changed many times since 1900!

For detailed information about pytz library, you’ll need to look at that library’s documentation.
As far as this book is concerned, pandas wraps pytz’s functionality so you
can ignore its API outside of the time zone names. Time zone names can be found
interactively and in the docs:

In [2]:
import pytz

In [3]:
pytz.common_timezones[-5:]

['US/Eastern', 'US/Hawaii', 'US/Mountain', 'US/Pacific', 'UTC']

To get a time zone object from pytz, use pytz.timezone:

In [4]:
tz = pytz.timezone('US/Eastern')

In [5]:
tz

<DstTzInfo 'US/Eastern' LMT-1 day, 19:04:00 STD>

Methods in pandas will accept either time zone names or these objects. I recommend
just using the names.

## Localization and Conversion

By default, time series in pandas are time zone naive. Consider the following time series:

In [7]:
rng = pd.date_range('3/9/2012 9:30', periods=6, freq='D')
ts = Series(np.random.randn(len(rng)), index=rng)

The index’s tz field is None:

In [8]:
print(ts.index.tz)

None


Date ranges can be generated with a time zone set:

In [9]:
pd.date_range('3/9/2012 9:30', periods=10, freq='D', tz='UTC')

DatetimeIndex(['2012-03-09 09:30:00+00:00', '2012-03-10 09:30:00+00:00',
               '2012-03-11 09:30:00+00:00', '2012-03-12 09:30:00+00:00',
               '2012-03-13 09:30:00+00:00', '2012-03-14 09:30:00+00:00',
               '2012-03-15 09:30:00+00:00', '2012-03-16 09:30:00+00:00',
               '2012-03-17 09:30:00+00:00', '2012-03-18 09:30:00+00:00'],
              dtype='datetime64[ns, UTC]', freq='D')

Conversion from naive to localized is handled by the tz_localize method:

In [10]:
ts_utc = ts.tz_localize('UTC')

In [11]:
ts_utc

2012-03-09 09:30:00+00:00    0.218814
2012-03-10 09:30:00+00:00    0.430408
2012-03-11 09:30:00+00:00    0.633672
2012-03-12 09:30:00+00:00   -0.069116
2012-03-13 09:30:00+00:00    0.955906
2012-03-14 09:30:00+00:00   -0.592763
Freq: D, dtype: float64

In [12]:
ts_utc.index

DatetimeIndex(['2012-03-09 09:30:00+00:00', '2012-03-10 09:30:00+00:00',
               '2012-03-11 09:30:00+00:00', '2012-03-12 09:30:00+00:00',
               '2012-03-13 09:30:00+00:00', '2012-03-14 09:30:00+00:00'],
              dtype='datetime64[ns, UTC]', freq='D')

Once a time series has been localized to a particular time zone, it can be converted to
another time zone using tz_convert:

In [13]:
ts_utc.tz_convert('US/Eastern')

2012-03-09 04:30:00-05:00    0.218814
2012-03-10 04:30:00-05:00    0.430408
2012-03-11 05:30:00-04:00    0.633672
2012-03-12 05:30:00-04:00   -0.069116
2012-03-13 05:30:00-04:00    0.955906
2012-03-14 05:30:00-04:00   -0.592763
Freq: D, dtype: float64

In the case of the above time series, which straddles a DST transition in the US/Eastern
time zone, we could localize to EST and convert to, say, UTC or Berlin time:

In [14]:
ts_eastern = ts.tz_localize('US/Eastern')

In [15]:
ts_eastern.tz_convert('UTC')

2012-03-09 14:30:00+00:00    0.218814
2012-03-10 14:30:00+00:00    0.430408
2012-03-11 13:30:00+00:00    0.633672
2012-03-12 13:30:00+00:00   -0.069116
2012-03-13 13:30:00+00:00    0.955906
2012-03-14 13:30:00+00:00   -0.592763
Freq: D, dtype: float64

In [16]:
ts_eastern.tz_convert('Europe/Berlin')

2012-03-09 15:30:00+01:00    0.218814
2012-03-10 15:30:00+01:00    0.430408
2012-03-11 14:30:00+01:00    0.633672
2012-03-12 14:30:00+01:00   -0.069116
2012-03-13 14:30:00+01:00    0.955906
2012-03-14 14:30:00+01:00   -0.592763
Freq: D, dtype: float64

tz_localize and tz_convert are also instance methods on DatetimeIndex:

In [17]:
ts.index.tz_localize('Asia/Shanghai')

DatetimeIndex(['2012-03-09 09:30:00+08:00', '2012-03-10 09:30:00+08:00',
               '2012-03-11 09:30:00+08:00', '2012-03-12 09:30:00+08:00',
               '2012-03-13 09:30:00+08:00', '2012-03-14 09:30:00+08:00'],
              dtype='datetime64[ns, Asia/Shanghai]', freq='D')

## Operations with Time Zone−aware Timestamp Objects

Similar to time series and date ranges, individual Timestamp objects similarly can be
localized from naive to time zone-aware and converted from one time zone to another:


In [18]:
stamp = pd.Timestamp('2011-03-12 04:00')
stamp_utc = stamp.tz_localize('utc')
stamp_utc.tz_convert('US/Eastern')


Timestamp('2011-03-11 23:00:00-0500', tz='US/Eastern')

You can also pass a time zone when creating the Timestamp:


In [20]:
stamp_moscow = pd.Timestamp('2011-03-12 04:00', tz='Europe/Moscow')
stamp_moscow


Timestamp('2011-03-12 04:00:00+0300', tz='Europe/Moscow')

Time zone-aware Timestamp objects internally store a UTC timestamp value as nanoseconds
since the UNIX epoch (January 1, 1970); this UTC value is invariant between
time zone conversions:

In [21]:
stamp_utc.value

1299902400000000000

In [22]:
stamp_utc.tz_convert('US/Eastern').value

1299902400000000000

When performing time arithmetic using pandas’s DateOffset objects, daylight savings
time transitions are respected where possible:


In [28]:
# 30 minutes before DST transition
from pandas.tseries.offsets import Hour

In [29]:
stamp = pd.Timestamp('2012-03-12 01:30', tz='US/Eastern')
stamp



Timestamp('2012-03-12 01:30:00-0400', tz='US/Eastern')

In [30]:
stamp + Hour()


Timestamp('2012-03-12 02:30:00-0400', tz='US/Eastern')

In [31]:
# 90 minutes before DST transition
stamp = pd.Timestamp('2012-11-04 00:30', tz='US/Eastern')
stamp


Timestamp('2012-11-04 00:30:00-0400', tz='US/Eastern')

In [32]:
stamp + 2 * Hour()


Timestamp('2012-11-04 01:30:00-0500', tz='US/Eastern')

## Operations between Different Time Zones

If two time series with different time zones are combined, the result will be UTC. Since
the timestamps are stored under the hood in UTC, this is a straightforward operation
and requires no conversion to happen:


In [33]:
rng = pd.date_range('3/7/2012 9:30', periods=10, freq='B')

In [34]:
ts = Series(np.random.randn(len(rng)), index=rng)

In [35]:
ts

2012-03-07 09:30:00   -1.217231
2012-03-08 09:30:00   -0.288108
2012-03-09 09:30:00    0.743463
2012-03-12 09:30:00    1.027897
2012-03-13 09:30:00   -0.809047
2012-03-14 09:30:00   -2.024228
2012-03-15 09:30:00   -0.047942
2012-03-16 09:30:00   -0.729712
2012-03-19 09:30:00    1.252719
2012-03-20 09:30:00    0.130683
Freq: B, dtype: float64

In [36]:
ts1 = ts[:7].tz_localize('Europe/London')

In [37]:
ts1

2012-03-07 09:30:00+00:00   -1.217231
2012-03-08 09:30:00+00:00   -0.288108
2012-03-09 09:30:00+00:00    0.743463
2012-03-12 09:30:00+00:00    1.027897
2012-03-13 09:30:00+00:00   -0.809047
2012-03-14 09:30:00+00:00   -2.024228
2012-03-15 09:30:00+00:00   -0.047942
Freq: B, dtype: float64

In [38]:
ts2 = ts1[2:].tz_convert('Europe/Moscow')

In [39]:
result = ts1 + ts2

In [40]:
result.index

DatetimeIndex(['2012-03-07 09:30:00+00:00', '2012-03-08 09:30:00+00:00',
               '2012-03-09 09:30:00+00:00', '2012-03-12 09:30:00+00:00',
               '2012-03-13 09:30:00+00:00', '2012-03-14 09:30:00+00:00',
               '2012-03-15 09:30:00+00:00'],
              dtype='datetime64[ns, UTC]', freq='B')