In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from datetime import datetime

# 在Python中，时区信息来自第三方库pytz，它使Python可以使用Olson数据库（汇编了世界时区信息）。
import pytz  # pytz是三方库，已经被封装到了pandas中

In [2]:
pytz.all_timezones[-5:]  # 查看时区名称

['UTC', 'Universal', 'W-SU', 'WET', 'Zulu']

In [3]:
pytz.common_timezones[-5:]  # 查看时区名称

['US/Eastern', 'US/Hawaii', 'US/Mountain', 'US/Pacific', 'UTC']

In [4]:
pytz.country_timezones['cn']  # 查看时区名称

['Asia/Shanghai', 'Asia/Urumqi']

In [5]:
tz = pytz.timezone('America/New_York')
tz

<DstTzInfo 'America/New_York' LMT-1 day, 19:04:00 STD>

### 时区的本地化和转换

In [6]:
rng = pd.date_range('3/9/2012 9:30', periods=6, freq='D')
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts

2012-03-09 09:30:00    0.316188
2012-03-10 09:30:00    0.532225
2012-03-11 09:30:00    0.968479
2012-03-12 09:30:00   -1.094454
2012-03-13 09:30:00   -0.460244
2012-03-14 09:30:00   -0.623301
Freq: D, dtype: float64

In [7]:
display(ts.index.tz)

None


In [8]:
pd.date_range('3/9/2012 9:30', periods=10, freq='D', tz='UTC')  # 通过时区集合来生成日期范围

DatetimeIndex(['2012-03-09 09:30:00+00:00', '2012-03-10 09:30:00+00:00',
               '2012-03-11 09:30:00+00:00', '2012-03-12 09:30:00+00:00',
               '2012-03-13 09:30:00+00:00', '2012-03-14 09:30:00+00:00',
               '2012-03-15 09:30:00+00:00', '2012-03-16 09:30:00+00:00',
               '2012-03-17 09:30:00+00:00', '2012-03-18 09:30:00+00:00'],
              dtype='datetime64[ns, UTC]', freq='D')

In [9]:
ts_utc = ts.tz_localize('UTC')  # 初始化为UTC时间
ts_utc

2012-03-09 09:30:00+00:00    0.316188
2012-03-10 09:30:00+00:00    0.532225
2012-03-11 09:30:00+00:00    0.968479
2012-03-12 09:30:00+00:00   -1.094454
2012-03-13 09:30:00+00:00   -0.460244
2012-03-14 09:30:00+00:00   -0.623301
Freq: D, dtype: float64

In [10]:
ts_utc.index

DatetimeIndex(['2012-03-09 09:30:00+00:00', '2012-03-10 09:30:00+00:00',
               '2012-03-11 09:30:00+00:00', '2012-03-12 09:30:00+00:00',
               '2012-03-13 09:30:00+00:00', '2012-03-14 09:30:00+00:00'],
              dtype='datetime64[ns, UTC]', freq='D')

In [11]:
ts_utc.tz_convert('America/New_York')  # 从UTC转换为纽约所在的时区

2012-03-09 04:30:00-05:00    0.316188
2012-03-10 04:30:00-05:00    0.532225
2012-03-11 05:30:00-04:00    0.968479
2012-03-12 05:30:00-04:00   -1.094454
2012-03-13 05:30:00-04:00   -0.460244
2012-03-14 05:30:00-04:00   -0.623301
Freq: D, dtype: float64

In [12]:
ts_eastern = ts.tz_localize('America/New_York')  # 初始化为纽约所在的时区
ts_eastern.tz_convert('UTC')  # 将纽约时间转换为UTC时间

2012-03-09 14:30:00+00:00    0.316188
2012-03-10 14:30:00+00:00    0.532225
2012-03-11 13:30:00+00:00    0.968479
2012-03-12 13:30:00+00:00   -1.094454
2012-03-13 13:30:00+00:00   -0.460244
2012-03-14 13:30:00+00:00   -0.623301
dtype: float64

In [13]:
ts_eastern.tz_convert('Europe/Berlin')  # 将纽约时间转换为柏林时间

2012-03-09 15:30:00+01:00    0.316188
2012-03-10 15:30:00+01:00    0.532225
2012-03-11 14:30:00+01:00    0.968479
2012-03-12 14:30:00+01:00   -1.094454
2012-03-13 14:30:00+01:00   -0.460244
2012-03-14 14:30:00+01:00   -0.623301
dtype: float64

In [14]:
ts.index.tz_localize('Asia/Shanghai')  # tz_localize和tz_convert也是DatetimeIndex的实例方法

DatetimeIndex(['2012-03-09 09:30:00+08:00', '2012-03-10 09:30:00+08:00',
               '2012-03-11 09:30:00+08:00', '2012-03-12 09:30:00+08:00',
               '2012-03-13 09:30:00+08:00', '2012-03-14 09:30:00+08:00'],
              dtype='datetime64[ns, Asia/Shanghai]', freq=None)

### 单独时间戳（Timestamp）的时区操作

In [15]:
stamp = pd.Timestamp('2011-03-12 04:00')
stamp_utc = stamp.tz_localize('utc')
stamp_utc.tz_convert('America/New_York')  # 时区转换

Timestamp('2011-03-11 23:00:00-0500', tz='America/New_York')

In [16]:
stamp_moscow = pd.Timestamp('2011-03-12 04:00', tz='Europe/Moscow')  # 创建Timestamp时设定时区
stamp_moscow

Timestamp('2011-03-12 04:00:00+0300', tz='Europe/Moscow')

In [17]:
print(stamp_utc.value)
print(stamp_utc.tz_convert('America/New_York').value)  # 时区并不影响时间戳自身的值

1299902400000000000
1299902400000000000


In [18]:
# 当使用pandas的DateOffset对象执行时间算术运算时，运算过程会自动关注是否存在夏令时转变期
from pandas.tseries.offsets import Hour

# 夏令时转变前的30分钟
stamp = pd.Timestamp('2012-03-12 01:30', tz='US/Eastern')
display(stamp)
display(stamp + Hour())
print()

# 夏令时转变前的90分钟
stamp = pd.Timestamp('2012-11-04 00:30', tz='US/Eastern')
display(stamp)
display(stamp + 2 * Hour())

2012-03-12 01:30:00-04:00
2012-03-12 02:30:00-04:00

2012-11-04 00:30:00-04:00
2012-11-04 01:30:00-05:00


### 不同时区间的操作

In [19]:
rng = pd.date_range('3/7/2012 9:30', periods=10, freq='B')
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts

2012-03-07 09:30:00    1.860038
2012-03-08 09:30:00    0.064317
2012-03-09 09:30:00    0.898020
2012-03-12 09:30:00   -1.722892
2012-03-13 09:30:00    0.090212
2012-03-14 09:30:00   -0.137632
2012-03-15 09:30:00   -1.985252
2012-03-16 09:30:00   -0.651331
2012-03-19 09:30:00   -0.440635
2012-03-20 09:30:00   -0.917682
Freq: B, dtype: float64

In [20]:
ts1 = ts[:7].tz_localize('Europe/London')
ts2 = ts1[2:].tz_convert('Europe/Moscow')
result = ts1 + ts2
result.index

DatetimeIndex(['2012-03-07 09:30:00+00:00', '2012-03-08 09:30:00+00:00',
               '2012-03-09 09:30:00+00:00', '2012-03-12 09:30:00+00:00',
               '2012-03-13 09:30:00+00:00', '2012-03-14 09:30:00+00:00',
               '2012-03-15 09:30:00+00:00'],
              dtype='datetime64[ns, UTC]', freq=None)

In [21]:
result

2012-03-07 09:30:00+00:00         NaN
2012-03-08 09:30:00+00:00         NaN
2012-03-09 09:30:00+00:00    1.796041
2012-03-12 09:30:00+00:00   -3.445784
2012-03-13 09:30:00+00:00    0.180424
2012-03-14 09:30:00+00:00   -0.275265
2012-03-15 09:30:00+00:00   -3.970503
dtype: float64