In [1]:
import pandas as pd
import numpy as np

## 时间日期

时间戳 tiimestamp：固定的时刻 -> pd.Timestamp
固定时期 period：比如 2016年3月份
时间间隔 interval：由起始时间和结束时间来表示，固定时期是时间间隔的一个特殊

**时间日期在 Pandas 里的作用**

* 分析金融数据，如股票交易数据
* 分析服务器日志


### Python datetime

python 标准库里提供了时间日期的处理。这个是时间日期的基础。

In [17]:
from datetime import datetime
from datetime import timedelta

In [4]:
now = datetime.now()
now

datetime.datetime(2016, 4, 21, 15, 16, 10, 148000)

In [5]:
now.year, now.month, now.day

(2016, 4, 21)

### 时间差

In [9]:
date1 = datetime(2016, 3, 20)
date2 = datetime(2016, 3, 16)
delta = date1 - date2
delta

datetime.timedelta(4)

In [10]:
delta.days

4

In [15]:
delta.total_seconds()

345600.0

In [16]:
date2 + delta

datetime.datetime(2016, 3, 20, 0, 0)

In [19]:
date2 + timedelta(4.5)

datetime.datetime(2016, 3, 20, 12, 0)

### 字符串和 datetime 转换

关于 datetime 格式定义，可以参阅 python 官方文档

In [28]:
date = datetime(2016, 3, 20, 8, 30)
date

datetime.datetime(2016, 3, 20, 8, 30)

In [29]:
str(date)

'2016-03-20 08:30:00'

In [30]:
date.strftime('%Y-%m-%d %H:%M:%S')

'2016-03-20 08:30:00'

In [31]:
datetime.strptime('2016-03-20 09:30', '%Y-%m-%d %H:%M')

datetime.datetime(2016, 3, 20, 9, 30)

## Pandas 里的时间序列

Pandas 里使用 Timestamp 来表达时间

In [33]:
dates = [datetime(2016, 3, 1), datetime(2016, 3, 2), datetime(2016, 3, 3), datetime(2016, 3, 4)]
s = pd.Series(np.random.randn(4), index=dates)
s

2016-03-01    0.221321
2016-03-02    0.590888
2016-03-03   -0.394489
2016-03-04    1.121639
dtype: float64

In [35]:
type(s.index)

pandas.tseries.index.DatetimeIndex

In [37]:
type(s.index[0])

pandas.tslib.Timestamp

## 日期范围

### 生成日期范围

In [41]:
pd.date_range('20160320', '20160331')

DatetimeIndex(['2016-03-20', '2016-03-21', '2016-03-22', '2016-03-23',
               '2016-03-24', '2016-03-25', '2016-03-26', '2016-03-27',
               '2016-03-28', '2016-03-29', '2016-03-30', '2016-03-31'],
              dtype='datetime64[ns]', freq='D')

In [42]:
pd.date_range(start='20160320', periods=10)

DatetimeIndex(['2016-03-20', '2016-03-21', '2016-03-22', '2016-03-23',
               '2016-03-24', '2016-03-25', '2016-03-26', '2016-03-27',
               '2016-03-28', '2016-03-29'],
              dtype='datetime64[ns]', freq='D')

In [47]:
## 规则化时间戳
pd.date_range(start='2016-03-20 16:23:32', periods=10, normalize=True)

DatetimeIndex(['2016-03-20', '2016-03-21', '2016-03-22', '2016-03-23',
               '2016-03-24', '2016-03-25', '2016-03-26', '2016-03-27',
               '2016-03-28', '2016-03-29'],
              dtype='datetime64[ns]', freq='D')

### 时间频率

In [49]:
## 星期
pd.date_range(start='20160320', periods=10, freq='W')

DatetimeIndex(['2016-03-20', '2016-03-27', '2016-04-03', '2016-04-10',
               '2016-04-17', '2016-04-24', '2016-05-01', '2016-05-08',
               '2016-05-15', '2016-05-22'],
              dtype='datetime64[ns]', freq='W-SUN')

In [52]:
# 月
pd.date_range(start='20160320', periods=10, freq='M')

DatetimeIndex(['2016-03-31', '2016-04-30', '2016-05-31', '2016-06-30',
               '2016-07-31', '2016-08-31', '2016-09-30', '2016-10-31',
               '2016-11-30', '2016-12-31'],
              dtype='datetime64[ns]', freq='M')

In [50]:
## 每个月最后一个工作日组成的索引
pd.date_range(start='20160320', periods=10, freq='BM')

DatetimeIndex(['2016-03-31', '2016-04-29', '2016-05-31', '2016-06-30',
               '2016-07-29', '2016-08-31', '2016-09-30', '2016-10-31',
               '2016-11-30', '2016-12-30'],
              dtype='datetime64[ns]', freq='BM')

In [55]:
# 小时
pd.date_range(start='20160320', periods=10, freq='4H')

DatetimeIndex(['2016-03-20 00:00:00', '2016-03-20 04:00:00',
               '2016-03-20 08:00:00', '2016-03-20 12:00:00',
               '2016-03-20 16:00:00', '2016-03-20 20:00:00',
               '2016-03-21 00:00:00', '2016-03-21 04:00:00',
               '2016-03-21 08:00:00', '2016-03-21 12:00:00'],
              dtype='datetime64[ns]', freq='4H')

## 时期及算术去处

pd.Period 表示时期，比如几日，月或几个月等。比如用来统计每个月的销售额，就可以用时期作为单位。

In [63]:
p1 = pd.Period(2010)
p1

Period('2010', 'A-DEC')

In [64]:
p2 = p1 + 2
p2

Period('2012', 'A-DEC')

In [65]:
p2 - p1

2L

In [70]:
p1 = pd.Period(2016, freq='M')
p1

Period('2016-01', 'M')

In [71]:
p1 + 3

Period('2016-04', 'M')

### 时期序列

In [73]:
pd.period_range(start='2016-01', periods=12, freq='M')

PeriodIndex(['2016-01', '2016-02', '2016-03', '2016-04', '2016-05', '2016-06',
             '2016-07', '2016-08', '2016-09', '2016-10', '2016-11', '2016-12'],
            dtype='int64', freq='M')

In [74]:
pd.period_range(start='2016-01', end='2016-10', freq='M')

PeriodIndex(['2016-01', '2016-02', '2016-03', '2016-04', '2016-05', '2016-06',
             '2016-07', '2016-08', '2016-09', '2016-10'],
            dtype='int64', freq='M')