# 日期的范围、频率以及移动

In [1]:
import numpy as np
import pandas as pd
from datetime import datetime
from datetime import timedelta
from dateutil.parser import parse
from pandas import DataFrame, Series
from pandas.tseries.offsets import Day, Hour, Minute, MonthEnd

In [9]:
dates = [datetime(2011, 1, 2),
         datetime(2011, 1, 5),
         datetime(2011, 1, 7),
         datetime(2011, 1, 8),
         datetime(2011, 1, 10),
         datetime(2011, 1, 12)]
ts = Series(np.random.randn(6), index=dates)
ts

2011-01-02   -0.898493
2011-01-05   -0.042470
2011-01-07   -0.196897
2011-01-08   -0.937957
2011-01-10    1.979414
2011-01-12   -1.508051
dtype: float64

In [4]:
for k, v in ts.resample('D'): # 按日重新采样，填充进去的日期设置为NA。
    print(k, v)


2011-01-02 00:00:00 2011-01-02   -0.100168
dtype: float64
2011-01-03 00:00:00 Series([], dtype: float64)
2011-01-04 00:00:00 Series([], dtype: float64)
2011-01-05 00:00:00 2011-01-05   -0.669363
dtype: float64
2011-01-06 00:00:00 Series([], dtype: float64)
2011-01-07 00:00:00 2011-01-07   -0.554793
dtype: float64
2011-01-08 00:00:00 2011-01-08    0.004142
dtype: float64
2011-01-09 00:00:00 Series([], dtype: float64)
2011-01-10 00:00:00 2011-01-10    0.069639
dtype: float64
2011-01-11 00:00:00 Series([], dtype: float64)
2011-01-12 00:00:00 2011-01-12   -0.931133
dtype: float64


## 生成日期范围

In [11]:
index = pd.date_range('4/1/2012', '6/1/2012')
index

DatetimeIndex(['2012-04-01', '2012-04-02', '2012-04-03', '2012-04-04',
               '2012-04-05', '2012-04-06', '2012-04-07', '2012-04-08',
               '2012-04-09', '2012-04-10', '2012-04-11', '2012-04-12',
               '2012-04-13', '2012-04-14', '2012-04-15', '2012-04-16',
               '2012-04-17', '2012-04-18', '2012-04-19', '2012-04-20',
               '2012-04-21', '2012-04-22', '2012-04-23', '2012-04-24',
               '2012-04-25', '2012-04-26', '2012-04-27', '2012-04-28',
               '2012-04-29', '2012-04-30', '2012-05-01', '2012-05-02',
               '2012-05-03', '2012-05-04', '2012-05-05', '2012-05-06',
               '2012-05-07', '2012-05-08', '2012-05-09', '2012-05-10',
               '2012-05-11', '2012-05-12', '2012-05-13', '2012-05-14',
               '2012-05-15', '2012-05-16', '2012-05-17', '2012-05-18',
               '2012-05-19', '2012-05-20', '2012-05-21', '2012-05-22',
               '2012-05-23', '2012-05-24', '2012-05-25', '2012-05-26',
      

In [12]:
pd.date_range(start='4/1/2012', periods=20) # 20天


DatetimeIndex(['2012-04-01', '2012-04-02', '2012-04-03', '2012-04-04',
               '2012-04-05', '2012-04-06', '2012-04-07', '2012-04-08',
               '2012-04-09', '2012-04-10', '2012-04-11', '2012-04-12',
               '2012-04-13', '2012-04-14', '2012-04-15', '2012-04-16',
               '2012-04-17', '2012-04-18', '2012-04-19', '2012-04-20'],
              dtype='datetime64[ns]', freq='D')

In [13]:
pd.date_range(end='6/1/2012', periods=20) # 20天，方向向前


DatetimeIndex(['2012-05-13', '2012-05-14', '2012-05-15', '2012-05-16',
               '2012-05-17', '2012-05-18', '2012-05-19', '2012-05-20',
               '2012-05-21', '2012-05-22', '2012-05-23', '2012-05-24',
               '2012-05-25', '2012-05-26', '2012-05-27', '2012-05-28',
               '2012-05-29', '2012-05-30', '2012-05-31', '2012-06-01'],
              dtype='datetime64[ns]', freq='D')

In [14]:
pd.date_range('1/1/2000', '12/1/2000', freq='BM') # BM = business end of month，结果调整到月底


DatetimeIndex(['2000-01-31', '2000-02-29', '2000-03-31', '2000-04-28',
               '2000-05-31', '2000-06-30', '2000-07-31', '2000-08-31',
               '2000-09-29', '2000-10-31', '2000-11-30'],
              dtype='datetime64[ns]', freq='BM')

In [15]:
pd.date_range('5/2/2012 12:56:31', periods=5) # 时分秒保留


DatetimeIndex(['2012-05-02 12:56:31', '2012-05-03 12:56:31',
               '2012-05-04 12:56:31', '2012-05-05 12:56:31',
               '2012-05-06 12:56:31'],
              dtype='datetime64[ns]', freq='D')

In [16]:
pd.date_range('5/2/2012 12:56:31', periods=5, normalize=True) # 时分秒被和谐


DatetimeIndex(['2012-05-02', '2012-05-03', '2012-05-04', '2012-05-05',
               '2012-05-06'],
              dtype='datetime64[ns]', freq='D')

## 频率和日期偏移量

In [18]:
hour = Hour()
four_hours = Hour(4)


In [21]:
pd.date_range('1/1/2000', '1/3/2000 23:59', freq='4h') # 以4小时为间隔单位,两边闭区间。如果右面是1/4/2000 00:00，那么符合生成条件。


DatetimeIndex(['2000-01-01 00:00:00', '2000-01-01 04:00:00',
               '2000-01-01 08:00:00', '2000-01-01 12:00:00',
               '2000-01-01 16:00:00', '2000-01-01 20:00:00',
               '2000-01-02 00:00:00', '2000-01-02 04:00:00',
               '2000-01-02 08:00:00', '2000-01-02 12:00:00',
               '2000-01-02 16:00:00', '2000-01-02 20:00:00',
               '2000-01-03 00:00:00', '2000-01-03 04:00:00',
               '2000-01-03 08:00:00', '2000-01-03 12:00:00',
               '2000-01-03 16:00:00', '2000-01-03 20:00:00'],
              dtype='datetime64[ns]', freq='4H')

In [22]:
Hour(2) + Minute(30)


<150 * Minutes>

In [23]:
pd.date_range('1/1/2000', periods=10, freq='1h30min')


DatetimeIndex(['2000-01-01 00:00:00', '2000-01-01 01:30:00',
               '2000-01-01 03:00:00', '2000-01-01 04:30:00',
               '2000-01-01 06:00:00', '2000-01-01 07:30:00',
               '2000-01-01 09:00:00', '2000-01-01 10:30:00',
               '2000-01-01 12:00:00', '2000-01-01 13:30:00'],
              dtype='datetime64[ns]', freq='90T')

### 时间序列基础频率

- `D`：       每日
- `B`：       每工作日
- `H`：       每小时
- `T/min`：   每分钟
- `S`：       每秒
- `L/ms`：    每毫秒
- `U`：       每微秒
- `M`：       每月最后一个日历日
- `BM`：      每月最后一个工作日
- `MS`：      每月第一个日历日
- `BNS`：     每月第一个工作日
- `W-MON`：   每月从指定的星期几开始算起
- `WOM-1MON`：产生每月的第一、第二、第x周的周几。WOM-3FRI表示每月第三个星期五。
- `Q-JAN`：   对于以指定月份结束的年度，每季度最后一月的最后一个日历日。
- `BQ-JAN`：  对于以指定月份结束的年度，每季度最后一月的最后一个工作日。
- `QS-JAN`：  对于以指定月份结束的年度，每季度最后一月的第一个日历日。
- `BQS-JAN`： 对于以指定月份结束的年度，每季度最后一月的第一个工作日。
- `A-JAN`：   每年指定月份的最后一个日历日 
- `BA-JAN`：  每年指定月份的最后一个工作日
- `AS-JAN`：  每年指定月份的第一个日历日
- `BAS-JAN`： 每年指定月份的第一个工作日


## WOM日期（Week Of Month）

In [24]:
rng = pd.date_range('10/1/2017', '12/31/2017', freq='WOM-3FRI') # 每月的第三个周五
rng

DatetimeIndex(['2017-10-20', '2017-11-17', '2017-12-15'], dtype='datetime64[ns]', freq='WOM-3FRI')

## 移动（超前和滞后）数据

In [25]:
ts = Series(np.random.randn(4),
            index=pd.date_range('1/1/2000', periods=4, freq='M'))
ts

2000-01-31    1.325051
2000-02-29   -1.895597
2000-03-31   -0.774225
2000-04-30   -1.099923
Freq: M, dtype: float64

In [26]:
ts.shift(2) # 数据整体往后推2步， key不动。


2000-01-31         NaN
2000-02-29         NaN
2000-03-31    1.325051
2000-04-30   -1.895597
Freq: M, dtype: float64

In [27]:
ts.shift(-2)


2000-01-31   -0.774225
2000-02-29   -1.099923
2000-03-31         NaN
2000-04-30         NaN
Freq: M, dtype: float64

In [28]:
ts / ts.shift(1) - 1 # 相对于前一条记录的变化率，比如股票涨跌幅。

# cc：等价于 (ts - ts.shift(1)) / ts.shift(1)

2000-01-31         NaN
2000-02-29   -2.430584
2000-03-31   -0.591566
2000-04-30    0.420675
Freq: M, dtype: float64

In [29]:
ts.shift(2, freq='M') # 指定freq后调整key，加2个月。


2000-03-31    1.325051
2000-04-30   -1.895597
2000-05-31   -0.774225
2000-06-30   -1.099923
Freq: M, dtype: float64

In [32]:
ts.shift(3, freq='D') # 在原ts每个日期时间戳基础上偏移3天


2000-02-03    1.325051
2000-03-03   -1.895597
2000-04-03   -0.774225
2000-05-03   -1.099923
dtype: float64

In [35]:
ts.shift(1, freq='3D') # 3D等价3天


2000-02-03    1.325051
2000-03-03   -1.895597
2000-04-03   -0.774225
2000-05-03   -1.099923
dtype: float64

## 通过偏移量对日期进行位移

In [36]:
now = datetime(2011, 11, 17)
now + 3 * Day()

Timestamp('2011-11-20 00:00:00')

In [37]:
now + MonthEnd() # 移动到月底


Timestamp('2011-11-30 00:00:00')

In [38]:
now + MonthEnd(2) # 推到下个月月底


Timestamp('2011-12-31 00:00:00')

In [39]:
offset = MonthEnd()
print(offset.rollforward(now)) # 等价now + MonthEnd()
print(offset.rollback(now)) # 移动到now之前那个月的月底

2011-11-30 00:00:00
2011-10-31 00:00:00


In [41]:
ts = Series(np.random.randn(20),
            index=pd.date_range('1/15/2000', periods=20, freq='4d'))
ts

2000-01-15    0.369232
2000-01-19    1.330381
2000-01-23    0.536198
2000-01-27   -0.535385
2000-01-31   -0.897669
2000-02-04   -0.916960
2000-02-08   -0.530554
2000-02-12    0.036835
2000-02-16    0.738474
2000-02-20   -1.173877
2000-02-24   -1.805578
2000-02-28   -1.191742
2000-03-03    0.902833
2000-03-07   -0.004377
2000-03-11   -0.957413
2000-03-15    0.981054
2000-03-19    1.141097
2000-03-23   -0.295033
2000-03-27   -0.709023
2000-03-31    0.134466
Freq: 4D, dtype: float64

In [42]:
ts.groupby(offset.rollforward).mean() # 根据rollforward的结果分组并统计
# cc：牛拜拜！！

2000-01-31    0.160552
2000-02-29   -0.691915
2000-03-31    0.149201
dtype: float64

In [43]:
ts.resample('M', how='mean') # 更粗暴直接的做法

# cc：报错的原因是函数resample的‘how’参数已经不适用了，可以将resample(how='mean')改成resample.mean()的形式

TypeError: resample() got an unexpected keyword argument 'how'

In [44]:
ts.resample('M').mean()

2000-01-31    0.160552
2000-02-29   -0.691915
2000-03-31    0.149201
Freq: M, dtype: float64