In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from datetime import datetime

In [2]:
# Period类表示时间区间（时间范围）
p = pd.Period(2007, freq='A-DEC')  # 这个Period对象表示的是从2007年1月1日到2007年12月31日之间的整段时间
p

Period('2007', 'A-DEC')

In [3]:
display(p + 5)  # 时间区间可以这样简单的移位
display(p - 2)

Period('2012', 'A-DEC')

Period('2005', 'A-DEC')

In [4]:
pd.Period('2014', freq='A-DEC') - p  # 如果两个时间区间具有相同的频率，它们的差是它们之间的单位数

<7 * YearEnds: month=12>

In [5]:
rng = pd.period_range('2000-01-01', '2000-06-30', freq='M')  # period_range函数可用于创建规则的period序列
rng

PeriodIndex(['2000-01', '2000-02', '2000-03', '2000-04', '2000-05', '2000-06'], dtype='period[M]', freq='M')

In [6]:
pd.Series(np.random.randn(6), index=rng)  # period序列可以作为pandas数据结构的索引

2000-01    0.071456
2000-02   -0.434606
2000-03   -0.418830
2000-04   -1.470933
2000-05    1.105196
2000-06    0.384940
Freq: M, dtype: float64

In [7]:
values = ['2001Q3', '2002Q2', '2003Q1']
index = pd.PeriodIndex(values, freq='Q-DEC')  # 字符串数组与PeriodIndex类
index

PeriodIndex(['2001Q3', '2002Q2', '2003Q1'], dtype='period[Q-DEC]', freq='Q-DEC')

### 区间频率的转换

In [8]:
p = pd.Period('2007', freq='A-DEC')  # 可以理解为一个年度区间，其中12月为该区间的最后一个月
display(p)
display(p.asfreq('M', how='start'))  # 年度区间转为月度区间
display(p.asfreq('M', how='end'))    # 年度区间转为月度区间

Period('2007', 'A-DEC')

Period('2007-01', 'M')

Period('2007-12', 'M')

In [9]:
p = pd.Period('2007', freq='A-JUN')  # 可以理解为一个年度区间，其中06月为该区间的最后一个月
display(p)
display(p.asfreq('M', 'start'))      # 年度区间转为月度区间
display(p.asfreq('M', 'end'))        # 年度区间转为月度区间

Period('2007', 'A-JUN')

Period('2006-07', 'M')

Period('2007-06', 'M')

In [10]:
p = pd.Period('Aug-2007', 'M')  # 月度区间
display(p)
display(p.asfreq('A-JUN'))      # 月度区间转为年度区间

Period('2007-08', 'M')

Period('2008', 'A-JUN')

In [11]:
rng = pd.period_range('2006', '2009', freq='A-DEC')
ts = pd.Series(np.random.randn(len(rng)), index=rng)
display(ts)
display(ts.asfreq('M', how='start'))
display(ts.asfreq('B', how='end'))

2006    0.071904
2007    0.883457
2008    0.683092
2009   -0.153910
Freq: A-DEC, dtype: float64

2006-01    0.071904
2007-01    0.883457
2008-01    0.683092
2009-01   -0.153910
Freq: M, dtype: float64

2006-12-29    0.071904
2007-12-31    0.883457
2008-12-31    0.683092
2009-12-31   -0.153910
Freq: B, dtype: float64

### 季度区间频率

In [12]:
p = pd.Period('2012Q4', freq='Q-JAN')
display(p)
display(p.asfreq('D', how='start'))
display(p.asfreq('D', how='end'))

Period('2012Q4', 'Q-JAN')

Period('2011-11-01', 'D')

Period('2012-01-31', 'D')

In [13]:
p = pd.Period('2012Q4', freq='Q-JAN')
p4pm = (p.asfreq('B', 'e') - 1).asfreq('T', 's') + 16 * 60
display(p4pm)
display(p4pm.to_timestamp())

Period('2012-01-30 16:00', 'T')

Timestamp('2012-01-30 16:00:00')

In [14]:
rng = pd.period_range('2011Q3', '2012Q4', freq='Q-JAN')
ts = pd.Series(np.arange(len(rng)), index=rng)
display(ts)

new_rng = (rng.asfreq('B', 'e') - 1).asfreq('T', 's') + 16 * 60
ts.index = new_rng.to_timestamp()
display(ts)

2011Q3    0
2011Q4    1
2012Q1    2
2012Q2    3
2012Q3    4
2012Q4    5
Freq: Q-JAN, dtype: int64

2010-10-28 16:00:00    0
2011-01-28 16:00:00    1
2011-04-28 16:00:00    2
2011-07-28 16:00:00    3
2011-10-28 16:00:00    4
2012-01-30 16:00:00    5
dtype: int64

### 时间戳与时间区间的相互转换

In [15]:
rng = pd.date_range('2000-01-01', periods=3, freq='M')
ts = pd.Series(np.random.randn(len(rng)), index=rng)
display(ts)

pts = ts.to_period()  # pandas会自动推断合适的时间区间频率
display(pts)

2000-01-31    0.429991
2000-02-29   -1.446119
2000-03-31    0.075729
Freq: M, dtype: float64

2000-01    0.429991
2000-02   -1.446119
2000-03    0.075729
Freq: M, dtype: float64

In [16]:
rng = pd.date_range('1/29/2000', periods=6, freq='D')
ts2 = pd.Series(np.random.randn(len(rng)), index=rng)
display(ts2)

display(ts2.to_period())
display(ts2.to_period('M'))  # 指定转换后的时间区间频率

2000-01-29    0.906331
2000-01-30   -0.873559
2000-01-31    0.036995
2000-02-01    0.503518
2000-02-02    0.152255
2000-02-03   -1.709124
Freq: D, dtype: float64

2000-01-29    0.906331
2000-01-30   -0.873559
2000-01-31    0.036995
2000-02-01    0.503518
2000-02-02    0.152255
2000-02-03   -1.709124
Freq: D, dtype: float64

2000-01    0.906331
2000-01   -0.873559
2000-01    0.036995
2000-02    0.503518
2000-02    0.152255
2000-02   -1.709124
Freq: M, dtype: float64

In [17]:
display(ts2.to_period().to_timestamp(how='end'))
display(ts2.to_period('M').to_timestamp(how='end'))

2000-01-29 23:59:59.999999999    0.906331
2000-01-30 23:59:59.999999999   -0.873559
2000-01-31 23:59:59.999999999    0.036995
2000-02-01 23:59:59.999999999    0.503518
2000-02-02 23:59:59.999999999    0.152255
2000-02-03 23:59:59.999999999   -1.709124
Freq: D, dtype: float64

2000-01-31 23:59:59.999999999    0.906331
2000-01-31 23:59:59.999999999   -0.873559
2000-01-31 23:59:59.999999999    0.036995
2000-02-29 23:59:59.999999999    0.503518
2000-02-29 23:59:59.999999999    0.152255
2000-02-29 23:59:59.999999999   -1.709124
dtype: float64

### 从数组生成PeriodIndex

In [18]:
data = pd.read_csv('../data/examples/macrodata.csv')
display(data.head(5))  # 年份和季度在不同的列中
display(data.year)
display(data.quarter)

Unnamed: 0,year,quarter,realgdp,realcons,realinv,realgovt,realdpi,cpi,m1,tbilrate,unemp,pop,infl,realint
0,1959.0,1.0,2710.349,1707.4,286.898,470.045,1886.9,28.98,139.7,2.82,5.8,177.146,0.0,0.0
1,1959.0,2.0,2778.801,1733.7,310.859,481.301,1919.7,29.15,141.7,3.08,5.1,177.83,2.34,0.74
2,1959.0,3.0,2775.488,1751.8,289.226,491.26,1916.4,29.35,140.5,3.82,5.3,178.657,2.74,1.09
3,1959.0,4.0,2785.204,1753.7,299.356,484.052,1931.3,29.37,140.0,4.33,5.6,179.386,0.27,4.06
4,1960.0,1.0,2847.699,1770.5,331.722,462.199,1955.5,29.54,139.6,3.5,5.2,180.007,2.31,1.19


0      1959.0
1      1959.0
2      1959.0
3      1959.0
4      1960.0
        ...  
198    2008.0
199    2008.0
200    2009.0
201    2009.0
202    2009.0
Name: year, Length: 203, dtype: float64

0      1.0
1      2.0
2      3.0
3      4.0
4      1.0
      ... 
198    3.0
199    4.0
200    1.0
201    2.0
202    3.0
Name: quarter, Length: 203, dtype: float64

In [19]:
index = pd.PeriodIndex(year=data.year, quarter=data.quarter, freq='Q-DEC')
display(index)

data.index = index
display(data)

PeriodIndex(['1959Q1', '1959Q2', '1959Q3', '1959Q4', '1960Q1', '1960Q2',
             '1960Q3', '1960Q4', '1961Q1', '1961Q2',
             ...
             '2007Q2', '2007Q3', '2007Q4', '2008Q1', '2008Q2', '2008Q3',
             '2008Q4', '2009Q1', '2009Q2', '2009Q3'],
            dtype='period[Q-DEC]', length=203, freq='Q-DEC')

Unnamed: 0,year,quarter,realgdp,realcons,realinv,realgovt,realdpi,cpi,m1,tbilrate,unemp,pop,infl,realint
1959Q1,1959.0,1.0,2710.349,1707.4,286.898,470.045,1886.9,28.980,139.7,2.82,5.8,177.146,0.00,0.00
1959Q2,1959.0,2.0,2778.801,1733.7,310.859,481.301,1919.7,29.150,141.7,3.08,5.1,177.830,2.34,0.74
1959Q3,1959.0,3.0,2775.488,1751.8,289.226,491.260,1916.4,29.350,140.5,3.82,5.3,178.657,2.74,1.09
1959Q4,1959.0,4.0,2785.204,1753.7,299.356,484.052,1931.3,29.370,140.0,4.33,5.6,179.386,0.27,4.06
1960Q1,1960.0,1.0,2847.699,1770.5,331.722,462.199,1955.5,29.540,139.6,3.50,5.2,180.007,2.31,1.19
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2008Q3,2008.0,3.0,13324.600,9267.7,1990.693,991.551,9838.3,216.889,1474.7,1.17,6.0,305.270,-3.16,4.33
2008Q4,2008.0,4.0,13141.920,9195.3,1857.661,1007.273,9920.4,212.174,1576.5,0.12,6.9,305.952,-8.79,8.91
2009Q1,2009.0,1.0,12925.410,9209.2,1558.494,996.287,9926.4,212.671,1592.8,0.22,8.1,306.547,0.94,-0.71
2009Q2,2009.0,2.0,12901.504,9189.0,1456.678,1023.528,10077.5,214.469,1653.6,0.18,9.2,307.226,3.37,-3.19
