# 时期及其算术运算

In [2]:
import numpy as np
import pandas as pd
from datetime import datetime
from datetime import timedelta
from dateutil.parser import parse
from pandas import DataFrame, Series
from pandas.tseries.offsets import Day, Hour, Minute, MonthEnd

In [2]:
p = pd.Period(2007, freq='A-DEC')
p

Period('2007', 'A-DEC')

In [3]:
p + 5 


Period('2012', 'A-DEC')

In [4]:
p - 2


Period('2005', 'A-DEC')

In [5]:
pd.Period('2014', freq='A-DEC') - p


<7 * YearEnds: month=12>

In [6]:
rng = pd.period_range('1/1/2000', '6/30/2000', freq='M')
rng

PeriodIndex(['2000-01', '2000-02', '2000-03', '2000-04', '2000-05', '2000-06'], dtype='period[M]', freq='M')

In [7]:
Series(np.random.randn(6), index=rng)


2000-01   -0.068650
2000-02   -0.410605
2000-03   -0.026792
2000-04   -0.813343
2000-05   -0.686088
2000-06   -0.316644
Freq: M, dtype: float64

In [8]:
values = ['2001Q3', '2002Q2', '2003Q1']
index = pd.PeriodIndex(values, freq='Q-DEC')
index

PeriodIndex(['2001Q3', '2002Q2', '2003Q1'], dtype='period[Q-DEC]', freq='Q-DEC')

## 时期的频率转换

In [9]:
p = pd.Period('2007', freq='A-DEC')


In [10]:
p.asfreq('M', how='start')


Period('2007-01', 'M')

In [11]:
p.asfreq('M', how='end')


Period('2007-12', 'M')

In [12]:
p = pd.Period('2007', freq='A-JUN') # 截至到2007/6/30的一年时间
p.start_time

Timestamp('2006-07-01 00:00:00')

In [13]:
p.asfreq('M', 'start')

# cc：所以，在年度上，以月为步长，开始月为2006年7月


Period('2006-07', 'M')

In [14]:
p.asfreq('M', 'end')


Period('2007-06', 'M')

In [16]:
p = pd.Period('2007-08', 'M')
p.asfreq('A-JUN')
p

Period('2007-08', 'M')

In [17]:
rng = pd.period_range('2006', '2009', freq='A-DEC')
ts = Series(np.random.randn(len(rng)), index=rng)
ts

2006    1.330618
2007   -0.646707
2008    0.785307
2009   -0.543137
Freq: A-DEC, dtype: float64

In [18]:
ts.asfreq('M', how='start')


2006-01    1.330618
2007-01   -0.646707
2008-01    0.785307
2009-01   -0.543137
Freq: M, dtype: float64

In [19]:
ts.asfreq('B', how='end') # 每年最后一个工作日


2006-12-29    1.330618
2007-12-31   -0.646707
2008-12-31    0.785307
2009-12-31   -0.543137
Freq: B, dtype: float64

## 按季度计算的时期频率

In [20]:
# Q代表季度为单位（开始时间要减去1个季度，Q4请忽略，这里没卵用。），
# JAN代表1月，结合Q，取1/31。
p = pd.Period('2012Q4', freq='Q-JAN')
p

Period('2012Q4', 'Q-JAN')

In [21]:
p.asfreq('D', 'start')


Period('2011-11-01', 'D')

In [22]:
p.asfreq('D', 'end')


Period('2012-01-31', 'D')

In [23]:
p.asfreq('B', 'e')


Period('2012-01-31', 'B')

In [24]:
# 2012/01/30
# 频率变成分钟
# 再加16小时
p4pm = (p.asfreq('B', 'e') - 1).asfreq('T', 's') + 16 * 60
p4pm

Period('2012-01-30 16:00', 'T')

In [25]:
rng = pd.period_range('2011Q3', '2012Q4', freq='Q-JAN') # 以Q为单位
ts = Series(np.arange(len(rng)), index=rng)
ts

2011Q3    0
2011Q4    1
2012Q1    2
2012Q2    3
2012Q3    4
2012Q4    5
Freq: Q-JAN, dtype: int64

In [26]:
new_rng = (rng.asfreq('B', 'e') - 1).asfreq('T', 's') + 16 * 60 # 最后一个工作日减1的下午4点
ts.index = new_rng.to_timestamp()
ts

2010-10-28 16:00:00    0
2011-01-28 16:00:00    1
2011-04-28 16:00:00    2
2011-07-28 16:00:00    3
2011-10-28 16:00:00    4
2012-01-30 16:00:00    5
dtype: int64

## 将Timestamp转换为Period（及其反向过程）

In [28]:
rng = pd.date_range('1/1/2000', periods=3, freq='M')
ts = Series(np.random.randn(3), index=rng)
pts = ts.to_period()
ts

2000-01-31   -0.742597
2000-02-29   -0.591384
2000-03-31    0.421001
Freq: M, dtype: float64

In [29]:
print(type(pts))
pts # 因为rng是以月为单位，to_period以后把日省略。

<class 'pandas.core.series.Series'>


2000-01   -0.742597
2000-02   -0.591384
2000-03    0.421001
Freq: M, dtype: float64

In [3]:
rng = pd.date_range('1/29/2000', periods=6, freq='D')
ts2 = Series(np.random.randn(6), index=rng)
print(ts2)
print(ts2.to_period('M'))

2000-01-29    0.888721
2000-01-30    0.736044
2000-01-31   -1.538739
2000-02-01    0.271469
2000-02-02   -0.456637
2000-02-03    0.663493
Freq: D, dtype: float64
2000-01    0.888721
2000-01    0.736044
2000-01   -1.538739
2000-02    0.271469
2000-02   -0.456637
2000-02    0.663493
Freq: M, dtype: float64


In [31]:
pts = ts.to_period()
pts

2000-01   -0.742597
2000-02   -0.591384
2000-03    0.421001
Freq: M, dtype: float64

In [32]:
pts.to_timestamp(how='end') # 还原到timestamp


2000-01-31 23:59:59.999999999   -0.742597
2000-02-29 23:59:59.999999999   -0.591384
2000-03-31 23:59:59.999999999    0.421001
dtype: float64

## 通过数组创建PeriodIndex

In [33]:
data = pd.read_csv('data/macrodata.csv')
data.head()

Unnamed: 0,year,quarter,realgdp,realcons,realinv,realgovt,realdpi,cpi,m1,tbilrate,unemp,pop,infl,realint
0,1959.0,1.0,2710.349,1707.4,286.898,470.045,1886.9,28.98,139.7,2.82,5.8,177.146,0.0,0.0
1,1959.0,2.0,2778.801,1733.7,310.859,481.301,1919.7,29.15,141.7,3.08,5.1,177.83,2.34,0.74
2,1959.0,3.0,2775.488,1751.8,289.226,491.26,1916.4,29.35,140.5,3.82,5.3,178.657,2.74,1.09
3,1959.0,4.0,2785.204,1753.7,299.356,484.052,1931.3,29.37,140.0,4.33,5.6,179.386,0.27,4.06
4,1960.0,1.0,2847.699,1770.5,331.722,462.199,1955.5,29.54,139.6,3.5,5.2,180.007,2.31,1.19


In [34]:
index = pd.PeriodIndex(year=data.year, quarter=data.quarter, freq='Q-DEC') # 根据年份和季度构造索引，12/31为年度结束日。
index

PeriodIndex(['1959Q1', '1959Q2', '1959Q3', '1959Q4', '1960Q1', '1960Q2',
             '1960Q3', '1960Q4', '1961Q1', '1961Q2',
             ...
             '2007Q2', '2007Q3', '2007Q4', '2008Q1', '2008Q2', '2008Q3',
             '2008Q4', '2009Q1', '2009Q2', '2009Q3'],
            dtype='period[Q-DEC]', length=203, freq='Q-DEC')

In [35]:
data.index = index
print(data.infl.head())
print(data.infl.tail())

1959Q1    0.00
1959Q2    2.34
1959Q3    2.74
1959Q4    0.27
1960Q1    2.31
Freq: Q-DEC, Name: infl, dtype: float64
2008Q3   -3.16
2008Q4   -8.79
2009Q1    0.94
2009Q2    3.37
2009Q3    3.56
Freq: Q-DEC, Name: infl, dtype: float64
