In [144]:
import pandas as pd
import numpy as np
from pandas import DataFrame, Series

In [146]:
'''频率不同的时间序列的运算'''
ts1=Series(np.random.randn(3),
          index=pd.date_range('2012-6-13',periods=3,freq='W-WED'))
ts1

2012-06-13    0.477332
2012-06-20   -0.275172
2012-06-27   -1.181320
Freq: W-WED, dtype: float64

In [147]:
ts1.resample('B').ffill()

2012-06-13    0.477332
2012-06-14    0.477332
2012-06-15    0.477332
2012-06-18    0.477332
2012-06-19    0.477332
2012-06-20   -0.275172
2012-06-21   -0.275172
2012-06-22   -0.275172
2012-06-25   -0.275172
2012-06-26   -0.275172
2012-06-27   -1.181320
Freq: B, dtype: float64

In [148]:
dates=pd.DatetimeIndex(['2012-6-12','2012-6-17','2012-6-18',
                        '2012-6-21','2012-6-22','2012-6-29'])
ts2 = Series(np.random.randn(6),index=dates)
ts2

2012-06-12   -1.517583
2012-06-17    0.930697
2012-06-18    0.343850
2012-06-21   -1.373333
2012-06-22    0.402291
2012-06-29   -0.013764
dtype: float64

In [149]:
ts1.reindex(ts2.index).ffill()

2012-06-12   NaN
2012-06-17   NaN
2012-06-18   NaN
2012-06-21   NaN
2012-06-22   NaN
2012-06-29   NaN
dtype: float64

In [150]:
ts2+ts1.reindex(ts2.index).ffill()

2012-06-12   NaN
2012-06-17   NaN
2012-06-18   NaN
2012-06-21   NaN
2012-06-22   NaN
2012-06-29   NaN
dtype: float64

In [152]:
'''
使用period
'''
gdp= Series([1.78,1.94,2.08,2.01,2.15,2.31,2.46],
           index = pd.period_range('1984Q2',periods=7,freq='Q-SEP'))
infl = Series([0.025,0.045,0.037,0.04],
             index = pd.period_range('1982',periods=4, freq='A-DEC'))
gdp

1984Q2    1.78
1984Q3    1.94
1984Q4    2.08
1985Q1    2.01
1985Q2    2.15
1985Q3    2.31
1985Q4    2.46
Freq: Q-SEP, dtype: float64

In [153]:
infl

1982    0.025
1983    0.045
1984    0.037
1985    0.040
Freq: A-DEC, dtype: float64

In [155]:
infl_q = infl.asfreq('Q-SEP',how='end')
infl_q

1983Q1    0.025
1984Q1    0.045
1985Q1    0.037
1986Q1    0.040
Freq: Q-SEP, dtype: float64

In [158]:
'''
时间和“最当前”数据选取
'''
#生成一个交易日内的日期范围和时间序列
rng = pd.date_range('2012-06-01 09:30','2012-06-01 15:59',freq='T')
#生成5天的时间点
rng=rng.append([rng+pd.offsets.BDay(i) for i in range(1,4)])
ts=Series(np.arange(len(rng),dtype=float),index=rng)
ts

2012-06-01 09:30:00       0.0
2012-06-01 09:31:00       1.0
2012-06-01 09:32:00       2.0
2012-06-01 09:33:00       3.0
2012-06-01 09:34:00       4.0
                        ...  
2012-06-06 15:55:00    1555.0
2012-06-06 15:56:00    1556.0
2012-06-06 15:57:00    1557.0
2012-06-06 15:58:00    1558.0
2012-06-06 15:59:00    1559.0
Length: 1560, dtype: float64

In [159]:
from datetime import time
ts[time(10,0)]

2012-06-01 10:00:00      30.0
2012-06-04 10:00:00     420.0
2012-06-05 10:00:00     810.0
2012-06-06 10:00:00    1200.0
dtype: float64

In [161]:
ts.at_time(time(10,0))

2012-06-01 10:00:00      30.0
2012-06-04 10:00:00     420.0
2012-06-05 10:00:00     810.0
2012-06-06 10:00:00    1200.0
dtype: float64

In [162]:
ts.between_time(time(10,0),time(10,1))

2012-06-01 10:00:00      30.0
2012-06-01 10:01:00      31.0
2012-06-04 10:00:00     420.0
2012-06-04 10:01:00     421.0
2012-06-05 10:00:00     810.0
2012-06-05 10:01:00     811.0
2012-06-06 10:00:00    1200.0
2012-06-06 10:01:00    1201.0
dtype: float64

In [163]:
indexer=np.sort(np.random.permutation(len(ts))[700:])
irr_ts=ts.copy()
irr_ts[indexer]=np.nan
irr_ts['2012-06-01 09:50':'2012-06-01 10:00']

2012-06-01 09:50:00     NaN
2012-06-01 09:51:00     NaN
2012-06-01 09:52:00    22.0
2012-06-01 09:53:00    23.0
2012-06-01 09:54:00    24.0
2012-06-01 09:55:00    25.0
2012-06-01 09:56:00     NaN
2012-06-01 09:57:00     NaN
2012-06-01 09:58:00    28.0
2012-06-01 09:59:00    29.0
2012-06-01 10:00:00    30.0
dtype: float64

In [164]:
selection = pd.date_range('2012-06-01 10:00',periods=4,freq='B')
irr_ts.asof(selection)

2012-06-01 10:00:00      30.0
2012-06-04 10:00:00     419.0
2012-06-05 10:00:00     806.0
2012-06-06 10:00:00    1199.0
Freq: B, dtype: float64

In [166]:
'''
拼接多个数据源
'''
data1=DataFrame(np.ones((6,3),dtype=float),
                columns=['a','b','c'],
                index=pd.date_range('6/12/2012',periods=6))
data2=DataFrame(np.ones((6,3),dtype=float)*2,
               columns=['a','b','c'],
               index=pd.date_range('6/13/2012',periods=6))
spliced=pd.concat([data1.loc[:'2012-06-14'],data2.loc['2012-06-15':]])
spliced

Unnamed: 0,a,b,c
2012-06-12,1.0,1.0,1.0
2012-06-13,1.0,1.0,1.0
2012-06-14,1.0,1.0,1.0
2012-06-15,2.0,2.0,2.0
2012-06-16,2.0,2.0,2.0
2012-06-17,2.0,2.0,2.0
2012-06-18,2.0,2.0,2.0


In [167]:
data2=DataFrame(np.ones((6,4),dtype=float)*2,
               columns=['a','b','c','d'],
               index=pd.date_range('6/13/2012',periods=6))
spliced=pd.concat([data1.loc[:'2012-06-14'], data2.loc['2012-06-15':]])
spliced

Unnamed: 0,a,b,c,d
2012-06-12,1.0,1.0,1.0,
2012-06-13,1.0,1.0,1.0,
2012-06-14,1.0,1.0,1.0,
2012-06-15,2.0,2.0,2.0,2.0
2012-06-16,2.0,2.0,2.0,2.0
2012-06-17,2.0,2.0,2.0,2.0
2012-06-18,2.0,2.0,2.0,2.0


In [168]:
spliced_filled = spliced.combine_first(data2)
spliced_filled

Unnamed: 0,a,b,c,d
2012-06-12,1.0,1.0,1.0,
2012-06-13,1.0,1.0,1.0,2.0
2012-06-14,1.0,1.0,1.0,2.0
2012-06-15,2.0,2.0,2.0,2.0
2012-06-16,2.0,2.0,2.0,2.0
2012-06-17,2.0,2.0,2.0,2.0
2012-06-18,2.0,2.0,2.0,2.0


In [169]:
cp_spliced = spliced.copy()
cp_spliced[['a','c']]=data1[['a','c']]
cp_spliced

Unnamed: 0,a,b,c,d
2012-06-12,1.0,1.0,1.0,
2012-06-13,1.0,1.0,1.0,
2012-06-14,1.0,1.0,1.0,
2012-06-15,1.0,2.0,1.0,2.0
2012-06-16,1.0,2.0,1.0,2.0
2012-06-17,1.0,2.0,1.0,2.0
2012-06-18,,2.0,,2.0
