# 时间日期，时间序列

In [1]:
import pandas as pd
import numpy as np

In [2]:
from datetime import datetime
from datetime import timedelta

In [4]:
now = datetime.now() #当前时间

In [5]:
now

datetime.datetime(2018, 7, 12, 10, 18, 13, 454082)

In [6]:
now.year, now.month, now.day

(2018, 7, 12)

In [7]:
data1 = datetime(2016,4,24)
data2 = datetime(2016,4,20)
delta = data1 - data2 #时间差

In [8]:
delta.days

4

In [9]:
date = datetime(2016, 3, 20, 8, 30)
date

datetime.datetime(2016, 3, 20, 8, 30)

In [10]:
str(date)

'2016-03-20 08:30:00'

In [11]:
date.strftime('%y/%m/%d %H:%M:%S')

'16/03/20 08:30:00'

In [13]:
datetime.strptime('2016-03-20 08:30', '%Y-%m-%d %H:%M')#时间格式转换

datetime.datetime(2016, 3, 20, 8, 30)

In [14]:
dates = [datetime(2016, 2, 1), datetime(2016, 2, 2), datetime(2016, 2, 3), datetime(2016, 2, 4)]

In [15]:
# 创建时间序列

In [16]:
s = pd.Series(np.random.randn(4), index=dates)
s

2016-02-01   -0.654616
2016-02-02   -0.398746
2016-02-03    1.304034
2016-02-04    0.921586
dtype: float64

In [17]:
type(s.index)

pandas.core.indexes.datetimes.DatetimeIndex

In [18]:
#生成时间戳的方法
pd.date_range('20160320', '20160330')

DatetimeIndex(['2016-03-20', '2016-03-21', '2016-03-22', '2016-03-23',
               '2016-03-24', '2016-03-25', '2016-03-26', '2016-03-27',
               '2016-03-28', '2016-03-29', '2016-03-30'],
              dtype='datetime64[ns]', freq='D')

In [19]:
pd.date_range('20160320', periods=10)

DatetimeIndex(['2016-03-20', '2016-03-21', '2016-03-22', '2016-03-23',
               '2016-03-24', '2016-03-25', '2016-03-26', '2016-03-27',
               '2016-03-28', '2016-03-29'],
              dtype='datetime64[ns]', freq='D')

In [20]:
pd.date_range('20160320 16:32:22', '20160330', normalize=True)#正则化去掉时间

DatetimeIndex(['2016-03-20', '2016-03-21', '2016-03-22', '2016-03-23',
               '2016-03-24', '2016-03-25', '2016-03-26', '2016-03-27',
               '2016-03-28', '2016-03-29', '2016-03-30'],
              dtype='datetime64[ns]', freq='D')

In [22]:
#时间频率,月份，周，几个小时
pd.date_range(start='20160320', periods=10, freq='M')

DatetimeIndex(['2016-03-31', '2016-04-30', '2016-05-31', '2016-06-30',
               '2016-07-31', '2016-08-31', '2016-09-30', '2016-10-31',
               '2016-11-30', '2016-12-31'],
              dtype='datetime64[ns]', freq='M')

In [23]:
pd.date_range(start='20160320', periods=10, freq='W')

DatetimeIndex(['2016-03-20', '2016-03-27', '2016-04-03', '2016-04-10',
               '2016-04-17', '2016-04-24', '2016-05-01', '2016-05-08',
               '2016-05-15', '2016-05-22'],
              dtype='datetime64[ns]', freq='W-SUN')

In [24]:
pd.date_range(start='20160320', periods=10, freq='4H')

DatetimeIndex(['2016-03-20 00:00:00', '2016-03-20 04:00:00',
               '2016-03-20 08:00:00', '2016-03-20 12:00:00',
               '2016-03-20 16:00:00', '2016-03-20 20:00:00',
               '2016-03-21 00:00:00', '2016-03-21 04:00:00',
               '2016-03-21 08:00:00', '2016-03-21 12:00:00'],
              dtype='datetime64[ns]', freq='4H')

In [25]:
##时期

In [28]:
p = pd.Period(2016)
p

Period('2016', 'A-DEC')

In [29]:
p = pd.Period(2016, freq='M')

In [30]:
p

Period('2016-01', 'M')

In [31]:
#时期的序列
pd.period_range('2018-01', periods=10, freq='M')

PeriodIndex(['2018-01', '2018-02', '2018-03', '2018-04', '2018-05', '2018-06',
             '2018-07', '2018-08', '2018-09', '2018-10'],
            dtype='period[M]', freq='M')

In [32]:
#按季度分时间序列
pd.period_range('2016Q1', periods=10, freq='Q')

PeriodIndex(['2016Q1', '2016Q2', '2016Q3', '2016Q4', '2017Q1', '2017Q2',
             '2017Q3', '2017Q4', '2018Q1', '2018Q2'],
            dtype='period[Q-DEC]', freq='Q-DEC')

In [33]:
#改变频率
p.asfreq('A-MAR')

Period('2016', 'A-MAR')

In [34]:
p = pd.Period('2016Q4', 'Q-JAN')# 以季节分时间，以每年1月份为结束
p.asfreq('M', how='start'), p.asfreq('M', how='end')

(Period('2015-11', 'M'), Period('2016-01', 'M'))

In [35]:
#获取该季度倒数第二个工作日下午4.20,换成工作日-分钟数
(p.asfreq('B') - 1).asfreq('T') + 16 * 60 + 20

Period('2016-01-29 16:19', 'T')

In [36]:
##timestamp和period的相互转化
s = pd.Series(np.random.randn(5), index=pd.date_range('2016-04-01', periods=5, freq='M'))


In [37]:
s

2016-04-30    0.747359
2016-05-31    0.115710
2016-06-30    0.316145
2016-07-31    0.166059
2016-08-31   -1.268916
Freq: M, dtype: float64

In [38]:
s.to_period()

2016-04    0.747359
2016-05    0.115710
2016-06    0.316145
2016-07    0.166059
2016-08   -1.268916
Freq: M, dtype: float64

In [40]:
####重采样resample，一开始的数据是以每分钟为单位，需要获取每五分钟的数据
ts = pd.Series(np.random.randint(0, 50, 60), index=pd.date_range('2016-4-25 09:30', periods=60, freq='T'))

In [44]:
ts.resample('5min', label='right').sum()# index为区间右端

2016-04-25 09:35:00    138
2016-04-25 09:40:00    163
2016-04-25 09:45:00    152
2016-04-25 09:50:00    128
2016-04-25 09:55:00    128
2016-04-25 10:00:00    152
2016-04-25 10:05:00     88
2016-04-25 10:10:00     59
2016-04-25 10:15:00    172
2016-04-25 10:20:00    139
2016-04-25 10:25:00     88
2016-04-25 10:30:00    160
Freq: 5T, dtype: int32

In [45]:
# 对于金融数据，股价的开盘价，最高价，最低价，收盘价
ts.resample('5min').ohlc()

Unnamed: 0,open,high,low,close
2016-04-25 09:30:00,14,42,14,21
2016-04-25 09:35:00,44,44,22,22
2016-04-25 09:40:00,21,48,9,38
2016-04-25 09:45:00,27,38,13,13
2016-04-25 09:50:00,38,42,10,17
2016-04-25 09:55:00,38,44,10,20
2016-04-25 10:00:00,15,44,6,6
2016-04-25 10:05:00,29,29,0,16
2016-04-25 10:10:00,31,41,31,31
2016-04-25 10:15:00,13,45,10,36


In [46]:
##通过groupby重采样
ts = pd.Series(np.random.randint(0, 50, 100), index=pd.date_range('2016-03-01', periods=100, freq='D'))
ts

2016-03-01    47
2016-03-02     9
2016-03-03    25
2016-03-04    33
2016-03-05    36
2016-03-06    46
2016-03-07    34
2016-03-08    15
2016-03-09    17
2016-03-10    26
2016-03-11    38
2016-03-12     8
2016-03-13    46
2016-03-14    39
2016-03-15    44
2016-03-16    25
2016-03-17    42
2016-03-18    44
2016-03-19    31
2016-03-20     5
2016-03-21    49
2016-03-22    11
2016-03-23    10
2016-03-24    36
2016-03-25    40
2016-03-26     9
2016-03-27    19
2016-03-28    46
2016-03-29    42
2016-03-30    40
              ..
2016-05-10    46
2016-05-11    24
2016-05-12    43
2016-05-13     7
2016-05-14    41
2016-05-15    23
2016-05-16    41
2016-05-17    31
2016-05-18    34
2016-05-19    14
2016-05-20     2
2016-05-21    20
2016-05-22    30
2016-05-23     0
2016-05-24    25
2016-05-25     7
2016-05-26    40
2016-05-27     1
2016-05-28    42
2016-05-29    18
2016-05-30    16
2016-05-31    42
2016-06-01     6
2016-06-02    47
2016-06-03     6
2016-06-04    20
2016-06-05    18
2016-06-06    

In [47]:
ts.groupby(lambda x: x.month).sum()#按月份重采样

3    932
4    602
5    737
6    164
dtype: int32

In [48]:
ts.groupby(ts.index.to_period('M')).sum()#转化成按月的时期

2016-03    932
2016-04    602
2016-05    737
2016-06    164
Freq: M, dtype: int32

In [56]:
df = pd.DataFrame(np.random.randint(1, 50, 2), index=pd.date_range('2016-4-22', periods=2, freq='W-FRI'))

In [57]:
df

Unnamed: 0,0
2016-04-22,31
2016-04-29,26


In [58]:
df.resample('D').ffill()# 以天采样后，缺失的值以上一个值填补

Unnamed: 0,0
2016-04-22,31
2016-04-23,31
2016-04-24,31
2016-04-25,31
2016-04-26,31
2016-04-27,31
2016-04-28,31
2016-04-29,26
