# datetime模块

In [40]:
import pandas as pd

In [43]:
import numpy as np

In [44]:
from pandas import Series,DataFrame

In [1]:
from datetime import datetime

In [2]:
now = datetime.now()

In [3]:
now

datetime.datetime(2017, 5, 15, 21, 24, 14, 653257)

In [4]:
now.year,now.month,now.day

(2017, 5, 15)

# delta

In [5]:
delta = datetime(2011,1,7)-datetime(2008,6,24,8,15)

In [6]:
delta     #926 days, 15:45:00

datetime.timedelta(926, 56700)

In [8]:
delta.days

926

In [9]:
delta.seconds

56700

In [10]:
from datetime import timedelta

In [11]:
start = datetime(2011,1,7)

In [12]:
start + timedelta(12)  #12 days

datetime.datetime(2011, 1, 19, 0, 0)

In [13]:
start - 2 * timedelta(12)

datetime.datetime(2010, 12, 14, 0, 0)

# 字符串和datetime的相互转换

In [14]:
stamp = datetime(2011,1,3)

In [15]:
stamp

datetime.datetime(2011, 1, 3, 0, 0)

In [16]:
str(stamp)

'2011-01-03 00:00:00'

## strftime格式化

In [17]:
stamp.strftime('%Y-%m-%d')    #相当于datetime通过strftime格式化

'2011-01-03'

In [18]:
value = '2011-01-03'

## strptime已知格式日期解析

In [20]:
datetime.strptime(value,'%Y-%m-%d')    #相当于通过strptime将格式化的转位datetime

datetime.datetime(2011, 1, 3, 0, 0)

In [22]:
datestrs = ['7/6/2011','8/6/2011']

In [23]:
[datetime.strptime(x,'%m/%d/%Y') for x in datestrs]

[datetime.datetime(2011, 7, 6, 0, 0), datetime.datetime(2011, 8, 6, 0, 0)]

## dateutil中的parser.parse

In [24]:
from dateutil.parser import parse

In [25]:
parse('2011-01-03')

datetime.datetime(2011, 1, 3, 0, 0)

In [26]:
parse('Jan 31, 1997 10:45 PM')

datetime.datetime(1997, 1, 31, 22, 45)

### dayfirst = True

In [27]:
parse('6/12/2011',dayfirst = True)

datetime.datetime(2011, 12, 6, 0, 0)

### to_datetime

In [28]:
datestrs

['7/6/2011', '8/6/2011']

In [31]:
pd.to_datetime(datestrs)

DatetimeIndex(['2011-07-06', '2011-08-06'], dtype='datetime64[ns]', freq=None)

### 处理缺失值

In [32]:
idx = pd.to_datetime(datestrs + [None])

In [33]:
idx

DatetimeIndex(['2011-07-06', '2011-08-06', 'NaT'], dtype='datetime64[ns]', freq=None)

In [34]:
idx[2] #NaT(not a time)是pandas中时间戳数据的NA值

NaT

In [35]:
pd.isnull(idx)

array([False, False,  True], dtype=bool)

# 时间序列基础

In [36]:
from datetime import datetime

In [38]:
dates = [datetime(2011,1,2),datetime(2011,1,5),datetime(2011,1,7),
        datetime(2011,1,8),datetime(2011,1,10),datetime(2011,1,12)]

In [39]:
dates

[datetime.datetime(2011, 1, 2, 0, 0),
 datetime.datetime(2011, 1, 5, 0, 0),
 datetime.datetime(2011, 1, 7, 0, 0),
 datetime.datetime(2011, 1, 8, 0, 0),
 datetime.datetime(2011, 1, 10, 0, 0),
 datetime.datetime(2011, 1, 12, 0, 0)]

In [45]:
ts = Series(np.random.randn(6),index = dates)

In [46]:
ts   #ts就变成了一个TimeSeries了

2011-01-02   -1.207058
2011-01-05   -0.643014
2011-01-07    0.007802
2011-01-08    0.954995
2011-01-10    1.909089
2011-01-12   -1.198520
dtype: float64

In [47]:
type(ts)

pandas.core.series.Series

In [48]:
ts.index

DatetimeIndex(['2011-01-02', '2011-01-05', '2011-01-07', '2011-01-08',
               '2011-01-10', '2011-01-12'],
              dtype='datetime64[ns]', freq=None)

In [49]:
ts + ts[::2]

2011-01-02   -2.414116
2011-01-05         NaN
2011-01-07    0.015604
2011-01-08         NaN
2011-01-10    3.818178
2011-01-12         NaN
dtype: float64

In [None]:
#pandas用Numpy的datetime64数据类型以纳秒形式存储时间戳

In [50]:
ts.index.dtype

dtype('<M8[ns]')

In [51]:
stamp = ts.index[0]

In [52]:
stamp

Timestamp('2011-01-02 00:00:00')

# 索引、选取、子集构造

In [56]:
ts

2011-01-02   -1.207058
2011-01-05   -0.643014
2011-01-07    0.007802
2011-01-08    0.954995
2011-01-10    1.909089
2011-01-12   -1.198520
dtype: float64

## 数据索引

In [53]:
stamp = ts.index[2]

In [54]:
ts[stamp]

0.0078017613960334083

In [55]:
ts['1/10/2011']

1.9090892426847332

In [57]:
ts['20110110']

1.9090892426847332

## 数据切片

In [60]:
longer_ts = Series(np.random.randn(1000),index = pd.date_range('1/1/2000',periods = 1000))  #1000days

In [63]:
longer_ts

2000-01-01   -2.020808
2000-01-02    0.397426
2000-01-03   -1.282812
2000-01-04   -1.013067
2000-01-05    0.488843
2000-01-06   -1.238329
2000-01-07    0.531672
2000-01-08    0.382196
2000-01-09    0.390241
2000-01-10   -1.297127
2000-01-11   -1.055768
2000-01-12    0.973685
2000-01-13   -1.395461
2000-01-14   -0.533250
2000-01-15   -0.814584
2000-01-16   -1.926431
2000-01-17    0.564374
2000-01-18   -1.268777
2000-01-19    0.211798
2000-01-20   -0.106225
2000-01-21    1.569911
2000-01-22    1.705875
2000-01-23   -1.556016
2000-01-24   -0.345982
2000-01-25   -1.913248
2000-01-26   -0.749158
2000-01-27   -0.998200
2000-01-28   -0.132859
2000-01-29   -0.076699
2000-01-30    0.734122
                ...   
2002-08-28    0.376341
2002-08-29   -2.225213
2002-08-30    1.176286
2002-08-31   -1.401522
2002-09-01    0.241813
2002-09-02   -0.754440
2002-09-03   -1.035894
2002-09-04    0.036172
2002-09-05   -0.034658
2002-09-06    0.370887
2002-09-07   -0.021864
2002-09-08   -0.961448
2002-09-09 

In [64]:
longer_ts['2001']

2001-01-01   -0.973301
2001-01-02    1.261070
2001-01-03    2.464958
2001-01-04   -1.452463
2001-01-05    1.708813
2001-01-06   -1.678703
2001-01-07   -0.184007
2001-01-08    0.953687
2001-01-09    0.274791
2001-01-10    0.038823
2001-01-11    1.102657
2001-01-12    0.638720
2001-01-13   -1.247243
2001-01-14    1.411852
2001-01-15    1.881724
2001-01-16    1.071559
2001-01-17   -1.513294
2001-01-18   -0.211476
2001-01-19   -0.212722
2001-01-20    0.304920
2001-01-21   -0.301219
2001-01-22    1.841523
2001-01-23   -1.396002
2001-01-24    2.153936
2001-01-25   -0.270836
2001-01-26    0.544841
2001-01-27    0.171885
2001-01-28   -1.274691
2001-01-29   -1.254326
2001-01-30   -1.326433
                ...   
2001-12-02   -0.346100
2001-12-03   -1.556345
2001-12-04   -0.812883
2001-12-05    0.617589
2001-12-06   -1.031129
2001-12-07   -0.743160
2001-12-08   -0.779981
2001-12-09    0.060173
2001-12-10   -0.895940
2001-12-11   -0.244878
2001-12-12   -0.129459
2001-12-13    0.687267
2001-12-14 

In [68]:
ts

2011-01-02   -1.207058
2011-01-05   -0.643014
2011-01-07    0.007802
2011-01-08    0.954995
2011-01-10    1.909089
2011-01-12   -1.198520
dtype: float64

In [67]:
ts[datetime(2011,1,7):]

2011-01-07    0.007802
2011-01-08    0.954995
2011-01-10    1.909089
2011-01-12   -1.198520
dtype: float64

In [69]:
ts['1/6/2011':'1/11/2011']

2011-01-07    0.007802
2011-01-08    0.954995
2011-01-10    1.909089
dtype: float64

## truncate

In [70]:
ts.truncate(after = '1/9/2011')

2011-01-02   -1.207058
2011-01-05   -0.643014
2011-01-07    0.007802
2011-01-08    0.954995
dtype: float64

## 对dataframe有效

In [73]:
dates = pd.date_range('1/1/2000',periods = 100, freq = 'W-WED')

In [76]:
dates

DatetimeIndex(['2000-01-05', '2000-01-12', '2000-01-19', '2000-01-26',
               '2000-02-02', '2000-02-09', '2000-02-16', '2000-02-23',
               '2000-03-01', '2000-03-08', '2000-03-15', '2000-03-22',
               '2000-03-29', '2000-04-05', '2000-04-12', '2000-04-19',
               '2000-04-26', '2000-05-03', '2000-05-10', '2000-05-17',
               '2000-05-24', '2000-05-31', '2000-06-07', '2000-06-14',
               '2000-06-21', '2000-06-28', '2000-07-05', '2000-07-12',
               '2000-07-19', '2000-07-26', '2000-08-02', '2000-08-09',
               '2000-08-16', '2000-08-23', '2000-08-30', '2000-09-06',
               '2000-09-13', '2000-09-20', '2000-09-27', '2000-10-04',
               '2000-10-11', '2000-10-18', '2000-10-25', '2000-11-01',
               '2000-11-08', '2000-11-15', '2000-11-22', '2000-11-29',
               '2000-12-06', '2000-12-13', '2000-12-20', '2000-12-27',
               '2001-01-03', '2001-01-10', '2001-01-17', '2001-01-24',
      

In [77]:
long_df = DataFrame(np.random.randn(100,4),index = dates,columns=['Colorado','Texas','New York','Ohio'])

In [78]:
long_df

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-05,-0.218187,1.067762,1.204531,-0.454129
2000-01-12,-0.759546,0.433285,-0.901516,0.526798
2000-01-19,-0.822387,0.623513,0.810404,0.711967
2000-01-26,1.175960,-1.142243,1.159131,-0.461551
2000-02-02,-1.062192,1.775304,-0.404899,1.740111
2000-02-09,-0.061671,0.973445,-0.470620,0.455956
2000-02-16,1.105713,-0.475636,2.730268,1.206502
2000-02-23,0.568751,-0.298150,0.468654,0.468447
2000-03-01,-0.796056,0.240534,1.265104,-0.271049
2000-03-08,-0.049877,-2.152746,-0.083958,0.154545


In [79]:
long_df.ix['5-2001']

Unnamed: 0,Colorado,Texas,New York,Ohio
2001-05-02,-0.33206,-1.650119,0.927918,1.497812
2001-05-09,0.574398,0.697028,-0.049334,-0.270528
2001-05-16,0.818809,-0.606428,2.805374,-2.092615
2001-05-23,1.403139,0.559583,-0.681701,-1.040336
2001-05-30,0.139066,-0.995271,0.766633,-1.365663


# 带有重复索引的时间序列

In [81]:
dates = pd.DatetimeIndex(['1/1/2000','1/2/2000','1/2/2000','1/2/2000',
                         '1/3/2000'])

In [82]:
dates

DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-02', '2000-01-02',
               '2000-01-03'],
              dtype='datetime64[ns]', freq=None)

In [83]:
dup_ts = Series(np.arange(5),index = dates)

In [84]:
dup_ts

2000-01-01    0
2000-01-02    1
2000-01-02    2
2000-01-02    3
2000-01-03    4
dtype: int64

In [85]:
dup_ts.index.is_unique

False

In [86]:
dup_ts['1/3/2000']

4

In [87]:
dup_ts['1/2/2000']

2000-01-02    1
2000-01-02    2
2000-01-02    3
dtype: int64

In [88]:
grouped = dup_ts.groupby(level=0)

In [89]:
grouped.mean()

2000-01-01    0
2000-01-02    2
2000-01-03    4
dtype: int64

In [90]:
grouped.count()

2000-01-01    1
2000-01-02    3
2000-01-03    1
dtype: int64

# 日期的范围、频率及移动

In [91]:
ts

2011-01-02   -1.207058
2011-01-05   -0.643014
2011-01-07    0.007802
2011-01-08    0.954995
2011-01-10    1.909089
2011-01-12   -1.198520
dtype: float64

In [98]:
ts.resample('D').mean()

2011-01-02   -1.207058
2011-01-03         NaN
2011-01-04         NaN
2011-01-05   -0.643014
2011-01-06         NaN
2011-01-07    0.007802
2011-01-08    0.954995
2011-01-09         NaN
2011-01-10    1.909089
2011-01-11         NaN
2011-01-12   -1.198520
Freq: D, dtype: float64

# 生成日期范围

In [99]:
index = pd.date_range('4/1/2012','6/1/2012')

In [100]:
index

DatetimeIndex(['2012-04-01', '2012-04-02', '2012-04-03', '2012-04-04',
               '2012-04-05', '2012-04-06', '2012-04-07', '2012-04-08',
               '2012-04-09', '2012-04-10', '2012-04-11', '2012-04-12',
               '2012-04-13', '2012-04-14', '2012-04-15', '2012-04-16',
               '2012-04-17', '2012-04-18', '2012-04-19', '2012-04-20',
               '2012-04-21', '2012-04-22', '2012-04-23', '2012-04-24',
               '2012-04-25', '2012-04-26', '2012-04-27', '2012-04-28',
               '2012-04-29', '2012-04-30', '2012-05-01', '2012-05-02',
               '2012-05-03', '2012-05-04', '2012-05-05', '2012-05-06',
               '2012-05-07', '2012-05-08', '2012-05-09', '2012-05-10',
               '2012-05-11', '2012-05-12', '2012-05-13', '2012-05-14',
               '2012-05-15', '2012-05-16', '2012-05-17', '2012-05-18',
               '2012-05-19', '2012-05-20', '2012-05-21', '2012-05-22',
               '2012-05-23', '2012-05-24', '2012-05-25', '2012-05-26',
      

In [102]:
pd.date_range(start='4/1/2012',periods=20)

DatetimeIndex(['2012-04-01', '2012-04-02', '2012-04-03', '2012-04-04',
               '2012-04-05', '2012-04-06', '2012-04-07', '2012-04-08',
               '2012-04-09', '2012-04-10', '2012-04-11', '2012-04-12',
               '2012-04-13', '2012-04-14', '2012-04-15', '2012-04-16',
               '2012-04-17', '2012-04-18', '2012-04-19', '2012-04-20'],
              dtype='datetime64[ns]', freq='D')

In [103]:
pd.date_range(end='6/1/2012',periods=20)

DatetimeIndex(['2012-05-13', '2012-05-14', '2012-05-15', '2012-05-16',
               '2012-05-17', '2012-05-18', '2012-05-19', '2012-05-20',
               '2012-05-21', '2012-05-22', '2012-05-23', '2012-05-24',
               '2012-05-25', '2012-05-26', '2012-05-27', '2012-05-28',
               '2012-05-29', '2012-05-30', '2012-05-31', '2012-06-01'],
              dtype='datetime64[ns]', freq='D')

# BM（business end of month）

In [104]:
pd.date_range('1/1/2000','12/1/2000',freq = 'BM')

DatetimeIndex(['2000-01-31', '2000-02-29', '2000-03-31', '2000-04-28',
               '2000-05-31', '2000-06-30', '2000-07-31', '2000-08-31',
               '2000-09-29', '2000-10-31', '2000-11-30'],
              dtype='datetime64[ns]', freq='BM')

In [105]:
pd.date_range('5/2/2012 12:56:31',periods=5)

DatetimeIndex(['2012-05-02 12:56:31', '2012-05-03 12:56:31',
               '2012-05-04 12:56:31', '2012-05-05 12:56:31',
               '2012-05-06 12:56:31'],
              dtype='datetime64[ns]', freq='D')

In [106]:
pd.date_range('5/2/2012 12:56:31',periods=5,normalize=True)

DatetimeIndex(['2012-05-02', '2012-05-03', '2012-05-04', '2012-05-05',
               '2012-05-06'],
              dtype='datetime64[ns]', freq='D')

# 频率和日期偏量

In [108]:
from pandas .tseries.offsets import Hour,Minute

In [109]:
hour = Hour()

In [110]:
hour

<Hour>

In [111]:
four_hours = Hour(4)

In [112]:
four_hours

<4 * Hours>

In [113]:
pd.date_range('1/1/2000','1/3/2000 23:59',freq='4h')

DatetimeIndex(['2000-01-01 00:00:00', '2000-01-01 04:00:00',
               '2000-01-01 08:00:00', '2000-01-01 12:00:00',
               '2000-01-01 16:00:00', '2000-01-01 20:00:00',
               '2000-01-02 00:00:00', '2000-01-02 04:00:00',
               '2000-01-02 08:00:00', '2000-01-02 12:00:00',
               '2000-01-02 16:00:00', '2000-01-02 20:00:00',
               '2000-01-03 00:00:00', '2000-01-03 04:00:00',
               '2000-01-03 08:00:00', '2000-01-03 12:00:00',
               '2000-01-03 16:00:00', '2000-01-03 20:00:00'],
              dtype='datetime64[ns]', freq='4H')

In [114]:
Hour(2)+Minute(30)

<150 * Minutes>

In [115]:
pd.date_range('1/1/2000',periods=10,freq='1h30min')

DatetimeIndex(['2000-01-01 00:00:00', '2000-01-01 01:30:00',
               '2000-01-01 03:00:00', '2000-01-01 04:30:00',
               '2000-01-01 06:00:00', '2000-01-01 07:30:00',
               '2000-01-01 09:00:00', '2000-01-01 10:30:00',
               '2000-01-01 12:00:00', '2000-01-01 13:30:00'],
              dtype='datetime64[ns]', freq='90T')

# WOM日期(week of month)

In [118]:
rng = pd.date_range('1/1/2012','9/1/2012',freq='WOM-3FRI')  #每月第三个星期五

In [117]:
rng

DatetimeIndex(['2012-01-20', '2012-02-17', '2012-03-16', '2012-04-20',
               '2012-05-18', '2012-06-15', '2012-07-20', '2012-08-17'],
              dtype='datetime64[ns]', freq='WOM-3FRI')

# 移动数据

In [120]:
ts = Series(np.random.randn(4),
           index = pd.date_range('1/1/2000',periods=4,freq='M'))

In [121]:
ts

2000-01-31    2.095914
2000-02-29   -1.704402
2000-03-31    0.219914
2000-04-30    0.608540
Freq: M, dtype: float64

In [122]:
ts.shift(2)

2000-01-31         NaN
2000-02-29         NaN
2000-03-31    2.095914
2000-04-30   -1.704402
Freq: M, dtype: float64

In [123]:
ts.shift(-2)

2000-01-31    0.219914
2000-02-29    0.608540
2000-03-31         NaN
2000-04-30         NaN
Freq: M, dtype: float64

In [124]:
ts.shift(2,freq='M')

2000-03-31    2.095914
2000-04-30   -1.704402
2000-05-31    0.219914
2000-06-30    0.608540
Freq: M, dtype: float64

In [125]:
ts.shift(3,freq='D')

2000-02-03    2.095914
2000-03-03   -1.704402
2000-04-03    0.219914
2000-05-03    0.608540
dtype: float64

In [126]:
ts.shift(1,freq='3D')

2000-02-03    2.095914
2000-03-03   -1.704402
2000-04-03    0.219914
2000-05-03    0.608540
dtype: float64

In [127]:
ts.shift(1,freq='90T')

2000-01-31 01:30:00    2.095914
2000-02-29 01:30:00   -1.704402
2000-03-31 01:30:00    0.219914
2000-04-30 01:30:00    0.608540
Freq: M, dtype: float64