# 时间操作

In [1]:
# datetime构造时间
import datetime
# %matplotlib notebook

dt = datetime.datetime(year=2017, month=11, day=24, hour=10, minute=30)
print(dt)

2017-11-24 10:30:00


### pandas构造时间

In [2]:
import pandas as pd

In [3]:
datetime = pd.to_datetime('2017-11-24')
to_datetime = pd.to_datetime('11/23/2017')
print(datetime)
print(to_datetime)

2017-11-24 00:00:00
2017-11-23 00:00:00


In [4]:
ts = pd.Timestamp('24/11/2017')

In [5]:
# 获取月份,天数
print(ts.month)
print(ts.day)

11
24


In [7]:
# 增加天数
print(ts+pd.Timedelta('5 days'))

2017-11-29 00:00:00


### 构造一个Series结构

In [8]:
s = pd.Series(['2017-11-24 00:00:00', '2017-11-25 00:00:00', '2017-11-26 00:00:00'])
print(s)

0    2017-11-24 00:00:00
1    2017-11-25 00:00:00
2    2017-11-26 00:00:00
dtype: object


In [9]:
# 转换成datetime格式
ts = pd.to_datetime(s)
print(ts)

0   2017-11-24
1   2017-11-25
2   2017-11-26
dtype: datetime64[ns]


In [10]:
# 获取他们的小时和周
print(ts.dt.hour)
print(ts.dt.weekday)

0    0
1    0
2    0
dtype: int64
0    4
1    5
2    6
dtype: int64


In [11]:
# 构造Series数据
# 从2017-11-24开始,构造3个数据,每个间隔12H
series = pd.Series(pd.date_range(start='2017-11-24', periods=3, freq='12H'))
print(series)

0   2017-11-24 00:00:00
1   2017-11-24 12:00:00
2   2017-11-25 00:00:00
dtype: datetime64[ns]


### 用pandas分析csv的日期

In [18]:
data = pd.read_csv('./data/flowdata.csv')
head = data.head()
print(head)

                  Time   L06_347  LS06_347  LS06_348
0  2009-01-01 00:00:00  0.137417  0.097500  0.016833
1  2009-01-01 03:00:00  0.131250  0.088833  0.016417
2  2009-01-01 06:00:00  0.113500  0.091250  0.016750
3  2009-01-01 09:00:00  0.135750  0.091500  0.016250
4  2009-01-01 12:00:00  0.140917  0.096167  0.017000


In [19]:
data['Time'] = pd.to_datetime(data['Time'])
# 设置索引为datetime
data = data.set_index('Time')
print(data.head())

                      L06_347  LS06_347  LS06_348
Time                                             
2009-01-01 00:00:00  0.137417  0.097500  0.016833
2009-01-01 03:00:00  0.131250  0.088833  0.016417
2009-01-01 06:00:00  0.113500  0.091250  0.016750
2009-01-01 09:00:00  0.135750  0.091500  0.016250
2009-01-01 12:00:00  0.140917  0.096167  0.017000


In [20]:
print(data.index)

DatetimeIndex(['2009-01-01 00:00:00', '2009-01-01 03:00:00',
               '2009-01-01 06:00:00', '2009-01-01 09:00:00',
               '2009-01-01 12:00:00', '2009-01-01 15:00:00',
               '2009-01-01 18:00:00', '2009-01-01 21:00:00',
               '2009-01-02 00:00:00', '2009-01-02 03:00:00',
               ...
               '2012-12-31 21:00:00', '2013-01-01 00:00:00',
               '2013-01-01 03:00:00', '2013-01-01 06:00:00',
               '2013-01-01 09:00:00', '2013-01-01 12:00:00',
               '2013-01-01 15:00:00', '2013-01-01 18:00:00',
               '2013-01-01 21:00:00', '2013-01-02 00:00:00'],
              dtype='datetime64[ns]', name='Time', length=11697, freq=None)


In [23]:
# 设置索引列,对索引列进行格式化
data = pd.read_csv('./data/flowdata.csv', index_col=0, parse_dates=True)
print(data.head())

                      L06_347  LS06_347  LS06_348
Time                                             
2009-01-01 00:00:00  0.137417  0.097500  0.016833
2009-01-01 03:00:00  0.131250  0.088833  0.016417
2009-01-01 06:00:00  0.113500  0.091250  0.016750
2009-01-01 09:00:00  0.135750  0.091500  0.016250
2009-01-01 12:00:00  0.140917  0.096167  0.017000


In [24]:
# 分片获取数据
dt = data[pd.Timestamp('2012-01-01 09:00'):pd.Timestamp('2012-01-02 19:00')]
print(dt)

                      L06_347  LS06_347  LS06_348
Time                                             
2012-01-01 09:00:00  0.330750  0.293583  0.029750
2012-01-01 12:00:00  0.295000  0.285167  0.031750
2012-01-01 15:00:00  0.301417  0.287750  0.031417
2012-01-01 18:00:00  0.322083  0.304167  0.038083
2012-01-01 21:00:00  0.355417  0.346500  0.080917
2012-01-02 00:00:00  1.069333  0.970000  0.071917
2012-01-02 03:00:00  0.886667  0.817417  0.070833
2012-01-02 06:00:00  1.231000  1.153083  0.150750
2012-01-02 09:00:00  1.647500  1.476667  0.076583
2012-01-02 12:00:00  1.111000  1.003833  0.062250
2012-01-02 15:00:00  0.738833  0.727083  0.052417
2012-01-02 18:00:00  0.641250  0.603417  0.047667


In [25]:
dt = data['2012-01-01 09:00':'2012-01-01 19:00']
print(dt)

                      L06_347  LS06_347  LS06_348
Time                                             
2012-01-01 09:00:00  0.330750  0.293583  0.029750
2012-01-01 12:00:00  0.295000  0.285167  0.031750
2012-01-01 15:00:00  0.301417  0.287750  0.031417
2012-01-01 18:00:00  0.322083  0.304167  0.038083


In [26]:
print(data.tail(10))

                      L06_347  LS06_347  LS06_348
Time                                             
2012-12-31 21:00:00  0.846500  0.846500  0.170167
2013-01-01 00:00:00  1.688333  1.688333  0.207333
2013-01-01 03:00:00  2.693333  2.693333  0.201500
2013-01-01 06:00:00  2.220833  2.220833  0.166917
2013-01-01 09:00:00  2.055000  2.055000  0.175667
2013-01-01 12:00:00  1.710000  1.710000  0.129583
2013-01-01 15:00:00  1.420000  1.420000  0.096333
2013-01-01 18:00:00  1.178583  1.178583  0.083083
2013-01-01 21:00:00  0.898250  0.898250  0.077167
2013-01-02 00:00:00  0.860000  0.860000  0.075000


In [29]:
print(data['2013'])

                      L06_347  LS06_347  LS06_348
Time                                             
2013-01-01 00:00:00  1.688333  1.688333  0.207333
2013-01-01 03:00:00  2.693333  2.693333  0.201500
2013-01-01 06:00:00  2.220833  2.220833  0.166917
2013-01-01 09:00:00  2.055000  2.055000  0.175667
2013-01-01 12:00:00  1.710000  1.710000  0.129583
2013-01-01 15:00:00  1.420000  1.420000  0.096333
2013-01-01 18:00:00  1.178583  1.178583  0.083083
2013-01-01 21:00:00  0.898250  0.898250  0.077167
2013-01-02 00:00:00  0.860000  0.860000  0.075000


In [30]:
print(data['2012-02':'2012-03'])

                      L06_347  LS06_347  LS06_348
Time                                             
2012-02-01 00:00:00  0.150917  0.208083  0.022250
2012-02-01 03:00:00  0.140917  0.200250  0.022083
2012-02-01 06:00:00  0.130667  0.191250  0.020250
2012-02-01 09:00:00  0.135583  0.186750  0.020000
2012-02-01 12:00:00  0.131750  0.183750  0.020917
2012-02-01 15:00:00  0.133333  0.177417  0.020667
2012-02-01 18:00:00  0.119333  0.168917  0.020667
2012-02-01 21:00:00  0.124417  0.174500  0.019333
2012-02-02 00:00:00  0.116167  0.167500  0.019000
2012-02-02 03:00:00  0.107333  0.157167  0.017417
2012-02-02 06:00:00  0.147750  0.217750  0.017167
2012-02-02 09:00:00  0.230583  0.313333  0.017000
2012-02-02 12:00:00  0.122250  0.174333  0.018000
2012-02-02 15:00:00  0.104083  0.152583  0.017500
2012-02-02 18:00:00  0.090917  0.150250  0.017083
2012-02-02 21:00:00  0.090667  0.145750  0.015750
2012-02-03 00:00:00  0.093833  0.153250  0.014417
2012-02-03 03:00:00  0.113083  0.193250  0.014667


In [32]:
# 获取全部都是1月的数据
dt = data[data.index.month == 1]
print(dt.head())

                      L06_347  LS06_347  LS06_348
Time                                             
2009-01-01 00:00:00  0.137417  0.097500  0.016833
2009-01-01 03:00:00  0.131250  0.088833  0.016417
2009-01-01 06:00:00  0.113500  0.091250  0.016750
2009-01-01 09:00:00  0.135750  0.091500  0.016250
2009-01-01 12:00:00  0.140917  0.096167  0.017000


In [33]:
# 获取指定时间内的数据
# 获取8-12小时的数据
# 也可以这样 data.between_time('08:00', '12:00')
dt = data[(data.index.hour > 8) & (data.index.hour < 12)]
# print(dt)
print(dt.head())

                      L06_347  LS06_347  LS06_348
Time                                             
2009-01-01 09:00:00  0.135750  0.091500  0.016250
2009-01-02 09:00:00  0.141917  0.097083  0.016417
2009-01-03 09:00:00  0.124583  0.084417  0.015833
2009-01-04 09:00:00  0.109000  0.105167  0.018000
2009-01-05 09:00:00  0.161500  0.114583  0.021583


In [34]:
# between_time() 获取一个闭合范围的数据
print(data.between_time('08:00','09:00')[:10])

                      L06_347  LS06_347  LS06_348
Time                                             
2009-01-01 09:00:00  0.135750  0.091500  0.016250
2009-01-02 09:00:00  0.141917  0.097083  0.016417
2009-01-03 09:00:00  0.124583  0.084417  0.015833
2009-01-04 09:00:00  0.109000  0.105167  0.018000
2009-01-05 09:00:00  0.161500  0.114583  0.021583
2009-01-06 09:00:00  0.100083  0.065583  0.015500
2009-01-07 09:00:00  0.138500  0.093917  0.015000
2009-01-08 09:00:00  0.101333  0.066417  0.016833
2009-01-09 09:00:00  0.061750  0.059417  0.015167
2009-01-10 09:00:00  0.193500  0.147000  0.013000


重采样

In [35]:
# 按3天 data.resample('3D').mean().head()
# 按月 data.resample('M').mean().head()
# 按天采样就最大值 data.resample('D').max().head()
dt = data.resample('D').mean().head()
print(dt)

             L06_347  LS06_347  LS06_348
Time                                    
2009-01-01  0.125010  0.092281  0.016635
2009-01-02  0.124146  0.095781  0.016406
2009-01-03  0.113562  0.085542  0.016094
2009-01-04  0.140198  0.102708  0.017323
2009-01-05  0.128812  0.104490  0.018167


In [36]:
dt = data.resample('D').max().head()
print(dt)

             L06_347  LS06_347  LS06_348
Time                                    
2009-01-01  0.140917  0.097500  0.017583
2009-01-02  0.147833  0.101917  0.016833
2009-01-03  0.135833  0.092500  0.016833
2009-01-04  0.160417  0.113750  0.018417
2009-01-05  0.161500  0.115167  0.021583


In [37]:
# data.resample('M').mean().plot()