## 16. 시간


## 참고자료
* [Python 완전정복 시리즈] 2편 : Pandas DataFrame 완전정복 : https://wikidocs.net/book/7188

In [2]:
import pandas as pd
import numpy as np

## 특정시간 필터링 (at_time)

In [4]:
i = pd.date_range('2021-12-24', periods=10, freq='6H')
# 2021-12-24를 시작으로 10기간(간격 6H)의 데이터 생성.
df = pd.DataFrame({'col1':[1,2,3,4,5,6,7,8,9,10]}, index=i)
df

Unnamed: 0,col1
2021-12-24 00:00:00,1
2021-12-24 06:00:00,2
2021-12-24 12:00:00,3
2021-12-24 18:00:00,4
2021-12-25 00:00:00,5
2021-12-25 06:00:00,6
2021-12-25 12:00:00,7
2021-12-25 18:00:00,8
2021-12-26 00:00:00,9
2021-12-26 06:00:00,10


In [5]:
df.at_time('06:00')

Unnamed: 0,col1
2021-12-24 06:00:00,2
2021-12-25 06:00:00,6
2021-12-26 06:00:00,10


## 특정기간 필터링 (between_time)

In [6]:
i = pd.date_range('2021-12-24', periods=10, freq='1H')
# 2021-12-24를 시작으로 10기간(간격 1H)의 데이터 생성.
df = pd.DataFrame({'col1':[1,2,3,4,5,6,7,8,9,10]}, index=i)
df

Unnamed: 0,col1
2021-12-24 00:00:00,1
2021-12-24 01:00:00,2
2021-12-24 02:00:00,3
2021-12-24 03:00:00,4
2021-12-24 04:00:00,5
2021-12-24 05:00:00,6
2021-12-24 06:00:00,7
2021-12-24 07:00:00,8
2021-12-24 08:00:00,9
2021-12-24 09:00:00,10


In [8]:
df.between_time(start_time='03:00',end_time='06:00')

Unnamed: 0,col1
2021-12-24 03:00:00,4
2021-12-24 04:00:00,5
2021-12-24 05:00:00,6
2021-12-24 06:00:00,7


In [9]:
df.between_time(start_time='03:00',end_time='06:00',
                      include_start=False, include_end=False)

  df.between_time(start_time='03:00',end_time='06:00',


Unnamed: 0,col1
2021-12-24 04:00:00,5
2021-12-24 05:00:00,6


In [10]:
# start_time을 end_time보다 늦은 시간으로 둘 경우, 두 시간 사이를 제외한 값을 출력
df.between_time(start_time='06:00',end_time='03:00')

Unnamed: 0,col1
2021-12-24 00:00:00,1
2021-12-24 01:00:00,2
2021-12-24 02:00:00,3
2021-12-24 03:00:00,4
2021-12-24 06:00:00,7
2021-12-24 07:00:00,8
2021-12-24 08:00:00,9
2021-12-24 09:00:00,10


## 처음/끝 특정기간 필터링 (first / last) 

In [11]:
i = pd.date_range('2021-12-06', periods=5, freq='3D')
# 2021-12-06을 시작일로 3일 간격의 5개 날짜 데이터를 생성
df = pd.DataFrame({'col1':[1,2,3,4,5]}, index=i)
df

Unnamed: 0,col1
2021-12-06,1
2021-12-09,2
2021-12-12,3
2021-12-15,4
2021-12-18,5


In [12]:
df.first('4D')

# 첫날짜 기준으로 4일간의 데이터를 필터링함. 4개 날짜의 출력이 아니라 4일동안.

Unnamed: 0,col1
2021-12-06,1
2021-12-09,2


In [13]:
df.last('4D')

# 마지막 날짜 기준으로 4일간의 데이터를 필터링.

Unnamed: 0,col1
2021-12-15,4
2021-12-18,5


## TimeZone변경 [표준시간대] (tz_convert)

In [14]:
dr = pd.date_range(start='2021-12-29 09:00', freq='H', periods=4, tz='US/Eastern')
# date_range를 통해 기본 시간과 간격을 설정하고, tz인수를통해 timezone을 지정.
df=pd.DataFrame(index=dr, data={'Seoul':[0,0,0,0],'None':[0,0,0,0]})
df

Unnamed: 0,Seoul,None
2021-12-29 09:00:00-05:00,0,0
2021-12-29 10:00:00-05:00,0,0
2021-12-29 11:00:00-05:00,0,0
2021-12-29 12:00:00-05:00,0,0


In [15]:
data1 = dr.tz_convert('Asia/Seoul')
# 지역/도시명 으로 표준시를 변경할 수 있습니다.
data2 = dr.tz_convert(None)
# None을 입력할 경우 시간대가 삭제됩니다.
df = pd.DataFrame(data={'Seoul':data1,'None':data2},index=dr)
df

Unnamed: 0,Seoul,None
2021-12-29 09:00:00-05:00,2021-12-29 23:00:00+09:00,2021-12-29 14:00:00
2021-12-29 10:00:00-05:00,2021-12-30 00:00:00+09:00,2021-12-29 15:00:00
2021-12-29 11:00:00-05:00,2021-12-30 01:00:00+09:00,2021-12-29 16:00:00
2021-12-29 12:00:00-05:00,2021-12-30 02:00:00+09:00,2021-12-29 17:00:00


## TimeZone설정 [표준시간대] (tz_localize)

In [16]:
dr = pd.date_range(start='2021-12-29 09:00', freq='H', periods=4)
# tz인수를 삭제하여 시간대 미설정으로 진행
df=pd.DataFrame(index=dr, data={'Seoul':[0,0,0,0],'None':[0,0,0,0]})
df

Unnamed: 0,Seoul,None
2021-12-29 09:00:00,0,0
2021-12-29 10:00:00,0,0
2021-12-29 11:00:00,0,0
2021-12-29 12:00:00,0,0


In [18]:
# 기존 시간대가 지정되어 있지 않아 tz_convert시 TypeError 발생

df.tz_convert('Asia/Seoul')

TypeError: Cannot convert tz-naive timestamps, use tz_localize to localize

In [19]:
dr = pd.date_range(start='2021-12-29 09:00', freq='H', periods=4)
dr = dr.tz_localize('US/Eastern') # 설정된 dr에 시간대를 설정
df = pd.DataFrame(index=dr, data={'Seoul':[0,0,0,0],'None':[0,0,0,0]})
df

Unnamed: 0,Seoul,None
2021-12-29 09:00:00-05:00,0,0
2021-12-29 10:00:00-05:00,0,0
2021-12-29 11:00:00-05:00,0,0
2021-12-29 12:00:00-05:00,0,0


In [20]:
data1 = dr.tz_convert('Asia/Seoul')
data2 = dr.tz_convert(None)
df = pd.DataFrame(data={'Seoul':data1,'None':data2},index=dr)

In [21]:
dr1 = pd.date_range(start='2021-12-29 09:00', freq='H', periods=4, tz='US/Eastern')
dr2 = pd.date_range(start='2021-12-29 09:00', freq='H', periods=4).tz_localize('US/Eastern')

dr1.equals(dr2)

True

## TimeStamp 변환 (to_timestamp)

In [22]:
period = pd.period_range(start='2021-10-04 00:00:00',end='2021-10-04 01:00:00',freq='30T')
# 2021-10-04 00:00:00부터 2021-10-04 01:00:00 까지 30분 단위로 인덱스 생성
# 여기서 T는 min을 의미합니다.
df=pd.DataFrame(data=range(len(period)),index=period)
df

Unnamed: 0,0
2021-10-04 00:00,0
2021-10-04 00:30,1
2021-10-04 01:00,2


In [23]:
df.to_timestamp(freq="S",how='start')

Unnamed: 0,0
2021-10-04 00:00:00,0
2021-10-04 00:30:00,1
2021-10-04 01:00:00,2


In [24]:
df.to_timestamp(freq="T",how='start')

Unnamed: 0,0
2021-10-04 00:00:00,0
2021-10-04 00:30:00,1
2021-10-04 01:00:00,2


In [25]:
df.to_timestamp(freq="H",how='start')

Unnamed: 0,0
2021-10-04 00:00:00,0
2021-10-04 00:00:00,1
2021-10-04 01:00:00,2


In [26]:
df.to_timestamp(freq="D",how='start')

Unnamed: 0,0
2021-10-04,0
2021-10-04,1
2021-10-04,2


In [27]:
df.to_timestamp(freq="M",how='start')

Unnamed: 0,0
2021-10-31,0
2021-10-31,1
2021-10-31,2


In [28]:
df.to_timestamp(freq="Y",how='start')

Unnamed: 0,0
2021-12-31,0
2021-12-31,1
2021-12-31,2


In [29]:
print(df.to_timestamp(freq="T",how='start'))
print(df.to_timestamp(freq="T",how='end'))

                     0
2021-10-04 00:00:00  0
2021-10-04 00:30:00  1
2021-10-04 01:00:00  2
                               0
2021-10-04 00:29:59.999999999  0
2021-10-04 00:59:59.999999999  1
2021-10-04 01:29:59.999999999  2


## 인덱스 나누기 [리샘플링] (asfreq)

In [30]:
idx = pd.date_range('2021-12-30',periods=3,freq='min')
# 2021-12-30 기준으로 1분 간격의 3개 값 만듦
df = pd.DataFrame(index=idx, data=[1,None,3],columns=['col'])
# 값중 하나는 의도적으로 NaN입력
df

Unnamed: 0,col
2021-12-30 00:00:00,1.0
2021-12-30 00:01:00,
2021-12-30 00:02:00,3.0


In [32]:
df.asfreq(freq='30S')

Unnamed: 0,col
2021-12-30 00:00:00,1.0
2021-12-30 00:00:30,
2021-12-30 00:01:00,
2021-12-30 00:01:30,
2021-12-30 00:02:00,3.0


In [33]:
df.asfreq(freq='30S',method='bfill')

Unnamed: 0,col
2021-12-30 00:00:00,1.0
2021-12-30 00:00:30,
2021-12-30 00:01:00,
2021-12-30 00:01:30,3.0
2021-12-30 00:02:00,3.0


In [34]:
df.asfreq(freq='30S',method='ffill')

Unnamed: 0,col
2021-12-30 00:00:00,1.0
2021-12-30 00:00:30,1.0
2021-12-30 00:01:00,
2021-12-30 00:01:30,
2021-12-30 00:02:00,3.0


In [35]:
df.asfreq(freq='30S',fill_value='-')

Unnamed: 0,col
2021-12-30 00:00:00,1.0
2021-12-30 00:00:30,-
2021-12-30 00:01:00,
2021-12-30 00:01:30,-
2021-12-30 00:02:00,3.0


In [36]:
idx = pd.date_range('2021-12-20',periods=3,freq='D')
df = pd.DataFrame(index=idx, data=[1,2,3],columns=['col'])
df

Unnamed: 0,col
2021-12-20,1
2021-12-21,2
2021-12-22,3


In [37]:
df.asfreq(freq='8H')

Unnamed: 0,col
2021-12-20 00:00:00,1.0
2021-12-20 08:00:00,
2021-12-20 16:00:00,
2021-12-21 00:00:00,2.0
2021-12-21 08:00:00,
2021-12-21 16:00:00,
2021-12-22 00:00:00,3.0


In [38]:
df.asfreq(freq='8H', normalize=True)

Unnamed: 0,col
2021-12-20,1.0
2021-12-20,
2021-12-20,
2021-12-21,2.0
2021-12-21,
2021-12-21,
2021-12-22,3.0


## 리샘플링 (resample)

In [39]:
# asfreq와 유사하지만 더 많은 기능과 설정 가능

idx = pd.date_range('2021-12-30',periods=10,freq='min')
# 2021-12-30일기준으로 1분간격의 10개의 행 생성
df = pd.DataFrame(index=idx, data=[0,1,2,3,4,5,6,7,8,9],columns=['col'])
df

Unnamed: 0,col
2021-12-30 00:00:00,0
2021-12-30 00:01:00,1
2021-12-30 00:02:00,2
2021-12-30 00:03:00,3
2021-12-30 00:04:00,4
2021-12-30 00:05:00,5
2021-12-30 00:06:00,6
2021-12-30 00:07:00,7
2021-12-30 00:08:00,8
2021-12-30 00:09:00,9


In [42]:
df.resample(rule='3T')

<pandas.core.resample.DatetimeIndexResampler object at 0x12877a6e0>

In [43]:
print(df.resample(rule='3T'))

DatetimeIndexResampler [freq=<3 * Minutes>, axis=0, closed=left, label=left, convention=start, origin=start_day]


In [45]:
df.resample(rule='3T').sum()

Unnamed: 0,col
2021-12-30 00:00:00,3
2021-12-30 00:03:00,12
2021-12-30 00:06:00,21
2021-12-30 00:09:00,9


In [46]:
df.resample(rule='3T', closed='left').sum()

Unnamed: 0,col
2021-12-30 00:00:00,3
2021-12-30 00:03:00,12
2021-12-30 00:06:00,21
2021-12-30 00:09:00,9


In [47]:
df.resample(rule='3T', closed='right').sum()

Unnamed: 0,col
2021-12-29 23:57:00,0
2021-12-30 00:00:00,6
2021-12-30 00:03:00,15
2021-12-30 00:06:00,24


In [48]:
df.resample(rule='3T', label='left').sum()

Unnamed: 0,col
2021-12-30 00:00:00,3
2021-12-30 00:03:00,12
2021-12-30 00:06:00,21
2021-12-30 00:09:00,9


In [49]:
df.resample(rule='3T', label='right').sum()

Unnamed: 0,col
2021-12-30 00:03:00,3
2021-12-30 00:06:00,12
2021-12-30 00:09:00,21
2021-12-30 00:12:00,9


In [50]:
df.resample(rule='3T',kind='timestamp').sum().index

DatetimeIndex(['2021-12-30 00:00:00', '2021-12-30 00:03:00',
               '2021-12-30 00:06:00', '2021-12-30 00:09:00'],
              dtype='datetime64[ns]', freq='3T')

In [51]:
df.resample(rule='3T',kind='period').sum().index

PeriodIndex(['2021-12-30 00:00', '2021-12-30 00:03', '2021-12-30 00:06',
             '2021-12-30 00:09'],
            dtype='period[3T]')

In [52]:
idx = pd.date_range('2021-12-30',periods=2,freq='5min')
df = pd.DataFrame(index=idx, data=[1,6],columns=['col'])
df2 = df.reset_index(drop=False)
df2

Unnamed: 0,index,col
0,2021-12-30 00:00:00,1
1,2021-12-30 00:05:00,6


In [53]:
df2.resample(rule='min',on='index').sum()

Unnamed: 0_level_0,col
index,Unnamed: 1_level_1
2021-12-30 00:00:00,1
2021-12-30 00:01:00,0
2021-12-30 00:02:00,0
2021-12-30 00:03:00,0
2021-12-30 00:04:00,0
2021-12-30 00:05:00,6


In [54]:
idx = pd.date_range('2021-12-31',periods=9,freq='3min')
df = pd.DataFrame(index=idx, data=[1,2,3,4,5,6,7,8,9],columns=['col'])
df

Unnamed: 0,col
2021-12-31 00:00:00,1
2021-12-31 00:03:00,2
2021-12-31 00:06:00,3
2021-12-31 00:09:00,4
2021-12-31 00:12:00,5
2021-12-31 00:15:00,6
2021-12-31 00:18:00,7
2021-12-31 00:21:00,8
2021-12-31 00:24:00,9


In [55]:
df.resample(rule='7min').sum()

Unnamed: 0,col
2021-12-31 00:00:00,6
2021-12-31 00:07:00,9
2021-12-31 00:14:00,13
2021-12-31 00:21:00,17


In [56]:
df.resample(rule='7min',origin='start_day').sum()

Unnamed: 0,col
2021-12-31 00:00:00,6
2021-12-31 00:07:00,9
2021-12-31 00:14:00,13
2021-12-31 00:21:00,17


In [57]:
df.resample(rule='7min',origin='start').sum()

Unnamed: 0,col
2021-12-31 00:00:00,6
2021-12-31 00:07:00,9
2021-12-31 00:14:00,13
2021-12-31 00:21:00,17


In [58]:
df.resample(rule='7min',origin='epoch').sum()

Unnamed: 0,col
2021-12-30 23:55:00,1
2021-12-31 00:02:00,5
2021-12-31 00:09:00,15
2021-12-31 00:16:00,15
2021-12-31 00:23:00,9


In [59]:
df.resample(rule='7min',origin='end').sum()

Unnamed: 0,col
2021-12-31 00:03:00,3
2021-12-31 00:10:00,7
2021-12-31 00:17:00,11
2021-12-31 00:24:00,24


In [60]:
df.resample(rule='7min',origin='end_day').sum()

Unnamed: 0,col
2021-12-31 00:05:00,3
2021-12-31 00:12:00,12
2021-12-31 00:19:00,13
2021-12-31 00:26:00,17


In [61]:
df.resample(rule='7min',origin='2021-12-30').sum()

Unnamed: 0,col
2021-12-30 23:55:00,1
2021-12-31 00:02:00,5
2021-12-31 00:09:00,15
2021-12-31 00:16:00,15
2021-12-31 00:23:00,9


In [62]:
df.resample(rule='7min').sum()

Unnamed: 0,col
2021-12-31 00:00:00,6
2021-12-31 00:07:00,9
2021-12-31 00:14:00,13
2021-12-31 00:21:00,17


In [64]:
df.resample(rule='7min',offset='4min').sum()

Unnamed: 0,col
2021-12-30 23:57:00,3
2021-12-31 00:04:00,7
2021-12-31 00:11:00,11
2021-12-31 00:18:00,24


## 기간/데이터 쉬프트 (shift)

In [65]:
idx = pd.date_range(start='2022-01-01',periods=5,freq='2D')
# 2일 간격으로 5행의 인덱스 생성
data={'col1':[10,20,30,40,50],'col2':[1,3,6,7,9],'col3':[43,13,82,47,31]}
df = pd.DataFrame(data=data, index=idx)
df

Unnamed: 0,col1,col2,col3
2022-01-01,10,1,43
2022-01-03,20,3,13
2022-01-05,30,6,82
2022-01-07,40,7,47
2022-01-09,50,9,31


In [66]:
df.shift(periods=2)

Unnamed: 0,col1,col2,col3
2022-01-01,,,
2022-01-03,,,
2022-01-05,10.0,1.0,43.0
2022-01-07,20.0,3.0,13.0
2022-01-09,30.0,6.0,82.0


In [67]:
df.shift(periods=2, axis=1)

Unnamed: 0,col1,col2,col3
2022-01-01,,,10
2022-01-03,,,20
2022-01-05,,,30
2022-01-07,,,40
2022-01-09,,,50


In [68]:
df.shift(periods=2, axis=1, fill_value='-')

Unnamed: 0,col1,col2,col3
2022-01-01,-,-,10
2022-01-03,-,-,20
2022-01-05,-,-,30
2022-01-07,-,-,40
2022-01-09,-,-,50


In [69]:
df.shift(periods=3, freq='D')

Unnamed: 0,col1,col2,col3
2022-01-04,10,1,43
2022-01-06,20,3,13
2022-01-08,30,6,82
2022-01-10,40,7,47
2022-01-12,50,9,31


In [70]:
df.shift(periods=3, freq='infer')

Unnamed: 0,col1,col2,col3
2022-01-07,10,1,43
2022-01-09,20,3,13
2022-01-11,30,6,82
2022-01-13,40,7,47
2022-01-15,50,9,31


## period로 변환 (to_period)