## 시계열 자료 다루기

In [1]:
import numpy as np
import pandas as pd


### DatetimeIndex 인덱스

In [6]:
date_str = ["2021, 8, 12","2021-8-12","20210812","2021.8.12","081221","8/12/21"]
idx = pd.to_datetime(date_str)
idx

DatetimeIndex(['2021-08-12', '2021-08-12', '2021-08-12', '2021-08-12',
               '2021-08-12', '2021-08-12'],
              dtype='datetime64[ns]', freq=None)

In [7]:
date_str = ["2021, 8, 10","2021-8-11","20210812","2021.8.13","081421","8/15/21"]
idx = pd.to_datetime(date_str)
idx

DatetimeIndex(['2021-08-10', '2021-08-11', '2021-08-12', '2021-08-13',
               '2021-08-14', '2021-08-15'],
              dtype='datetime64[ns]', freq=None)

In [8]:
np.random.seed(2021)
s = pd.Series(np.random.rand(6), index=idx)
s

2021-08-10    0.605978
2021-08-11    0.733369
2021-08-12    0.138947
2021-08-13    0.312673
2021-08-14    0.997243
2021-08-15    0.128162
dtype: float64

In [9]:
pd.date_range("20210801","2021-08-31")

DatetimeIndex(['2021-08-01', '2021-08-02', '2021-08-03', '2021-08-04',
               '2021-08-05', '2021-08-06', '2021-08-07', '2021-08-08',
               '2021-08-09', '2021-08-10', '2021-08-11', '2021-08-12',
               '2021-08-13', '2021-08-14', '2021-08-15', '2021-08-16',
               '2021-08-17', '2021-08-18', '2021-08-19', '2021-08-20',
               '2021-08-21', '2021-08-22', '2021-08-23', '2021-08-24',
               '2021-08-25', '2021-08-26', '2021-08-27', '2021-08-28',
               '2021-08-29', '2021-08-30', '2021-08-31'],
              dtype='datetime64[ns]', freq='D')

In [10]:
pd.date_range("20210801",periods=31)

DatetimeIndex(['2021-08-01', '2021-08-02', '2021-08-03', '2021-08-04',
               '2021-08-05', '2021-08-06', '2021-08-07', '2021-08-08',
               '2021-08-09', '2021-08-10', '2021-08-11', '2021-08-12',
               '2021-08-13', '2021-08-14', '2021-08-15', '2021-08-16',
               '2021-08-17', '2021-08-18', '2021-08-19', '2021-08-20',
               '2021-08-21', '2021-08-22', '2021-08-23', '2021-08-24',
               '2021-08-25', '2021-08-26', '2021-08-27', '2021-08-28',
               '2021-08-29', '2021-08-30', '2021-08-31'],
              dtype='datetime64[ns]', freq='D')

freq 인수값은 다음과 같다.

s: 초

T: 분

H: 시간

D: 일(day)

B: 주말이 아닌 평일

W: 주(일요일)

W-MON: 주(월요일)

M: 각 달(month)의 마지막 날

MS: 각 달의 첫날

BM: 주말이 아닌 평일 중에서 각 달의 마지막 날

BMS: 주말이 아닌 평일 중에서 각 달의 첫날

WOM-2THU: 각 달의 두번째 목요일

Q-JAN: 각 분기의 첫달의 마지막 날

Q-DEC: 각 분기의 마지막 달의 마지막 날

In [14]:
# 근무일만 뽑기(biz-day)
pd.date_range("2021-08-01","2021-08-31",freq="B")

DatetimeIndex(['2021-08-02', '2021-08-03', '2021-08-04', '2021-08-05',
               '2021-08-06', '2021-08-09', '2021-08-10', '2021-08-11',
               '2021-08-12', '2021-08-13', '2021-08-16', '2021-08-17',
               '2021-08-18', '2021-08-19', '2021-08-20', '2021-08-23',
               '2021-08-24', '2021-08-25', '2021-08-26', '2021-08-27',
               '2021-08-30', '2021-08-31'],
              dtype='datetime64[ns]', freq='B')

In [15]:
# 일요일만 뽑기(W)
pd.date_range("2021-08-01","2021-08-31",freq="W")

DatetimeIndex(['2021-08-01', '2021-08-08', '2021-08-15', '2021-08-22',
               '2021-08-29'],
              dtype='datetime64[ns]', freq='W-SUN')

### resample 연산

In [17]:
ts = pd.Series(np.random.randn(100),
            index = pd.date_range("2021-1-1",periods=100))
ts.tail(10)

2021-04-01   -1.517695
2021-04-02   -1.037998
2021-04-03    0.662567
2021-04-04   -0.624228
2021-04-05   -0.643762
2021-04-06   -0.686254
2021-04-07    0.441222
2021-04-08    0.855880
2021-04-09    0.147717
2021-04-10   -1.594633
Freq: D, dtype: float64

In [20]:
ts.resample("M").first()

2021-01-31    0.541335
2021-02-28    0.514857
2021-03-31   -0.075244
2021-04-30   -1.517695
Freq: M, dtype: float64