# 시게열 자료 다루기
> 시계열 자료는 인덱스가 날짜 혹은 시간인 데이터를 말한다

In [1]:
import numpy as np
import pandas as pd

## DatetimeIndex 인덱스

In [11]:
date_str = ['2021-8,12', '2021-8-12', '20210812', '2021.8.12', '081221', '8/14/21']
idx = pd.to_datetime(date_str)
idx

DatetimeIndex(['2021-08-12', '2021-08-12', '2021-08-12', '2021-08-12',
               '2021-08-12', '2021-08-14'],
              dtype='datetime64[ns]', freq=None)

In [12]:
np.random.seed(2021)
s= pd.Series(np.random.randn(6), index=idx)
s

2021-08-12    1.488609
2021-08-12    0.676011
2021-08-12   -0.418451
2021-08-12   -0.806521
2021-08-12    0.555876
2021-08-14   -0.705504
dtype: float64

In [13]:
pd.date_range("2021-8-1", "2021-8-31")

DatetimeIndex(['2021-08-01', '2021-08-02', '2021-08-03', '2021-08-04',
               '2021-08-05', '2021-08-06', '2021-08-07', '2021-08-08',
               '2021-08-09', '2021-08-10', '2021-08-11', '2021-08-12',
               '2021-08-13', '2021-08-14', '2021-08-15', '2021-08-16',
               '2021-08-17', '2021-08-18', '2021-08-19', '2021-08-20',
               '2021-08-21', '2021-08-22', '2021-08-23', '2021-08-24',
               '2021-08-25', '2021-08-26', '2021-08-27', '2021-08-28',
               '2021-08-29', '2021-08-30', '2021-08-31'],
              dtype='datetime64[ns]', freq='D')

In [14]:
pd.date_range("2021-8-1", periods=31)

DatetimeIndex(['2021-08-01', '2021-08-02', '2021-08-03', '2021-08-04',
               '2021-08-05', '2021-08-06', '2021-08-07', '2021-08-08',
               '2021-08-09', '2021-08-10', '2021-08-11', '2021-08-12',
               '2021-08-13', '2021-08-14', '2021-08-15', '2021-08-16',
               '2021-08-17', '2021-08-18', '2021-08-19', '2021-08-20',
               '2021-08-21', '2021-08-22', '2021-08-23', '2021-08-24',
               '2021-08-25', '2021-08-26', '2021-08-27', '2021-08-28',
               '2021-08-29', '2021-08-30', '2021-08-31'],
              dtype='datetime64[ns]', freq='D')

In [19]:
#freq 인수는 특정 날짜만 생성하게 할수 있다
#B: 주말이 아닌 평일
pd.date_range('20211001', periods=22, freq='B')

DatetimeIndex(['2021-10-01', '2021-10-04', '2021-10-05', '2021-10-06',
               '2021-10-07', '2021-10-08', '2021-10-11', '2021-10-12',
               '2021-10-13', '2021-10-14', '2021-10-15', '2021-10-18',
               '2021-10-19', '2021-10-20', '2021-10-21', '2021-10-22',
               '2021-10-25', '2021-10-26', '2021-10-27', '2021-10-28',
               '2021-10-29', '2021-11-01'],
              dtype='datetime64[ns]', freq='B')

In [20]:
# 일요일 : w
pd.date_range('20210801', '20210831', freq='w')

DatetimeIndex(['2021-08-01', '2021-08-08', '2021-08-15', '2021-08-22',
               '2021-08-29'],
              dtype='datetime64[ns]', freq='W-SUN')

### resample 연산
> 시간 간격을 재조정하는 리샘플링(resampling)이 가능하다

In [28]:
np.random.seed(2021)
ts = pd.Series(np.random.randn(10), 
                index=pd.date_range("2021-1-1", periods=10, freq="D"))
ts.tail(20)

2021-01-01    1.488609
2021-01-02    0.676011
2021-01-03   -0.418451
2021-01-04   -0.806521
2021-01-05    0.555876
2021-01-06   -0.705504
2021-01-07    1.130858
2021-01-08    0.645002
2021-01-09    0.106414
2021-01-10    0.422155
Freq: D, dtype: float64

In [30]:
ts.resample('w').mean() #주말 값에 평균

2021-01-03    0.582056
2021-01-10    0.192611
Freq: W-SUN, dtype: float64

In [32]:
ts.resample('w').sum() #주말 값에 합

2021-01-03    1.746169
2021-01-10    1.348279
Freq: W-SUN, dtype: float64

In [34]:
ts.resample('M').first() 

2021-01-31    1.488609
Freq: M, dtype: float64