# DatetimeIndex 
- pd.to_datetime() : 문자열을 날짜/시간의 자료형인 datetime 자료형으로 바꿔주는 함수
- pd.date_range() : 시작일과 종료일 또는 시작일과 기간을 입력하면 범위 내의 인덱스를 자동으로 생성
- 시계열 자료 : 인덱스에 날짜나 시간이 오는 데이터
- 판다스에서 시계열 자료를 생성하려면 인덱스를 DatetimeIndex 자료형으로 만들어야 한다.
- 판다스에서 시계열 자료를 생성하려면 인덱스를 DatetimeIndex 자료형으로 만들어야 한다.

In [2]:
import numpy as np
import pandas as pd

In [6]:
date_str = ['2018, 1, 1','2018, 1, 4','2018, 1, 5','2018, 1, 6']
print(type(date_str))

<class 'list'>


In [7]:
idx = pd.to_datetime(date_str)
idx

DatetimeIndex(['2018-01-01', '2018-01-04', '2018-01-05', '2018-01-06'], dtype='datetime64[ns]', freq=None)

In [8]:
# Series
s = pd.Series(np.random.randn(4), index=idx)
s

2018-01-01    0.952687
2018-01-04   -0.012788
2018-01-05    0.559820
2018-01-06   -0.067473
dtype: float64

In [9]:
pd.date_range('2022-4-2', '2022-4-30')

DatetimeIndex(['2022-04-02', '2022-04-03', '2022-04-04', '2022-04-05',
               '2022-04-06', '2022-04-07', '2022-04-08', '2022-04-09',
               '2022-04-10', '2022-04-11', '2022-04-12', '2022-04-13',
               '2022-04-14', '2022-04-15', '2022-04-16', '2022-04-17',
               '2022-04-18', '2022-04-19', '2022-04-20', '2022-04-21',
               '2022-04-22', '2022-04-23', '2022-04-24', '2022-04-25',
               '2022-04-26', '2022-04-27', '2022-04-28', '2022-04-29',
               '2022-04-30'],
              dtype='datetime64[ns]', freq='D')

In [10]:
pd.date_range(start='2022-10-1', periods = 30)

DatetimeIndex(['2022-10-01', '2022-10-02', '2022-10-03', '2022-10-04',
               '2022-10-05', '2022-10-06', '2022-10-07', '2022-10-08',
               '2022-10-09', '2022-10-10', '2022-10-11', '2022-10-12',
               '2022-10-13', '2022-10-14', '2022-10-15', '2022-10-16',
               '2022-10-17', '2022-10-18', '2022-10-19', '2022-10-20',
               '2022-10-21', '2022-10-22', '2022-10-23', '2022-10-24',
               '2022-10-25', '2022-10-26', '2022-10-27', '2022-10-28',
               '2022-10-29', '2022-10-30'],
              dtype='datetime64[ns]', freq='D')

### freq 매개변수로 특정한 날짜만 생성되도록 설정할 수 있다.
- s : second, 초
- T : minute, 분
- H : hour, 시간
- D : day, 일
- B : 주말이 아닌 평일
- W : 주(일요일)
- W-MON : 주(월요일)
- M : 각 달의 마지막 날
- MS : 각 달의 첫날
- https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#dateoffset-objects

In [12]:
pd.date_range('2022-10-1', '2022-12-31', freq='W')

DatetimeIndex(['2022-10-02', '2022-10-09', '2022-10-16', '2022-10-23',
               '2022-10-30', '2022-11-06', '2022-11-13', '2022-11-20',
               '2022-11-27', '2022-12-04', '2022-12-11', '2022-12-18',
               '2022-12-25'],
              dtype='datetime64[ns]', freq='W-SUN')

In [13]:
pd.date_range('2022-10-1', '2022-12-31', freq='W-MON')

DatetimeIndex(['2022-10-03', '2022-10-10', '2022-10-17', '2022-10-24',
               '2022-10-31', '2022-11-07', '2022-11-14', '2022-11-21',
               '2022-11-28', '2022-12-05', '2022-12-12', '2022-12-19',
               '2022-12-26'],
              dtype='datetime64[ns]', freq='W-MON')

### shift

In [14]:
ts = pd.Series(np.random.randn(4), index = pd.date_range('2018-1-1', periods = 4, freq = 'M'))
ts

2018-01-31   -0.025459
2018-02-28    0.095811
2018-03-31   -2.761430
2018-04-30   -1.181555
Freq: M, dtype: float64

In [15]:
ts.shift(1)

2018-01-31         NaN
2018-02-28   -0.025459
2018-03-31    0.095811
2018-04-30   -2.761430
Freq: M, dtype: float64

In [16]:
ts.shift(-1)

2018-01-31    0.095811
2018-02-28   -2.761430
2018-03-31   -1.181555
2018-04-30         NaN
Freq: M, dtype: float64

In [17]:
ts.shift(1, freq = 'M')

2018-02-28   -0.025459
2018-03-31    0.095811
2018-04-30   -2.761430
2018-05-31   -1.181555
Freq: M, dtype: float64

In [18]:
ts.shift(1, freq = 'W')

2018-02-04   -0.025459
2018-03-04    0.095811
2018-04-01   -2.761430
2018-05-06   -1.181555
dtype: float64

### resample : 날짜나 시간 간격을 재조정하는 기능
- up-sample(업 샘플링): 데이터 양이 증가
- down-sample(다운 샘플링): 데이터 양이 감소

In [21]:
ts = pd.Series(np.random.randn(100), index = pd.date_range('2018-1-1', 
                                                         periods = 100, freq = 'D'))
ts.head(20)

2018-01-01   -0.712935
2018-01-02   -1.189685
2018-01-03    0.810552
2018-01-04   -0.481846
2018-01-05    0.530084
2018-01-06    0.536961
2018-01-07    0.279107
2018-01-08    0.184247
2018-01-09   -0.530118
2018-01-10    1.408875
2018-01-11   -0.148644
2018-01-12   -0.437299
2018-01-13   -0.108087
2018-01-14   -1.094325
2018-01-15   -0.243785
2018-01-16    0.876134
2018-01-17    1.146971
2018-01-18   -0.292071
2018-01-19   -0.446235
2018-01-20   -0.940291
Freq: D, dtype: float64

In [22]:
# 다운 샘플링의 경우에는 원래의 데이터가 그룹화
ts.resample('W').mean()

2018-01-07   -0.032537
2018-01-14   -0.103622
2018-01-21    0.266333
2018-01-28    0.230895
2018-02-04   -0.240593
2018-02-11    0.025681
2018-02-18   -0.588539
2018-02-25    0.184186
2018-03-04   -0.232847
2018-03-11   -0.413841
2018-03-18    0.033501
2018-03-25    0.119243
2018-04-01    0.431587
2018-04-08   -0.393350
2018-04-15   -0.079980
Freq: W-SUN, dtype: float64

In [23]:
ts.resample('M').first()

2018-01-31   -0.712935
2018-02-28   -1.956087
2018-03-31    0.518397
2018-04-30    1.646365
Freq: M, dtype: float64