# 10. 시계열
> ## 시간대 다루기

In [1]:
import pandas as pd
import numpy as np
from pandas import Series, DataFrame
from datetime import datetime
from datetime import timedelta
from dateutil.parser import parse
from pandas.tseries.offsets import Hour, Minute, Day, MonthEnd

In [2]:
import pytz

pytz.common_timezones[-5:]

['US/Eastern', 'US/Hawaii', 'US/Mountain', 'US/Pacific', 'UTC']

- pytz 라이브러리는 전 세계의 시간대 정보를 모아둠

In [3]:
tz = pytz.timezone('US/Eastern')
tz

<DstTzInfo 'US/Eastern' LMT-1 day, 19:04:00 STD>

___
## 1. 지역화와 변환

In [4]:
rng = pd.date_range('3/9/2012 9:30', periods = 6, freq = 'D')
ts = Series(np.random.randn(len(rng)), index = rng)
ts

2012-03-09 09:30:00    0.536305
2012-03-10 09:30:00   -0.469671
2012-03-11 09:30:00    1.108272
2012-03-12 09:30:00    0.750225
2012-03-13 09:30:00   -0.428140
2012-03-14 09:30:00   -1.668063
Freq: D, dtype: float64

In [5]:
print(ts.index.tz)

None


In [6]:
pd.date_range('3/9/2012 9:30', periods = 10, freq = 'D', tz = 'UTC')

DatetimeIndex(['2012-03-09 09:30:00+00:00', '2012-03-10 09:30:00+00:00',
               '2012-03-11 09:30:00+00:00', '2012-03-12 09:30:00+00:00',
               '2012-03-13 09:30:00+00:00', '2012-03-14 09:30:00+00:00',
               '2012-03-15 09:30:00+00:00', '2012-03-16 09:30:00+00:00',
               '2012-03-17 09:30:00+00:00', '2012-03-18 09:30:00+00:00'],
              dtype='datetime64[ns, UTC]', freq='D')

In [8]:
ts_utc = ts.tz_localize('UTC')
ts_utc

2012-03-09 09:30:00+00:00    0.536305
2012-03-10 09:30:00+00:00   -0.469671
2012-03-11 09:30:00+00:00    1.108272
2012-03-12 09:30:00+00:00    0.750225
2012-03-13 09:30:00+00:00   -0.428140
2012-03-14 09:30:00+00:00   -1.668063
Freq: D, dtype: float64

In [9]:
ts_utc.index

DatetimeIndex(['2012-03-09 09:30:00+00:00', '2012-03-10 09:30:00+00:00',
               '2012-03-11 09:30:00+00:00', '2012-03-12 09:30:00+00:00',
               '2012-03-13 09:30:00+00:00', '2012-03-14 09:30:00+00:00'],
              dtype='datetime64[ns, UTC]', freq='D')

- 지역화 시간으로의 변환은 tz_localize 메서드 사용

In [11]:
ts_utc.tz_convert('US/Eastern').index

DatetimeIndex(['2012-03-09 04:30:00-05:00', '2012-03-10 04:30:00-05:00',
               '2012-03-11 05:30:00-04:00', '2012-03-12 05:30:00-04:00',
               '2012-03-13 05:30:00-04:00', '2012-03-14 05:30:00-04:00'],
              dtype='datetime64[ns, US/Eastern]', freq='D')

In [13]:
ts_eastern = ts.tz_localize('US/Eastern')
ts_eastern.tz_convert('UTC').index

DatetimeIndex(['2012-03-09 14:30:00+00:00', '2012-03-10 14:30:00+00:00',
               '2012-03-11 13:30:00+00:00', '2012-03-12 13:30:00+00:00',
               '2012-03-13 13:30:00+00:00', '2012-03-14 13:30:00+00:00'],
              dtype='datetime64[ns, UTC]', freq='D')

- 다른 시간대로 변환은 tz_convert 메서드 사용
___
## 2. 시간대 고려해 Timestamp 객체 다루기

In [15]:
stamp = pd.Timestamp('2011/03/12 04:00')
stamp_utc = stamp.tz_localize('utc')
stamp_utc.tz_convert('US/Eastern')

Timestamp('2011-03-11 23:00:00-0500', tz='US/Eastern')

- 4시에서 변경됨

In [16]:
stamp_moscow = pd.Timestamp('2011/03/12 4:00', tz = 'Europe/Moscow')
stamp_moscow

Timestamp('2011-03-12 04:00:00+0300', tz='Europe/Moscow')

- 객체 생성시 시간대 부여 가능

In [17]:
stamp_utc.value

1299902400000000000

In [18]:
stamp_utc.tz_convert('US/Eastern').value

1299902400000000000

- Timestamp 객체는 내부적으로 UTC 값을 저장
- 시간대 변환 과정에서 값 변경 없음
___
## 3. 다른 시간대 간의 연산

In [20]:
rng = pd.date_range('3/7/2012 9:30', periods = 10, freq = 'B')
ts = Series(np.random.randn(len(rng)), index = rng)
ts

2012-03-07 09:30:00   -0.063209
2012-03-08 09:30:00   -0.721197
2012-03-09 09:30:00    0.132337
2012-03-12 09:30:00   -0.478517
2012-03-13 09:30:00    0.935449
2012-03-14 09:30:00    0.052066
2012-03-15 09:30:00    0.299733
2012-03-16 09:30:00   -0.121924
2012-03-19 09:30:00   -0.276504
2012-03-20 09:30:00   -1.468149
Freq: B, dtype: float64

In [21]:
ts1 = ts[:7].tz_localize('Europe/London')
ts2 = ts1[2:]. tz_convert('Europe/Moscow')
result = ts1 + ts2
result

2012-03-07 09:30:00+00:00         NaN
2012-03-08 09:30:00+00:00         NaN
2012-03-09 09:30:00+00:00    0.264674
2012-03-12 09:30:00+00:00   -0.957035
2012-03-13 09:30:00+00:00    1.870898
2012-03-14 09:30:00+00:00    0.104133
2012-03-15 09:30:00+00:00    0.599467
Freq: B, dtype: float64

In [22]:
result.index

DatetimeIndex(['2012-03-07 09:30:00+00:00', '2012-03-08 09:30:00+00:00',
               '2012-03-09 09:30:00+00:00', '2012-03-12 09:30:00+00:00',
               '2012-03-13 09:30:00+00:00', '2012-03-14 09:30:00+00:00',
               '2012-03-15 09:30:00+00:00'],
              dtype='datetime64[ns, UTC]', freq='B')

- 서로 다른 2종류의 시계열이 합쳐질 시 UTC로 결과값 반환