# 10. 시계열
> ## 시계열 기초

In [4]:
import pandas as pd
import numpy as np
from pandas import Series, DataFrame
from datetime import datetime
from datetime import timedelta
from dateutil.parser import parse

In [5]:
dates = [datetime(2011, 1, 2), datetime(2011, 1, 5), datetime(2011, 1, 7),
        datetime(2011, 1, 8), datetime(2011, 1, 10), datetime(2011, 1, 12)]
ts = Series(np.random.randn(6), index = dates)
ts

2011-01-02   -1.380778
2011-01-05   -2.739160
2011-01-07    2.676942
2011-01-08    0.028508
2011-01-10   -0.750029
2011-01-12    0.297393
dtype: float64

In [8]:
ts.index

DatetimeIndex(['2011-01-02', '2011-01-05', '2011-01-07', '2011-01-08',
               '2011-01-10', '2011-01-12'],
              dtype='datetime64[ns]', freq=None)

- ts 변수의 타입은 TimeSeries임
___
## 1. 인덱싱, 선택, 부분 선택

In [11]:
stamp = ts.index[2]
stamp

Timestamp('2011-01-07 00:00:00')

In [12]:
ts[stamp]

2.676941566223484

- Series와 동일한 방식으로 인덱싱

In [13]:
ts

2011-01-02   -1.380778
2011-01-05   -2.739160
2011-01-07    2.676942
2011-01-08    0.028508
2011-01-10   -0.750029
2011-01-12    0.297393
dtype: float64

In [14]:
ts['1/10/2011']

-0.7500292467600435

In [16]:
ts['20110110']

-0.7500292467600435

- 지정한 형식 외 해석 가능한 날짜 형식은 모두 호환 가능

In [17]:
longer_ts = Series(np.random.randn(1000),
                  index = pd.date_range('1/1/2000', periods = 1000))
longer_ts.head()

2000-01-01   -0.920198
2000-01-02   -0.866173
2000-01-03    0.261634
2000-01-04   -3.129879
2000-01-05   -0.287225
Freq: D, dtype: float64

In [18]:
len(longer_ts['2001'])

365

In [19]:
len(longer_ts['2002/05'])

31

- 긴 시계열에서는 데이터의 일부 구간만 선택 가능

In [21]:
ts[datetime(2011, 1, 7):]

2011-01-07    2.676942
2011-01-08    0.028508
2011-01-10   -0.750029
2011-01-12    0.297393
dtype: float64

In [23]:
ts['1/6/2011':'1/11/2011']

2011-01-07    2.676942
2011-01-08    0.028508
2011-01-10   -0.750029
dtype: float64

In [25]:
ts.truncate(after = '1/9/2011')

2011-01-02   -1.380778
2011-01-05   -2.739160
2011-01-07    2.676942
2011-01-08    0.028508
dtype: float64

- Series와 같은 방식으로 데이터 슬라이싱 가능
- truncate 메서드 사용 가능

In [27]:
dates = pd.date_range('1/1/2000', periods = 100, freq = 'W-WED')
long_df = DataFrame(np.random.randn(100,4),
                   index = dates,
                   columns = ['Colorado', 'Texas', 'New York', 'Ohio'])
long_df.head()

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-05,-0.587254,0.500988,0.489314,-0.691617
2000-01-12,-0.096754,0.820145,-0.642268,0.943192
2000-01-19,-0.777072,-0.300341,0.916848,0.040605
2000-01-26,-1.200133,1.299586,0.104342,0.940238
2000-02-02,0.071979,-0.702436,-0.22206,0.819407


In [29]:
long_df.loc['5/2001']

Unnamed: 0,Colorado,Texas,New York,Ohio
2001-05-02,-0.529705,-0.225986,0.000718,-0.074503
2001-05-09,0.008921,1.416912,0.336776,-0.757383
2001-05-16,1.578911,1.015375,-0.801904,-0.269758
2001-05-23,1.013891,1.091795,0.234371,-0.733766
2001-05-30,0.125493,-0.538154,0.145959,1.087095


- DataFrame에서도 동일하게 적용
___
## 2. 중복된 색인을 갖는 시계열

In [31]:
dates = pd.DatetimeIndex(['1/1/2000', '1/2/2000', '1/2/2000',
                         '1/2/2000', '1/3/2000'])
dup_ts = Series(np.arange(5), index = dates)
dup_ts

2000-01-01    0
2000-01-02    1
2000-01-02    2
2000-01-02    3
2000-01-03    4
dtype: int32

In [32]:
dup_ts.index

DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-02', '2000-01-02',
               '2000-01-03'],
              dtype='datetime64[ns]', freq=None)

In [36]:
dup_ts.index.is_unique

False

In [37]:
dup_ts['1/3/2000']

4

In [38]:
dup_ts['2000/1/2']

2000-01-02    1
2000-01-02    2
2000-01-02    3
dtype: int32

In [40]:
dup_ts.groupby(level=0).count()

2000-01-01    1
2000-01-02    3
2000-01-03    1
dtype: int64