# Working with Time Series

## Dates and Times in Python


### Native Python dates and times: ``datetime`` and ``dateutil``

In [2]:
from datetime import datetime

In [4]:
someday = datetime(2022, 5, 11)

In [6]:
someday.isoformat()
# 문자열로

'2022-05-11T00:00:00'

In [7]:
someday.strftime('%Y-%m')

'2022-05'

In [8]:
someday.strftime('%A')

'Wednesday'

In [9]:
someday.weekday()

2

In [31]:
someday + 10

TypeError: unsupported operand type(s) for +: 'datetime.datetime' and 'int'

In [10]:
date_str = '2022-05-21'

In [12]:
date_str.isoformat()

AttributeError: 'str' object has no attribute 'isoformat'

In [14]:
from dateutil.parser import parse

In [15]:
someday = parse(date_str)

In [16]:
someday.weekday()

5

In [None]:
# 설명서

[strftime section](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior) 

[datetime documentation](https://docs.python.org/3/library/datetime.html)

[dateutil's online documentation](http://labix.org/python-dateutil)


In [18]:
date_str_list = ['2022-05-01' , '2022-06-10' , '2022-07-03']

In [None]:
# @@ X @@
parse(date_str_list)
# 리스트 안에있는거 바꿔줘라 안됨 X

In [None]:
for data in date_str_list :
    print(parse(data))
# 이렇게 했어야 됬는데

### Typed arrays of times: NumPy's ``datetime64``

기존의 파이썬 datetime 을 보강하기 위해,  date 의 array 도 처리할 수 있게 numpy 에서 64-bit 로 처리하도록 라이브러리를 강화했음.

In [23]:
import numpy as np

In [27]:
any_date = np.array( '2022-05-11' , dtype= np.datetime64 )

In [29]:
any_date + 10

numpy.datetime64('2022-05-21')

In [30]:
any_date - 35

numpy.datetime64('2022-04-06')

In [32]:
someday - 10
# 안되서 넘파이가 좋다

TypeError: unsupported operand type(s) for -: 'datetime.datetime' and 'int'

In [33]:
any_date + np.arange(10)

array(['2022-05-11', '2022-05-12', '2022-05-13', '2022-05-14',
       '2022-05-15', '2022-05-16', '2022-05-17', '2022-05-18',
       '2022-05-19', '2022-05-20'], dtype='datetime64[D]')

|Code    | Meaning     | Time span (relative) | Time span (absolute)   |
|--------|-------------|----------------------|------------------------|
| ``Y``  | Year	       | ± 9.2e18 years       | [9.2e18 BC, 9.2e18 AD] |
| ``M``  | Month       | ± 7.6e17 years       | [7.6e17 BC, 7.6e17 AD] |
| ``W``  | Week	       | ± 1.7e17 years       | [1.7e17 BC, 1.7e17 AD] |
| ``D``  | Day         | ± 2.5e16 years       | [2.5e16 BC, 2.5e16 AD] |
| ``h``  | Hour        | ± 1.0e15 years       | [1.0e15 BC, 1.0e15 AD] |
| ``m``  | Minute      | ± 1.7e13 years       | [1.7e13 BC, 1.7e13 AD] |
| ``s``  | Second      | ± 2.9e12 years       | [ 2.9e9 BC, 2.9e9 AD]  |
| ``ms`` | Millisecond | ± 2.9e9 years        | [ 2.9e6 BC, 2.9e6 AD]  |
| ``us`` | Microsecond | ± 2.9e6 years        | [290301 BC, 294241 AD] |
| ``ns`` | Nanosecond  | ± 292 years          | [ 1678 AD, 2262 AD]    |
| ``ps`` | Picosecond  | ± 106 days           | [ 1969 AD, 1970 AD]    |
| ``fs`` | Femtosecond | ± 2.6 hours          | [ 1969 AD, 1970 AD]    |
| ``as`` | Attosecond  | ± 9.2 seconds        | [ 1969 AD, 1970 AD]    |

### Dates and times in pandas: best of both worlds



In [34]:
import pandas as pd

In [35]:
dates = ['2022-01-04', '2022-01-07', '2022-01-08', '2022-01-22']

In [36]:
dates

['2022-01-04', '2022-01-07', '2022-01-08', '2022-01-22']

In [39]:
pd.to_datetime(dates)
# 문자열로 리스트를 만들었지만,
# 리스트도 데이터타입을 넘파이로 바꿔준다

DatetimeIndex(['2022-01-04', '2022-01-07', '2022-01-08', '2022-01-22'], dtype='datetime64[ns]', freq=None)

In [40]:
dates1 = pd.to_datetime(dates)

In [41]:
dates1

DatetimeIndex(['2022-01-04', '2022-01-07', '2022-01-08', '2022-01-22'], dtype='datetime64[ns]', freq=None)

In [42]:
pd.to_timedelta(np.arange(10), 'D')

TimedeltaIndex(['0 days', '1 days', '2 days', '3 days', '4 days', '5 days',
                '6 days', '7 days', '8 days', '9 days'],
               dtype='timedelta64[ns]', freq=None)

In [43]:
any_date + pd.to_timedelta(np.arange(10), 'D')

DatetimeIndex(['2022-05-11', '2022-05-12', '2022-05-13', '2022-05-14',
               '2022-05-15', '2022-05-16', '2022-05-17', '2022-05-18',
               '2022-05-19', '2022-05-20'],
              dtype='datetime64[ns]', freq=None)

In [44]:
any_date + pd.to_timedelta(np.arange(10), 'W')

DatetimeIndex(['2022-05-11', '2022-05-18', '2022-05-25', '2022-06-01',
               '2022-06-08', '2022-06-15', '2022-06-22', '2022-06-29',
               '2022-07-06', '2022-07-13'],
              dtype='datetime64[ns]', freq=None)

In [45]:
any_date + pd.to_timedelta(np.arange(10), 'M')
# M 이 다른걸로 바뀜

ValueError: Units 'M', 'Y', and 'y' are no longer supported, as they do not represent unambiguous timedelta values durations.

In [47]:
any_date + pd.to_timedelta(np.arange(10), 'YS')

ValueError: invalid unit abbreviation: YS

In [48]:
any_date + pd.to_timedelta(np.arange(10), 'H')

DatetimeIndex(['2022-05-11 00:00:00', '2022-05-11 01:00:00',
               '2022-05-11 02:00:00', '2022-05-11 03:00:00',
               '2022-05-11 04:00:00', '2022-05-11 05:00:00',
               '2022-05-11 06:00:00', '2022-05-11 07:00:00',
               '2022-05-11 08:00:00', '2022-05-11 09:00:00'],
              dtype='datetime64[ns]', freq=None)

## Pandas Time Series: Indexing by Time



In [49]:
dates

['2022-01-04', '2022-01-07', '2022-01-08', '2022-01-22']

In [51]:
date_index = pd.DatetimeIndex(dates)

In [56]:
            #같다 to_Datetime

In [52]:
date_index

DatetimeIndex(['2022-01-04', '2022-01-07', '2022-01-08', '2022-01-22'], dtype='datetime64[ns]', freq=None)

In [54]:
df = pd.Series(data = [20000, 35000, 18000, 22000], index=date_index)

In [55]:
df

2022-01-04    20000
2022-01-07    35000
2022-01-08    18000
2022-01-22    22000
dtype: int64

### Regular sequences: ``pd.date_range()``



In [57]:
# 시작일과 종료일을 셋팅하면, 알아서 날짜를 채우도록 하는 함수

In [58]:
pd.date_range('2022-05-04', '2022-06-21')

DatetimeIndex(['2022-05-04', '2022-05-05', '2022-05-06', '2022-05-07',
               '2022-05-08', '2022-05-09', '2022-05-10', '2022-05-11',
               '2022-05-12', '2022-05-13', '2022-05-14', '2022-05-15',
               '2022-05-16', '2022-05-17', '2022-05-18', '2022-05-19',
               '2022-05-20', '2022-05-21', '2022-05-22', '2022-05-23',
               '2022-05-24', '2022-05-25', '2022-05-26', '2022-05-27',
               '2022-05-28', '2022-05-29', '2022-05-30', '2022-05-31',
               '2022-06-01', '2022-06-02', '2022-06-03', '2022-06-04',
               '2022-06-05', '2022-06-06', '2022-06-07', '2022-06-08',
               '2022-06-09', '2022-06-10', '2022-06-11', '2022-06-12',
               '2022-06-13', '2022-06-14', '2022-06-15', '2022-06-16',
               '2022-06-17', '2022-06-18', '2022-06-19', '2022-06-20',
               '2022-06-21'],
              dtype='datetime64[ns]', freq='D')

In [59]:
pd.date_range('2022-05-04', '2022-06-21' , freq='H')
                                        # '' 단위로 만들어 준다

DatetimeIndex(['2022-05-04 00:00:00', '2022-05-04 01:00:00',
               '2022-05-04 02:00:00', '2022-05-04 03:00:00',
               '2022-05-04 04:00:00', '2022-05-04 05:00:00',
               '2022-05-04 06:00:00', '2022-05-04 07:00:00',
               '2022-05-04 08:00:00', '2022-05-04 09:00:00',
               ...
               '2022-06-20 15:00:00', '2022-06-20 16:00:00',
               '2022-06-20 17:00:00', '2022-06-20 18:00:00',
               '2022-06-20 19:00:00', '2022-06-20 20:00:00',
               '2022-06-20 21:00:00', '2022-06-20 22:00:00',
               '2022-06-20 23:00:00', '2022-06-21 00:00:00'],
              dtype='datetime64[ns]', length=1153, freq='H')

In [60]:
pd.date_range('2022-05-04', '2022-06-21' , freq='W')

DatetimeIndex(['2022-05-08', '2022-05-15', '2022-05-22', '2022-05-29',
               '2022-06-05', '2022-06-12', '2022-06-19'],
              dtype='datetime64[ns]', freq='W-SUN')

In [61]:
pd.date_range('2022-05-04', '2022-06-21' , freq='W-WED')

DatetimeIndex(['2022-05-04', '2022-05-11', '2022-05-18', '2022-05-25',
               '2022-06-01', '2022-06-08', '2022-06-15'],
              dtype='datetime64[ns]', freq='W-WED')

## Frequencies and Offsets

| Code   | Description         | Code   | Description          |
|--------|---------------------|--------|----------------------|
| ``D``  | Calendar day        | ``B``  | Business day         |
| ``W``  | Weekly              |        |                      |
| ``M``  | Month end           | ``BM`` | Business month end   |
| ``Q``  | Quarter end         | ``BQ`` | Business quarter end |
| ``A``  | Year end            | ``BA`` | Business year end    |
| ``H``  | Hours               | ``BH`` | Business hours       |
| ``T``  | Minutes             |        |                      |
| ``S``  | Seconds             |        |                      |
| ``L``  | Milliseonds         |        |                      |
| ``U``  | Microseconds        |        |                      |
| ``N``  | nanoseconds         |        |                      |

In [62]:
pd.timedelta_range(0, periods=10, freq = '2H30T')

TimedeltaIndex(['0 days 00:00:00', '0 days 02:30:00', '0 days 05:00:00',
                '0 days 07:30:00', '0 days 10:00:00', '0 days 12:30:00',
                '0 days 15:00:00', '0 days 17:30:00', '0 days 20:00:00',
                '0 days 22:30:00'],
               dtype='timedelta64[ns]', freq='150T')