# CH.11_Time_Series

## 0. Import Packages

In [32]:
from IPython.core.display import display, HTML
display(HTML("<style> .container{width:100% !important;}</style>"))

from datetime import datetime
from datetime import timedelta
from dateutil.parser import parse
import pandas as pd
import numpy as np
from pandas import DataFrame, Series
from pandas.tseries.offsets import Day, MonthEnd, Hour, Minute
import pytz

## 1. Date and Time Data Types and Tools

In [3]:
now = datetime.now()
now

datetime.datetime(2020, 9, 25, 11, 4, 25, 334118)

In [4]:
now.year, now.month, now.day

(2020, 9, 25)

In [7]:
delta = datetime(2019, 2, 12, 12) - datetime(2019, 2, 10, 11)
print(delta)
print(delta.days)
print(delta.seconds)

2 days, 1:00:00
2
3600


In [8]:
start = datetime(2019,2,12)
print(start + timedelta(2))
print(start - 2*timedelta(2))

2019-02-14 00:00:00
2019-02-08 00:00:00


In [9]:
stamp = datetime(2019,2,12)
print(stamp)
print(str(stamp))
print(stamp.strftime('%Y-%m-%d'))

2019-02-12 00:00:00
2019-02-12 00:00:00
2019-02-12


In [10]:
value = '2019-02-12'
datetime.strptime(value, '%Y-%m-%d')

datetime.datetime(2019, 2, 12, 0, 0)

In [12]:
print(parse('2019-02-12'))
print(parse('Jan 31, 1997, 10:45 PM'))
print(parse('12/2/2019', dayfirst = True))

2019-02-12 00:00:00
1997-01-31 22:45:00
2019-02-12 00:00:00


In [14]:
idx = pd.to_datetime(['2/12/2019', '1/12/2019', '', None])

In [15]:
print(idx)
print(idx[2])

DatetimeIndex(['2019-02-12', '2019-01-12', 'NaT', 'NaT'], dtype='datetime64[ns]', freq=None)
NaT


## 2. Time Series Basics

In [19]:
dates = [datetime(2019,2,12), datetime(2019,2,15), datetime(2019,2,1), 
         datetime(2019,2,2), datetime(2019,2,19), datetime(2019,2,20)]
ts = Series(np.random.permutation(6), index = dates)

print(ts)
print(type(ts))
print(ts.index)

2019-02-12    3
2019-02-15    2
2019-02-01    1
2019-02-02    4
2019-02-19    5
2019-02-20    0
dtype: int32
<class 'pandas.core.series.Series'>
DatetimeIndex(['2019-02-12', '2019-02-15', '2019-02-01', '2019-02-02',
               '2019-02-19', '2019-02-20'],
              dtype='datetime64[ns]', freq=None)


In [20]:
ts+ts

2019-02-12     6
2019-02-15     4
2019-02-01     2
2019-02-02     8
2019-02-19    10
2019-02-20     0
dtype: int32

In [21]:
stamp = ts.index[2]
print(stamp)
print(ts[stamp])
print(ts['2/01/2019'])
print(ts['20190201'])

2019-02-01 00:00:00
1
2019-02-01    1
dtype: int32
2019-02-01    1
dtype: int32


In [22]:
longer_ts = Series(np.random.permutation(1000), 
                   index = pd.date_range('2/1/2019', periods = 1000))

longer_ts.head()

2019-02-01    555
2019-02-02    656
2019-02-03    324
2019-02-04    632
2019-02-05     22
Freq: D, dtype: int32

In [23]:
longer_ts['2020']

2020-01-01    899
2020-01-02      1
2020-01-03    142
2020-01-04      4
2020-01-05    877
             ... 
2020-12-27    677
2020-12-28    452
2020-12-29    688
2020-12-30    484
2020-12-31    981
Freq: D, Length: 366, dtype: int32

In [24]:
ts[datetime(2019,2,1):]

2019-02-01    1
2019-02-02    4
2019-02-19    5
2019-02-20    0
dtype: int32

## 3. Date Ranges, Frequencies, and Shifting

In [25]:
dates = [datetime(2019,2,12), datetime(2019,2,15), datetime(2019,2,1), 
         datetime(2019,2,2), datetime(2019,2,19), datetime(2019,2,20)]
ts = Series(np.random.permutation(6), index = dates)

In [26]:
ts.resample('D').mean()

2019-02-01    1.0
2019-02-02    0.0
2019-02-03    NaN
2019-02-04    NaN
2019-02-05    NaN
2019-02-06    NaN
2019-02-07    NaN
2019-02-08    NaN
2019-02-09    NaN
2019-02-10    NaN
2019-02-11    NaN
2019-02-12    2.0
2019-02-13    NaN
2019-02-14    NaN
2019-02-15    3.0
2019-02-16    NaN
2019-02-17    NaN
2019-02-18    NaN
2019-02-19    4.0
2019-02-20    5.0
Freq: D, dtype: float64

In [27]:
# date range
index = pd.date_range('2/1/2019', '2/28/2019')
index

DatetimeIndex(['2019-02-01', '2019-02-02', '2019-02-03', '2019-02-04',
               '2019-02-05', '2019-02-06', '2019-02-07', '2019-02-08',
               '2019-02-09', '2019-02-10', '2019-02-11', '2019-02-12',
               '2019-02-13', '2019-02-14', '2019-02-15', '2019-02-16',
               '2019-02-17', '2019-02-18', '2019-02-19', '2019-02-20',
               '2019-02-21', '2019-02-22', '2019-02-23', '2019-02-24',
               '2019-02-25', '2019-02-26', '2019-02-27', '2019-02-28'],
              dtype='datetime64[ns]', freq='D')

In [28]:
pd.date_range(start = '2/1/2019', periods = 20)

DatetimeIndex(['2019-02-01', '2019-02-02', '2019-02-03', '2019-02-04',
               '2019-02-05', '2019-02-06', '2019-02-07', '2019-02-08',
               '2019-02-09', '2019-02-10', '2019-02-11', '2019-02-12',
               '2019-02-13', '2019-02-14', '2019-02-15', '2019-02-16',
               '2019-02-17', '2019-02-18', '2019-02-19', '2019-02-20'],
              dtype='datetime64[ns]', freq='D')

In [29]:
pd.date_range(end = '2/20/2012', periods = 20)

DatetimeIndex(['2012-02-01', '2012-02-02', '2012-02-03', '2012-02-04',
               '2012-02-05', '2012-02-06', '2012-02-07', '2012-02-08',
               '2012-02-09', '2012-02-10', '2012-02-11', '2012-02-12',
               '2012-02-13', '2012-02-14', '2012-02-15', '2012-02-16',
               '2012-02-17', '2012-02-18', '2012-02-19', '2012-02-20'],
              dtype='datetime64[ns]', freq='D')

In [30]:
pd.date_range('1/1/2019', '3/31/2019', freq = 'BM')

DatetimeIndex(['2019-01-31', '2019-02-28', '2019-03-29'], dtype='datetime64[ns]', freq='BM')

In [31]:
pd.date_range('2/1/2019 12:59:59', periods = 5, normalize = True)

DatetimeIndex(['2019-02-01', '2019-02-02', '2019-02-03', '2019-02-04',
               '2019-02-05'],
              dtype='datetime64[ns]', freq='D')

In [33]:
# frequency
hour = Hour()
hour

<Hour>

In [34]:
four_hours = Hour(4)
four_hours

<4 * Hours>

In [35]:
pd.date_range('2/1/2019', '2/3/2019 23:59', freq = '4h')

DatetimeIndex(['2019-02-01 00:00:00', '2019-02-01 04:00:00',
               '2019-02-01 08:00:00', '2019-02-01 12:00:00',
               '2019-02-01 16:00:00', '2019-02-01 20:00:00',
               '2019-02-02 00:00:00', '2019-02-02 04:00:00',
               '2019-02-02 08:00:00', '2019-02-02 12:00:00',
               '2019-02-02 16:00:00', '2019-02-02 20:00:00',
               '2019-02-03 00:00:00', '2019-02-03 04:00:00',
               '2019-02-03 08:00:00', '2019-02-03 12:00:00',
               '2019-02-03 16:00:00', '2019-02-03 20:00:00'],
              dtype='datetime64[ns]', freq='4H')

In [36]:
pd.date_range('2/1/2019', '2/3/2019 23:59', freq = four_hours)

DatetimeIndex(['2019-02-01 00:00:00', '2019-02-01 04:00:00',
               '2019-02-01 08:00:00', '2019-02-01 12:00:00',
               '2019-02-01 16:00:00', '2019-02-01 20:00:00',
               '2019-02-02 00:00:00', '2019-02-02 04:00:00',
               '2019-02-02 08:00:00', '2019-02-02 12:00:00',
               '2019-02-02 16:00:00', '2019-02-02 20:00:00',
               '2019-02-03 00:00:00', '2019-02-03 04:00:00',
               '2019-02-03 08:00:00', '2019-02-03 12:00:00',
               '2019-02-03 16:00:00', '2019-02-03 20:00:00'],
              dtype='datetime64[ns]', freq='4H')

In [37]:
rng = pd.date_range('1/1/2019', '12/31/2019', freq = 'WOM-3FRI')
list(rng)

[Timestamp('2019-01-18 00:00:00', freq='WOM-3FRI'),
 Timestamp('2019-02-15 00:00:00', freq='WOM-3FRI'),
 Timestamp('2019-03-15 00:00:00', freq='WOM-3FRI'),
 Timestamp('2019-04-19 00:00:00', freq='WOM-3FRI'),
 Timestamp('2019-05-17 00:00:00', freq='WOM-3FRI'),
 Timestamp('2019-06-21 00:00:00', freq='WOM-3FRI'),
 Timestamp('2019-07-19 00:00:00', freq='WOM-3FRI'),
 Timestamp('2019-08-16 00:00:00', freq='WOM-3FRI'),
 Timestamp('2019-09-20 00:00:00', freq='WOM-3FRI'),
 Timestamp('2019-10-18 00:00:00', freq='WOM-3FRI'),
 Timestamp('2019-11-15 00:00:00', freq='WOM-3FRI'),
 Timestamp('2019-12-20 00:00:00', freq='WOM-3FRI')]

In [41]:
# shifting
ts = Series(np.random.permutation(4), 
           index = pd.date_range('2/1/2019', periods = 4, freq = 'D'))

ts

2019-02-01    0
2019-02-02    3
2019-02-03    1
2019-02-04    2
Freq: D, dtype: int32

In [49]:
print("[1]", ts.shift(1))
print("[2]", ts.shift(-2))
print("[3]", ts.shift(2, freq = 'D'))

[1] 2019-02-01    NaN
2019-02-02    0.0
2019-02-03    3.0
2019-02-04    1.0
Freq: D, dtype: float64
[2] 2019-02-01    1.0
2019-02-02    2.0
2019-02-03    NaN
2019-02-04    NaN
Freq: D, dtype: float64
[3] 2019-02-03    0
2019-02-04    3
2019-02-05    1
2019-02-06    2
Freq: D, dtype: int32


In [52]:
now = datetime(2019,2,12)
print(now + 3 * Day())
print(now + MonthEnd())

2019-02-15 00:00:00
2019-02-28 00:00:00


In [53]:
offset = MonthEnd()

print(offset)
print(offset.rollforward(now))
print(offset.rollback(now))

<MonthEnd>
2019-02-28 00:00:00
2019-01-31 00:00:00


In [54]:
ts = Series(np.random.permutation(20), 
            index = pd.date_range('2/12/2019', periods = 20, freq = '4d'))

ts

2019-02-12     0
2019-02-16    19
2019-02-20    18
2019-02-24     1
2019-02-28     6
2019-03-04    10
2019-03-08     4
2019-03-12     2
2019-03-16     7
2019-03-20     8
2019-03-24    12
2019-03-28    17
2019-04-01    16
2019-04-05     3
2019-04-09    13
2019-04-13    15
2019-04-17    14
2019-04-21     5
2019-04-25    11
2019-04-29     9
Freq: 4D, dtype: int32

## 4. Time Zone Handling

In [55]:
pytz.common_timezones

['Africa/Abidjan', 'Africa/Accra', 'Africa/Addis_Ababa', 'Africa/Algiers', 'Africa/Asmara', 'Africa/Bamako', 'Africa/Bangui', 'Africa/Banjul', 'Africa/Bissau', 'Africa/Blantyre', 'Africa/Brazzaville', 'Africa/Bujumbura', 'Africa/Cairo', 'Africa/Casablanca', 'Africa/Ceuta', 'Africa/Conakry', 'Africa/Dakar', 'Africa/Dar_es_Salaam', 'Africa/Djibouti', 'Africa/Douala', 'Africa/El_Aaiun', 'Africa/Freetown', 'Africa/Gaborone', 'Africa/Harare', 'Africa/Johannesburg', 'Africa/Juba', 'Africa/Kampala', 'Africa/Khartoum', 'Africa/Kigali', 'Africa/Kinshasa', 'Africa/Lagos', 'Africa/Libreville', 'Africa/Lome', 'Africa/Luanda', 'Africa/Lubumbashi', 'Africa/Lusaka', 'Africa/Malabo', 'Africa/Maputo', 'Africa/Maseru', 'Africa/Mbabane', 'Africa/Mogadishu', 'Africa/Monrovia', 'Africa/Nairobi', 'Africa/Ndjamena', 'Africa/Niamey', 'Africa/Nouakchott', 'Africa/Ouagadougou', 'Africa/Porto-Novo', 'Africa/Sao_Tome', 'Africa/Tripoli', 'Africa/Tunis', 'Africa/Windhoek', 'America/Adak', 'America/Anchorage', 'Amer

In [56]:
rng = pd.date_range('2/13/2019 10:30', periods = 6, freq = 'D')
ts = Series(np.random.permutation(len(rng)), index = rng)

ts

2019-02-13 10:30:00    3
2019-02-14 10:30:00    1
2019-02-15 10:30:00    4
2019-02-16 10:30:00    2
2019-02-17 10:30:00    0
2019-02-18 10:30:00    5
Freq: D, dtype: int32

In [58]:
pd.date_range('2/13/2019 10:30', periods = 6, freq = 'D', tz = 'UTC')

DatetimeIndex(['2019-02-13 10:30:00+00:00', '2019-02-14 10:30:00+00:00',
               '2019-02-15 10:30:00+00:00', '2019-02-16 10:30:00+00:00',
               '2019-02-17 10:30:00+00:00', '2019-02-18 10:30:00+00:00'],
              dtype='datetime64[ns, UTC]', freq='D')

In [59]:
ts_utc = ts.tz_localize('UTC')  # Localize
ts_utc

2019-02-13 10:30:00+00:00    3
2019-02-14 10:30:00+00:00    1
2019-02-15 10:30:00+00:00    4
2019-02-16 10:30:00+00:00    2
2019-02-17 10:30:00+00:00    0
2019-02-18 10:30:00+00:00    5
Freq: D, dtype: int32

In [60]:
ts_utc.tz_convert('US/Eastern')

2019-02-13 05:30:00-05:00    3
2019-02-14 05:30:00-05:00    1
2019-02-15 05:30:00-05:00    4
2019-02-16 05:30:00-05:00    2
2019-02-17 05:30:00-05:00    0
2019-02-18 05:30:00-05:00    5
Freq: D, dtype: int32

In [61]:
stamp = pd.Timestamp('2019-02-15 11:12')
stamp_utc = stamp.tz_localize('utc')
stamp_utc.tz_convert('US/Eastern')

Timestamp('2019-02-15 06:12:00-0500', tz='US/Eastern')

In [63]:
stamp_moscow = pd.Timestamp('2019-02-13', tz = 'Europe/Moscow')
stamp_moscow

Timestamp('2019-02-13 00:00:00+0300', tz='Europe/Moscow')

In [64]:
stamp = pd.Timestamp('2018-03-11 01:30', tz = 'US/Eastern')
stamp + Hour()

Timestamp('2018-03-11 03:30:00-0400', tz='US/Eastern')

In [66]:
stamp = pd.Timestamp('2018-11-04 00:30', tz = 'US/Eastern')
print(stamp + Hour())
print(stamp + 2*Hour())

2018-11-04 01:30:00-04:00
2018-11-04 01:30:00-05:00


In [68]:
rng = pd.date_range('2/13/2019 9:30', periods = 10, freq = 'B')
ts = Series(np.random.randn(len(rng)), index = rng)

ts

2019-02-13 09:30:00    0.541856
2019-02-14 09:30:00    0.578441
2019-02-15 09:30:00   -0.624586
2019-02-18 09:30:00    1.301772
2019-02-19 09:30:00   -0.038993
2019-02-20 09:30:00   -0.434665
2019-02-21 09:30:00    1.932265
2019-02-22 09:30:00    0.579780
2019-02-25 09:30:00   -0.342864
2019-02-26 09:30:00   -0.299932
Freq: B, dtype: float64

In [70]:
ts1 = ts[:7].tz_localize('Europe/London')
ts2 = ts1[2:].tz_convert('Europe/Moscow')

print(ts1)
print(ts2)

2019-02-13 09:30:00+00:00    0.541856
2019-02-14 09:30:00+00:00    0.578441
2019-02-15 09:30:00+00:00   -0.624586
2019-02-18 09:30:00+00:00    1.301772
2019-02-19 09:30:00+00:00   -0.038993
2019-02-20 09:30:00+00:00   -0.434665
2019-02-21 09:30:00+00:00    1.932265
Freq: B, dtype: float64
2019-02-15 12:30:00+03:00   -0.624586
2019-02-18 12:30:00+03:00    1.301772
2019-02-19 12:30:00+03:00   -0.038993
2019-02-20 12:30:00+03:00   -0.434665
2019-02-21 12:30:00+03:00    1.932265
Freq: B, dtype: float64
