# Time Series - Lesson 1

In [None]:
import numpy as np
import pandas as pd

In [None]:
np.random.seed(12345)
np.set_printoptions(precision=4, suppress=True)

## Date and Time Data Types and Tools

In [None]:
from datetime import datetime, date

now = datetime.now()
print(now)

print(now.year, now.month, now.day)

print(date.today())

- datetime(year, month, day, hour=0, minute=0, second=0, microsecond=0)

In [None]:
datetime(2019, 1, 31, 8, 30)

In [None]:
delta = datetime(2019, 1, 31, 8, 30) - datetime(2019, 1, 1, 8, 20)
print(delta.days, delta.seconds)
delta

- timedelta(days=0, seconds=0, microseconds=0, milliseconds=0, minutes=0, hours=0, weeks=0)

In [None]:
from datetime import timedelta

In [None]:
timedelta(12, 30, 10)

In [None]:
start = datetime(2019, 1, 1)
start + timedelta(12, 30),  start + 2 * timedelta(12)

### Converting Between String and Datetime

In [None]:
# format

stamp = datetime(2019, 1, 15)

print(str(stamp))
print(stamp.strftime('%Y-%m-%d'))
print(stamp.strftime('%y-%m-%d'))
print(stamp.strftime('%A, %B %d, %Y'))
print(stamp.strftime('%a, %B %d, %Y'))

In [None]:
# parse

value = '2019-01-15'
datetime.strptime(value, '%Y-%m-%d')

In [None]:
datestrs = ['1/15/2019', '1/30/2019']
[datetime.strptime(x, '%m/%d/%Y') for x in datestrs]

In [None]:
value = 'Tuesday, January 15, 2019'
datetime.strptime(value, '%A, %B %d, %Y')

- dateutil.parser module offers a generic date/time string parser which is able to parse most known formats to represent a date and/or time

In [None]:
from dateutil.parser import parse
parse('2019-01-31')

In [None]:
parse('Jan 31, 2019 10:45 AM')

In [None]:
parse('31/1/2019', dayfirst=True)

### pandas to_datetime method

In [None]:
datestrs = ['2019-01-06 12:00:00', '2019-02-06 00:00:00']
pd.to_datetime(datestrs)

## Time Series Operations

In [None]:
np.random.seed(12345)

dates = [datetime(2018, 12, 31),
         datetime(2019, 1, 1), datetime(2019, 1, 2),
         datetime(2019, 1, 3), datetime(2019, 1, 4),
         datetime(2019, 1, 7), datetime(2019, 1, 8)]

ts = pd.Series(np.random.randint(1, 100, 7), index=dates)
ts

In [None]:
ts.index

In [None]:
ts[::2]

In [None]:
ts + ts[::2]

In [None]:
stamp = ts.index[0]
stamp

### Indexing, Selection, Subsetting

In [None]:
ts

In [None]:
stamp = ts.index[1]
ts[stamp]

In [None]:
ts['1/7/2019'], ts['20190107']

In [None]:
ts['2018']

In [None]:
ts['2019']

In [None]:
np.random.seed(12345)

longer_ts = pd.Series(np.random.randint(1, 100, 1000),
                      index=pd.date_range('1/1/2018', periods=1000))
longer_ts.head()

In [None]:
longer_ts.tail()

In [None]:
longer_ts['2019']

In [None]:
longer_ts['2019-09']

In [None]:
ts

In [None]:
ts[datetime(2019, 1, 3):]

In [None]:
ts['1/3/2019':'1/10/2019']

In [None]:
ts['1/3/2019':'1/7/2019']

In [None]:
ts.truncate(after='1/3/2019')

In [None]:
ts

#### Frequency offsets
https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases

In [None]:
# Dataframe

dates = pd.date_range('1/1/2019', periods=100, freq='W-Tue')
dates[:20]

In [None]:
np.random.seed(12345)

long_df = pd.DataFrame(np.random.randint(1, 100, (100, 4)),
                       index=dates,
                       columns=['Colorado', 'Texas',
                                'New York', 'Ohio'])
long_df.head()

In [None]:
long_df.loc['3-2019']

In [None]:
long_df.loc['3-2019':'5-2019']

In [None]:
# Weekly default to Sundays

np.random.seed(12345)

dates = pd.date_range('1/1/2019', periods=100, freq='W')
long_df = pd.DataFrame(np.random.randint(1, 100, (100, 4)),
                       index=dates,
                       columns=['Colorado', 'Texas',
                                'New York', 'Ohio'])

long_df.loc['11-2019']

## Date Ranges, Frequencies, and Shifting

In [None]:
ts

In [None]:
# Resample at higher or lower frequencies

resampler = ts.resample('D')
resampler

In [None]:
resampler.sum()

In [None]:
resampler = ts.resample('3D')
resampler.sum()

In [None]:
resampler = ts.resample('M')
resampler.sum()

### Generating Date Ranges
 - pandas.date_range(start=None, end=None, periods=None, freq='D', tz=None, normalize=False, name=None, closed=None)

In [None]:
# default frequency - Daily

index = pd.date_range('2019-11-01', '2019-12-01')
index

In [None]:
# Business days only

pd.date_range('2019-11-01', '2019-12-01', freq='B')

In [None]:
pd.date_range(start='2019-11-01', periods=20)

In [None]:
pd.date_range(start='2019-11-01', periods=20, freq='B')

In [None]:
pd.date_range(end='2019-11-01', periods=20)

In [None]:
# Month end frequency

pd.date_range('2019-01-01', freq='M', periods=12)

In [None]:
# Month Start frequency

pd.date_range('2019-01-01', freq='MS', periods=12)

In [None]:
# Semi-Month frequency (15th and end of Month)

pd.date_range('2019-01-01', freq='SM', periods=12)

In [None]:
# Hourly frequency

pd.date_range('2019-01-01', freq='H', periods=12)

In [None]:
pd.date_range('2019-11-01 12:56:31', periods=5)

In [None]:
pd.date_range('2019-11-01 12:56:31', freq='H', periods=5)

In [None]:
# Normalize start/end dates to midnight before generating date range

pd.date_range('2019-11-01 12:56:31', periods=5, normalize=True)

In [None]:
pd.date_range('2019-11-01 12:56:31', periods=5, freq='H', normalize=True)

In [None]:
pd.date_range('2019-01-01', '2019-01-02 23:59', freq='4h')

In [None]:
pd.date_range('2019-01-01', periods=10, freq='2h30min')

#### Week of month dates

In [None]:
# 3rd Tuesday

pd.date_range('2019-01-01', '2020-01-01', freq='WOM-3TUE')

### Shifting (Leading and Lagging) Data

In [None]:
np.random.seed(12345)

ts2 = pd.Series(np.random.randint(1, 100, 6),
               index=pd.date_range('1/1/2019', periods=6, freq='MS'))
ts2

In [None]:
ts2.shift(3)

In [None]:
ts2.shift(-3)

In [None]:
ts2

In [None]:
ts2.shift(3, freq='MS')

In [None]:
ts2.shift(-3, freq='MS')

In [None]:
ts2

In [None]:
ts2.shift(3, freq='D')

In [None]:
ts2.shift(-3, freq='D')

In [None]:
ts2

In [None]:
# 90 minutes

ts2.shift(1, freq='90T')

In [None]:
ts2.shift(2, freq='90T')

#### Shifting dates with offsets

In [None]:
from pandas.tseries.offsets import Day, MonthEnd

In [None]:
now = datetime(2019, 1, 15)
now + 3 * Day()

In [None]:
now + MonthEnd(), now + MonthEnd(2)

In [None]:
offset = MonthEnd()
offset.rollforward(now), offset.rollback(now)

In [None]:
np.random.seed(12345)

ts3 = pd.Series(np.random.randint(1, 100, 20),
               index=pd.date_range('1/15/2019', periods=20, freq='4d'))
ts3

In [None]:
for (key, group) in ts3.groupby(offset.rollforward):
    print("\nKey: ", key)
    print(group)


In [None]:
ts3.groupby(offset.rollforward).sum()

In [None]:
ts3.resample('M').sum()

### Open-High-Low-Close (OHLC) resampling

In [None]:
pd.date_range('2019-01-01', periods=12, freq='T')

In [None]:
np.random.seed(12345)

rng = pd.date_range('2019-01-01', periods=12, freq='T')
ts4 = pd.Series(np.random.randint(10, 50, 12), index=rng)
ts4

In [None]:
ts4.resample('5min').sum()

In [None]:
ts4.resample('5min').ohlc()

### Upsampling and Interpolation

In [None]:
np.random.seed(12345)

frame = pd.DataFrame(np.random.randint(20, 50, (2,4)),
                     index=pd.date_range('1/1/2019', periods=2,
                                         freq='W-MON'),
                     columns=['Colorado', 'Texas', 'New York', 'Ohio'])
frame

In [None]:
df_daily = frame.resample('D').asfreq()
df_daily

In [None]:
frame.resample('D').ffill()

In [None]:
frame.resample('D').ffill(limit=2)

In [None]:
frame

In [None]:
frame.resample('W-THU').asfreq()

In [None]:
frame.resample('W-THU').ffill()