# Time Series

# Dan Wang

## 1. Date and Time Data Types and Tools

In [None]:
from datetime import datetime

now = datetime.now()
print(now.year, now.month, now.day)
#now

In [None]:
delta = datetime(2011, 1, 7) - datetime(2008, 6, 24, 8, 15)
print(delta)

In [None]:
print(delta.days)
print (delta.seconds)

In [None]:
from datetime import timedelta
start = datetime(2011, 1, 7)
start + timedelta(4)

In [None]:
start - 2*timedelta(12)

Type Description  
date: Store calendar date (year, month, day) using the Gregorian calendar.  
time: Store time of day as hours, minutes, seconds, and microseconds  
datetime: Stores both date and time  
timedelta: Represents the difference between two datetime values (as days, seconds, and microseconds)

### 1.1 Coverting between string and datetime

In [None]:
stamp = datetime(2011, 1, 3)
str(stamp)

In [None]:
stamp.strftime('%y-%m-%d')

Type Description  
%Y 4-digit year   
%y 2-digit year  
%m 2-digit month [01, 12]  
%d 2-digit day [01, 31]  
%H Hour (24-hour clock) [00, 23]  
%I Hour (12-hour clock) [01, 12]  
%M 2-digit minute [00, 59]  
%S Second [00, 61] (seconds 60, 61 account for leap seconds)  
%w Weekday as integer [0 (Sunday), 6]  
%U Week number of the year [00, 53]. Sunday is considered the first day of the week, and days before the first
Sunday of the year are “week 0”.  
%W Week number of the year [00, 53]. Monday is considered the first day of the week, and days before the first
Monday of the year are “week 0”.  
%z UTC time zone offset as +HHMM or -HHMM, empty if time zone naive  
%F Shortcut for %Y-%m-%d, for example 2012-4-18  
%D Shortcut for %m/%d/%y, for example 04/18/12

In [None]:
value = '201101/03'
datetime.strptime(value, '%Y%m/%d')

In [None]:
datestrs = ['7/6/2011', '8/6/2011']
[datetime.strptime(x, '%m/%d/%Y') for x in datestrs]

It can be a bit annoying to have to write a format spec each time, especially for common date formats. In this case, you can use the *parser.parse* method in the third party *deteutil* package:

pandas is generally oriented toward working with arrays of dates, whether used as an axis index or a column in a DataFrame. 

In [None]:
import pandas as pd
datestrs = ['7/6/2011', '8/6/2011']
pd.to_datetime(datestrs)

In [None]:
idx = pd.to_datetime(datestrs + [None])
idx

In [None]:
pd.isnull(idx)

### 1.2 Time Series Basics

The most basic kind of time series object in pandas is a Series indexed by timestamps, which is often represented external to pandas as Python strings or *datetime* objects.

In [None]:
import numpy as np
import pandas as pd

from datetime import datetime
from pandas import Series, DataFrame

dates = [datetime(2011, 1, 2), datetime(2011, 1, 5), datetime(2011, 1, 7),
         datetime(2011, 1, 8), datetime(2011, 1, 10), datetime(2011, 1, 12)]

ts = Series(np.random.randn(6), index=dates)
ts

In [None]:
type(ts)

In [None]:
ts.index

In [None]:
stamp = ts.index[0]
stamp
ts[0]

In [None]:
ts[::2] + ts

### 1.3 Indexing, Selection, Subsetting

In [None]:
stamp = ts.index[2]
stamp

In [None]:
ts[stamp]

As a convenience, you can also pass a string that is interpretable as a data.

In [None]:
ts['1/10/2011']

In [None]:
ts['2011-01-10']

For longer time series, a year or only a year and month can be passed to easily select slices of data.

In [None]:
longer_ts = Series(np.random.randn(1000),
                   index=pd.date_range('1/1/2000', periods=1000))
longer_ts.head() 

In [None]:
longer_ts['2001']

In [None]:
longer_ts['2001-05']  

In [None]:
ts[datetime(2011, 1, 7):]

In [None]:
ts['1/6/2011':'1/10/2011']

In [None]:
ts.truncate(after='1/9/2011')

In [None]:
dates = pd.date_range('1/1/2000', periods=100, freq='W-WED')
long_df = DataFrame(np.random.randn(100, 4),
                    index=dates,
                    columns=['Colorado', 'Texas', 'New York', 'Ohio'])

long_df.loc['5-2001']

### 1.4 Time Series with Duplicate Indices

In [None]:
dates = pd.DatetimeIndex(['1/1/2000', '1/2/2000', '1/2/2000', 
                          '1/2/2000', '1/3/2000'])

dup_ts = Series(np.arange(5), index=dates)
dup_ts

In [None]:
dup_ts.index.is_unique

In [None]:
new_ts = dup_ts.index.drop_duplicates()
new_ts

In [None]:
dup_ts['1/3/2000']

In [None]:
dup_ts['1/2/2000']

In [None]:
grouped = dup_ts.groupby(level=0)
grouped.count()

## 2. Date Range, Frequencies, and Shifting

In [None]:
ts

Converting it to be fixed daily frequency can be accomplished by caling *resample*. 

In [None]:
ts.resample('D')

### 2.1 Generating Date Ranges

In [None]:
index = pd.date_range('4/1/2012', '6/1/2012')
index

In [None]:
pd.date_range(start='4/1/2012', periods=20)

In [None]:
pd.date_range(end='6/1/2012', periods=20)

In [None]:
pd.date_range('1/1/2000', '12/31/2000', freq='BM') #business day end of month

In [None]:
pd.date_range('5/2/2012 12:56:31', periods=5)

### 2.2 Frequencies and Date Offsets

In [None]:
from pandas.tseries.offsets import Hour, Minute
hour = Hour()
hour

In [None]:
four_hours = Hour(4)
four_hours

In most applications, you would never need to explicitly create one of these objects,
instead using a string alias like 'H' or '4H'. Putting an integer before the base frequency
creates a multiple:

In [None]:
dr_tmp = pd.date_range('1/1/2000', '1/3/2000 23:59', freq='4h')
dr_tmp

In [None]:
dr_tmp[0] + Hour(2)

In [None]:
pd.date_range('1/1/2000', periods=10, freq='1h30min')

### 2.3 Week of Month Dates

In [None]:
rng = pd.date_range('1/1/2012', '9/1/2012', freq='WOM-3FRI')
list(rng)

Traders of US equity options will recognize these dates as the stadard dates of monthly expiry. 

### 2.4 Shifting (Leading and Lagging) Data

In [None]:
ts = Series(np.random.randn(4),
            index=pd.date_range('1/1/2000', periods=4, freq='BM'))
ts

In [None]:
ts.shift(2)

In [None]:
ts.shift(-2)

In [None]:
ts / ts.shift(1) - 1

In [None]:
ts.shift(2, freq='M')

### 2.5 Shifting Dates with Offsets

In [None]:
from pandas.tseries.offsets import Day, BMonthEnd, MonthEnd
now = datetime(2011, 11, 17)
now + 3*Day()

In [None]:
now + BMonthEnd()

In [None]:
now + MonthEnd(2)

In [None]:
offset = MonthEnd()
offset.rollforward(now)


In [None]:
offset.rollback(now)

In [None]:
ts = Series(np.random.randn(20),
          index=pd.date_range('1/15/2000', periods=20, freq='4d'))
ts
ts.groupby(offset.rollforward).mean()

In [None]:
ts.resample('M').mean()

## 3. Time Zone Handling

In [None]:
import pytz
pytz.common_timezones[-5:]

In [None]:
tz = pytz.timezone('US/Eastern')
tz

### 3.1 Localization and Conversion

In [None]:
rng = pd.date_range('3/9/2012 9:30', periods=6, freq='D')
ts = Series(np.random.randn(len(rng)), index=rng)
print(ts.index.tz)
ts

In [None]:
pd.date_range('3/9/2012 9:30', periods=10, freq='D', tz='UTC')

In [None]:
ts_utc = ts.tz_localize('UTC')
ts_utc

In [None]:
ts_utc.tz_convert('US/Eastern')

### 3.3 Operations with Time Zone

In [None]:
stamp = pd.Timestamp('2011-03-12 04:00')
stamp_utc = stamp.tz_localize('utc')
stamp_utc.tz_convert('US/Eastern')

## 4. Periods and Period Arithmetic

In [None]:
p = pd.Period(2007, freq='A-DEC')
p

In [None]:
p+5

In [None]:
pd.Period('2014', freq='A-DEC') - p

In [None]:
rng = pd.period_range('1/1/2000', '6/30/2000', freq='M')
rng

In [None]:
Series(np.random.randn(6), index=rng)

### 4.1 Period Frequency Conversion

In [None]:
p = pd.Period('2007', freq='A-OCT')
p.asfreq('M', how='start')

In [None]:
p.asfreq('M', how='end')

In [None]:
p = pd.Period('2007', freq='A-JUN')
p.asfreq('M', 'start')

In [None]:
p.asfreq('M', 'end')

In [None]:
p = pd.Period('2007-08', 'M')
p.asfreq('A-JUN')

In [None]:
rng = pd.period_range('2006', '2009', freq='A-DEC')
ts = Series(np.random.randn(len(rng)), index=rng)
ts

In [None]:
ts.asfreq('M', how='start')

In [None]:
ts.asfreq('B', how='end')

### 4.2 Quarterly Period Frequencies

In [None]:
p = pd.Period('2012Q4', freq='Q-JAN')
p

In [None]:
p.asfreq('D', 'start')

In [None]:
p.asfreq('D', 'end')

## 5. Resampling and Frequency Conversion

Resampling refers to the process of converting a time series from one frequency to another. Aggregating higher frequency data to lower frequency is called downsampling, while converting lower frequency to higher frequency is called upsampling.

In [None]:
rng = pd.date_range('1/1/2000', periods=100, freq='D')
ts = Series(np.random.randn(len(rng)), index=rng)
ts.resample('M').mean()

### 5.1 Downsampling

In [None]:
rng = pd.date_range('1/1/2000', periods=12, freq='T')
ts = Series(np.arange(12), index=rng)
ts

In [None]:
ts.resample('5min').sum()

In [None]:
ts.resample('5min', closed='right').sum() #by default closed = 'left'

In [None]:
ts.resample('5min',  closed='left', label='right').sum()

### 5.2 Open-High-Low-Close Resampling

In [None]:
ts.resample('5min').ohlc()

### 5.3 Resampling with GroupBy

In [None]:
rng = pd.date_range('1/1/2000', periods=100, freq='D')
ts = Series(np.arange(100), index=rng)
ts.groupby(lambda x: x.month).mean()

In [None]:
ts.groupby(lambda x: x.weekday).mean()

### 5.4 Upsampling and Interpolation

In [None]:
frame = DataFrame(np.random.randn(2, 4),
                  index=pd.date_range('1/1/2000', periods=2, freq='W-WED'),
                  columns=['Colorado', 'Texas', 'New York', 'Ohio'])
frame[:5]

In [None]:
frame.resample('D').bfill()

In [None]:
frame.resample('D').ffill(limit=2)