 # <center>Pandas Time and Date Series</center>

In [719]:
import pandas as pd
import numpy as np


In [None]:
rng = pd.date_range('1/1/2011', periods=52, freq='H')

In [None]:
rng[:5]

In [None]:
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts[:5]

In [None]:
converted = ts.asfreq('45Min', method='pad')
converted.head()

In [None]:
# Daily means
ts.resample('D').mean()

### Time Stamps vs Time Spans

In [None]:
pd.Timestamp('2015-05-01')

In [None]:
pd.Period('2011-01')

In [None]:
dates = [pd.Timestamp('2012-05-01'), pd.Timestamp('2012-05-02'), pd.Timestamp('2012-05-03')]
dates

In [None]:
ts = pd.Series(np.random.randn(3), dates)
type (ts.index)
ts.index


In [None]:
ts

### Converting to Timestamps

In [None]:
pd.to_datetime(pd.Series(['Jul 31, 2009', '2010-01-10', None]))

In [None]:
pd.to_datetime(['2005/11/23', '2010.12.31'])

In [None]:
pd.to_datetime(['04-01-2012 10:00'], dayfirst=True)

In [None]:
pd.to_datetime(['14-01-2012', '01-14-2012'], dayfirst=True)

In [None]:
pd.to_datetime('2010/11/12')

In [None]:
pd.Timestamp('2010/11/12')

In [None]:
df = pd.DataFrame({'year': [2015, 2016],
                   'month': [2, 3],
                   'day': [4, 5],
                   'hour': [2, 3]})

In [None]:
pd.to_datetime(df)

In [None]:
pd.to_datetime(df[['year', 'month', 'day']])

In [None]:
# pd.to_datetime(['2009/07/31', 'asd'], errors='raise')

In [None]:
pd.to_datetime(['2009/07/31', 'asd'], errors='ignore')

### Epoch Timestamps

In [None]:
pd.to_datetime([1349720105, 1349806505, 1349892905, 1349979305, 1350065705], unit='s')

In [None]:
 pd.to_datetime([1349720105100, 1349720105200, 1349720105300,
   ....:                 1349720105400, 1349720105500 ], unit='ms')

### From Timestamps to Epoch

In [None]:
stamps = pd.date_range('2012-10-08 18:15:05', periods=4, freq='D')
stamps

In [None]:
stamps.view('int64')

In [None]:
pd.Timedelta(1, unit='s')

### Using the Orgin Parameter

In [None]:
pd.to_datetime([1, 2, 3, 10], unit='D', origin=pd.Timestamp('1960-01-01'))

The default is set at origin='unix', which defaults to 1970-01-01 00:00:00. Commonly called ‘unix epoch’ or POSIX time.

In [None]:
pd.to_datetime([1, 2, 3], unit='D')

### Generating Ranges of Timestamps

In [None]:
dates = [pd.datetime(2012, 5, 1), pd.datetime(2012, 5, 2), pd.datetime(2012, 5, 3)]
dates

In [None]:
index = pd.DatetimeIndex(dates)
index

In [None]:
index = pd.Index(dates)
index

In [None]:
index = pd.date_range('2000-1-1', periods=1000, freq='M')
index

In [None]:
index = pd.bdate_range('2012-1-1', periods=250)
index

In [None]:
start = pd.datetime(2011, 1, 1)
end = pd.datetime(2012, 1, 1)
rng = pd.date_range(start, end)
rng

In [None]:
rng = pd.bdate_range(start, end)
rng

In [None]:
pd.date_range(start, end, freq='BM') #BM stands for the end of every month

In [None]:
pd.date_range(start, end, freq='W')

In [None]:
pd.bdate_range(end=end, periods=20)

In [None]:
pd.bdate_range(start=start, periods=20)

### Timestamp Limitations

In [None]:
pd.Timestamp.min

In [None]:
pd.Timestamp.max

### Indexing

In [None]:
rng = pd.date_range(start, end, freq='BM')
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts.index

In [None]:
ts[:5].index

In [None]:
ts[::2].index

### Partial String Indexing

In [None]:
ts['1/31/2011']

In [None]:
ts[pd.datetime(2011, 12, 25):]

In [None]:
ts['10/31/2011':'12/31/2011']

In [None]:
ts['2011']

In [None]:
ts['2011-6']

In [None]:
dft = pd.DataFrame(np.random.randn(100000,1), columns=['A'], index=pd.date_range('20130101',periods=100000,freq='T'))
dft

In [None]:
dft['2013']

In [None]:
dft['2013-1':'2013-2']

In [None]:
dft['2013-1':'2013-2-28']

In [None]:
dft['2013-1':'2013-2-28 00:00:00']

In [None]:
dft['2013-1-15':'2013-1-15 12:30:00']

In [None]:
# String slicing on MultiIndex 
dft2 = pd.DataFrame(np.random.randn(20, 1),columns=['A'], 
                    index=pd.MultiIndex.from_product([pd.date_range('20130101', 
                    periods=10, freq='12H'),['a', 'b']]))
dft2

In [555]:
dft2.loc['2013-01-05']

KeyError: 'the label [2013-01-05] is not in the [index]'

In [556]:
idx = pd.IndexSlice
idx

<pandas.core.indexing._IndexSlice at 0x109662ba8>

In [557]:
dft2 = dft2.swaplevel(0, 1).sort_index()
dft2

Unnamed: 0,Unnamed: 1,A
2013-01-01 00:00:00,a,-1.523056
2013-01-01 00:00:00,b,0.06553
2013-01-01 12:00:00,a,1.383377
2013-01-01 12:00:00,b,-1.142751
2013-01-02 00:00:00,a,-0.358363
2013-01-02 00:00:00,b,-0.427507
2013-01-02 12:00:00,a,-0.589666
2013-01-02 12:00:00,b,-0.790588
2013-01-03 00:00:00,a,0.603286
2013-01-03 00:00:00,b,-1.804797


In [558]:
dft2.loc[idx[:, '2013-01-05'], :]

KeyError: '2013-01-05'

## Slice vs Exact Match

In [559]:
series_minute = pd.Series([1, 2, 3],pd.DatetimeIndex(['2011-12-31 23:59:00',
                                                      '2012-01-01 00:00:00', 
                                                      '2012-01-01 00:02:00']))
series_minute

2011-12-31 23:59:00    1
2012-01-01 00:00:00    2
2012-01-01 00:02:00    3
dtype: int64

In [560]:
series_minute.index.resolution

'minute'

In [561]:
series_minute['2011-12-31 23']

2011-12-31 23:59:00    1
dtype: int64

In [562]:
series_minute['2011-12-31 23:59']

1

In [563]:
series_minute['2011-12-31 23:59:00']

1

In [564]:
series_second = pd.Series([1, 2, 3],pd.DatetimeIndex(['2011-12-31 23:59:59',
                                                      '2012-01-01 00:00:00',
                                                      '2012-01-01 00:00:01']))
series_second.index.resolution

'second'

In [565]:
series_second['2011-12-31 23:59']

2011-12-31 23:59:59    1
dtype: int64

In [566]:
dft_minute = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, index=series_minute.index)
dft_minute

Unnamed: 0,a,b
2011-12-31 23:59:00,1,4
2012-01-01 00:00:00,2,5
2012-01-01 00:02:00,3,6


In [567]:
dft_minute.loc['2011-12-31 23']

Unnamed: 0,a,b
2011-12-31 23:59:00,1,4


In [568]:
series_monthly = pd.Series([1, 2, 3],pd.DatetimeIndex(['2011-12','2012-01','2012-02']))
series_monthly

2011-12-01    1
2012-01-01    2
2012-02-01    3
dtype: int64

In [569]:
series_monthly.index.resolution

'day'

In [570]:
series_monthly['2011-12']

2011-12-01    1
dtype: int64

### Exact Indexing

In [571]:
dft[pd.datetime(2013, 1, 1):pd.datetime(2013,2,28)]

NameError: name 'dft' is not defined

In [None]:
dft[pd.datetime(2013, 1, 1, 10, 12, 0):pd.datetime(2013, 2, 28, 10, 12, 0)]

## Truncating & Fancy Indexing

In [None]:
ts.truncate(before='10/31/2011', after='12/31/2011')

In [None]:
ts[[0, 2, 6]].index

### Time/Date Components

There are several time/date properties that one can access from Timestamp or a collection of timestamps like a DateTimeIndex.

Source: https://pandas.pydata.org/pandas-docs/stable/timeseries.html

## DateOffset Objects

In [None]:
from pandas.tseries.offsets import *

d = pd.datetime(2008, 8, 18, 9, 0)
d + DateOffset(months = 4, days=5)

In [None]:
class BDay(DateOffset):
    """DateOffset increments between business days"""
    def apply(self, other):
        ...

In [None]:
d - 5*BDay()

In [None]:
d + BMonthEnd()

In [None]:
d

In [None]:
offset = BMonthEnd()

In [None]:
offset.rollforward(d)

In [None]:
day = Day()y

In [None]:
day.apply(pd.Timestamp('2014-01-01 09:00'))

In [None]:
day = Day(normalize=True)

In [None]:
day.apply(pd.Timestamp('2014-01-01 09:00'))

In [None]:
hour = Hour()

In [None]:
hour.apply(pd.Timestamp('2014-01-01 22:00'))

In [None]:
hour = Hour(normalize=True)

In [None]:
hour.apply(pd.Timestamp('2014-01-01 22:00'))

In [None]:
hour.apply(pd.Timestamp('2014-01-01 23:00'))

### Parametric Offsets

In [None]:
d = pd.datetime(2008, 8, 18, 9, 0)

In [None]:
d

In [None]:
d + Week()

In [None]:
d + Week(weekday=4)

In [None]:
(d + Week(weekday=4)).weekday()

In [None]:
d - Week()

In [None]:
d + Week(normatlize=True)

In [None]:
d - Week(normalize=True)

In [None]:
d + YearEnd()

In [None]:
d + YearEnd(month=6)

### Using Offsets with Series / DatetimeIndex

In [None]:
rng = pd.date_range('2012-01-01','2012-01-03')

In [None]:
s = pd.Series(rng)

In [None]:
rng

In [None]:
rng + DateOffset(month=2)

In [None]:
s + DateOffset(months=2)

In [None]:
s - DateOffset(months=2)

In [None]:
s = Day(2)

In [None]:
#td = s - pd.Series(pd.date_range('2011-12-29', '2011-12-31'))\
#td

In [None]:
#td + Minute(15)

In [None]:
rng + BQuarterEnd()

### Custom Business Days

In [None]:
from pandas.tseries.offsets import CustomBusinessDay

In [None]:
weekmask_egypt = 'Sun Mon Tue Wed Thu'

In [None]:
holidays = ['2012-05-01', pd.datetime(2013, 5, 1), np.datetime64('2014-05-01')]

In [None]:
bday_egypt = CustomBusinessDay(holidays=holidays, weekmask=weekmask_egypt)
bday_egypt

In [None]:
dt = pd.datetime(2013, 4, 30)

dt+2*bday_egypt

In [None]:
dts = pd.date_range(dt, periods=5, freg=bday_egypt)

In [None]:
pd.Series(dts.weekday, dts).map(pd.Series('Mon Tue Wed Thu Fri Sat Sun'.split()))

Holiday calendars can be used to provide the list of holidays. See the holiday calendar section for more information.

In [None]:
from pandas.tseries.holiday import USFederalHolidayCalendar
bday_us = CustomBusinessDay(calendar=USFederalHolidayCalendar())

#Priday before MLK Day
dt = pd.datetime(2014, 1,7)

#Tuesday after MLK Day (Monday is skipped because its a holiday)
dt + bday_us

In [None]:
from pandas.tseries.offsets import CustomBusinessMonthBegin
bmth_us = CustomBusinessMonthBegin(calendar = USFederalHolidayCalendar())

In [None]:
dt = pd.datetime(2013, 12, 17)
dt

In [None]:
dt + bmth_us

In [None]:
pd.DatetimeIndex(start='20100101',end='20120101',freq=bmth_us)

### Business Hours

In [None]:
bh = BusinessHour()
bh

In [None]:
pd.Timestamp('2014-08-01 10:00').weekday()

In [None]:
pd.Timestamp('2014-08-01 10:00:00') + bh

In [None]:
pd.Timestamp('2014-08-01 08:00:00') + bh

In [None]:
pd.Timestamp('2014-08-01 16:00:00') + bh

In [None]:
pd.Timestamp('2014-08-01 9:30:00') + bh

In [None]:
pd.Timestamp('2014-08-01 10:00:00') + BusinessHour(3)

In [None]:
pd.Timestamp('2014-08-01 10:00:00') + BusinessHour(-3)

In [None]:
bh = BusinessHour(start='11:00', end='20:00')
bh

In [None]:
pd.Timestamp('2014-08-01 13:00') + bh

In [None]:
pd.Timestamp('2014-08-01 09:00') + bh

In [None]:
pd.Timestamp('2014-08-01 18:00') + bh

In [None]:
bh = BusinessHour(start='17:00', end='09:00')
bh

In [None]:
pd.Timestamp('2014-08-01 17:00') + bh

In [None]:
pd.Timestamp('2014-08-01 23:00') + bh

In [None]:
pd.Timestamp('2014-08-02 04:00') + bh

In [None]:
pd.Timestamp('2014-08-04 04:00') + bh

Applying BusinessHour.rollforward and rollback to out of business hours results in the next business hour start or previous day’s end. Different from other offsets, BusinessHour.rollforward may output different results from apply by definition.

This is because one day’s business hour end is equal to next day’s business hour start. For example, under the default business hours (9:00 - 17:00), there is no gap (0 minutes) between 2014-08-01 17:00 and 2014-08-04 09:00.

In [None]:
# This adjusts a Timestamp to business hour edge
BusinessHour().rollback(pd.Timestamp ('2014-08-02 15:00'))

In [None]:
BusinessHour().rollforward(pd.Timestamp('2014-08-02 15:00'))

In [None]:
# It is the same as BusinessHour().apply(pd.Timestamp('2014-08-01 17:00')).
# And it is the same as BusinessHour().apply(pd.Timestamp('2014-08-04 09:00'))
BusinessHour().apply(pd.Timestamp('2014-08-02 15:00'))

In [None]:
BusinessHour().rollforward(pd.Timestamp('2014-08-02'))

In [None]:
BusinessHour().apply(pd.Timestamp('2014-08-02'))

### Custom Business Hour (0.18.1)

In [None]:
from pandas.tseries.holiday import USFederalHolidayCalendar

In [None]:
bhour_us = CustomBusinessHour(calendar=USFederalHolidayCalendar())

In [None]:
# Friday before MLK DAY
dt = pd.datetime(2014, 1, 17, 15)

In [None]:
dt+ bhour_us

In [None]:
# Tuesday after MLK Day (Monday is skipped because it's a holiday)
dt + bhour_us * 2

In [None]:
bhour_mon = CustomBusinessHour(start='10:00', weekmask='Tue Wed Thu Fri')

In [None]:
# Monday is skipped because it's a holiday, business hour starts from 10:00
dt + bhour_mon * 2

### Combining Aliases

In [None]:
start = '2011-01-03'

In [None]:
pd.date_range(start, periods=5, freq='B')

In [None]:
#pd.date_range(start, periods=5, freq=BDay())

In [None]:
pd.date_range(start, periods=10, frew='2h20min')

In [None]:
pd.date_range(start, periods=10, freq='1D10U')

### Anchoresd Offset Semantics

In [None]:
pd.Timestamp('2014-01-02') + MonthBegin(n=1)

In [None]:
pd.Timestamp('2014-01-02') + MonthEnd(n=1)

In [None]:
pd.Timestamp('2014-01-02') - MonthBegin(n=1)

In [None]:
pd.Timestamp('2014-01-02') - MonthEnd(n=1)

In [None]:
pd.Timestamp('2014-01-02') + MonthBegin(n=4)

In [None]:
pd.Timestamp('2014-01-02') - MonthBegin(n=4)

In [None]:
pd.Timestamp('2014-01-01') + MonthBegin(n=1)

In [None]:
pd.Timestamp('2014-01-31') + MonthEnd(n=1)

In [None]:
pd.Timestamp('2014-01-01') - MonthBegin(n=1)

In [None]:
pd.Timestamp('2014-01-31') - MonthEnd(n=1)

In [None]:
pd.Timestamp('2014-01-01') + MonthBegin(n=4)

In [None]:
pd.Timestamp('2014-01-31') - MonthBegin(n=4)

In [None]:
pd.Timestamp('2014-01-02') + MonthBegin(n=0)

In [None]:
pd.Timestamp('2014-01-02') + MonthEnd(n=0)

In [None]:
pd.Timestamp('2014-01-01') + MonthBegin(n=0)

In [None]:
pd.Timestamp('2014-01-31') + MonthEnd(n=0)

### Holidays / Holiday Calendars

In [None]:
from pandas.tseries.holiday import Holiday, USMemorialDay, \
 AbstractHolidayCalendar, nearest_workday, MO

In [None]:
class ExampleCalendar(AbstractHolidayCalendar):
    rules = [
        USMemorialDay, 
        Holiday('July 4th', month=7, day=4, observance=nearest_workday),
        Holiday('Columbus Day', month=10, day=1,
                offset=DateOffset(weekday=MO(2))), #same as 2*Week(weekday=2)
    ]

In [None]:
cal = ExampleCalendar()

In [None]:
cal.holidays(pd.datetime(2012, 1, 1), pd.datetime(2012, 12, 31))

In [None]:
from pandas.tseries.offsets import CDay
pd.DatetimeIndex(start='7/1/2012', end='7/10/2012', 
                freq =CDay(calendar=cal)).to_pydatetime()

In [None]:
offset = CustomBusinessDay(calendar=cal)

In [None]:
pd.datetime(2012,5,25) + offset

In [None]:
pd.datetime(2012, 7, 3) + offset

In [None]:
pd.datetime(2012, 7, 3) + 2*offset

In [None]:
pd.datetime(2012, 7, 6) + offset

In [None]:
AbstractHolidayCalendar.start_date

In [None]:
AbstractHolidayCalendar.end_date

These dates can be overwritten by setting the attributes as datetime/Timestamp/string.

In [None]:
AbstractHolidayCalendar.start_date = pd.datetime(2012, 1, 1)

In [None]:
AbstractHolidayCalendar.end_date = pd.datetime(2012, 12, 31)

In [None]:
cal.holidays()

In [None]:
from pandas.tseries.holiday import get_calendar, HolidayCalendarFactory,\
USLaborDay

In [None]:
cal=get_calendar('ExampleCalendar')

In [None]:
cal.rules

In [None]:
new_cal = HolidayCalendarFactory('NewExampleCalendar', cal, USLaborDay)
new_cal.rules

## Time Series related instance methods
### Shifting/Laggihg

In [None]:
ts = pd.Series(np.random.randn(len(rng)), index=rng)

In [None]:
ts = ts[:5]
ts

In [None]:
ts.shift(1)

In [None]:
#ts.shif(5,freq=offset.BDay())

In [None]:
ts.shift(5, freq='BM')

In [None]:
ts.tshift(5, freq='D')

### Frequency Conversion

In [None]:
#dr = pd.date_range('1/1/2010', periods=3, freq=3 * offsets.BDay())

In [None]:
#ts = pd.Series(np.random.randn(3), index=dr)

In [None]:
#ts.asfreq(BDay())

In [None]:
#ts.asfreq(BDay(), method='pad')

## Resampling
### Basic

In [None]:
rng = pd.date_range('1/1/2012', periods=100, freq='S')

In [None]:
ts = pd.Series(np.random.randint(0, 500, len(rng)), index=rng)
ts.head()

In [None]:
ts.resample('5Min').sum()

In [None]:
ts.resample('5Min').mean()

In [None]:
ts.resample('5Min').ohlc()

In [572]:
ts.resample('5Min').max()

2012-03-06 00:00:00   -1.390783
2012-03-06 00:05:00         NaN
2012-03-06 00:10:00         NaN
2012-03-06 00:15:00         NaN
2012-03-06 00:20:00         NaN
2012-03-06 00:25:00         NaN
2012-03-06 00:30:00         NaN
2012-03-06 00:35:00         NaN
2012-03-06 00:40:00         NaN
2012-03-06 00:45:00         NaN
2012-03-06 00:50:00         NaN
2012-03-06 00:55:00         NaN
2012-03-06 01:00:00         NaN
2012-03-06 01:05:00         NaN
2012-03-06 01:10:00         NaN
2012-03-06 01:15:00         NaN
2012-03-06 01:20:00         NaN
2012-03-06 01:25:00         NaN
2012-03-06 01:30:00         NaN
2012-03-06 01:35:00         NaN
2012-03-06 01:40:00         NaN
2012-03-06 01:45:00         NaN
2012-03-06 01:50:00         NaN
2012-03-06 01:55:00         NaN
2012-03-06 02:00:00         NaN
2012-03-06 02:05:00         NaN
2012-03-06 02:10:00         NaN
2012-03-06 02:15:00         NaN
2012-03-06 02:20:00         NaN
2012-03-06 02:25:00         NaN
                         ...   
2012-03-

In [573]:
ts.resample('5Min', closed='right').mean()

2012-03-05 23:55:00   -1.390783
2012-03-06 00:00:00         NaN
2012-03-06 00:05:00         NaN
2012-03-06 00:10:00         NaN
2012-03-06 00:15:00         NaN
2012-03-06 00:20:00         NaN
2012-03-06 00:25:00         NaN
2012-03-06 00:30:00         NaN
2012-03-06 00:35:00         NaN
2012-03-06 00:40:00         NaN
2012-03-06 00:45:00         NaN
2012-03-06 00:50:00         NaN
2012-03-06 00:55:00         NaN
2012-03-06 01:00:00         NaN
2012-03-06 01:05:00         NaN
2012-03-06 01:10:00         NaN
2012-03-06 01:15:00         NaN
2012-03-06 01:20:00         NaN
2012-03-06 01:25:00         NaN
2012-03-06 01:30:00         NaN
2012-03-06 01:35:00         NaN
2012-03-06 01:40:00         NaN
2012-03-06 01:45:00         NaN
2012-03-06 01:50:00         NaN
2012-03-06 01:55:00         NaN
2012-03-06 02:00:00         NaN
2012-03-06 02:05:00         NaN
2012-03-06 02:10:00         NaN
2012-03-06 02:15:00         NaN
2012-03-06 02:20:00         NaN
                         ...   
2012-03-

In [574]:
ts.resample('5Min', closed='left').mean()


2012-03-06 00:00:00   -1.390783
2012-03-06 00:05:00         NaN
2012-03-06 00:10:00         NaN
2012-03-06 00:15:00         NaN
2012-03-06 00:20:00         NaN
2012-03-06 00:25:00         NaN
2012-03-06 00:30:00         NaN
2012-03-06 00:35:00         NaN
2012-03-06 00:40:00         NaN
2012-03-06 00:45:00         NaN
2012-03-06 00:50:00         NaN
2012-03-06 00:55:00         NaN
2012-03-06 01:00:00         NaN
2012-03-06 01:05:00         NaN
2012-03-06 01:10:00         NaN
2012-03-06 01:15:00         NaN
2012-03-06 01:20:00         NaN
2012-03-06 01:25:00         NaN
2012-03-06 01:30:00         NaN
2012-03-06 01:35:00         NaN
2012-03-06 01:40:00         NaN
2012-03-06 01:45:00         NaN
2012-03-06 01:50:00         NaN
2012-03-06 01:55:00         NaN
2012-03-06 02:00:00         NaN
2012-03-06 02:05:00         NaN
2012-03-06 02:10:00         NaN
2012-03-06 02:15:00         NaN
2012-03-06 02:20:00         NaN
2012-03-06 02:25:00         NaN
                         ...   
2012-03-

In [575]:
ts.resample('5Min').mean() # by default label='right'

2012-03-06 00:00:00   -1.390783
2012-03-06 00:05:00         NaN
2012-03-06 00:10:00         NaN
2012-03-06 00:15:00         NaN
2012-03-06 00:20:00         NaN
2012-03-06 00:25:00         NaN
2012-03-06 00:30:00         NaN
2012-03-06 00:35:00         NaN
2012-03-06 00:40:00         NaN
2012-03-06 00:45:00         NaN
2012-03-06 00:50:00         NaN
2012-03-06 00:55:00         NaN
2012-03-06 01:00:00         NaN
2012-03-06 01:05:00         NaN
2012-03-06 01:10:00         NaN
2012-03-06 01:15:00         NaN
2012-03-06 01:20:00         NaN
2012-03-06 01:25:00         NaN
2012-03-06 01:30:00         NaN
2012-03-06 01:35:00         NaN
2012-03-06 01:40:00         NaN
2012-03-06 01:45:00         NaN
2012-03-06 01:50:00         NaN
2012-03-06 01:55:00         NaN
2012-03-06 02:00:00         NaN
2012-03-06 02:05:00         NaN
2012-03-06 02:10:00         NaN
2012-03-06 02:15:00         NaN
2012-03-06 02:20:00         NaN
2012-03-06 02:25:00         NaN
                         ...   
2012-03-

In [576]:
ts.resample('5Min', label='left').mean()

2012-03-06 00:00:00   -1.390783
2012-03-06 00:05:00         NaN
2012-03-06 00:10:00         NaN
2012-03-06 00:15:00         NaN
2012-03-06 00:20:00         NaN
2012-03-06 00:25:00         NaN
2012-03-06 00:30:00         NaN
2012-03-06 00:35:00         NaN
2012-03-06 00:40:00         NaN
2012-03-06 00:45:00         NaN
2012-03-06 00:50:00         NaN
2012-03-06 00:55:00         NaN
2012-03-06 01:00:00         NaN
2012-03-06 01:05:00         NaN
2012-03-06 01:10:00         NaN
2012-03-06 01:15:00         NaN
2012-03-06 01:20:00         NaN
2012-03-06 01:25:00         NaN
2012-03-06 01:30:00         NaN
2012-03-06 01:35:00         NaN
2012-03-06 01:40:00         NaN
2012-03-06 01:45:00         NaN
2012-03-06 01:50:00         NaN
2012-03-06 01:55:00         NaN
2012-03-06 02:00:00         NaN
2012-03-06 02:05:00         NaN
2012-03-06 02:10:00         NaN
2012-03-06 02:15:00         NaN
2012-03-06 02:20:00         NaN
2012-03-06 02:25:00         NaN
                         ...   
2012-03-

In [577]:
ts.resample('5Min', label='left', loffset='1s').mean()

2012-03-06 00:00:01   -1.390783
2012-03-06 00:05:01         NaN
2012-03-06 00:10:01         NaN
2012-03-06 00:15:01         NaN
2012-03-06 00:20:01         NaN
2012-03-06 00:25:01         NaN
2012-03-06 00:30:01         NaN
2012-03-06 00:35:01         NaN
2012-03-06 00:40:01         NaN
2012-03-06 00:45:01         NaN
2012-03-06 00:50:01         NaN
2012-03-06 00:55:01         NaN
2012-03-06 01:00:01         NaN
2012-03-06 01:05:01         NaN
2012-03-06 01:10:01         NaN
2012-03-06 01:15:01         NaN
2012-03-06 01:20:01         NaN
2012-03-06 01:25:01         NaN
2012-03-06 01:30:01         NaN
2012-03-06 01:35:01         NaN
2012-03-06 01:40:01         NaN
2012-03-06 01:45:01         NaN
2012-03-06 01:50:01         NaN
2012-03-06 01:55:01         NaN
2012-03-06 02:00:01         NaN
2012-03-06 02:05:01         NaN
2012-03-06 02:10:01         NaN
2012-03-06 02:15:01         NaN
2012-03-06 02:20:01         NaN
2012-03-06 02:25:01         NaN
                         ...   
2012-03-

### Up Sampling

In [578]:
ts[:2].resample('250L').asfreq()

2012-03-06 00:00:00.000   -1.390783
2012-03-06 00:00:00.250         NaN
2012-03-06 00:00:00.500         NaN
2012-03-06 00:00:00.750         NaN
2012-03-06 00:00:01.000         NaN
2012-03-06 00:00:01.250         NaN
2012-03-06 00:00:01.500         NaN
2012-03-06 00:00:01.750         NaN
2012-03-06 00:00:02.000         NaN
2012-03-06 00:00:02.250         NaN
2012-03-06 00:00:02.500         NaN
2012-03-06 00:00:02.750         NaN
2012-03-06 00:00:03.000         NaN
2012-03-06 00:00:03.250         NaN
2012-03-06 00:00:03.500         NaN
2012-03-06 00:00:03.750         NaN
2012-03-06 00:00:04.000         NaN
2012-03-06 00:00:04.250         NaN
2012-03-06 00:00:04.500         NaN
2012-03-06 00:00:04.750         NaN
2012-03-06 00:00:05.000         NaN
2012-03-06 00:00:05.250         NaN
2012-03-06 00:00:05.500         NaN
2012-03-06 00:00:05.750         NaN
2012-03-06 00:00:06.000         NaN
2012-03-06 00:00:06.250         NaN
2012-03-06 00:00:06.500         NaN
2012-03-06 00:00:06.750     

In [579]:
ts[:2].resample('250L').ffill()

2012-03-06 00:00:00.000   -1.390783
2012-03-06 00:00:00.250   -1.390783
2012-03-06 00:00:00.500   -1.390783
2012-03-06 00:00:00.750   -1.390783
2012-03-06 00:00:01.000   -1.390783
2012-03-06 00:00:01.250   -1.390783
2012-03-06 00:00:01.500   -1.390783
2012-03-06 00:00:01.750   -1.390783
2012-03-06 00:00:02.000   -1.390783
2012-03-06 00:00:02.250   -1.390783
2012-03-06 00:00:02.500   -1.390783
2012-03-06 00:00:02.750   -1.390783
2012-03-06 00:00:03.000   -1.390783
2012-03-06 00:00:03.250   -1.390783
2012-03-06 00:00:03.500   -1.390783
2012-03-06 00:00:03.750   -1.390783
2012-03-06 00:00:04.000   -1.390783
2012-03-06 00:00:04.250   -1.390783
2012-03-06 00:00:04.500   -1.390783
2012-03-06 00:00:04.750   -1.390783
2012-03-06 00:00:05.000   -1.390783
2012-03-06 00:00:05.250   -1.390783
2012-03-06 00:00:05.500   -1.390783
2012-03-06 00:00:05.750   -1.390783
2012-03-06 00:00:06.000   -1.390783
2012-03-06 00:00:06.250   -1.390783
2012-03-06 00:00:06.500   -1.390783
2012-03-06 00:00:06.750   -1

In [580]:
ts[:2].resample('250L').ffill(limit=2)

2012-03-06 00:00:00.000   -1.390783
2012-03-06 00:00:00.250   -1.390783
2012-03-06 00:00:00.500   -1.390783
2012-03-06 00:00:00.750         NaN
2012-03-06 00:00:01.000         NaN
2012-03-06 00:00:01.250         NaN
2012-03-06 00:00:01.500         NaN
2012-03-06 00:00:01.750         NaN
2012-03-06 00:00:02.000         NaN
2012-03-06 00:00:02.250         NaN
2012-03-06 00:00:02.500         NaN
2012-03-06 00:00:02.750         NaN
2012-03-06 00:00:03.000         NaN
2012-03-06 00:00:03.250         NaN
2012-03-06 00:00:03.500         NaN
2012-03-06 00:00:03.750         NaN
2012-03-06 00:00:04.000         NaN
2012-03-06 00:00:04.250         NaN
2012-03-06 00:00:04.500         NaN
2012-03-06 00:00:04.750         NaN
2012-03-06 00:00:05.000         NaN
2012-03-06 00:00:05.250         NaN
2012-03-06 00:00:05.500         NaN
2012-03-06 00:00:05.750         NaN
2012-03-06 00:00:06.000         NaN
2012-03-06 00:00:06.250         NaN
2012-03-06 00:00:06.500         NaN
2012-03-06 00:00:06.750     

### Sparse Resampling

In [581]:
rng = pd.date_range('2014-1-1', periods=100, freq='D') + pd.Timedelta('1s')
ts = pd.Series(range(100), index=rng)
ts.head()

2014-01-01 00:00:01    0
2014-01-02 00:00:01    1
2014-01-03 00:00:01    2
2014-01-04 00:00:01    3
2014-01-05 00:00:01    4
Freq: D, dtype: int64

In [582]:
ts.resample('3T').sum()

2014-01-01 00:00:00     0.0
2014-01-01 00:03:00     NaN
2014-01-01 00:06:00     NaN
2014-01-01 00:09:00     NaN
2014-01-01 00:12:00     NaN
2014-01-01 00:15:00     NaN
2014-01-01 00:18:00     NaN
2014-01-01 00:21:00     NaN
2014-01-01 00:24:00     NaN
2014-01-01 00:27:00     NaN
2014-01-01 00:30:00     NaN
2014-01-01 00:33:00     NaN
2014-01-01 00:36:00     NaN
2014-01-01 00:39:00     NaN
2014-01-01 00:42:00     NaN
2014-01-01 00:45:00     NaN
2014-01-01 00:48:00     NaN
2014-01-01 00:51:00     NaN
2014-01-01 00:54:00     NaN
2014-01-01 00:57:00     NaN
2014-01-01 01:00:00     NaN
2014-01-01 01:03:00     NaN
2014-01-01 01:06:00     NaN
2014-01-01 01:09:00     NaN
2014-01-01 01:12:00     NaN
2014-01-01 01:15:00     NaN
2014-01-01 01:18:00     NaN
2014-01-01 01:21:00     NaN
2014-01-01 01:24:00     NaN
2014-01-01 01:27:00     NaN
                       ... 
2014-04-09 22:33:00     NaN
2014-04-09 22:36:00     NaN
2014-04-09 22:39:00     NaN
2014-04-09 22:42:00     NaN
2014-04-09 22:45:00 

In [583]:
from functools import partial
from pandas.tseries.frequencies import to_offset

In [584]:
def round(t, freq):
    freq = to_offset(freq)
    return pd.Timestamp((t.value // freq.delta.value) * freq.delta.value)

In [585]:
ts.groupby(partial(round, freq='3T')).sum()

2014-01-01     0
2014-01-02     1
2014-01-03     2
2014-01-04     3
2014-01-05     4
2014-01-06     5
2014-01-07     6
2014-01-08     7
2014-01-09     8
2014-01-10     9
2014-01-11    10
2014-01-12    11
2014-01-13    12
2014-01-14    13
2014-01-15    14
2014-01-16    15
2014-01-17    16
2014-01-18    17
2014-01-19    18
2014-01-20    19
2014-01-21    20
2014-01-22    21
2014-01-23    22
2014-01-24    23
2014-01-25    24
2014-01-26    25
2014-01-27    26
2014-01-28    27
2014-01-29    28
2014-01-30    29
              ..
2014-03-12    70
2014-03-13    71
2014-03-14    72
2014-03-15    73
2014-03-16    74
2014-03-17    75
2014-03-18    76
2014-03-19    77
2014-03-20    78
2014-03-21    79
2014-03-22    80
2014-03-23    81
2014-03-24    82
2014-03-25    83
2014-03-26    84
2014-03-27    85
2014-03-28    86
2014-03-29    87
2014-03-30    88
2014-03-31    89
2014-04-01    90
2014-04-02    91
2014-04-03    92
2014-04-04    93
2014-04-05    94
2014-04-06    95
2014-04-07    96
2014-04-08    

### Aggregation

In [586]:
df = pd.DataFrame(np.random.randn(1000,3), index=pd.date_range('1/1/2012',
                                                          freq='S',
                                                          periods=1000),
                 columns=['A','B','C'])

In [587]:
ts.groupby(partial(round, freq='3T')).sum()

2014-01-01     0
2014-01-02     1
2014-01-03     2
2014-01-04     3
2014-01-05     4
2014-01-06     5
2014-01-07     6
2014-01-08     7
2014-01-09     8
2014-01-10     9
2014-01-11    10
2014-01-12    11
2014-01-13    12
2014-01-14    13
2014-01-15    14
2014-01-16    15
2014-01-17    16
2014-01-18    17
2014-01-19    18
2014-01-20    19
2014-01-21    20
2014-01-22    21
2014-01-23    22
2014-01-24    23
2014-01-25    24
2014-01-26    25
2014-01-27    26
2014-01-28    27
2014-01-29    28
2014-01-30    29
              ..
2014-03-12    70
2014-03-13    71
2014-03-14    72
2014-03-15    73
2014-03-16    74
2014-03-17    75
2014-03-18    76
2014-03-19    77
2014-03-20    78
2014-03-21    79
2014-03-22    80
2014-03-23    81
2014-03-24    82
2014-03-25    83
2014-03-26    84
2014-03-27    85
2014-03-28    86
2014-03-29    87
2014-03-30    88
2014-03-31    89
2014-04-01    90
2014-04-02    91
2014-04-03    92
2014-04-04    93
2014-04-05    94
2014-04-06    95
2014-04-07    96
2014-04-08    

### Aggregation

In [588]:
df = pd. DataFrame(np.random.randn(1000,3),
                   index = pd.date_range('1/1/2012', freq='S',periods=1000),
                   columns = ['A','B','C'])

In [589]:
r = df.resample('3T')

In [590]:
r.mean()

Unnamed: 0,A,B,C
2012-01-01 00:00:00,0.013883,-0.087869,-0.024
2012-01-01 00:03:00,-0.101475,0.014091,0.06457
2012-01-01 00:06:00,-0.067507,0.101602,-0.059258
2012-01-01 00:09:00,-0.03765,0.012024,-0.117446
2012-01-01 00:12:00,0.186183,0.097987,-0.00201
2012-01-01 00:15:00,0.107917,0.174091,-0.010921


In [591]:
r['A'].mean()

2012-01-01 00:00:00    0.013883
2012-01-01 00:03:00   -0.101475
2012-01-01 00:06:00   -0.067507
2012-01-01 00:09:00   -0.037650
2012-01-01 00:12:00    0.186183
2012-01-01 00:15:00    0.107917
Freq: 3T, Name: A, dtype: float64

In [592]:
r[['A','B']].mean()

Unnamed: 0,A,B
2012-01-01 00:00:00,0.013883,-0.087869
2012-01-01 00:03:00,-0.101475,0.014091
2012-01-01 00:06:00,-0.067507,0.101602
2012-01-01 00:09:00,-0.03765,0.012024
2012-01-01 00:12:00,0.186183,0.097987
2012-01-01 00:15:00,0.107917,0.174091


In [593]:
r['A'].agg([np.sum, np.mean, np.std])

Unnamed: 0,sum,mean,std
2012-01-01 00:00:00,2.498895,0.013883,1.024056
2012-01-01 00:03:00,-18.265549,-0.101475,1.014878
2012-01-01 00:06:00,-12.151274,-0.067507,1.035389
2012-01-01 00:09:00,-6.777071,-0.03765,0.981985
2012-01-01 00:12:00,33.513016,0.186183,0.948671
2012-01-01 00:15:00,10.791668,0.107917,1.013665


In [594]:
r.agg([np.sum,np.mean])

Unnamed: 0_level_0,A,A,B,B,C,C
Unnamed: 0_level_1,sum,mean,sum,mean,sum,mean
2012-01-01 00:00:00,2.498895,0.013883,-15.816348,-0.087869,-4.319996,-0.024
2012-01-01 00:03:00,-18.265549,-0.101475,2.536441,0.014091,11.622597,0.06457
2012-01-01 00:06:00,-12.151274,-0.067507,18.288335,0.101602,-10.66635,-0.059258
2012-01-01 00:09:00,-6.777071,-0.03765,2.164277,0.012024,-21.140297,-0.117446
2012-01-01 00:12:00,33.513016,0.186183,17.63771,0.097987,-0.361881,-0.00201
2012-01-01 00:15:00,10.791668,0.107917,17.409064,0.174091,-1.092116,-0.010921


In [595]:
r.agg({'A': np.sum,
      'B': lambda x: np.std(x, ddof=1)})

Unnamed: 0,A,B
2012-01-01 00:00:00,2.498895,0.894135
2012-01-01 00:03:00,-18.265549,1.038884
2012-01-01 00:06:00,-12.151274,0.876696
2012-01-01 00:09:00,-6.777071,0.947267
2012-01-01 00:12:00,33.513016,1.116959
2012-01-01 00:15:00,10.791668,0.914109


In [596]:
r.agg({'A': 'sum', 'B':'std'})

Unnamed: 0,A,B
2012-01-01 00:00:00,2.498895,0.894135
2012-01-01 00:03:00,-18.265549,1.038884
2012-01-01 00:06:00,-12.151274,0.876696
2012-01-01 00:09:00,-6.777071,0.947267
2012-01-01 00:12:00,33.513016,1.116959
2012-01-01 00:15:00,10.791668,0.914109


In [597]:
r.agg({'A' : ['sum','std'], 'B' : ['mean','std'] })

Unnamed: 0_level_0,A,A,B,B
Unnamed: 0_level_1,sum,std,mean,std
2012-01-01 00:00:00,2.498895,1.024056,-0.087869,0.894135
2012-01-01 00:03:00,-18.265549,1.014878,0.014091,1.038884
2012-01-01 00:06:00,-12.151274,1.035389,0.101602,0.876696
2012-01-01 00:09:00,-6.777071,0.981985,0.012024,0.947267
2012-01-01 00:12:00,33.513016,0.948671,0.097987,1.116959
2012-01-01 00:15:00,10.791668,1.013665,0.174091,0.914109


In [598]:
df = pd.DataFrame({'date': pd.date_range('2015-01-01', freq='W', periods=5),
                   'a': np.arange(5)},
                  index=pd.MultiIndex.from_arrays([
                      [1,2,3,4,5],
                      pd.date_range('2015-01-01', freq='W', periods=5)],
                      names=['v','d']))

In [599]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,a,date
v,d,Unnamed: 2_level_1,Unnamed: 3_level_1
1,2015-01-04,0,2015-01-04
2,2015-01-11,1,2015-01-11
3,2015-01-18,2,2015-01-18
4,2015-01-25,3,2015-01-25
5,2015-02-01,4,2015-02-01


In [600]:
df.resample('M', on='date').sum()

Unnamed: 0_level_0,a
date,Unnamed: 1_level_1
2015-01-31,6
2015-02-28,4


In [601]:
df.resample('M',level='d').sum()

Unnamed: 0_level_0,a
d,Unnamed: 1_level_1
2015-01-31,6
2015-02-28,4


## Time Span Representation
### Period

In [602]:
p = pd.Period('2012', freq='A-DEC')

In [603]:
p + 1

Period('2013', 'A-DEC')

In [604]:
p - 3

Period('2009', 'A-DEC')

In [605]:
p = pd.Period('2012-01', freq='2M')

In [606]:
p + 2

Period('2012-05', '2M')

In [607]:
p - 1

Period('2011-11', '2M')

In [608]:
p = pd.Period('2012-01',freq='2M')
p

Period('2012-01', '2M')

In [609]:
p+2

Period('2012-05', '2M')

In [610]:
p-1

Period('2011-11', '2M')

In [611]:
p = pd.Period('2012-01', freq='3M')

In [612]:
p = pd.Period('2014-07-01 09:00', freq='H')

In [613]:
p + Hour(2)

Period('2014-07-01 11:00', 'H')

In [614]:
p + pd.Timedelta(minutes=120)

Period('2014-07-01 11:00', 'H')

In [615]:
p + np.timedelta64(7200, 's')

Period('2014-07-01 11:00', 'H')

In [616]:
p = pd.Period('2014-07', freq='M')
p

Period('2014-07', 'M')

In [617]:
p + MonthEnd(3)

Period('2014-10', 'M')

In [618]:
#p + MonthBegin(3)

In [619]:
pd.Period('2012', freq='A-DEC') - pd.Period('2002', freq='A-DEC')

10

### PeriodIndex and Period_Range

In [620]:
prng = pd.period_range('1/1/2011', '1/1/2012', freq='M')
prng

PeriodIndex(['2011-01', '2011-02', '2011-03', '2011-04', '2011-05', '2011-06',
             '2011-07', '2011-08', '2011-09', '2011-10', '2011-11', '2011-12',
             '2012-01'],
            dtype='period[M]', freq='M')

In [621]:
pd.PeriodIndex(['2011-1', '2011-2', '2011-3'], freq='M')

PeriodIndex(['2011-01', '2011-02', '2011-03'], dtype='period[M]', freq='M')

In [622]:
pd.PeriodIndex(start='2014-01', freq='3M', periods=4)

PeriodIndex(['2014-01', '2014-04', '2014-07', '2014-10'], dtype='period[3M]', freq='3M')

In [623]:
ps = pd.Series(np.random.randn(len(prng)), prng)
ps.head

<bound method NDFrame.head of 2011-01    1.110790
2011-02   -1.284593
2011-03    0.562710
2011-04   -1.390612
2011-05    0.862287
2011-06   -0.815714
2011-07    0.368174
2011-08    0.384632
2011-09    1.223115
2011-10   -1.952491
2011-11   -0.201312
2011-12   -1.215201
2012-01    0.285208
Freq: M, dtype: float64>

In [624]:
idx = pd.period_range('2014-07-01 09:00', periods=5, freq='H')
idx

PeriodIndex(['2014-07-01 09:00', '2014-07-01 10:00', '2014-07-01 11:00',
             '2014-07-01 12:00', '2014-07-01 13:00'],
            dtype='period[H]', freq='H')

In [625]:
idx + Hour(2)

PeriodIndex(['2014-07-01 11:00', '2014-07-01 12:00', '2014-07-01 13:00',
             '2014-07-01 14:00', '2014-07-01 15:00'],
            dtype='period[H]', freq='H')

In [626]:
idx = pd.period_range('2014-07', periods=5, freq='M')
idx

PeriodIndex(['2014-07', '2014-08', '2014-09', '2014-10', '2014-11'], dtype='period[M]', freq='M')

In [627]:
idx + MonthEnd(3)

PeriodIndex(['2014-10', '2014-11', '2014-12', '2015-01', '2015-02'], dtype='period[M]', freq='M')

### Period Dtypes

In [628]:
pi = pd.period_range('2016-01-01', periods=3, freq='M')
pi

PeriodIndex(['2016-01', '2016-02', '2016-03'], dtype='period[M]', freq='M')

In [629]:
pi.dtype

period[M]

In [630]:
# change monthly freq to daily freq
pi.astype('period[D]')

PeriodIndex(['2016-01-31', '2016-02-29', '2016-03-31'], dtype='period[D]', freq='D')

In [631]:
# convert to DatetimeIndex
pi.astype('datetime64[ns]')

DatetimeIndex(['2016-01-01', '2016-02-01', '2016-03-01'], dtype='datetime64[ns]', freq='MS')

### PeriodIndex Partial String Indexing

In [632]:
ps['2011-01']

1.1107896629969602

In [633]:
ps[pd.datetime(2011, 12, 25):]

2011-12   -1.215201
2012-01    0.285208
Freq: M, dtype: float64

In [634]:
ps['10/31/2011':'12/31/2011']

2011-10   -1.952491
2011-11   -0.201312
2011-12   -1.215201
Freq: M, dtype: float64

In [635]:
ps['2011']

2011-01    1.110790
2011-02   -1.284593
2011-03    0.562710
2011-04   -1.390612
2011-05    0.862287
2011-06   -0.815714
2011-07    0.368174
2011-08    0.384632
2011-09    1.223115
2011-10   -1.952491
2011-11   -0.201312
2011-12   -1.215201
Freq: M, dtype: float64

In [636]:
dfp = pd.DataFrame(np.random.randn(600,1),columns=['A'],
                   index=pd.period_range('2013-01-01 9:00', periods=600, freq='T'))

In [637]:
dfp

Unnamed: 0,A
2013-01-01 09:00,-0.303461
2013-01-01 09:01,1.182376
2013-01-01 09:02,-0.015624
2013-01-01 09:03,1.504825
2013-01-01 09:04,-2.602536
2013-01-01 09:05,-0.786233
2013-01-01 09:06,0.343672
2013-01-01 09:07,0.083490
2013-01-01 09:08,1.486885
2013-01-01 09:09,1.256721


In [638]:
dfp['2013-01-01 10H']

Unnamed: 0,A
2013-01-01 10:00,-0.254723
2013-01-01 10:01,-0.361387
2013-01-01 10:02,1.536073
2013-01-01 10:03,0.277516
2013-01-01 10:04,-0.96963
2013-01-01 10:05,2.022136
2013-01-01 10:06,-0.222557
2013-01-01 10:07,2.003114
2013-01-01 10:08,-1.591905
2013-01-01 10:09,-1.07908


In [639]:
dfp['2013-01-01 10H':'2013-01-01 11H']

Unnamed: 0,A
2013-01-01 10:00,-0.254723
2013-01-01 10:01,-0.361387
2013-01-01 10:02,1.536073
2013-01-01 10:03,0.277516
2013-01-01 10:04,-0.969630
2013-01-01 10:05,2.022136
2013-01-01 10:06,-0.222557
2013-01-01 10:07,2.003114
2013-01-01 10:08,-1.591905
2013-01-01 10:09,-1.079080


### Frequency Conversion and Resampling with PeriodIndex

In [640]:
p = pd.Period('2011', freq='A-DEC')
p

Period('2011', 'A-DEC')

In [641]:
p.asfreq('M', how='start')

Period('2011-01', 'M')

In [642]:
p.asfreq('M', how='end')

Period('2011-12', 'M')

In [643]:
p.asfreq('M', 's')

Period('2011-01', 'M')

In [644]:
p.asfreq('M', 'e')

Period('2011-12', 'M')

In [645]:
p = pd.Period('2011-12', freq='M')

In [646]:
p.asfreq('A-NOV')

Period('2012', 'A-NOV')

In [647]:
p = pd.Period('2012Q1', freq='Q-DEC')

In [648]:
p.asfreq('D', 's')

Period('2012-01-01', 'D')

In [649]:
p.asfreq('D', 'e')

Period('2012-03-31', 'D')

In [650]:
p = pd.Period('2011Q4', freq='Q-MAR')

In [651]:
p.asfreq('D', 's')

Period('2011-01-01', 'D')

In [652]:
p.asfreq('D', 'e')

Period('2011-03-31', 'D')

### Converting between Reprsentations

In [653]:
rng = pd.date_range('1/1/2012', periods=5, freq='M')
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts

2012-01-31    1.339985
2012-02-29    0.430788
2012-03-31   -0.057099
2012-04-30    0.024807
2012-05-31   -0.137830
Freq: M, dtype: float64

In [654]:
ps = ts.to_period()
ps

2012-01    1.339985
2012-02    0.430788
2012-03   -0.057099
2012-04    0.024807
2012-05   -0.137830
Freq: M, dtype: float64

In [655]:
ps.to_timestamp()

2012-01-01    1.339985
2012-02-01    0.430788
2012-03-01   -0.057099
2012-04-01    0.024807
2012-05-01   -0.137830
Freq: MS, dtype: float64

In [656]:
#Remember that ‘s’ and ‘e’ can be used to return the timestamps at the start or end of the period:
ps.to_timestamp('D', how='s')

2012-01-01    1.339985
2012-02-01    0.430788
2012-03-01   -0.057099
2012-04-01    0.024807
2012-05-01   -0.137830
Freq: MS, dtype: float64

In [657]:
prng = pd.period_range('1990Q1', '2000Q4', freq='Q-NOV')
ts = pd.Series(np.random.randn(len(prng)), prng)
ts.index = (prng.asfreq('M', 'e') + 1).asfreq('H', 's') + 9
ts.head()

1990-03-01 09:00    0.062509
1990-06-01 09:00    0.299053
1990-09-01 09:00   -0.606674
1990-12-01 09:00   -1.597373
1991-03-01 09:00    0.661737
Freq: H, dtype: float64

### Representing out-of-bounds spans

In [658]:
span = pd.period_range('1215-01-01', '1381-01-01', freq='D')
span

PeriodIndex(['1215-01-01', '1215-01-02', '1215-01-03', '1215-01-04',
             '1215-01-05', '1215-01-06', '1215-01-07', '1215-01-08',
             '1215-01-09', '1215-01-10',
             ...
             '1380-12-23', '1380-12-24', '1380-12-25', '1380-12-26',
             '1380-12-27', '1380-12-28', '1380-12-29', '1380-12-30',
             '1380-12-31', '1381-01-01'],
            dtype='period[D]', length=60632, freq='D')

In [659]:
s = pd.Series([20121231, 20141130, 99991231])
s

0    20121231
1    20141130
2    99991231
dtype: int64

In [660]:
# To convert from a int64 based YYYYMMDD representation.
def conv(x):
    return pd.Period(year = x // 10000, month = x//100 % 100, day = x%100, freq='D')

In [661]:
s.apply(conv)

0   2012-12-31
1   2014-11-30
2   9999-12-31
dtype: object

In [662]:
s.apply(conv)[2]

Period('9999-12-31', 'D')

In [663]:
# These can easily be converted to a PeriodIndex
span = pd.PeriodIndex(s.apply(conv))
span

PeriodIndex(['2012-12-31', '2014-11-30', '9999-12-31'], dtype='period[D]', freq='D')


## Time Zone Handling
### Working with Time Zones

In [664]:
rng = pd.date_range('3/6/2012 00:00', periods=15, freq='D')

In [665]:
rng.tz is None

True

In [666]:
rng_pytz = pd.date_range('3/6/2012 00:00', periods=10, freq='D',tz='Europe/London')

In [667]:
rng_pytz.tz

<DstTzInfo 'Europe/London' LMT-1 day, 23:59:00 STD>

In [668]:
import dateutil

In [669]:
rng_utc = pd.date_range('3/6/2012 00:00', periods=10, freq='D', tz=dateutil.tz.tzutc())

In [670]:
rng_utc.tz

tzutc()

In [671]:
import pytz

In [672]:
tz_pytz = pytz.timezone('Europe/London')

In [673]:
rng_pytz = pd.date_range('3/6/2012 00:00', periods=10, freq='D', tz=tz_pytz)

In [674]:
rng_pytz.tz == tz_pytz

True

In [675]:
tz_dateutil = dateutil.tz.gettz('Europe/London')

In [676]:
rng_dateutil = pd.date_range('3/6/2012 00:00', periods=10, freq='D', 
                           tz=tz_dateutil)

In [677]:
rng_dateutil.tz == tz_dateutil

True

Timestamps, like Python’s datetime.datetime object can be either time zone naive or time zone aware. Naive time series and DatetimeIndex objects can be localized using tz_localize:

In [678]:
ts = pd.Series(np.random.randn(len(rng)), rng)
ts

2012-03-06    0.823346
2012-03-07   -0.869983
2012-03-08   -0.323601
2012-03-09   -0.757136
2012-03-10   -0.174542
2012-03-11   -0.239936
2012-03-12    1.108951
2012-03-13    0.733135
2012-03-14    0.439944
2012-03-15    1.614645
2012-03-16    0.821074
2012-03-17   -0.738779
2012-03-18   -0.343959
2012-03-19    0.620515
2012-03-20   -0.804303
Freq: D, dtype: float64

In [679]:
ts_utc = ts.tz_localize('UTC')

In [680]:
ts_utc

2012-03-06 00:00:00+00:00    0.823346
2012-03-07 00:00:00+00:00   -0.869983
2012-03-08 00:00:00+00:00   -0.323601
2012-03-09 00:00:00+00:00   -0.757136
2012-03-10 00:00:00+00:00   -0.174542
2012-03-11 00:00:00+00:00   -0.239936
2012-03-12 00:00:00+00:00    1.108951
2012-03-13 00:00:00+00:00    0.733135
2012-03-14 00:00:00+00:00    0.439944
2012-03-15 00:00:00+00:00    1.614645
2012-03-16 00:00:00+00:00    0.821074
2012-03-17 00:00:00+00:00   -0.738779
2012-03-18 00:00:00+00:00   -0.343959
2012-03-19 00:00:00+00:00    0.620515
2012-03-20 00:00:00+00:00   -0.804303
Freq: D, dtype: float64

In [681]:
ts_utc.tz_convert('US/Eastern')

2012-03-05 19:00:00-05:00    0.823346
2012-03-06 19:00:00-05:00   -0.869983
2012-03-07 19:00:00-05:00   -0.323601
2012-03-08 19:00:00-05:00   -0.757136
2012-03-09 19:00:00-05:00   -0.174542
2012-03-10 19:00:00-05:00   -0.239936
2012-03-11 20:00:00-04:00    1.108951
2012-03-12 20:00:00-04:00    0.733135
2012-03-13 20:00:00-04:00    0.439944
2012-03-14 20:00:00-04:00    1.614645
2012-03-15 20:00:00-04:00    0.821074
2012-03-16 20:00:00-04:00   -0.738779
2012-03-17 20:00:00-04:00   -0.343959
2012-03-18 20:00:00-04:00    0.620515
2012-03-19 20:00:00-04:00   -0.804303
Freq: D, dtype: float64

In [682]:
rng_eastern = rng_utc.tz_convert('US/Eastern')
rng_eastern

DatetimeIndex(['2012-03-05', '2012-03-06', '2012-03-07', '2012-03-08',
               '2012-03-09', '2012-03-10', '2012-03-11', '2012-03-12',
               '2012-03-13', '2012-03-14'],
              dtype='datetime64[ns, US/Eastern]', freq='D')

In [683]:
rng_berlin = rng_utc.tz_convert('Europe/Berlin')
rng_berlin

DatetimeIndex(['2012-03-06', '2012-03-07', '2012-03-08', '2012-03-09',
               '2012-03-10', '2012-03-11', '2012-03-12', '2012-03-13',
               '2012-03-14', '2012-03-15'],
              dtype='datetime64[ns, Europe/Berlin]', freq='D')

In [684]:
rng_eastern[5]

Timestamp('2012-03-10 19:00:00-0500', tz='US/Eastern', freq='D')

In [685]:
rng_berlin[5]

Timestamp('2012-03-11 01:00:00+0100', tz='Europe/Berlin', freq='D')

In [686]:
rng_eastern[5] == rng_berlin[5]

True

In [687]:
rng_eastern[5]

Timestamp('2012-03-10 19:00:00-0500', tz='US/Eastern', freq='D')

In [688]:
rng_berlin[5]

Timestamp('2012-03-11 01:00:00+0100', tz='Europe/Berlin', freq='D')

In [689]:
rng_eastern[5].tz_convert('Europe/Berlin')

Timestamp('2012-03-11 01:00:00+0100', tz='Europe/Berlin')

In [690]:
rng[5]

Timestamp('2012-03-11 00:00:00', freq='D')

In [691]:
rng[5].tz_localize('Asia/Shanghai')

Timestamp('2012-03-11 00:00:00+0800', tz='Asia/Shanghai')

In [692]:
eastern = ts_utc.tz_convert('US/Eastern')
eastern

2012-03-05 19:00:00-05:00    0.823346
2012-03-06 19:00:00-05:00   -0.869983
2012-03-07 19:00:00-05:00   -0.323601
2012-03-08 19:00:00-05:00   -0.757136
2012-03-09 19:00:00-05:00   -0.174542
2012-03-10 19:00:00-05:00   -0.239936
2012-03-11 20:00:00-04:00    1.108951
2012-03-12 20:00:00-04:00    0.733135
2012-03-13 20:00:00-04:00    0.439944
2012-03-14 20:00:00-04:00    1.614645
2012-03-15 20:00:00-04:00    0.821074
2012-03-16 20:00:00-04:00   -0.738779
2012-03-17 20:00:00-04:00   -0.343959
2012-03-18 20:00:00-04:00    0.620515
2012-03-19 20:00:00-04:00   -0.804303
Freq: D, dtype: float64

In [693]:
berlin = ts_utc.tz_convert('Europe/Berlin')
berlin

2012-03-06 01:00:00+01:00    0.823346
2012-03-07 01:00:00+01:00   -0.869983
2012-03-08 01:00:00+01:00   -0.323601
2012-03-09 01:00:00+01:00   -0.757136
2012-03-10 01:00:00+01:00   -0.174542
2012-03-11 01:00:00+01:00   -0.239936
2012-03-12 01:00:00+01:00    1.108951
2012-03-13 01:00:00+01:00    0.733135
2012-03-14 01:00:00+01:00    0.439944
2012-03-15 01:00:00+01:00    1.614645
2012-03-16 01:00:00+01:00    0.821074
2012-03-17 01:00:00+01:00   -0.738779
2012-03-18 01:00:00+01:00   -0.343959
2012-03-19 01:00:00+01:00    0.620515
2012-03-20 01:00:00+01:00   -0.804303
Freq: D, dtype: float64

In [694]:
result = eastern + berlin
result

2012-03-06 00:00:00+00:00    1.646693
2012-03-07 00:00:00+00:00   -1.739966
2012-03-08 00:00:00+00:00   -0.647202
2012-03-09 00:00:00+00:00   -1.514271
2012-03-10 00:00:00+00:00   -0.349083
2012-03-11 00:00:00+00:00   -0.479872
2012-03-12 00:00:00+00:00    2.217903
2012-03-13 00:00:00+00:00    1.466269
2012-03-14 00:00:00+00:00    0.879887
2012-03-15 00:00:00+00:00    3.229291
2012-03-16 00:00:00+00:00    1.642148
2012-03-17 00:00:00+00:00   -1.477558
2012-03-18 00:00:00+00:00   -0.687919
2012-03-19 00:00:00+00:00    1.241029
2012-03-20 00:00:00+00:00   -1.608607
Freq: D, dtype: float64

To remove timezone from tz-aware DatetimeIndex, use tz_localize(None) or tz_convert(None). tz_localize(None) will remove timezone holding local time representations. tz_convert(None) will remove timezone after converting to UTC time.



In [695]:
didx = pd.DatetimeIndex(start='2014-08-01 09:00', freq='H', periods=10, tz='US/Eastern')
didx

DatetimeIndex(['2014-08-01 09:00:00-04:00', '2014-08-01 10:00:00-04:00',
               '2014-08-01 11:00:00-04:00', '2014-08-01 12:00:00-04:00',
               '2014-08-01 13:00:00-04:00', '2014-08-01 14:00:00-04:00',
               '2014-08-01 15:00:00-04:00', '2014-08-01 16:00:00-04:00',
               '2014-08-01 17:00:00-04:00', '2014-08-01 18:00:00-04:00'],
              dtype='datetime64[ns, US/Eastern]', freq='H')

In [696]:
didx.tz_localize(None)

DatetimeIndex(['2014-08-01 09:00:00', '2014-08-01 10:00:00',
               '2014-08-01 11:00:00', '2014-08-01 12:00:00',
               '2014-08-01 13:00:00', '2014-08-01 14:00:00',
               '2014-08-01 15:00:00', '2014-08-01 16:00:00',
               '2014-08-01 17:00:00', '2014-08-01 18:00:00'],
              dtype='datetime64[ns]', freq='H')

In [697]:
didx.tz_convert(None)

DatetimeIndex(['2014-08-01 13:00:00', '2014-08-01 14:00:00',
               '2014-08-01 15:00:00', '2014-08-01 16:00:00',
               '2014-08-01 17:00:00', '2014-08-01 18:00:00',
               '2014-08-01 19:00:00', '2014-08-01 20:00:00',
               '2014-08-01 21:00:00', '2014-08-01 22:00:00'],
              dtype='datetime64[ns]', freq='H')

In [698]:
# tz_convert(None) is identical with tz_convert('UTC').tz_localize(None)
didx.tz_convert('UCT').tz_localize(None)

DatetimeIndex(['2014-08-01 13:00:00', '2014-08-01 14:00:00',
               '2014-08-01 15:00:00', '2014-08-01 16:00:00',
               '2014-08-01 17:00:00', '2014-08-01 18:00:00',
               '2014-08-01 19:00:00', '2014-08-01 20:00:00',
               '2014-08-01 21:00:00', '2014-08-01 22:00:00'],
              dtype='datetime64[ns]', freq='H')

### Ambiguous Times when Localizing

In some cases, localize cannot determine the DST and non-DST hours when there are duplicates. This often happens when reading files or database records that simply duplicate the hours. Passing ambiguous='infer' (infer_dst argument in prior releases) into tz_localize will attempt to determine the right offset. Below the top example will fail as it contains ambiguous times and the bottom will infer the right offset.

In [699]:
rng_hourly = pd.DatetimeIndex(['11/06/2011 00:00', '11/06/2011 01:00',
                                    '11/06/2011 01:00', '11/06/2011 02:00',
                                    '11/06/2011 03:00'])
     

In [700]:
# Error is normal for commented code
#rng_hourly.tz_localize('US/Eastern')

In [701]:
rng_hourly_eastern = rng_hourly.tz_localize('US/Eastern', ambiguous='infer')


In [702]:
rng_hourly_eastern.tolist()

[Timestamp('2011-11-06 00:00:00-0400', tz='US/Eastern'),
 Timestamp('2011-11-06 01:00:00-0400', tz='US/Eastern'),
 Timestamp('2011-11-06 01:00:00-0500', tz='US/Eastern'),
 Timestamp('2011-11-06 02:00:00-0500', tz='US/Eastern'),
 Timestamp('2011-11-06 03:00:00-0500', tz='US/Eastern')]

In [703]:
rng_hourly_dst = np.array([1, 1, 0, 0, 0])

In [704]:
rng_hourly.tz_localize('US/Eastern', ambiguous=rng_hourly_dst).tolist()

[Timestamp('2011-11-06 00:00:00-0400', tz='US/Eastern'),
 Timestamp('2011-11-06 01:00:00-0400', tz='US/Eastern'),
 Timestamp('2011-11-06 01:00:00-0500', tz='US/Eastern'),
 Timestamp('2011-11-06 02:00:00-0500', tz='US/Eastern'),
 Timestamp('2011-11-06 03:00:00-0500', tz='US/Eastern')]

In [705]:
rng_hourly.tz_localize('US/Eastern', ambiguous='NaT').tolist()

[Timestamp('2011-11-06 00:00:00-0400', tz='US/Eastern'),
 NaT,
 NaT,
 Timestamp('2011-11-06 02:00:00-0500', tz='US/Eastern'),
 Timestamp('2011-11-06 03:00:00-0500', tz='US/Eastern')]

In [706]:
didx = pd.DatetimeIndex(start='2014-08-01 09:00', freq='H', periods=10, tz='US/Eastern')
didx

DatetimeIndex(['2014-08-01 09:00:00-04:00', '2014-08-01 10:00:00-04:00',
               '2014-08-01 11:00:00-04:00', '2014-08-01 12:00:00-04:00',
               '2014-08-01 13:00:00-04:00', '2014-08-01 14:00:00-04:00',
               '2014-08-01 15:00:00-04:00', '2014-08-01 16:00:00-04:00',
               '2014-08-01 17:00:00-04:00', '2014-08-01 18:00:00-04:00'],
              dtype='datetime64[ns, US/Eastern]', freq='H')

In [707]:
didx.tz_localize(None)

DatetimeIndex(['2014-08-01 09:00:00', '2014-08-01 10:00:00',
               '2014-08-01 11:00:00', '2014-08-01 12:00:00',
               '2014-08-01 13:00:00', '2014-08-01 14:00:00',
               '2014-08-01 15:00:00', '2014-08-01 16:00:00',
               '2014-08-01 17:00:00', '2014-08-01 18:00:00'],
              dtype='datetime64[ns]', freq='H')

In [708]:
didx.tz_convert(None)

DatetimeIndex(['2014-08-01 13:00:00', '2014-08-01 14:00:00',
               '2014-08-01 15:00:00', '2014-08-01 16:00:00',
               '2014-08-01 17:00:00', '2014-08-01 18:00:00',
               '2014-08-01 19:00:00', '2014-08-01 20:00:00',
               '2014-08-01 21:00:00', '2014-08-01 22:00:00'],
              dtype='datetime64[ns]', freq='H')

In [709]:
# tz_convert(None) is identical with tz_convert('UTC').tz_localize(None)
didx.tz_convert('UCT').tz_localize(None)

DatetimeIndex(['2014-08-01 13:00:00', '2014-08-01 14:00:00',
               '2014-08-01 15:00:00', '2014-08-01 16:00:00',
               '2014-08-01 17:00:00', '2014-08-01 18:00:00',
               '2014-08-01 19:00:00', '2014-08-01 20:00:00',
               '2014-08-01 21:00:00', '2014-08-01 22:00:00'],
              dtype='datetime64[ns]', freq='H')

### TZ Aware DTypes

Series/DatetimeIndex with a timezone naive value are represented with a dtype of datetime64[ns].

In [710]:
s_naive = pd.Series(pd.date_range('20130101',periods=3))
s_naive

0   2013-01-01
1   2013-01-02
2   2013-01-03
dtype: datetime64[ns]

Series/DatetimeIndex with a timezone aware value are represented with a dtype of datetime64[ns, tz].

In [711]:
s_aware = pd.Series(pd.date_range('20130101',periods=3,tz='US/Eastern'))
s_aware

0   2013-01-01 00:00:00-05:00
1   2013-01-02 00:00:00-05:00
2   2013-01-03 00:00:00-05:00
dtype: datetime64[ns, US/Eastern]

Both of these Series can be manipulated via the .dt accessor, see here.

For example, to localize and convert a naive stamp to timezone aware.

In [712]:
# localize and convert a naive timezone
s_naive.astype('datetime64[ns, US/Eastern]')

0   2012-12-31 19:00:00-05:00
1   2013-01-01 19:00:00-05:00
2   2013-01-02 19:00:00-05:00
dtype: datetime64[ns, US/Eastern]

In [713]:
# make an aware tz naive
s_aware.astype('datetime64[ns]')

0   2013-01-01 05:00:00
1   2013-01-02 05:00:00
2   2013-01-03 05:00:00
dtype: datetime64[ns]

In [714]:
# convert to a new timezone
s_aware.astype('datetime64[ns, CET]')

0   2013-01-01 06:00:00+01:00
1   2013-01-02 06:00:00+01:00
2   2013-01-03 06:00:00+01:00
dtype: datetime64[ns, CET]

Note Using the .values accessor on a Series, returns an numpy array of the data. These values are converted to UTC, as numpy does not currently support timezones (even though it is printing in the local timezone!).

In [715]:
s_naive.values

array(['2013-01-01T00:00:00.000000000', '2013-01-02T00:00:00.000000000',
       '2013-01-03T00:00:00.000000000'], dtype='datetime64[ns]')

In [716]:
s_aware.values

array(['2013-01-01T05:00:00.000000000', '2013-01-02T05:00:00.000000000',
       '2013-01-03T05:00:00.000000000'], dtype='datetime64[ns]')

In [717]:
pd.Series(s_aware.values)

0   2013-01-01 05:00:00
1   2013-01-02 05:00:00
2   2013-01-03 05:00:00
dtype: datetime64[ns]

In [718]:
pd.Series(s_aware.values).dt.tz_localize('UTC').dt.tz_convert('US/Eastern')

0   2013-01-01 00:00:00-05:00
1   2013-01-02 00:00:00-05:00
2   2013-01-03 00:00:00-05:00
dtype: datetime64[ns, US/Eastern]

Fin, Source: https://pandas.pydata.org/pandas-docs/stable/timeseries.html#dateoffset-objects