 # <center>Pandas Time and Date Series</center>

In [720]:
import pandas as pd
import numpy as np


In [721]:
rng = pd.date_range('1/1/2011', periods=52, freq='H')

In [722]:
rng[:5]

DatetimeIndex(['2011-01-01 00:00:00', '2011-01-01 01:00:00',
               '2011-01-01 02:00:00', '2011-01-01 03:00:00',
               '2011-01-01 04:00:00'],
              dtype='datetime64[ns]', freq='H')

In [723]:
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts[:5]

2011-01-01 00:00:00   -1.373137
2011-01-01 01:00:00   -0.190374
2011-01-01 02:00:00   -0.724424
2011-01-01 03:00:00    2.504553
2011-01-01 04:00:00    0.543506
Freq: H, dtype: float64

In [724]:
converted = ts.asfreq('45Min', method='pad')
converted.head()

2011-01-01 00:00:00   -1.373137
2011-01-01 00:45:00   -1.373137
2011-01-01 01:30:00   -0.190374
2011-01-01 02:15:00   -0.724424
2011-01-01 03:00:00    2.504553
Freq: 45T, dtype: float64

In [725]:
# Daily means
ts.resample('D').mean()

2011-01-01    0.014475
2011-01-02    0.000427
2011-01-03   -0.854685
Freq: D, dtype: float64

### Time Stamps vs Time Spans

In [726]:
pd.Timestamp('2015-05-01')

Timestamp('2015-05-01 00:00:00')

In [727]:
pd.Period('2011-01')

Period('2011-01', 'M')

In [728]:
dates = [pd.Timestamp('2012-05-01'), pd.Timestamp('2012-05-02'), pd.Timestamp('2012-05-03')]
dates

[Timestamp('2012-05-01 00:00:00'),
 Timestamp('2012-05-02 00:00:00'),
 Timestamp('2012-05-03 00:00:00')]

In [729]:
ts = pd.Series(np.random.randn(3), dates)
type (ts.index)
ts.index


DatetimeIndex(['2012-05-01', '2012-05-02', '2012-05-03'], dtype='datetime64[ns]', freq=None)

In [730]:
ts

2012-05-01    0.713736
2012-05-02    0.768792
2012-05-03   -1.064923
dtype: float64

### Converting to Timestamps

In [731]:
pd.to_datetime(pd.Series(['Jul 31, 2009', '2010-01-10', None]))

0   2009-07-31
1   2010-01-10
2          NaT
dtype: datetime64[ns]

In [732]:
pd.to_datetime(['2005/11/23', '2010.12.31'])

DatetimeIndex(['2005-11-23', '2010-12-31'], dtype='datetime64[ns]', freq=None)

In [733]:
pd.to_datetime(['04-01-2012 10:00'], dayfirst=True)

DatetimeIndex(['2012-01-04 10:00:00'], dtype='datetime64[ns]', freq=None)

In [734]:
pd.to_datetime(['14-01-2012', '01-14-2012'], dayfirst=True)

DatetimeIndex(['2012-01-14', '2012-01-14'], dtype='datetime64[ns]', freq=None)

In [735]:
pd.to_datetime('2010/11/12')

Timestamp('2010-11-12 00:00:00')

In [736]:
pd.Timestamp('2010/11/12')

Timestamp('2010-11-12 00:00:00')

In [737]:
df = pd.DataFrame({'year': [2015, 2016],
                   'month': [2, 3],
                   'day': [4, 5],
                   'hour': [2, 3]})

In [738]:
pd.to_datetime(df)

0   2015-02-04 02:00:00
1   2016-03-05 03:00:00
dtype: datetime64[ns]

In [739]:
pd.to_datetime(df[['year', 'month', 'day']])

0   2015-02-04
1   2016-03-05
dtype: datetime64[ns]

In [740]:
# pd.to_datetime(['2009/07/31', 'asd'], errors='raise')

In [741]:
pd.to_datetime(['2009/07/31', 'asd'], errors='ignore')

array(['2009/07/31', 'asd'], dtype=object)

### Epoch Timestamps

In [742]:
pd.to_datetime([1349720105, 1349806505, 1349892905, 1349979305, 1350065705], unit='s')

DatetimeIndex(['2012-10-08 18:15:05', '2012-10-09 18:15:05',
               '2012-10-10 18:15:05', '2012-10-11 18:15:05',
               '2012-10-12 18:15:05'],
              dtype='datetime64[ns]', freq=None)

In [743]:
 pd.to_datetime([1349720105100, 1349720105200, 1349720105300,
   ....:                 1349720105400, 1349720105500 ], unit='ms')

DatetimeIndex(['2012-10-08 18:15:05.100000', '2012-10-08 18:15:05.200000',
               '2012-10-08 18:15:05.300000', '2012-10-08 18:15:05.400000',
               '2012-10-08 18:15:05.500000'],
              dtype='datetime64[ns]', freq=None)

### From Timestamps to Epoch

In [744]:
stamps = pd.date_range('2012-10-08 18:15:05', periods=4, freq='D')
stamps

DatetimeIndex(['2012-10-08 18:15:05', '2012-10-09 18:15:05',
               '2012-10-10 18:15:05', '2012-10-11 18:15:05'],
              dtype='datetime64[ns]', freq='D')

In [745]:
stamps.view('int64')

array([1349720105000000000, 1349806505000000000, 1349892905000000000,
       1349979305000000000])

In [746]:
pd.Timedelta(1, unit='s')

Timedelta('0 days 00:00:01')

### Using the Orgin Parameter

In [747]:
pd.to_datetime([1, 2, 3, 10], unit='D', origin=pd.Timestamp('1960-01-01'))

DatetimeIndex(['1960-01-02', '1960-01-03', '1960-01-04', '1960-01-11'], dtype='datetime64[ns]', freq=None)

The default is set at origin='unix', which defaults to 1970-01-01 00:00:00. Commonly called ‘unix epoch’ or POSIX time.

In [748]:
pd.to_datetime([1, 2, 3], unit='D')

DatetimeIndex(['1970-01-02', '1970-01-03', '1970-01-04'], dtype='datetime64[ns]', freq=None)

### Generating Ranges of Timestamps

In [749]:
dates = [pd.datetime(2012, 5, 1), pd.datetime(2012, 5, 2), pd.datetime(2012, 5, 3)]
dates

[datetime.datetime(2012, 5, 1, 0, 0),
 datetime.datetime(2012, 5, 2, 0, 0),
 datetime.datetime(2012, 5, 3, 0, 0)]

In [750]:
index = pd.DatetimeIndex(dates)
index

DatetimeIndex(['2012-05-01', '2012-05-02', '2012-05-03'], dtype='datetime64[ns]', freq=None)

In [751]:
index = pd.Index(dates)
index

DatetimeIndex(['2012-05-01', '2012-05-02', '2012-05-03'], dtype='datetime64[ns]', freq=None)

In [752]:
index = pd.date_range('2000-1-1', periods=1000, freq='M')
index

DatetimeIndex(['2000-01-31', '2000-02-29', '2000-03-31', '2000-04-30',
               '2000-05-31', '2000-06-30', '2000-07-31', '2000-08-31',
               '2000-09-30', '2000-10-31',
               ...
               '2082-07-31', '2082-08-31', '2082-09-30', '2082-10-31',
               '2082-11-30', '2082-12-31', '2083-01-31', '2083-02-28',
               '2083-03-31', '2083-04-30'],
              dtype='datetime64[ns]', length=1000, freq='M')

In [753]:
index = pd.bdate_range('2012-1-1', periods=250)
index

DatetimeIndex(['2012-01-02', '2012-01-03', '2012-01-04', '2012-01-05',
               '2012-01-06', '2012-01-09', '2012-01-10', '2012-01-11',
               '2012-01-12', '2012-01-13',
               ...
               '2012-12-03', '2012-12-04', '2012-12-05', '2012-12-06',
               '2012-12-07', '2012-12-10', '2012-12-11', '2012-12-12',
               '2012-12-13', '2012-12-14'],
              dtype='datetime64[ns]', length=250, freq='B')

In [754]:
start = pd.datetime(2011, 1, 1)
end = pd.datetime(2012, 1, 1)
rng = pd.date_range(start, end)
rng

DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03', '2011-01-04',
               '2011-01-05', '2011-01-06', '2011-01-07', '2011-01-08',
               '2011-01-09', '2011-01-10',
               ...
               '2011-12-23', '2011-12-24', '2011-12-25', '2011-12-26',
               '2011-12-27', '2011-12-28', '2011-12-29', '2011-12-30',
               '2011-12-31', '2012-01-01'],
              dtype='datetime64[ns]', length=366, freq='D')

In [755]:
rng = pd.bdate_range(start, end)
rng

DatetimeIndex(['2011-01-03', '2011-01-04', '2011-01-05', '2011-01-06',
               '2011-01-07', '2011-01-10', '2011-01-11', '2011-01-12',
               '2011-01-13', '2011-01-14',
               ...
               '2011-12-19', '2011-12-20', '2011-12-21', '2011-12-22',
               '2011-12-23', '2011-12-26', '2011-12-27', '2011-12-28',
               '2011-12-29', '2011-12-30'],
              dtype='datetime64[ns]', length=260, freq='B')

In [756]:
pd.date_range(start, end, freq='BM') #BM stands for the end of every month

DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31', '2011-04-29',
               '2011-05-31', '2011-06-30', '2011-07-29', '2011-08-31',
               '2011-09-30', '2011-10-31', '2011-11-30', '2011-12-30'],
              dtype='datetime64[ns]', freq='BM')

In [757]:
pd.date_range(start, end, freq='W')

DatetimeIndex(['2011-01-02', '2011-01-09', '2011-01-16', '2011-01-23',
               '2011-01-30', '2011-02-06', '2011-02-13', '2011-02-20',
               '2011-02-27', '2011-03-06', '2011-03-13', '2011-03-20',
               '2011-03-27', '2011-04-03', '2011-04-10', '2011-04-17',
               '2011-04-24', '2011-05-01', '2011-05-08', '2011-05-15',
               '2011-05-22', '2011-05-29', '2011-06-05', '2011-06-12',
               '2011-06-19', '2011-06-26', '2011-07-03', '2011-07-10',
               '2011-07-17', '2011-07-24', '2011-07-31', '2011-08-07',
               '2011-08-14', '2011-08-21', '2011-08-28', '2011-09-04',
               '2011-09-11', '2011-09-18', '2011-09-25', '2011-10-02',
               '2011-10-09', '2011-10-16', '2011-10-23', '2011-10-30',
               '2011-11-06', '2011-11-13', '2011-11-20', '2011-11-27',
               '2011-12-04', '2011-12-11', '2011-12-18', '2011-12-25',
               '2012-01-01'],
              dtype='datetime64[ns]', freq='W-S

In [758]:
pd.bdate_range(end=end, periods=20)

DatetimeIndex(['2011-12-05', '2011-12-06', '2011-12-07', '2011-12-08',
               '2011-12-09', '2011-12-12', '2011-12-13', '2011-12-14',
               '2011-12-15', '2011-12-16', '2011-12-19', '2011-12-20',
               '2011-12-21', '2011-12-22', '2011-12-23', '2011-12-26',
               '2011-12-27', '2011-12-28', '2011-12-29', '2011-12-30'],
              dtype='datetime64[ns]', freq='B')

In [759]:
pd.bdate_range(start=start, periods=20)

DatetimeIndex(['2011-01-03', '2011-01-04', '2011-01-05', '2011-01-06',
               '2011-01-07', '2011-01-10', '2011-01-11', '2011-01-12',
               '2011-01-13', '2011-01-14', '2011-01-17', '2011-01-18',
               '2011-01-19', '2011-01-20', '2011-01-21', '2011-01-24',
               '2011-01-25', '2011-01-26', '2011-01-27', '2011-01-28'],
              dtype='datetime64[ns]', freq='B')

### Timestamp Limitations

In [760]:
pd.Timestamp.min

Timestamp('1677-09-21 00:12:43.145225')

In [761]:
pd.Timestamp.max

Timestamp('2262-04-11 23:47:16.854775807')

### Indexing

In [762]:
rng = pd.date_range(start, end, freq='BM')
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts.index

DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31', '2011-04-29',
               '2011-05-31', '2011-06-30', '2011-07-29', '2011-08-31',
               '2011-09-30', '2011-10-31', '2011-11-30', '2011-12-30'],
              dtype='datetime64[ns]', freq='BM')

In [763]:
ts[:5].index

DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31', '2011-04-29',
               '2011-05-31'],
              dtype='datetime64[ns]', freq='BM')

In [764]:
ts[::2].index

DatetimeIndex(['2011-01-31', '2011-03-31', '2011-05-31', '2011-07-29',
               '2011-09-30', '2011-11-30'],
              dtype='datetime64[ns]', freq='2BM')

### Partial String Indexing

In [765]:
ts['1/31/2011']

-1.7399244432129515

In [766]:
ts[pd.datetime(2011, 12, 25):]

2011-12-30   -1.012448
Freq: BM, dtype: float64

In [767]:
ts['10/31/2011':'12/31/2011']

2011-10-31   -0.234299
2011-11-30    0.318409
2011-12-30   -1.012448
Freq: BM, dtype: float64

In [768]:
ts['2011']

2011-01-31   -1.739924
2011-02-28   -0.253613
2011-03-31   -0.599665
2011-04-29    0.410505
2011-05-31   -1.570554
2011-06-30   -1.643063
2011-07-29    0.362864
2011-08-31   -0.786339
2011-09-30   -2.346068
2011-10-31   -0.234299
2011-11-30    0.318409
2011-12-30   -1.012448
Freq: BM, dtype: float64

In [769]:
ts['2011-6']

2011-06-30   -1.643063
Freq: BM, dtype: float64

In [770]:
dft = pd.DataFrame(np.random.randn(100000,1), columns=['A'], index=pd.date_range('20130101',periods=100000,freq='T'))
dft

Unnamed: 0,A
2013-01-01 00:00:00,-1.009164
2013-01-01 00:01:00,-0.834403
2013-01-01 00:02:00,0.199435
2013-01-01 00:03:00,-0.934874
2013-01-01 00:04:00,0.595367
2013-01-01 00:05:00,0.222170
2013-01-01 00:06:00,0.946015
2013-01-01 00:07:00,-0.022721
2013-01-01 00:08:00,-0.397902
2013-01-01 00:09:00,1.647524


In [771]:
dft['2013']

Unnamed: 0,A
2013-01-01 00:00:00,-1.009164
2013-01-01 00:01:00,-0.834403
2013-01-01 00:02:00,0.199435
2013-01-01 00:03:00,-0.934874
2013-01-01 00:04:00,0.595367
2013-01-01 00:05:00,0.222170
2013-01-01 00:06:00,0.946015
2013-01-01 00:07:00,-0.022721
2013-01-01 00:08:00,-0.397902
2013-01-01 00:09:00,1.647524


In [772]:
dft['2013-1':'2013-2']

Unnamed: 0,A
2013-01-01 00:00:00,-1.009164
2013-01-01 00:01:00,-0.834403
2013-01-01 00:02:00,0.199435
2013-01-01 00:03:00,-0.934874
2013-01-01 00:04:00,0.595367
2013-01-01 00:05:00,0.222170
2013-01-01 00:06:00,0.946015
2013-01-01 00:07:00,-0.022721
2013-01-01 00:08:00,-0.397902
2013-01-01 00:09:00,1.647524


In [773]:
dft['2013-1':'2013-2-28']

Unnamed: 0,A
2013-01-01 00:00:00,-1.009164
2013-01-01 00:01:00,-0.834403
2013-01-01 00:02:00,0.199435
2013-01-01 00:03:00,-0.934874
2013-01-01 00:04:00,0.595367
2013-01-01 00:05:00,0.222170
2013-01-01 00:06:00,0.946015
2013-01-01 00:07:00,-0.022721
2013-01-01 00:08:00,-0.397902
2013-01-01 00:09:00,1.647524


In [774]:
dft['2013-1':'2013-2-28 00:00:00']

Unnamed: 0,A
2013-01-01 00:00:00,-1.009164
2013-01-01 00:01:00,-0.834403
2013-01-01 00:02:00,0.199435
2013-01-01 00:03:00,-0.934874
2013-01-01 00:04:00,0.595367
2013-01-01 00:05:00,0.222170
2013-01-01 00:06:00,0.946015
2013-01-01 00:07:00,-0.022721
2013-01-01 00:08:00,-0.397902
2013-01-01 00:09:00,1.647524


In [775]:
dft['2013-1-15':'2013-1-15 12:30:00']

Unnamed: 0,A
2013-01-15 00:00:00,-0.949961
2013-01-15 00:01:00,0.252519
2013-01-15 00:02:00,0.311516
2013-01-15 00:03:00,-0.879424
2013-01-15 00:04:00,-0.256118
2013-01-15 00:05:00,0.268150
2013-01-15 00:06:00,-1.435577
2013-01-15 00:07:00,0.122282
2013-01-15 00:08:00,0.122822
2013-01-15 00:09:00,0.942391


In [776]:
# String slicing on MultiIndex 
dft2 = pd.DataFrame(np.random.randn(20, 1),columns=['A'], 
                    index=pd.MultiIndex.from_product([pd.date_range('20130101', 
                    periods=10, freq='12H'),['a', 'b']]))
dft2

Unnamed: 0,Unnamed: 1,A
2013-01-01 00:00:00,a,-0.79653
2013-01-01 00:00:00,b,-1.54079
2013-01-01 12:00:00,a,-0.72571
2013-01-01 12:00:00,b,-0.388563
2013-01-02 00:00:00,a,0.632665
2013-01-02 00:00:00,b,1.296748
2013-01-02 12:00:00,a,0.867938
2013-01-02 12:00:00,b,0.182011
2013-01-03 00:00:00,a,-0.259817
2013-01-03 00:00:00,b,0.018705


In [777]:
dft2.loc['2013-01-05']

Unnamed: 0,Unnamed: 1,A
2013-01-05 00:00:00,a,-0.634383
2013-01-05 00:00:00,b,0.177247
2013-01-05 12:00:00,a,-0.562308
2013-01-05 12:00:00,b,0.179306


In [778]:
idx = pd.IndexSlice
idx

<pandas.core.indexing._IndexSlice at 0x109662ba8>

In [779]:
dft2 = dft2.swaplevel(0, 1).sort_index()
dft2

Unnamed: 0,Unnamed: 1,A
a,2013-01-01 00:00:00,-0.79653
a,2013-01-01 12:00:00,-0.72571
a,2013-01-02 00:00:00,0.632665
a,2013-01-02 12:00:00,0.867938
a,2013-01-03 00:00:00,-0.259817
a,2013-01-03 12:00:00,-0.773704
a,2013-01-04 00:00:00,0.444237
a,2013-01-04 12:00:00,1.762325
a,2013-01-05 00:00:00,-0.634383
a,2013-01-05 12:00:00,-0.562308


In [780]:
dft2.loc[idx[:, '2013-01-05'], :]

Unnamed: 0,Unnamed: 1,A
a,2013-01-05 00:00:00,-0.634383
a,2013-01-05 12:00:00,-0.562308
b,2013-01-05 00:00:00,0.177247
b,2013-01-05 12:00:00,0.179306


## Slice vs Exact Match

In [781]:
series_minute = pd.Series([1, 2, 3],pd.DatetimeIndex(['2011-12-31 23:59:00',
                                                      '2012-01-01 00:00:00', 
                                                      '2012-01-01 00:02:00']))
series_minute

2011-12-31 23:59:00    1
2012-01-01 00:00:00    2
2012-01-01 00:02:00    3
dtype: int64

In [782]:
series_minute.index.resolution

'minute'

In [783]:
series_minute['2011-12-31 23']

2011-12-31 23:59:00    1
dtype: int64

In [784]:
series_minute['2011-12-31 23:59']

1

In [785]:
series_minute['2011-12-31 23:59:00']

1

In [786]:
series_second = pd.Series([1, 2, 3],pd.DatetimeIndex(['2011-12-31 23:59:59',
                                                      '2012-01-01 00:00:00',
                                                      '2012-01-01 00:00:01']))
series_second.index.resolution

'second'

In [787]:
series_second['2011-12-31 23:59']

2011-12-31 23:59:59    1
dtype: int64

In [788]:
dft_minute = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}, index=series_minute.index)
dft_minute

Unnamed: 0,a,b
2011-12-31 23:59:00,1,4
2012-01-01 00:00:00,2,5
2012-01-01 00:02:00,3,6


In [789]:
dft_minute.loc['2011-12-31 23']

Unnamed: 0,a,b
2011-12-31 23:59:00,1,4


In [790]:
series_monthly = pd.Series([1, 2, 3],pd.DatetimeIndex(['2011-12','2012-01','2012-02']))
series_monthly

2011-12-01    1
2012-01-01    2
2012-02-01    3
dtype: int64

In [791]:
series_monthly.index.resolution

'day'

In [792]:
series_monthly['2011-12']

2011-12-01    1
dtype: int64

### Exact Indexing

In [793]:
dft[pd.datetime(2013, 1, 1):pd.datetime(2013,2,28)]

Unnamed: 0,A
2013-01-01 00:00:00,-1.009164
2013-01-01 00:01:00,-0.834403
2013-01-01 00:02:00,0.199435
2013-01-01 00:03:00,-0.934874
2013-01-01 00:04:00,0.595367
2013-01-01 00:05:00,0.222170
2013-01-01 00:06:00,0.946015
2013-01-01 00:07:00,-0.022721
2013-01-01 00:08:00,-0.397902
2013-01-01 00:09:00,1.647524


In [794]:
dft[pd.datetime(2013, 1, 1, 10, 12, 0):pd.datetime(2013, 2, 28, 10, 12, 0)]

Unnamed: 0,A
2013-01-01 10:12:00,0.538316
2013-01-01 10:13:00,1.867577
2013-01-01 10:14:00,0.720156
2013-01-01 10:15:00,0.036419
2013-01-01 10:16:00,-0.722363
2013-01-01 10:17:00,-0.193135
2013-01-01 10:18:00,-0.093464
2013-01-01 10:19:00,0.422281
2013-01-01 10:20:00,0.789161
2013-01-01 10:21:00,-0.872336


## Truncating & Fancy Indexing

In [795]:
ts.truncate(before='10/31/2011', after='12/31/2011')

2011-10-31   -0.234299
2011-11-30    0.318409
2011-12-30   -1.012448
Freq: BM, dtype: float64

In [796]:
ts[[0, 2, 6]].index

DatetimeIndex(['2011-01-31', '2011-03-31', '2011-07-29'], dtype='datetime64[ns]', freq=None)

### Time/Date Components

There are several time/date properties that one can access from Timestamp or a collection of timestamps like a DateTimeIndex.

Source: https://pandas.pydata.org/pandas-docs/stable/timeseries.html

## DateOffset Objects

In [797]:
from pandas.tseries.offsets import *

d = pd.datetime(2008, 8, 18, 9, 0)
d + DateOffset(months = 4, days=5)

Timestamp('2008-12-23 09:00:00')

In [798]:
class BDay(DateOffset):
    """DateOffset increments between business days"""
    def apply(self, other):
        ...

In [799]:
d - 5*BDay()

In [800]:
d + BMonthEnd()

Timestamp('2008-08-29 09:00:00')

In [801]:
d

datetime.datetime(2008, 8, 18, 9, 0)

In [802]:
offset = BMonthEnd()

In [803]:
offset.rollforward(d)

Timestamp('2008-08-29 09:00:00')

In [804]:
day = Day()y

SyntaxError: invalid syntax (<ipython-input-804-bcafff832ac5>, line 1)

In [None]:
day.apply(pd.Timestamp('2014-01-01 09:00'))

In [None]:
day = Day(normalize=True)

In [None]:
day.apply(pd.Timestamp('2014-01-01 09:00'))

In [None]:
hour = Hour()

In [None]:
hour.apply(pd.Timestamp('2014-01-01 22:00'))

In [None]:
hour = Hour(normalize=True)

In [None]:
hour.apply(pd.Timestamp('2014-01-01 22:00'))

In [None]:
hour.apply(pd.Timestamp('2014-01-01 23:00'))

### Parametric Offsets

In [None]:
d = pd.datetime(2008, 8, 18, 9, 0)

In [None]:
d

In [None]:
d + Week()

In [None]:
d + Week(weekday=4)

In [None]:
(d + Week(weekday=4)).weekday()

In [None]:
d - Week()

In [None]:
d + Week(normatlize=True)

In [None]:
d - Week(normalize=True)

In [None]:
d + YearEnd()

In [None]:
d + YearEnd(month=6)

### Using Offsets with Series / DatetimeIndex

In [None]:
rng = pd.date_range('2012-01-01','2012-01-03')

In [None]:
s = pd.Series(rng)

In [None]:
rng

In [None]:
rng + DateOffset(month=2)

In [None]:
s + DateOffset(months=2)

In [None]:
s - DateOffset(months=2)

In [None]:
s = Day(2)

In [None]:
#td = s - pd.Series(pd.date_range('2011-12-29', '2011-12-31'))\
#td

In [None]:
#td + Minute(15)

In [None]:
rng + BQuarterEnd()

### Custom Business Days

In [None]:
from pandas.tseries.offsets import CustomBusinessDay

In [None]:
weekmask_egypt = 'Sun Mon Tue Wed Thu'

In [None]:
holidays = ['2012-05-01', pd.datetime(2013, 5, 1), np.datetime64('2014-05-01')]

In [None]:
bday_egypt = CustomBusinessDay(holidays=holidays, weekmask=weekmask_egypt)
bday_egypt

In [None]:
dt = pd.datetime(2013, 4, 30)

dt+2*bday_egypt

In [None]:
dts = pd.date_range(dt, periods=5, freg=bday_egypt)

In [None]:
pd.Series(dts.weekday, dts).map(pd.Series('Mon Tue Wed Thu Fri Sat Sun'.split()))

Holiday calendars can be used to provide the list of holidays. See the holiday calendar section for more information.

In [None]:
from pandas.tseries.holiday import USFederalHolidayCalendar
bday_us = CustomBusinessDay(calendar=USFederalHolidayCalendar())

#Priday before MLK Day
dt = pd.datetime(2014, 1,7)

#Tuesday after MLK Day (Monday is skipped because its a holiday)
dt + bday_us

In [None]:
from pandas.tseries.offsets import CustomBusinessMonthBegin
bmth_us = CustomBusinessMonthBegin(calendar = USFederalHolidayCalendar())

In [None]:
dt = pd.datetime(2013, 12, 17)
dt

In [None]:
dt + bmth_us

In [None]:
pd.DatetimeIndex(start='20100101',end='20120101',freq=bmth_us)

### Business Hours

In [None]:
bh = BusinessHour()
bh

In [None]:
pd.Timestamp('2014-08-01 10:00').weekday()

In [None]:
pd.Timestamp('2014-08-01 10:00:00') + bh

In [None]:
pd.Timestamp('2014-08-01 08:00:00') + bh

In [None]:
pd.Timestamp('2014-08-01 16:00:00') + bh

In [None]:
pd.Timestamp('2014-08-01 9:30:00') + bh

In [None]:
pd.Timestamp('2014-08-01 10:00:00') + BusinessHour(3)

In [None]:
pd.Timestamp('2014-08-01 10:00:00') + BusinessHour(-3)

In [None]:
bh = BusinessHour(start='11:00', end='20:00')
bh

In [None]:
pd.Timestamp('2014-08-01 13:00') + bh

In [None]:
pd.Timestamp('2014-08-01 09:00') + bh

In [None]:
pd.Timestamp('2014-08-01 18:00') + bh

In [None]:
bh = BusinessHour(start='17:00', end='09:00')
bh

In [None]:
pd.Timestamp('2014-08-01 17:00') + bh

In [None]:
pd.Timestamp('2014-08-01 23:00') + bh

In [None]:
pd.Timestamp('2014-08-02 04:00') + bh

In [None]:
pd.Timestamp('2014-08-04 04:00') + bh

Applying BusinessHour.rollforward and rollback to out of business hours results in the next business hour start or previous day’s end. Different from other offsets, BusinessHour.rollforward may output different results from apply by definition.

This is because one day’s business hour end is equal to next day’s business hour start. For example, under the default business hours (9:00 - 17:00), there is no gap (0 minutes) between 2014-08-01 17:00 and 2014-08-04 09:00.

In [None]:
# This adjusts a Timestamp to business hour edge
BusinessHour().rollback(pd.Timestamp ('2014-08-02 15:00'))

In [None]:
BusinessHour().rollforward(pd.Timestamp('2014-08-02 15:00'))

In [None]:
# It is the same as BusinessHour().apply(pd.Timestamp('2014-08-01 17:00')).
# And it is the same as BusinessHour().apply(pd.Timestamp('2014-08-04 09:00'))
BusinessHour().apply(pd.Timestamp('2014-08-02 15:00'))

In [None]:
BusinessHour().rollforward(pd.Timestamp('2014-08-02'))

In [None]:
BusinessHour().apply(pd.Timestamp('2014-08-02'))

### Custom Business Hour (0.18.1)

In [None]:
from pandas.tseries.holiday import USFederalHolidayCalendar

In [None]:
bhour_us = CustomBusinessHour(calendar=USFederalHolidayCalendar())

In [None]:
# Friday before MLK DAY
dt = pd.datetime(2014, 1, 17, 15)

In [None]:
dt+ bhour_us

In [None]:
# Tuesday after MLK Day (Monday is skipped because it's a holiday)
dt + bhour_us * 2

In [None]:
bhour_mon = CustomBusinessHour(start='10:00', weekmask='Tue Wed Thu Fri')

In [None]:
# Monday is skipped because it's a holiday, business hour starts from 10:00
dt + bhour_mon * 2

### Combining Aliases

In [None]:
start = '2011-01-03'

In [None]:
pd.date_range(start, periods=5, freq='B')

In [None]:
#pd.date_range(start, periods=5, freq=BDay())

In [None]:
pd.date_range(start, periods=10, frew='2h20min')

In [None]:
pd.date_range(start, periods=10, freq='1D10U')

### Anchoresd Offset Semantics

In [None]:
pd.Timestamp('2014-01-02') + MonthBegin(n=1)

In [None]:
pd.Timestamp('2014-01-02') + MonthEnd(n=1)

In [None]:
pd.Timestamp('2014-01-02') - MonthBegin(n=1)

In [None]:
pd.Timestamp('2014-01-02') - MonthEnd(n=1)

In [None]:
pd.Timestamp('2014-01-02') + MonthBegin(n=4)

In [None]:
pd.Timestamp('2014-01-02') - MonthBegin(n=4)

In [None]:
pd.Timestamp('2014-01-01') + MonthBegin(n=1)

In [None]:
pd.Timestamp('2014-01-31') + MonthEnd(n=1)

In [None]:
pd.Timestamp('2014-01-01') - MonthBegin(n=1)

In [None]:
pd.Timestamp('2014-01-31') - MonthEnd(n=1)

In [None]:
pd.Timestamp('2014-01-01') + MonthBegin(n=4)

In [None]:
pd.Timestamp('2014-01-31') - MonthBegin(n=4)

In [None]:
pd.Timestamp('2014-01-02') + MonthBegin(n=0)

In [None]:
pd.Timestamp('2014-01-02') + MonthEnd(n=0)

In [None]:
pd.Timestamp('2014-01-01') + MonthBegin(n=0)

In [None]:
pd.Timestamp('2014-01-31') + MonthEnd(n=0)

### Holidays / Holiday Calendars

In [None]:
from pandas.tseries.holiday import Holiday, USMemorialDay, \
 AbstractHolidayCalendar, nearest_workday, MO

In [None]:
class ExampleCalendar(AbstractHolidayCalendar):
    rules = [
        USMemorialDay, 
        Holiday('July 4th', month=7, day=4, observance=nearest_workday),
        Holiday('Columbus Day', month=10, day=1,
                offset=DateOffset(weekday=MO(2))), #same as 2*Week(weekday=2)
    ]

In [None]:
cal = ExampleCalendar()

In [None]:
cal.holidays(pd.datetime(2012, 1, 1), pd.datetime(2012, 12, 31))

In [None]:
from pandas.tseries.offsets import CDay
pd.DatetimeIndex(start='7/1/2012', end='7/10/2012', 
                freq =CDay(calendar=cal)).to_pydatetime()

In [None]:
offset = CustomBusinessDay(calendar=cal)

In [None]:
pd.datetime(2012,5,25) + offset

In [None]:
pd.datetime(2012, 7, 3) + offset

In [None]:
pd.datetime(2012, 7, 3) + 2*offset

In [None]:
pd.datetime(2012, 7, 6) + offset

In [None]:
AbstractHolidayCalendar.start_date

In [None]:
AbstractHolidayCalendar.end_date

These dates can be overwritten by setting the attributes as datetime/Timestamp/string.

In [None]:
AbstractHolidayCalendar.start_date = pd.datetime(2012, 1, 1)

In [None]:
AbstractHolidayCalendar.end_date = pd.datetime(2012, 12, 31)

In [None]:
cal.holidays()

In [None]:
from pandas.tseries.holiday import get_calendar, HolidayCalendarFactory,\
USLaborDay

In [None]:
cal=get_calendar('ExampleCalendar')

In [None]:
cal.rules

In [None]:
new_cal = HolidayCalendarFactory('NewExampleCalendar', cal, USLaborDay)
new_cal.rules

## Time Series related instance methods
### Shifting/Laggihg

In [None]:
ts = pd.Series(np.random.randn(len(rng)), index=rng)

In [None]:
ts = ts[:5]
ts

In [None]:
ts.shift(1)

In [None]:
#ts.shif(5,freq=offset.BDay())

In [None]:
ts.shift(5, freq='BM')

In [None]:
ts.tshift(5, freq='D')

### Frequency Conversion

In [None]:
#dr = pd.date_range('1/1/2010', periods=3, freq=3 * offsets.BDay())

In [None]:
#ts = pd.Series(np.random.randn(3), index=dr)

In [None]:
#ts.asfreq(BDay())

In [None]:
#ts.asfreq(BDay(), method='pad')

## Resampling
### Basic

In [None]:
rng = pd.date_range('1/1/2012', periods=100, freq='S')

In [None]:
ts = pd.Series(np.random.randint(0, 500, len(rng)), index=rng)
ts.head()

In [None]:
ts.resample('5Min').sum()

In [None]:
ts.resample('5Min').mean()

In [None]:
ts.resample('5Min').ohlc()

In [None]:
ts.resample('5Min').max()

In [None]:
ts.resample('5Min', closed='right').mean()

In [None]:
ts.resample('5Min', closed='left').mean()


In [None]:
ts.resample('5Min').mean() # by default label='right'

In [None]:
ts.resample('5Min', label='left').mean()

In [None]:
ts.resample('5Min', label='left', loffset='1s').mean()

### Up Sampling

In [None]:
ts[:2].resample('250L').asfreq()

In [None]:
ts[:2].resample('250L').ffill()

In [None]:
ts[:2].resample('250L').ffill(limit=2)

### Sparse Resampling

In [None]:
rng = pd.date_range('2014-1-1', periods=100, freq='D') + pd.Timedelta('1s')
ts = pd.Series(range(100), index=rng)
ts.head()

In [None]:
ts.resample('3T').sum()

In [None]:
from functools import partial
from pandas.tseries.frequencies import to_offset

In [None]:
def round(t, freq):
    freq = to_offset(freq)
    return pd.Timestamp((t.value // freq.delta.value) * freq.delta.value)

In [None]:
ts.groupby(partial(round, freq='3T')).sum()

### Aggregation

In [None]:
df = pd.DataFrame(np.random.randn(1000,3), index=pd.date_range('1/1/2012',
                                                          freq='S',
                                                          periods=1000),
                 columns=['A','B','C'])

In [None]:
ts.groupby(partial(round, freq='3T')).sum()

### Aggregation

In [None]:
df = pd. DataFrame(np.random.randn(1000,3),
                   index = pd.date_range('1/1/2012', freq='S',periods=1000),
                   columns = ['A','B','C'])

In [None]:
r = df.resample('3T')

In [None]:
r.mean()

In [805]:
r['A'].mean()

2012-01-01 00:00:00    0.013883
2012-01-01 00:03:00   -0.101475
2012-01-01 00:06:00   -0.067507
2012-01-01 00:09:00   -0.037650
2012-01-01 00:12:00    0.186183
2012-01-01 00:15:00    0.107917
Freq: 3T, Name: A, dtype: float64

In [806]:
r[['A','B']].mean()

Unnamed: 0,A,B
2012-01-01 00:00:00,0.013883,-0.087869
2012-01-01 00:03:00,-0.101475,0.014091
2012-01-01 00:06:00,-0.067507,0.101602
2012-01-01 00:09:00,-0.03765,0.012024
2012-01-01 00:12:00,0.186183,0.097987
2012-01-01 00:15:00,0.107917,0.174091


In [807]:
r['A'].agg([np.sum, np.mean, np.std])

Unnamed: 0,sum,mean,std
2012-01-01 00:00:00,2.498895,0.013883,1.024056
2012-01-01 00:03:00,-18.265549,-0.101475,1.014878
2012-01-01 00:06:00,-12.151274,-0.067507,1.035389
2012-01-01 00:09:00,-6.777071,-0.03765,0.981985
2012-01-01 00:12:00,33.513016,0.186183,0.948671
2012-01-01 00:15:00,10.791668,0.107917,1.013665


In [808]:
r.agg([np.sum,np.mean])

Unnamed: 0_level_0,A,A,B,B,C,C
Unnamed: 0_level_1,sum,mean,sum,mean,sum,mean
2012-01-01 00:00:00,2.498895,0.013883,-15.816348,-0.087869,-4.319996,-0.024
2012-01-01 00:03:00,-18.265549,-0.101475,2.536441,0.014091,11.622597,0.06457
2012-01-01 00:06:00,-12.151274,-0.067507,18.288335,0.101602,-10.66635,-0.059258
2012-01-01 00:09:00,-6.777071,-0.03765,2.164277,0.012024,-21.140297,-0.117446
2012-01-01 00:12:00,33.513016,0.186183,17.63771,0.097987,-0.361881,-0.00201
2012-01-01 00:15:00,10.791668,0.107917,17.409064,0.174091,-1.092116,-0.010921


In [809]:
r.agg({'A': np.sum,
      'B': lambda x: np.std(x, ddof=1)})

Unnamed: 0,A,B
2012-01-01 00:00:00,2.498895,0.894135
2012-01-01 00:03:00,-18.265549,1.038884
2012-01-01 00:06:00,-12.151274,0.876696
2012-01-01 00:09:00,-6.777071,0.947267
2012-01-01 00:12:00,33.513016,1.116959
2012-01-01 00:15:00,10.791668,0.914109


In [810]:
r.agg({'A': 'sum', 'B':'std'})

Unnamed: 0,A,B
2012-01-01 00:00:00,2.498895,0.894135
2012-01-01 00:03:00,-18.265549,1.038884
2012-01-01 00:06:00,-12.151274,0.876696
2012-01-01 00:09:00,-6.777071,0.947267
2012-01-01 00:12:00,33.513016,1.116959
2012-01-01 00:15:00,10.791668,0.914109


In [811]:
r.agg({'A' : ['sum','std'], 'B' : ['mean','std'] })

Unnamed: 0_level_0,A,A,B,B
Unnamed: 0_level_1,sum,std,mean,std
2012-01-01 00:00:00,2.498895,1.024056,-0.087869,0.894135
2012-01-01 00:03:00,-18.265549,1.014878,0.014091,1.038884
2012-01-01 00:06:00,-12.151274,1.035389,0.101602,0.876696
2012-01-01 00:09:00,-6.777071,0.981985,0.012024,0.947267
2012-01-01 00:12:00,33.513016,0.948671,0.097987,1.116959
2012-01-01 00:15:00,10.791668,1.013665,0.174091,0.914109


In [812]:
df = pd.DataFrame({'date': pd.date_range('2015-01-01', freq='W', periods=5),
                   'a': np.arange(5)},
                  index=pd.MultiIndex.from_arrays([
                      [1,2,3,4,5],
                      pd.date_range('2015-01-01', freq='W', periods=5)],
                      names=['v','d']))

In [813]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,a,date
v,d,Unnamed: 2_level_1,Unnamed: 3_level_1
1,2015-01-04,0,2015-01-04
2,2015-01-11,1,2015-01-11
3,2015-01-18,2,2015-01-18
4,2015-01-25,3,2015-01-25
5,2015-02-01,4,2015-02-01


In [814]:
df.resample('M', on='date').sum()

Unnamed: 0_level_0,a
date,Unnamed: 1_level_1
2015-01-31,6
2015-02-28,4


In [815]:
df.resample('M',level='d').sum()

Unnamed: 0_level_0,a
d,Unnamed: 1_level_1
2015-01-31,6
2015-02-28,4


## Time Span Representation
### Period

In [816]:
p = pd.Period('2012', freq='A-DEC')

In [817]:
p + 1

Period('2013', 'A-DEC')

In [818]:
p - 3

Period('2009', 'A-DEC')

In [819]:
p = pd.Period('2012-01', freq='2M')

In [820]:
p + 2

Period('2012-05', '2M')

In [821]:
p - 1

Period('2011-11', '2M')

In [822]:
p = pd.Period('2012-01',freq='2M')
p

Period('2012-01', '2M')

In [823]:
p+2

Period('2012-05', '2M')

In [824]:
p-1

Period('2011-11', '2M')

In [825]:
p = pd.Period('2012-01', freq='3M')

In [826]:
p = pd.Period('2014-07-01 09:00', freq='H')

In [827]:
p + Hour(2)

Period('2014-07-01 11:00', 'H')

In [828]:
p + pd.Timedelta(minutes=120)

Period('2014-07-01 11:00', 'H')

In [829]:
p + np.timedelta64(7200, 's')

Period('2014-07-01 11:00', 'H')

In [830]:
p = pd.Period('2014-07', freq='M')
p

Period('2014-07', 'M')

In [831]:
p + MonthEnd(3)

Period('2014-10', 'M')

In [832]:
#p + MonthBegin(3)

In [833]:
pd.Period('2012', freq='A-DEC') - pd.Period('2002', freq='A-DEC')

10

### PeriodIndex and Period_Range

In [834]:
prng = pd.period_range('1/1/2011', '1/1/2012', freq='M')
prng

PeriodIndex(['2011-01', '2011-02', '2011-03', '2011-04', '2011-05', '2011-06',
             '2011-07', '2011-08', '2011-09', '2011-10', '2011-11', '2011-12',
             '2012-01'],
            dtype='period[M]', freq='M')

In [835]:
pd.PeriodIndex(['2011-1', '2011-2', '2011-3'], freq='M')

PeriodIndex(['2011-01', '2011-02', '2011-03'], dtype='period[M]', freq='M')

In [836]:
pd.PeriodIndex(start='2014-01', freq='3M', periods=4)

PeriodIndex(['2014-01', '2014-04', '2014-07', '2014-10'], dtype='period[3M]', freq='3M')

In [837]:
ps = pd.Series(np.random.randn(len(prng)), prng)
ps.head

<bound method NDFrame.head of 2011-01   -0.902203
2011-02   -1.305497
2011-03   -2.442517
2011-04   -0.817470
2011-05    0.576337
2011-06    0.957857
2011-07    0.352634
2011-08   -0.536263
2011-09    0.543004
2011-10   -0.103223
2011-11   -0.197910
2011-12   -2.183855
2012-01   -0.216848
Freq: M, dtype: float64>

In [838]:
idx = pd.period_range('2014-07-01 09:00', periods=5, freq='H')
idx

PeriodIndex(['2014-07-01 09:00', '2014-07-01 10:00', '2014-07-01 11:00',
             '2014-07-01 12:00', '2014-07-01 13:00'],
            dtype='period[H]', freq='H')

In [839]:
idx + Hour(2)

PeriodIndex(['2014-07-01 11:00', '2014-07-01 12:00', '2014-07-01 13:00',
             '2014-07-01 14:00', '2014-07-01 15:00'],
            dtype='period[H]', freq='H')

In [840]:
idx = pd.period_range('2014-07', periods=5, freq='M')
idx

PeriodIndex(['2014-07', '2014-08', '2014-09', '2014-10', '2014-11'], dtype='period[M]', freq='M')

In [841]:
idx + MonthEnd(3)

PeriodIndex(['2014-10', '2014-11', '2014-12', '2015-01', '2015-02'], dtype='period[M]', freq='M')

### Period Dtypes

In [842]:
pi = pd.period_range('2016-01-01', periods=3, freq='M')
pi

PeriodIndex(['2016-01', '2016-02', '2016-03'], dtype='period[M]', freq='M')

In [843]:
pi.dtype

period[M]

In [844]:
# change monthly freq to daily freq
pi.astype('period[D]')

PeriodIndex(['2016-01-31', '2016-02-29', '2016-03-31'], dtype='period[D]', freq='D')

In [845]:
# convert to DatetimeIndex
pi.astype('datetime64[ns]')

DatetimeIndex(['2016-01-01', '2016-02-01', '2016-03-01'], dtype='datetime64[ns]', freq='MS')

### PeriodIndex Partial String Indexing

In [846]:
ps['2011-01']

-0.90220272805157786

In [847]:
ps[pd.datetime(2011, 12, 25):]

2011-12   -2.183855
2012-01   -0.216848
Freq: M, dtype: float64

In [848]:
ps['10/31/2011':'12/31/2011']

2011-10   -0.103223
2011-11   -0.197910
2011-12   -2.183855
Freq: M, dtype: float64

In [849]:
ps['2011']

2011-01   -0.902203
2011-02   -1.305497
2011-03   -2.442517
2011-04   -0.817470
2011-05    0.576337
2011-06    0.957857
2011-07    0.352634
2011-08   -0.536263
2011-09    0.543004
2011-10   -0.103223
2011-11   -0.197910
2011-12   -2.183855
Freq: M, dtype: float64

In [850]:
dfp = pd.DataFrame(np.random.randn(600,1),columns=['A'],
                   index=pd.period_range('2013-01-01 9:00', periods=600, freq='T'))

In [851]:
dfp

Unnamed: 0,A
2013-01-01 09:00,-1.405098
2013-01-01 09:01,0.074392
2013-01-01 09:02,-0.495124
2013-01-01 09:03,-0.423149
2013-01-01 09:04,-1.636965
2013-01-01 09:05,0.428533
2013-01-01 09:06,0.044393
2013-01-01 09:07,-0.891318
2013-01-01 09:08,0.198487
2013-01-01 09:09,-0.691969


In [852]:
dfp['2013-01-01 10H']

Unnamed: 0,A
2013-01-01 10:00,-1.249845
2013-01-01 10:01,-0.628125
2013-01-01 10:02,1.373584
2013-01-01 10:03,0.053493
2013-01-01 10:04,0.769746
2013-01-01 10:05,-0.070654
2013-01-01 10:06,-0.024811
2013-01-01 10:07,-0.543695
2013-01-01 10:08,0.443849
2013-01-01 10:09,-0.211832


In [853]:
dfp['2013-01-01 10H':'2013-01-01 11H']

Unnamed: 0,A
2013-01-01 10:00,-1.249845
2013-01-01 10:01,-0.628125
2013-01-01 10:02,1.373584
2013-01-01 10:03,0.053493
2013-01-01 10:04,0.769746
2013-01-01 10:05,-0.070654
2013-01-01 10:06,-0.024811
2013-01-01 10:07,-0.543695
2013-01-01 10:08,0.443849
2013-01-01 10:09,-0.211832


### Frequency Conversion and Resampling with PeriodIndex

In [854]:
p = pd.Period('2011', freq='A-DEC')
p

Period('2011', 'A-DEC')

In [855]:
p.asfreq('M', how='start')

Period('2011-01', 'M')

In [856]:
p.asfreq('M', how='end')

Period('2011-12', 'M')

In [857]:
p.asfreq('M', 's')

Period('2011-01', 'M')

In [858]:
p.asfreq('M', 'e')

Period('2011-12', 'M')

In [859]:
p = pd.Period('2011-12', freq='M')

In [860]:
p.asfreq('A-NOV')

Period('2012', 'A-NOV')

In [861]:
p = pd.Period('2012Q1', freq='Q-DEC')

In [862]:
p.asfreq('D', 's')

Period('2012-01-01', 'D')

In [863]:
p.asfreq('D', 'e')

Period('2012-03-31', 'D')

In [864]:
p = pd.Period('2011Q4', freq='Q-MAR')

In [865]:
p.asfreq('D', 's')

Period('2011-01-01', 'D')

In [866]:
p.asfreq('D', 'e')

Period('2011-03-31', 'D')

### Converting between Reprsentations

In [867]:
rng = pd.date_range('1/1/2012', periods=5, freq='M')
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts

2012-01-31   -0.624194
2012-02-29    1.058641
2012-03-31   -0.840506
2012-04-30   -0.674434
2012-05-31    0.281963
Freq: M, dtype: float64

In [868]:
ps = ts.to_period()
ps

2012-01   -0.624194
2012-02    1.058641
2012-03   -0.840506
2012-04   -0.674434
2012-05    0.281963
Freq: M, dtype: float64

In [869]:
ps.to_timestamp()

2012-01-01   -0.624194
2012-02-01    1.058641
2012-03-01   -0.840506
2012-04-01   -0.674434
2012-05-01    0.281963
Freq: MS, dtype: float64

In [870]:
#Remember that ‘s’ and ‘e’ can be used to return the timestamps at the start or end of the period:
ps.to_timestamp('D', how='s')

2012-01-01   -0.624194
2012-02-01    1.058641
2012-03-01   -0.840506
2012-04-01   -0.674434
2012-05-01    0.281963
Freq: MS, dtype: float64

In [871]:
prng = pd.period_range('1990Q1', '2000Q4', freq='Q-NOV')
ts = pd.Series(np.random.randn(len(prng)), prng)
ts.index = (prng.asfreq('M', 'e') + 1).asfreq('H', 's') + 9
ts.head()

1990-03-01 09:00    0.232139
1990-06-01 09:00   -0.446019
1990-09-01 09:00    0.880813
1990-12-01 09:00   -0.216962
1991-03-01 09:00   -0.244724
Freq: H, dtype: float64

### Representing out-of-bounds spans

In [872]:
span = pd.period_range('1215-01-01', '1381-01-01', freq='D')
span

PeriodIndex(['1215-01-01', '1215-01-02', '1215-01-03', '1215-01-04',
             '1215-01-05', '1215-01-06', '1215-01-07', '1215-01-08',
             '1215-01-09', '1215-01-10',
             ...
             '1380-12-23', '1380-12-24', '1380-12-25', '1380-12-26',
             '1380-12-27', '1380-12-28', '1380-12-29', '1380-12-30',
             '1380-12-31', '1381-01-01'],
            dtype='period[D]', length=60632, freq='D')

In [873]:
s = pd.Series([20121231, 20141130, 99991231])
s

0    20121231
1    20141130
2    99991231
dtype: int64

In [874]:
# To convert from a int64 based YYYYMMDD representation.
def conv(x):
    return pd.Period(year = x // 10000, month = x//100 % 100, day = x%100, freq='D')

In [875]:
s.apply(conv)

0   2012-12-31
1   2014-11-30
2   9999-12-31
dtype: object

In [876]:
s.apply(conv)[2]

Period('9999-12-31', 'D')

In [877]:
# These can easily be converted to a PeriodIndex
span = pd.PeriodIndex(s.apply(conv))
span

PeriodIndex(['2012-12-31', '2014-11-30', '9999-12-31'], dtype='period[D]', freq='D')


## Time Zone Handling
### Working with Time Zones

In [878]:
rng = pd.date_range('3/6/2012 00:00', periods=15, freq='D')

In [879]:
rng.tz is None

True

In [880]:
rng_pytz = pd.date_range('3/6/2012 00:00', periods=10, freq='D',tz='Europe/London')

In [881]:
rng_pytz.tz

<DstTzInfo 'Europe/London' LMT-1 day, 23:59:00 STD>

In [882]:
import dateutil

In [883]:
rng_utc = pd.date_range('3/6/2012 00:00', periods=10, freq='D', tz=dateutil.tz.tzutc())

In [884]:
rng_utc.tz

tzutc()

In [885]:
import pytz

In [886]:
tz_pytz = pytz.timezone('Europe/London')

In [887]:
rng_pytz = pd.date_range('3/6/2012 00:00', periods=10, freq='D', tz=tz_pytz)

In [888]:
rng_pytz.tz == tz_pytz

True

In [889]:
tz_dateutil = dateutil.tz.gettz('Europe/London')

In [890]:
rng_dateutil = pd.date_range('3/6/2012 00:00', periods=10, freq='D', 
                           tz=tz_dateutil)

In [891]:
rng_dateutil.tz == tz_dateutil

True

Timestamps, like Python’s datetime.datetime object can be either time zone naive or time zone aware. Naive time series and DatetimeIndex objects can be localized using tz_localize:

In [892]:
ts = pd.Series(np.random.randn(len(rng)), rng)
ts

2012-03-06    1.764582
2012-03-07    0.090501
2012-03-08    2.407271
2012-03-09    0.202708
2012-03-10    2.361733
2012-03-11    0.301664
2012-03-12    0.036943
2012-03-13    0.587466
2012-03-14    1.063091
2012-03-15    0.761768
2012-03-16    0.743895
2012-03-17    0.943634
2012-03-18    0.474290
2012-03-19   -1.289041
2012-03-20   -0.948882
Freq: D, dtype: float64

In [893]:
ts_utc = ts.tz_localize('UTC')

In [894]:
ts_utc

2012-03-06 00:00:00+00:00    1.764582
2012-03-07 00:00:00+00:00    0.090501
2012-03-08 00:00:00+00:00    2.407271
2012-03-09 00:00:00+00:00    0.202708
2012-03-10 00:00:00+00:00    2.361733
2012-03-11 00:00:00+00:00    0.301664
2012-03-12 00:00:00+00:00    0.036943
2012-03-13 00:00:00+00:00    0.587466
2012-03-14 00:00:00+00:00    1.063091
2012-03-15 00:00:00+00:00    0.761768
2012-03-16 00:00:00+00:00    0.743895
2012-03-17 00:00:00+00:00    0.943634
2012-03-18 00:00:00+00:00    0.474290
2012-03-19 00:00:00+00:00   -1.289041
2012-03-20 00:00:00+00:00   -0.948882
Freq: D, dtype: float64

In [895]:
ts_utc.tz_convert('US/Eastern')

2012-03-05 19:00:00-05:00    1.764582
2012-03-06 19:00:00-05:00    0.090501
2012-03-07 19:00:00-05:00    2.407271
2012-03-08 19:00:00-05:00    0.202708
2012-03-09 19:00:00-05:00    2.361733
2012-03-10 19:00:00-05:00    0.301664
2012-03-11 20:00:00-04:00    0.036943
2012-03-12 20:00:00-04:00    0.587466
2012-03-13 20:00:00-04:00    1.063091
2012-03-14 20:00:00-04:00    0.761768
2012-03-15 20:00:00-04:00    0.743895
2012-03-16 20:00:00-04:00    0.943634
2012-03-17 20:00:00-04:00    0.474290
2012-03-18 20:00:00-04:00   -1.289041
2012-03-19 20:00:00-04:00   -0.948882
Freq: D, dtype: float64

In [896]:
rng_eastern = rng_utc.tz_convert('US/Eastern')
rng_eastern

DatetimeIndex(['2012-03-05', '2012-03-06', '2012-03-07', '2012-03-08',
               '2012-03-09', '2012-03-10', '2012-03-11', '2012-03-12',
               '2012-03-13', '2012-03-14'],
              dtype='datetime64[ns, US/Eastern]', freq='D')

In [897]:
rng_berlin = rng_utc.tz_convert('Europe/Berlin')
rng_berlin

DatetimeIndex(['2012-03-06', '2012-03-07', '2012-03-08', '2012-03-09',
               '2012-03-10', '2012-03-11', '2012-03-12', '2012-03-13',
               '2012-03-14', '2012-03-15'],
              dtype='datetime64[ns, Europe/Berlin]', freq='D')

In [898]:
rng_eastern[5]

Timestamp('2012-03-10 19:00:00-0500', tz='US/Eastern', freq='D')

In [899]:
rng_berlin[5]

Timestamp('2012-03-11 01:00:00+0100', tz='Europe/Berlin', freq='D')

In [900]:
rng_eastern[5] == rng_berlin[5]

True

In [901]:
rng_eastern[5]

Timestamp('2012-03-10 19:00:00-0500', tz='US/Eastern', freq='D')

In [902]:
rng_berlin[5]

Timestamp('2012-03-11 01:00:00+0100', tz='Europe/Berlin', freq='D')

In [903]:
rng_eastern[5].tz_convert('Europe/Berlin')

Timestamp('2012-03-11 01:00:00+0100', tz='Europe/Berlin')

In [904]:
rng[5]

Timestamp('2012-03-11 00:00:00', freq='D')

In [905]:
rng[5].tz_localize('Asia/Shanghai')

Timestamp('2012-03-11 00:00:00+0800', tz='Asia/Shanghai')

In [906]:
eastern = ts_utc.tz_convert('US/Eastern')
eastern

2012-03-05 19:00:00-05:00    1.764582
2012-03-06 19:00:00-05:00    0.090501
2012-03-07 19:00:00-05:00    2.407271
2012-03-08 19:00:00-05:00    0.202708
2012-03-09 19:00:00-05:00    2.361733
2012-03-10 19:00:00-05:00    0.301664
2012-03-11 20:00:00-04:00    0.036943
2012-03-12 20:00:00-04:00    0.587466
2012-03-13 20:00:00-04:00    1.063091
2012-03-14 20:00:00-04:00    0.761768
2012-03-15 20:00:00-04:00    0.743895
2012-03-16 20:00:00-04:00    0.943634
2012-03-17 20:00:00-04:00    0.474290
2012-03-18 20:00:00-04:00   -1.289041
2012-03-19 20:00:00-04:00   -0.948882
Freq: D, dtype: float64

In [907]:
berlin = ts_utc.tz_convert('Europe/Berlin')
berlin

2012-03-06 01:00:00+01:00    1.764582
2012-03-07 01:00:00+01:00    0.090501
2012-03-08 01:00:00+01:00    2.407271
2012-03-09 01:00:00+01:00    0.202708
2012-03-10 01:00:00+01:00    2.361733
2012-03-11 01:00:00+01:00    0.301664
2012-03-12 01:00:00+01:00    0.036943
2012-03-13 01:00:00+01:00    0.587466
2012-03-14 01:00:00+01:00    1.063091
2012-03-15 01:00:00+01:00    0.761768
2012-03-16 01:00:00+01:00    0.743895
2012-03-17 01:00:00+01:00    0.943634
2012-03-18 01:00:00+01:00    0.474290
2012-03-19 01:00:00+01:00   -1.289041
2012-03-20 01:00:00+01:00   -0.948882
Freq: D, dtype: float64

In [908]:
result = eastern + berlin
result

2012-03-06 00:00:00+00:00    3.529163
2012-03-07 00:00:00+00:00    0.181001
2012-03-08 00:00:00+00:00    4.814542
2012-03-09 00:00:00+00:00    0.405416
2012-03-10 00:00:00+00:00    4.723466
2012-03-11 00:00:00+00:00    0.603328
2012-03-12 00:00:00+00:00    0.073885
2012-03-13 00:00:00+00:00    1.174931
2012-03-14 00:00:00+00:00    2.126183
2012-03-15 00:00:00+00:00    1.523537
2012-03-16 00:00:00+00:00    1.487789
2012-03-17 00:00:00+00:00    1.887267
2012-03-18 00:00:00+00:00    0.948580
2012-03-19 00:00:00+00:00   -2.578081
2012-03-20 00:00:00+00:00   -1.897765
Freq: D, dtype: float64

To remove timezone from tz-aware DatetimeIndex, use tz_localize(None) or tz_convert(None). tz_localize(None) will remove timezone holding local time representations. tz_convert(None) will remove timezone after converting to UTC time.



In [909]:
didx = pd.DatetimeIndex(start='2014-08-01 09:00', freq='H', periods=10, tz='US/Eastern')
didx

DatetimeIndex(['2014-08-01 09:00:00-04:00', '2014-08-01 10:00:00-04:00',
               '2014-08-01 11:00:00-04:00', '2014-08-01 12:00:00-04:00',
               '2014-08-01 13:00:00-04:00', '2014-08-01 14:00:00-04:00',
               '2014-08-01 15:00:00-04:00', '2014-08-01 16:00:00-04:00',
               '2014-08-01 17:00:00-04:00', '2014-08-01 18:00:00-04:00'],
              dtype='datetime64[ns, US/Eastern]', freq='H')

In [910]:
didx.tz_localize(None)

DatetimeIndex(['2014-08-01 09:00:00', '2014-08-01 10:00:00',
               '2014-08-01 11:00:00', '2014-08-01 12:00:00',
               '2014-08-01 13:00:00', '2014-08-01 14:00:00',
               '2014-08-01 15:00:00', '2014-08-01 16:00:00',
               '2014-08-01 17:00:00', '2014-08-01 18:00:00'],
              dtype='datetime64[ns]', freq='H')

In [911]:
didx.tz_convert(None)

DatetimeIndex(['2014-08-01 13:00:00', '2014-08-01 14:00:00',
               '2014-08-01 15:00:00', '2014-08-01 16:00:00',
               '2014-08-01 17:00:00', '2014-08-01 18:00:00',
               '2014-08-01 19:00:00', '2014-08-01 20:00:00',
               '2014-08-01 21:00:00', '2014-08-01 22:00:00'],
              dtype='datetime64[ns]', freq='H')

In [912]:
# tz_convert(None) is identical with tz_convert('UTC').tz_localize(None)
didx.tz_convert('UCT').tz_localize(None)

DatetimeIndex(['2014-08-01 13:00:00', '2014-08-01 14:00:00',
               '2014-08-01 15:00:00', '2014-08-01 16:00:00',
               '2014-08-01 17:00:00', '2014-08-01 18:00:00',
               '2014-08-01 19:00:00', '2014-08-01 20:00:00',
               '2014-08-01 21:00:00', '2014-08-01 22:00:00'],
              dtype='datetime64[ns]', freq='H')

### Ambiguous Times when Localizing

In some cases, localize cannot determine the DST and non-DST hours when there are duplicates. This often happens when reading files or database records that simply duplicate the hours. Passing ambiguous='infer' (infer_dst argument in prior releases) into tz_localize will attempt to determine the right offset. Below the top example will fail as it contains ambiguous times and the bottom will infer the right offset.

In [913]:
rng_hourly = pd.DatetimeIndex(['11/06/2011 00:00', '11/06/2011 01:00',
                                    '11/06/2011 01:00', '11/06/2011 02:00',
                                    '11/06/2011 03:00'])
     

In [914]:
# Error is normal for commented code
#rng_hourly.tz_localize('US/Eastern')

In [915]:
rng_hourly_eastern = rng_hourly.tz_localize('US/Eastern', ambiguous='infer')


In [916]:
rng_hourly_eastern.tolist()

[Timestamp('2011-11-06 00:00:00-0400', tz='US/Eastern'),
 Timestamp('2011-11-06 01:00:00-0400', tz='US/Eastern'),
 Timestamp('2011-11-06 01:00:00-0500', tz='US/Eastern'),
 Timestamp('2011-11-06 02:00:00-0500', tz='US/Eastern'),
 Timestamp('2011-11-06 03:00:00-0500', tz='US/Eastern')]

In [917]:
rng_hourly_dst = np.array([1, 1, 0, 0, 0])

In [918]:
rng_hourly.tz_localize('US/Eastern', ambiguous=rng_hourly_dst).tolist()

[Timestamp('2011-11-06 00:00:00-0400', tz='US/Eastern'),
 Timestamp('2011-11-06 01:00:00-0400', tz='US/Eastern'),
 Timestamp('2011-11-06 01:00:00-0500', tz='US/Eastern'),
 Timestamp('2011-11-06 02:00:00-0500', tz='US/Eastern'),
 Timestamp('2011-11-06 03:00:00-0500', tz='US/Eastern')]

In [919]:
rng_hourly.tz_localize('US/Eastern', ambiguous='NaT').tolist()

[Timestamp('2011-11-06 00:00:00-0400', tz='US/Eastern'),
 NaT,
 NaT,
 Timestamp('2011-11-06 02:00:00-0500', tz='US/Eastern'),
 Timestamp('2011-11-06 03:00:00-0500', tz='US/Eastern')]

In [920]:
didx = pd.DatetimeIndex(start='2014-08-01 09:00', freq='H', periods=10, tz='US/Eastern')
didx

DatetimeIndex(['2014-08-01 09:00:00-04:00', '2014-08-01 10:00:00-04:00',
               '2014-08-01 11:00:00-04:00', '2014-08-01 12:00:00-04:00',
               '2014-08-01 13:00:00-04:00', '2014-08-01 14:00:00-04:00',
               '2014-08-01 15:00:00-04:00', '2014-08-01 16:00:00-04:00',
               '2014-08-01 17:00:00-04:00', '2014-08-01 18:00:00-04:00'],
              dtype='datetime64[ns, US/Eastern]', freq='H')

In [921]:
didx.tz_localize(None)

DatetimeIndex(['2014-08-01 09:00:00', '2014-08-01 10:00:00',
               '2014-08-01 11:00:00', '2014-08-01 12:00:00',
               '2014-08-01 13:00:00', '2014-08-01 14:00:00',
               '2014-08-01 15:00:00', '2014-08-01 16:00:00',
               '2014-08-01 17:00:00', '2014-08-01 18:00:00'],
              dtype='datetime64[ns]', freq='H')

In [922]:
didx.tz_convert(None)

DatetimeIndex(['2014-08-01 13:00:00', '2014-08-01 14:00:00',
               '2014-08-01 15:00:00', '2014-08-01 16:00:00',
               '2014-08-01 17:00:00', '2014-08-01 18:00:00',
               '2014-08-01 19:00:00', '2014-08-01 20:00:00',
               '2014-08-01 21:00:00', '2014-08-01 22:00:00'],
              dtype='datetime64[ns]', freq='H')

In [923]:
# tz_convert(None) is identical with tz_convert('UTC').tz_localize(None)
didx.tz_convert('UCT').tz_localize(None)

DatetimeIndex(['2014-08-01 13:00:00', '2014-08-01 14:00:00',
               '2014-08-01 15:00:00', '2014-08-01 16:00:00',
               '2014-08-01 17:00:00', '2014-08-01 18:00:00',
               '2014-08-01 19:00:00', '2014-08-01 20:00:00',
               '2014-08-01 21:00:00', '2014-08-01 22:00:00'],
              dtype='datetime64[ns]', freq='H')

### TZ Aware DTypes

Series/DatetimeIndex with a timezone naive value are represented with a dtype of datetime64[ns].

In [924]:
s_naive = pd.Series(pd.date_range('20130101',periods=3))
s_naive

0   2013-01-01
1   2013-01-02
2   2013-01-03
dtype: datetime64[ns]

Series/DatetimeIndex with a timezone aware value are represented with a dtype of datetime64[ns, tz].

In [925]:
s_aware = pd.Series(pd.date_range('20130101',periods=3,tz='US/Eastern'))
s_aware

0   2013-01-01 00:00:00-05:00
1   2013-01-02 00:00:00-05:00
2   2013-01-03 00:00:00-05:00
dtype: datetime64[ns, US/Eastern]

Both of these Series can be manipulated via the .dt accessor, see here.

For example, to localize and convert a naive stamp to timezone aware.

In [926]:
# localize and convert a naive timezone
s_naive.astype('datetime64[ns, US/Eastern]')

0   2012-12-31 19:00:00-05:00
1   2013-01-01 19:00:00-05:00
2   2013-01-02 19:00:00-05:00
dtype: datetime64[ns, US/Eastern]

In [927]:
# make an aware tz naive
s_aware.astype('datetime64[ns]')

0   2013-01-01 05:00:00
1   2013-01-02 05:00:00
2   2013-01-03 05:00:00
dtype: datetime64[ns]

In [928]:
# convert to a new timezone
s_aware.astype('datetime64[ns, CET]')

0   2013-01-01 06:00:00+01:00
1   2013-01-02 06:00:00+01:00
2   2013-01-03 06:00:00+01:00
dtype: datetime64[ns, CET]

Note Using the .values accessor on a Series, returns an numpy array of the data. These values are converted to UTC, as numpy does not currently support timezones (even though it is printing in the local timezone!).

In [929]:
s_naive.values

array(['2013-01-01T00:00:00.000000000', '2013-01-02T00:00:00.000000000',
       '2013-01-03T00:00:00.000000000'], dtype='datetime64[ns]')

In [930]:
s_aware.values

array(['2013-01-01T05:00:00.000000000', '2013-01-02T05:00:00.000000000',
       '2013-01-03T05:00:00.000000000'], dtype='datetime64[ns]')

In [931]:
pd.Series(s_aware.values)

0   2013-01-01 05:00:00
1   2013-01-02 05:00:00
2   2013-01-03 05:00:00
dtype: datetime64[ns]

In [932]:
pd.Series(s_aware.values).dt.tz_localize('UTC').dt.tz_convert('US/Eastern')

0   2013-01-01 00:00:00-05:00
1   2013-01-02 00:00:00-05:00
2   2013-01-03 00:00:00-05:00
dtype: datetime64[ns, US/Eastern]

Fin, Source: https://pandas.pydata.org/pandas-docs/stable/timeseries.html#dateoffset-objects