## Agenda
* Used extensively in finacial data analysis space
* Generate fixed frequency dates & time spans
* Time series to a particular frequency
* Non-standard time series

In [2]:
import pandas as pd

#### Create a range of dates
* In below example we had created 72 periods from 1/1/2011 on frequency = hourly basis

In [17]:
rng = pd.date_range('1/1/2011', periods=72, freq='H')

In [18]:
rng[:10]

DatetimeIndex(['2011-01-01 00:00:00', '2011-01-01 01:00:00',
               '2011-01-01 02:00:00', '2011-01-01 03:00:00',
               '2011-01-01 04:00:00', '2011-01-01 05:00:00',
               '2011-01-01 06:00:00', '2011-01-01 07:00:00',
               '2011-01-01 08:00:00', '2011-01-01 09:00:00'],
              dtype='datetime64[ns]', freq='H')

In [19]:
import numpy as np

In [20]:
ts=pd.Series(data=np.arange(72),index=rng)

In [21]:
ts

2011-01-01 00:00:00     0
2011-01-01 01:00:00     1
2011-01-01 02:00:00     2
2011-01-01 03:00:00     3
2011-01-01 04:00:00     4
2011-01-01 05:00:00     5
2011-01-01 06:00:00     6
2011-01-01 07:00:00     7
2011-01-01 08:00:00     8
2011-01-01 09:00:00     9
2011-01-01 10:00:00    10
2011-01-01 11:00:00    11
2011-01-01 12:00:00    12
2011-01-01 13:00:00    13
2011-01-01 14:00:00    14
2011-01-01 15:00:00    15
2011-01-01 16:00:00    16
2011-01-01 17:00:00    17
2011-01-01 18:00:00    18
2011-01-01 19:00:00    19
2011-01-01 20:00:00    20
2011-01-01 21:00:00    21
2011-01-01 22:00:00    22
2011-01-01 23:00:00    23
2011-01-02 00:00:00    24
2011-01-02 01:00:00    25
2011-01-02 02:00:00    26
2011-01-02 03:00:00    27
2011-01-02 04:00:00    28
2011-01-02 05:00:00    29
                       ..
2011-01-02 18:00:00    42
2011-01-02 19:00:00    43
2011-01-02 20:00:00    44
2011-01-02 21:00:00    45
2011-01-02 22:00:00    46
2011-01-02 23:00:00    47
2011-01-03 00:00:00    48
2011-01-03 0

#### asfreq
* DataFrame.asfreq(freq, method=None, how=None, normalize=False, fill_value=None)
* Convert TimeSeries to specified frequency.
* Optionally provide filling method to pad/backfill missing values

#### Parameters
* freq : DateOffset object, or string

* method : {‘backfill’/’bfill’, ‘pad’/’ffill’}, default None

* Method to use for filling holes in reindexed Series (note this does not fill NaNs that already were present):

        * ‘pad’ / ‘ffill’: propagate last valid observation forward to next valid
        * ‘backfill’ / ‘bfill’: use NEXT valid observation to fill

In [22]:
ts.asfreq('45min',method='pad')

2011-01-01 00:00:00     0
2011-01-01 00:45:00     0
2011-01-01 01:30:00     1
2011-01-01 02:15:00     2
2011-01-01 03:00:00     3
2011-01-01 03:45:00     3
2011-01-01 04:30:00     4
2011-01-01 05:15:00     5
2011-01-01 06:00:00     6
2011-01-01 06:45:00     6
2011-01-01 07:30:00     7
2011-01-01 08:15:00     8
2011-01-01 09:00:00     9
2011-01-01 09:45:00     9
2011-01-01 10:30:00    10
2011-01-01 11:15:00    11
2011-01-01 12:00:00    12
2011-01-01 12:45:00    12
2011-01-01 13:30:00    13
2011-01-01 14:15:00    14
2011-01-01 15:00:00    15
2011-01-01 15:45:00    15
2011-01-01 16:30:00    16
2011-01-01 17:15:00    17
2011-01-01 18:00:00    18
2011-01-01 18:45:00    18
2011-01-01 19:30:00    19
2011-01-01 20:15:00    20
2011-01-01 21:00:00    21
2011-01-01 21:45:00    21
                       ..
2011-01-03 00:45:00    48
2011-01-03 01:30:00    49
2011-01-03 02:15:00    50
2011-01-03 03:00:00    51
2011-01-03 03:45:00    51
2011-01-03 04:30:00    52
2011-01-03 05:15:00    53
2011-01-03 0

*  Downsample the series into 1 Day bins and sum the values of the timestamps falling into a bin.

In [25]:
d = ts.resample('1D')
d

DatetimeIndexResampler [freq=<Day>, axis=0, closed=left, label=left, convention=start, base=0]

In [26]:
d.sum()

2011-01-01     276
2011-01-02     852
2011-01-03    1428
Freq: D, dtype: int32

### Timestamps vs. Time Spans

In [27]:
from datetime import datetime
pd.Timestamp(datetime(2015,11,11))

Timestamp('2015-11-11 00:00:00')

In [28]:
pd.Timestamp(2015,11,11)

Timestamp('2015-11-11 00:00:00')

In [29]:
pd.Timestamp('2015.11.30')

Timestamp('2015-11-30 00:00:00')

### Period
* many time, its more natural to associate things with span rather than time stamp

In [30]:
pd.Period('2011-01')

Period('2011-01', 'M')

In [31]:
pd.Period('2012-05', freq='D')

Period('2012-05-01', 'D')

### Timestamp as index

In [32]:
dates = [pd.Timestamp('2012-05-01'), pd.Timestamp('2012-05-02'), pd.Timestamp('2012-05-03')]

In [33]:
dates

[Timestamp('2012-05-01 00:00:00'),
 Timestamp('2012-05-02 00:00:00'),
 Timestamp('2012-05-03 00:00:00')]

In [35]:
ts = pd.Series(np.random.randn(3), dates)
ts

2012-05-01    0.679588
2012-05-02    0.751919
2012-05-03   -0.910344
dtype: float64

In [37]:
ts.index

DatetimeIndex(['2012-05-01', '2012-05-02', '2012-05-03'], dtype='datetime64[ns]', freq=None)

### Period as index

In [43]:
periods = [pd.Period('2012-01'), pd.Period('2012-02'), pd.Period('2012-03')]
periods

[Period('2012-01', 'M'), Period('2012-02', 'M'), Period('2012-03', 'M')]

In [47]:
ts = pd.Series(np.random.randn(3), periods)
ts

2012-01   -0.197938
2012-02    0.658357
2012-03    1.046222
Freq: M, dtype: float64

### Converting to Timestamps

#### Providing a Format Argument
* In addition to the required datetime string, a format argument can be passed to ensure specific parsing. This could also potentially speed up the conversion considerably.

In [48]:
pd.to_datetime(pd.Series(['Jul 31, 2009', 'Nov 22 1985', '2005/11/22']))

0   2009-07-31
1   1985-11-22
2   2005-11-22
dtype: datetime64[ns]

In [49]:
pd.to_datetime('2010/11/12', format='%Y/%m/%d')

Timestamp('2010-11-12 00:00:00')

In [50]:
pd.to_datetime('12-11-2010 00:00', format='%d-%m-%Y %H:%M')

Timestamp('2010-11-12 00:00:00')

## Generating Ranges of Timestamps

#### Genrating date between ranges

In [51]:
start = datetime(2011,1,1)
end = datetime(2012,1,1)

In [52]:
pd.date_range(start,end, freq='M')

DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31', '2011-04-30',
               '2011-05-31', '2011-06-30', '2011-07-31', '2011-08-31',
               '2011-09-30', '2011-10-31', '2011-11-30', '2011-12-31'],
              dtype='datetime64[ns]', freq='M')

#### Generating date of periods

In [53]:
pd.date_range(start, periods=1000, freq='M')

DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31', '2011-04-30',
               '2011-05-31', '2011-06-30', '2011-07-31', '2011-08-31',
               '2011-09-30', '2011-10-31',
               ...
               '2093-07-31', '2093-08-31', '2093-09-30', '2093-10-31',
               '2093-11-30', '2093-12-31', '2094-01-31', '2094-02-28',
               '2094-03-31', '2094-04-30'],
              dtype='datetime64[ns]', length=1000, freq='M')

In [54]:
s=datetime(2018,1,1)
e=datetime(2018,12,30)

In [57]:
pd.bdate_range(s,e)

DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
               '2018-01-05', '2018-01-08', '2018-01-09', '2018-01-10',
               '2018-01-11', '2018-01-12',
               ...
               '2018-12-17', '2018-12-18', '2018-12-19', '2018-12-20',
               '2018-12-21', '2018-12-24', '2018-12-25', '2018-12-26',
               '2018-12-27', '2018-12-28'],
              dtype='datetime64[ns]', length=260, freq='B')

#### Custom Frequency Dates

In [58]:
weekmask = 'Mon Wed Fri'

In [63]:
holidays = [datetime(2018, 1, 5), datetime(2018, 3, 14)]

In [65]:
pd.bdate_range(start=s,end=e,weekmask=weekmask,holidays=holidays)

DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
               '2018-01-05', '2018-01-08', '2018-01-09', '2018-01-10',
               '2018-01-11', '2018-01-12',
               ...
               '2018-12-17', '2018-12-18', '2018-12-19', '2018-12-20',
               '2018-12-21', '2018-12-24', '2018-12-25', '2018-12-26',
               '2018-12-27', '2018-12-28'],
              dtype='datetime64[ns]', length=260, freq='B')

In [66]:
from pandas.tseries.holiday import USFederalHolidayCalendar

In [67]:
s=USFederalHolidayCalendar()

In [69]:
s.holidays()

DatetimeIndex(['1970-01-01', '1970-02-16', '1970-05-25', '1970-07-03',
               '1970-09-07', '1970-10-12', '1970-11-11', '1970-11-26',
               '1970-12-25', '1971-01-01',
               ...
               '2030-01-01', '2030-01-21', '2030-02-18', '2030-05-27',
               '2030-07-04', '2030-09-02', '2030-10-14', '2030-11-11',
               '2030-11-28', '2030-12-25'],
              dtype='datetime64[ns]', length=594, freq=None)

### Aggregation

In [80]:
df=pd.DataFrame(np.random.randn(1000,3),columns=['A','B','C'],index=pd.date_range('1/1/2012',periods=1000,freq='h'))

In [81]:
df.head()

Unnamed: 0,A,B,C
2012-01-01 00:00:00,-0.326725,-0.415778,-0.18663
2012-01-01 01:00:00,-0.295156,-0.094985,1.223674
2012-01-01 02:00:00,-0.886081,0.468568,0.341677
2012-01-01 03:00:00,0.779058,-0.5806,-1.55089
2012-01-01 04:00:00,-0.069026,-0.968917,-3.178399


In [83]:
df.resample('3D').mean()

Unnamed: 0,A,B,C
2012-01-01,0.015487,0.063134,-0.031526
2012-01-04,-0.035064,-0.084913,0.062766
2012-01-07,0.002723,-0.050548,-0.164737
2012-01-10,0.04647,-0.081576,0.20962
2012-01-13,-0.189809,0.024394,-0.124559
2012-01-16,-0.002961,0.153965,-0.052396
2012-01-19,0.026364,0.06282,0.133791
2012-01-22,0.022151,0.013832,0.104441
2012-01-25,-0.031745,-0.182263,-0.054309
2012-01-28,-0.006089,0.185475,-0.174273


In [95]:
r=df.resample('3D')

In [96]:
r.agg([np.sum,np.mean])

Unnamed: 0_level_0,A,A,B,B,C,C
Unnamed: 0_level_1,sum,mean,sum,mean,sum,mean
2012-01-01,1.115037,0.015487,4.545678,0.063134,-2.269864,-0.031526
2012-01-04,-2.524636,-0.035064,-6.113717,-0.084913,4.51915,0.062766
2012-01-07,0.196064,0.002723,-3.639486,-0.050548,-11.86109,-0.164737
2012-01-10,3.345868,0.04647,-5.873474,-0.081576,15.092643,0.20962
2012-01-13,-13.666271,-0.189809,1.756354,0.024394,-8.968249,-0.124559
2012-01-16,-0.213214,-0.002961,11.085483,0.153965,-3.772497,-0.052396
2012-01-19,1.898177,0.026364,4.523011,0.06282,9.632935,0.133791
2012-01-22,1.59484,0.022151,0.995875,0.013832,7.519746,0.104441
2012-01-25,-2.285675,-0.031745,-13.12291,-0.182263,-3.910255,-0.054309
2012-01-28,-0.438403,-0.006089,13.3542,0.185475,-12.54765,-0.174273
