In [7]:
import pandas as pd
import numpy as np

# Pandas Dataframe Resampling

## time series / date functionality

Playing around with dates and time series for resampling use

Parsing time series from different formats

In [8]:
import datetime

dti = pd.to_datetime(["1/1/2018", np.datetime64("2018-01-01"), datetime.datetime(2018, 1, 1)])
dti

DatetimeIndex(['2018-01-01', '2018-01-01', '2018-01-01'], dtype='datetime64[ns]', freq=None)

Generate sequences of fixed-frequency dates and time spans

In [19]:
dti = pd.date_range('2018-01-01', periods=3, freq='H')
dti

DatetimeIndex(['2018-01-01 00:00:00', '2018-01-01 01:00:00',
               '2018-01-01 02:00:00'],
              dtype='datetime64[ns]', freq='H')

Manipulating and converting date times with time zone information. (To reset, use tz_localize(None) or tz_convert(None))

In [33]:
if dti.tz == None: dti = dti.tz_localize("UTC")
dti

DatetimeIndex(['2018-01-01 00:00:00+00:00', '2018-01-01 01:00:00+00:00',
               '2018-01-01 02:00:00+00:00'],
              dtype='datetime64[ns, UTC]', freq=None)

In [34]:
dti.tz_convert("US/Pacific")

DatetimeIndex(['2017-12-31 16:00:00-08:00', '2017-12-31 17:00:00-08:00',
               '2017-12-31 18:00:00-08:00'],
              dtype='datetime64[ns, US/Pacific]', freq=None)

## Resampling example
Resampling or converting a time series to a particular frequency

In [42]:
# generate a time series at hourly intervals
idx = pd.date_range('2010-01-01', periods=5, freq='H')
ts = pd.Series(range(len(idx)), index=idx)
ts

2010-01-01 00:00:00    0
2010-01-01 01:00:00    1
2010-01-01 02:00:00    2
2010-01-01 03:00:00    3
2010-01-01 04:00:00    4
Freq: H, dtype: int64

In [43]:
# resample to every 2 hours and assume the mean value of the 2 hours as the sample value
ts.resample("2H").mean()

2010-01-01 00:00:00    0.5
2010-01-01 02:00:00    2.5
2010-01-01 04:00:00    4.0
Freq: 2H, dtype: float64

## Another example
Resample by taking sum of the values in the bracketed time periods

In [45]:
idx = pd.date_range('1/1/2000', periods=9, freq='T')
ts = pd.Series(range(9), index=index)
ts

2000-01-01 00:00:00    0
2000-01-01 00:01:00    1
2000-01-01 00:02:00    2
2000-01-01 00:03:00    3
2000-01-01 00:04:00    4
2000-01-01 00:05:00    5
2000-01-01 00:06:00    6
2000-01-01 00:07:00    7
2000-01-01 00:08:00    8
Freq: T, dtype: int64

resample into 3-minute buckets and sum the values in the buckets

In [46]:
ts.resample('3T').sum()

2000-01-01 00:00:00     3
2000-01-01 00:03:00    12
2000-01-01 00:06:00    21
Freq: 3T, dtype: int64

## Time arithmetic

In [47]:
friday = pd.Timestamp('2018-01-05')
friday.day_name()

'Friday'

Add 1 day

In [48]:
saturday = friday + pd.Timedelta('1 day')
saturday.day_name()

'Saturday'

Add 1 business day (Monday to Friday)

In [50]:
tuesday = friday + pd.offsets.BDay(2)
tuesday.day_name()

'Tuesday'

Date arithmetics

In [51]:
tuesday - saturday

Timedelta('3 days 00:00:00')

Null date values

In [57]:
pd.Timestamp(pd.NaT)

NaT