In [8]:
import pandas as pd
import numpy as np

# Time in Pandas
https://pandas.pydata.org/docs/user_guide/timeseries.html#timeseries-offset-aliases

4 kinds in Pandas:
- Date times: 

    Creation: `to_datetime`, `date_range`

    Scalar (single value): `Timestamp`

- Time deltas:

    Creation: `to_timedelta`, `timedelta_range`

    Scalar (single value): `Timedelta`

- Time spans:

    Creation: `Period`, `Period_range`

    Scalar (single value): `Period`

- Date offsets:

    Creation: `DateOffset`

    Scalar: `DateOffset`


## Frequency

*S after a freq represents start. B in front of a freq means business setting*. 

B: business day

C: custom business day

D: calendar day

W: weekly

M: month **end**

SM: semi-month end

BM: business month end

MS: month **start**

Q: quarter

BQ: business quarter end

Y: year end

H: hour

T: minute

S: second

In [19]:
pd.date_range('2022-01-01', periods=7, freq='AS')

DatetimeIndex(['2022-01-01', '2023-01-01', '2024-01-01', '2025-01-01',
               '2026-01-01', '2027-01-01', '2028-01-01'],
              dtype='datetime64[ns]', freq='AS-JAN')

**Custom Frequency**

When customizing frequency, one have to use `bdate_range` rather than `date_range`.

In [72]:
weekmask = 'Mon Wed Fri'
holidays = ['2022-01-05']
pd.bdate_range('2022-01-01', periods=10, freq='C', weekmask=weekmask, holidays=holidays)

DatetimeIndex(['2022-01-03', '2022-01-07', '2022-01-10', '2022-01-12',
               '2022-01-14', '2022-01-17', '2022-01-19', '2022-01-21',
               '2022-01-24', '2022-01-26'],
              dtype='datetime64[ns]', freq='C')

## Time Zone


In [35]:
time = pd.date_range('2022-01-01', periods=7, freq='D', tz='UTC')
# time = time.tz_localize('UTC')
print(time)
time = time.tz_convert('US/Pacific')
print(time)

DatetimeIndex(['2022-01-01 00:00:00+00:00', '2022-01-02 00:00:00+00:00',
               '2022-01-03 00:00:00+00:00', '2022-01-04 00:00:00+00:00',
               '2022-01-05 00:00:00+00:00', '2022-01-06 00:00:00+00:00',
               '2022-01-07 00:00:00+00:00'],
              dtype='datetime64[ns, UTC]', freq='D')
DatetimeIndex(['2021-12-31 16:00:00-08:00', '2022-01-01 16:00:00-08:00',
               '2022-01-02 16:00:00-08:00', '2022-01-03 16:00:00-08:00',
               '2022-01-04 16:00:00-08:00', '2022-01-05 16:00:00-08:00',
               '2022-01-06 16:00:00-08:00'],
              dtype='datetime64[ns, US/Pacific]', freq='D')


## Resample
Only apply to `pd.Series`.

In [37]:
# time.resample('2D').mean() <- this does not work. 
ts = pd.Series(range(len(time)), index=time)
ts.resample('2D').mean()

2021-12-31 00:00:00-08:00    0.5
2022-01-02 00:00:00-08:00    2.5
2022-01-04 00:00:00-08:00    4.5
2022-01-06 00:00:00-08:00    6.0
Freq: 2D, dtype: float64

## Offsets
A easier way to increment certain time period than `pd.Timedelta`.

In [49]:
tp = pd.Timestamp('2022-01-01')
print(tp.day_name, tp.day_of_week, tp.day_of_year, tp.is_leap_year, tp.is_month_start)
tp = tp + pd.Timedelta('1 day')
print(tp)

tp = tp + pd.offsets.MonthEnd(2)
print(tp)

<built-in method day_name of Timestamp object at 0x7fe6b5f32b50> 5 1 False True
2022-01-02 00:00:00
2022-02-28 00:00:00


## Periods

In [51]:
pd.period_range('2022-01-01', freq='M', periods=3)

PeriodIndex(['2022-01', '2022-02', '2022-03'], dtype='period[M]')

In [54]:
pd.Period('2022')

Period('2022', 'A-DEC')

## Timestamps
`errors` parameter can be `raise`, `ignore`, `coerce`.

In [61]:
pd.to_datetime(['2006-12-31 00:00', '2022-01-01 00:00'], format='%Y-%m-%d %H:%M')

DatetimeIndex(['2006-12-31', '2022-01-01'], dtype='datetime64[ns]', freq=None)

Generate time from data frame. 

In [64]:
df = pd.DataFrame({"year": [2015, 2016], "month": [2, 3], "day": [4, 5], "hour": [2, 3]})
pd.to_datetime(df[['year', 'month', 'day']])

0   2015-02-04
1   2016-03-05
dtype: datetime64[ns]

Generate from `unit`, which can be D, s, ms, us, ns.

> **Warning**
cannot take in tz for `to_datetime`

In [69]:
pd.to_datetime([1349720105, 1349806505, 1349892905, 1349979305, 1350065705], unit="s").tz_localize(
    'UTC')

DatetimeIndex(['2012-10-08 18:15:05+00:00', '2012-10-09 18:15:05+00:00',
               '2012-10-10 18:15:05+00:00', '2012-10-11 18:15:05+00:00',
               '2012-10-12 18:15:05+00:00'],
              dtype='datetime64[ns, UTC]', freq=None)

In [70]:
pd.to_datetime([1, 2, 3], unit='D', origin='1960-01-01')

DatetimeIndex(['1960-01-02', '1960-01-03', '1960-01-04'], dtype='datetime64[ns]', freq=None)

## Indexing

In [74]:
dft = pd.DataFrame(np.random.randn(100000, 1), columns=["A"], index=pd.date_range(
    "20130101", periods=100000, freq="T"))
dft['2013-02']
dft.loc['2013-02', ]

  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,A
2013-02-01 00:00:00,-0.710944
2013-02-01 00:01:00,1.601816
2013-02-01 00:02:00,-0.429343
2013-02-01 00:03:00,0.345044
2013-02-01 00:04:00,-1.188630
...,...
2013-02-28 23:55:00,-0.473344
2013-02-28 23:56:00,1.174343
2013-02-28 23:57:00,1.089370
2013-02-28 23:58:00,0.297622


In particular, ``loc`` includes both start time/index and end time/index. 

Now, consider multi-indexing...

In [79]:
dft2 = pd.DataFrame(np.random.randn(20, 1), columns=["A"], 
                    index=pd.MultiIndex.from_product([pd.date_range("20130101", periods=10, freq="12H"), ["a", "b"]]), )

# dft2['2013-01-05'] <- this can not work. 
dft2.loc['2013-01-05']

Unnamed: 0,Unnamed: 1,A
2013-01-05 00:00:00,a,-1.105788
2013-01-05 00:00:00,b,-1.149616
2013-01-05 12:00:00,a,-0.999896
2013-01-05 12:00:00,b,-0.17416


Another way to select: `truncate`. Use `before` and `after` to truncate the unwanted dates. 

In [80]:
dft.truncate(before='2013-02-05', after='2013-02-20')

Unnamed: 0,A
2013-02-05 00:00:00,-0.009462
2013-02-05 00:01:00,-0.493820
2013-02-05 00:02:00,-1.489708
2013-02-05 00:03:00,-1.854218
2013-02-05 00:04:00,-1.689612
...,...
2013-02-19 23:56:00,1.533875
2013-02-19 23:57:00,1.176951
2013-02-19 23:58:00,1.144231
2013-02-19 23:59:00,-0.986800
