In [125]:
from pandas import DataFrame, Series
import pandas as pd
import numpy as np
import datetime

# Main classes

In [164]:
pd.Timestamp('2018-01-01')

Timestamp('2018-01-01 00:00:00')

In [165]:
p = pd.Period('2018-01-01')
p

Period('2018-01-01', 'D')

In [166]:
pd.Timedelta(days=1)

Timedelta('1 days 00:00:00')

In [167]:
pd.to_datetime('2010/11/12', format='%Y/%m/%d')

Timestamp('2010-11-12 00:00:00')

## Conversion
Conversion of float epoch times can lead to inaccurate and unexpected results. Python floats have about 15 digits precision in decimal. Rounding during conversion from float to high precision Timestamp is unavoidable. The only way to achieve exact precision is to use a fixed-width types (e.g. an int64).

In [185]:
pd.to_datetime([1490195805.433, 1490195805.433502912], unit='s')

DatetimeIndex(['2017-03-22 15:16:45.433000088', '2017-03-22 15:16:45.433502913'], dtype='datetime64[ns]', freq=None)

In [190]:
pd.to_datetime(1490195805433502912, unit='ns')

Timestamp('2017-03-22 15:16:45.433502912')

Use origin to specify a alternative starting point. It defaults to 1970-01-01 00:00:00. Commonly called ‘unix epoch’ or POSIX time.

In [194]:
pd.to_datetime([1, 2, 3], unit='D', origin=pd.Timestamp('1960-01-01'))

DatetimeIndex(['1960-01-02', '1960-01-03', '1960-01-04'], dtype='datetime64[ns]', freq=None)

In [193]:
stamps = pd.date_range('2012-10-08 18:15:05', periods=4, freq='D')
(stamps - pd.Timestamp("1970-01-01")) // pd.Timedelta('1s')

Int64Index([1349720105, 1349806505, 1349892905, 1349979305], dtype='int64')

## properties

## Erorr handling

In [172]:
pd.to_datetime(['2009/07/31', 'asd'], errors='ignore')

Index(['2009/07/31', 'asd'], dtype='object')

In [174]:
pd.to_datetime(['2009/07/31', 'asd'], errors='coerce')

DatetimeIndex(['2009-07-31', 'NaT'], dtype='datetime64[ns]', freq=None)

In [175]:
pd.to_datetime(['2009/07/31', 'asd'], errors='raise')

ValueError: ('Unknown string format:', 'asd')

# Create TimeSeries

## support a list of date representation

```pd.to_datetime``` support differnet time representation

In [178]:
dti = pd.to_datetime(
    [
        '1/1/2018',
        '2018-1-1',
        np.datetime64('2018-01-01'), 
        datetime.datetime(2018, 1, 1)
    ]
)
dti

DatetimeIndex(['2018-01-01', '2018-01-01', '2018-01-01', '2018-01-01'], dtype='datetime64[ns]', freq=None)

## use date_range

For more available freq, see here []()

In [148]:
dti = pd.date_range('2018-01-01', periods=3, freq='H')
dti

DatetimeIndex(['2018-01-01 00:00:00', '2018-01-01 01:00:00',
               '2018-01-01 02:00:00'],
              dtype='datetime64[ns]', freq='H')

[Frequency Alias](http://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#timeseries-offset-aliases)

In [176]:
pd.to_datetime([1349720105, 1349806505, 1349892905, 1349979305, 1350065705], unit='s')

DatetimeIndex(['2012-10-08 18:15:05', '2012-10-09 18:15:05',
               '2012-10-10 18:15:05', '2012-10-11 18:15:05',
               '2012-10-12 18:15:05'],
              dtype='datetime64[ns]', freq=None)

In [182]:
pd.DatetimeIndex(['2018-01-01', '2018-01-03', '2018-01-05'], freq='infer')

DatetimeIndex(['2018-01-01', '2018-01-01', '2018-01-01'], dtype='datetime64[ns]', freq=None)

# Miss values

In [151]:
pd.Timestamp(pd.NaT)


pd.Timedelta(pd.NaT)


pd.Period(pd.NaT)

NaT

## Assembling Datetime from Multiple DataFrame Columns

In [169]:
df = pd.DataFrame(
    {'year': [2015, 2016],
    'month': [2, 3],
    'day': [4, 5],
    'hour': [2, 3]
    }
)


pd.to_datetime(df)

0   2015-02-04 02:00:00
1   2016-03-05 03:00:00
dtype: datetime64[ns]

* required: year, month, day
* optional: hour, minute, second, millisecond, microsecond, nanosecond

In [199]:
stamps = pd.date_range('2012-10-08 18:15:05', periods=4, freq='D') 
(stamps - pd.Timestamp("1970-01-01")) // pd.Timedelta(1)

Int64Index([1349720105000000000, 1349806505000000000, 1349892905000000000,
            1349979305000000000],
           dtype='int64')

In [204]:
start = datetime.datetime(2011, 1, 1)

end = datetime.datetime(2012, 1, 1)

pd.date_range(start, periods=1000, freq='M')

DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31', '2011-04-30',
               '2011-05-31', '2011-06-30', '2011-07-31', '2011-08-31',
               '2011-09-30', '2011-10-31',
               ...
               '2093-07-31', '2093-08-31', '2093-09-30', '2093-10-31',
               '2093-11-30', '2093-12-31', '2094-01-31', '2094-02-28',
               '2094-03-31', '2094-04-30'],
              dtype='datetime64[ns]', length=1000, freq='M')

In [205]:
pd.bdate_range(start, periods=250, freq='BQS')

DatetimeIndex(['2011-01-03', '2011-04-01', '2011-07-01', '2011-10-03',
               '2012-01-02', '2012-04-02', '2012-07-02', '2012-10-01',
               '2013-01-01', '2013-04-01',
               ...
               '2071-01-01', '2071-04-01', '2071-07-01', '2071-10-01',
               '2072-01-01', '2072-04-01', '2072-07-01', '2072-10-03',
               '2073-01-02', '2073-04-03'],
              dtype='datetime64[ns]', length=250, freq='BQS-JAN')

## weekmask and holidays

In [209]:
weekmask = 'Mon Wed Fri'

start = datetime.datetime(2011, 1, 1)

end = datetime.datetime(2012, 1, 1)

holidays = [datetime.datetime(2011, 1, 5), datetime.datetime(2011, 3, 14)]

pd.bdate_range(start, end, freq='C', weekmask=weekmask, holidays=holidays)

DatetimeIndex(['2011-01-03', '2011-01-07', '2011-01-10', '2011-01-12',
               '2011-01-14', '2011-01-17', '2011-01-19', '2011-01-21',
               '2011-01-24', '2011-01-26',
               ...
               '2011-12-09', '2011-12-12', '2011-12-14', '2011-12-16',
               '2011-12-19', '2011-12-21', '2011-12-23', '2011-12-26',
               '2011-12-28', '2011-12-30'],
              dtype='datetime64[ns]', length=154, freq='C')

## Partial String Index

In [212]:
rng = pd.date_range(start, end, freq='BM')

ts = pd.Series(np.random.randn(len(rng)), index=rng)

ts

2011-01-31    1.252755
2011-02-28    1.162303
2011-03-31    0.453864
2011-04-29    1.778205
2011-05-31    0.764373
2011-06-30    0.112225
2011-07-29   -0.850050
2011-08-31    0.592459
2011-09-30   -1.435249
2011-10-31    0.909876
2011-11-30   -0.933673
2011-12-30    0.998985
Freq: BM, dtype: float64

In [213]:
ts['1/31/2011']

1.2527549891655907

In [217]:
ts[datetime.datetime(2011, 1, 31)]

1.2527549891655907

In [218]:
ts[datetime.datetime(2011, 1, 31, 0, 0, 1)]

KeyError: Timestamp('2011-01-31 00:00:01')

In [214]:
ts['2011']

2011-01-31    1.252755
2011-02-28    1.162303
2011-03-31    0.453864
2011-04-29    1.778205
2011-05-31    0.764373
2011-06-30    0.112225
2011-07-29   -0.850050
2011-08-31    0.592459
2011-09-30   -1.435249
2011-10-31    0.909876
2011-11-30   -0.933673
2011-12-30    0.998985
Freq: BM, dtype: float64

## Exact Slicing

In [220]:
dft[datetime.datetime(2013, 1, 1, 10, 12, 0):datetime.datetime(2013, 2, 28, 10, 12, 0)]

Unnamed: 0,A
2013-01-01 10:12:00,-0.219543
2013-01-01 10:13:00,0.022396
2013-01-01 10:14:00,0.892383
2013-01-01 10:15:00,-0.937760
2013-01-01 10:16:00,1.325598
2013-01-01 10:17:00,1.004643
2013-01-01 10:18:00,-1.937513
2013-01-01 10:19:00,-1.302677
2013-01-01 10:20:00,-2.383274
2013-01-01 10:21:00,-1.329756


## Truncate

In [225]:
rng2 = pd.date_range('2011-01-01', '2012-01-01', freq='W')

ts2 = pd.Series(np.random.randn(len(rng2)), index=rng2)

```truncate``` assume 0 for any unspecified date components

In [226]:
ts2.truncate(before='2011-11', after='2011-12')

2011-11-06   -0.624789
2011-11-13   -0.317121
2011-11-20   -0.775552
2011-11-27    0.238854
Freq: W-SUN, dtype: float64

In [227]:
ts2['2011-11':'2011-12']

2011-11-06   -0.624789
2011-11-13   -0.317121
2011-11-20   -0.775552
2011-11-27    0.238854
2011-12-04    2.165130
2011-12-11    0.340883
2011-12-18    1.242340
2011-12-25    2.101040
Freq: W-SUN, dtype: float64

[frequency alias](http://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#timeseries-offset-aliases)


In [None]:
One of the main uses for DatetimeIndex is as an index for pandas objects. The DatetimeIndex class contains many time series related optimizations:

A large range of dates for various offsets are pre-computed and cached under the hood in order to make generating subsequent date ranges very fast (just have to grab a slice).
Fast shifting using the shift and tshift method on pandas objects.
Unioning of overlapping DatetimeIndex objects with the same frequency is very fast (important for fast data alignment).
Quick access to date fields via properties such as year, month, etc.
Regularization functions like snap and very fast asof logic.
DatetimeIndex objects have all the basic functionality of regular Index objects, and a smorgasbord of advanced time series specific methods for easy frequency processing.

In [84]:
rng = pd.date_range(start, end, freq='BM')

ts = pd.Series(np.random.randn(len(rng)), index=rng)

ts


2011-01-31    0.351271
2011-02-28    0.084233
2011-03-31    1.085598
2011-04-29   -0.192880
2011-05-31   -2.858873
2011-06-30    0.400600
2011-07-29    1.561299
2011-08-31    0.096220
2011-09-30   -0.095683
2011-10-31    0.681281
2011-11-30    0.049072
2011-12-30   -1.981617
Freq: BM, dtype: float64

[link](http://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#time-date-components)

In [103]:
ts = pd.Timestamp('2014-01-01 09:00')

day = pd.offsets.Day()

day.apply(ts)

Timestamp('2014-01-02 09:00:00')

Timestamp('2009-06-30 09:00:00')

In [114]:
rng = pd.date_range('2012-01-01', '2012-01-03')

s = pd.Series(rng)

s

0   2012-01-01
1   2012-01-02
2   2012-01-03
dtype: datetime64[ns]

In [113]:
rng + pd.DateOffset(months=2)

DatetimeIndex(['2012-03-01', '2012-03-02', '2012-03-03'], dtype='datetime64[ns]', freq='D')