In [14]:
import pandas as pd
import numpy as np

from datetime import date

In [15]:
pd.__version__,np.__version__

('0.24.2', '1.16.2')

In [16]:
df = pd.DataFrame({
    'name':[
        'john','mary','peter','jeff','bill'
    ],
    'date_of_birth':[
        '2000-01-01', '1999-12-20', '2000-11-01', '1995-02-25', '1992-06-30',
    ],
})
df

Unnamed: 0,date_of_birth,name
0,2000-01-01,john
1,1999-12-20,mary
2,2000-11-01,peter
3,1995-02-25,jeff
4,1992-06-30,bill


In [17]:
df.index

RangeIndex(start=0, stop=5, step=1)

## use date column as index

In [18]:
datetime_series = pd.to_datetime(df['date_of_birth'])

datetime_index = pd.DatetimeIndex(datetime_series.values)

df2=df.set_index(datetime_index)

df2.drop('date_of_birth',axis=1,inplace=True)

df2

Unnamed: 0,name
2000-01-01,john
1999-12-20,mary
2000-11-01,peter
1995-02-25,jeff
1992-06-30,bill


In [19]:
df2.index

DatetimeIndex(['2000-01-01', '1999-12-20', '2000-11-01', '1995-02-25',
               '1992-06-30'],
              dtype='datetime64[ns]', freq=None)

## fill empty periods

In [20]:
df = pd.DataFrame({
    'name':[
        'john','mary','peter','jeff','bill'
    ],
    'year_born':[
        '2000', '1999', '2001', '1995', '1992',
    ],
})
df

Unnamed: 0,name,year_born
0,john,2000
1,mary,1999
2,peter,2001
3,jeff,1995
4,bill,1992


In [21]:
df.index

RangeIndex(start=0, stop=5, step=1)

In [22]:
datetime_series = pd.to_datetime(df['year_born'])

datetime_index = pd.DatetimeIndex(datetime_series.values)

df3=df.set_index(datetime_index)

df3.drop('year_born',axis=1,inplace=True)

df3.sort_index(inplace=True)

df3

Unnamed: 0,name
1992-01-01,bill
1995-01-01,jeff
1999-01-01,mary
2000-01-01,john
2001-01-01,peter


In [23]:
df3.index

DatetimeIndex(['1992-01-01', '1995-01-01', '1999-01-01', '2000-01-01',
               '2001-01-01'],
              dtype='datetime64[ns]', freq=None)

In [24]:
df4=df3.asfreq('YS')
df4

Unnamed: 0,name
1992-01-01,bill
1993-01-01,
1994-01-01,
1995-01-01,jeff
1996-01-01,
1997-01-01,
1998-01-01,
1999-01-01,mary
2000-01-01,john
2001-01-01,peter


In [25]:
df4.index

DatetimeIndex(['1992-01-01', '1993-01-01', '1994-01-01', '1995-01-01',
               '1996-01-01', '1997-01-01', '1998-01-01', '1999-01-01',
               '2000-01-01', '2001-01-01'],
              dtype='datetime64[ns]', freq='AS-JAN')

## lag columns

In [51]:
df = pd.DataFrame(
    data={'reading': np.random.uniform(high=100,size=10)},
    index=pd.to_datetime([date(2019,1,d) for d in range(1,11)])
)
df

Unnamed: 0,reading
2019-01-01,75.350298
2019-01-02,56.180706
2019-01-03,83.672698
2019-01-04,98.63128
2019-01-05,12.410272
2019-01-06,86.110112
2019-01-07,10.827618
2019-01-08,24.023237
2019-01-09,57.848769
2019-01-10,10.808829


In [57]:
df['reading_d_minus_1']=df['reading'].shift(1,freq='D')

df['reading_d_minus_2']=df['reading'].shift(2,freq='D')

In [58]:
df

Unnamed: 0,reading,reading_d_minus_1,reading_d_minus_2
2019-01-01,75.350298,,
2019-01-02,56.180706,75.350298,
2019-01-03,83.672698,56.180706,75.350298
2019-01-04,98.63128,83.672698,56.180706
2019-01-05,12.410272,98.63128,83.672698
2019-01-06,86.110112,12.410272,98.63128
2019-01-07,10.827618,86.110112,12.410272
2019-01-08,24.023237,10.827618,86.110112
2019-01-09,57.848769,24.023237,10.827618
2019-01-10,10.808829,57.848769,24.023237
