In [1]:
import pandas as pd 
import numpy as np 

dt = '20110107'
dt = pd.to_datetime(dt)

In [2]:
dt

Timestamp('2011-01-07 00:00:00')

In [3]:
pd.to_datetime('2011-01-07')

Timestamp('2011-01-07 00:00:00')

In [4]:
# get time fields
print (dt.year)

print (dt.month)

print (dt.day)

# maybe you care about price behavior on a particular hour, minute or second of the day?
print (dt.hour)

print (dt.minute)

print (dt.second)

2011
1
7
0
0
0


In [5]:
# useful for checking if weekend
dt.weekday()

4

In [6]:
# handles hours/minute/seconds as well
pd.to_datetime('20110107 12:10:30')

Timestamp('2011-01-07 12:10:30')

In [7]:
# convert back to a string
dt.strftime('%Y%m%d')

'20110107'

Check documentation for other string formatting codes: https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior

In [8]:
# can shift dates
dt + pd.tseries.offsets.Day()

Timestamp('2011-01-08 00:00:00')

In [9]:
# can shift by multiple time units
dt + pd.tseries.offsets.Day(2)

Timestamp('2011-01-09 00:00:00')

In [10]:
# shift only by business day (skip weekends)
dt + pd.tseries.offsets.BDay()

Timestamp('2011-01-10 00:00:00')

In [11]:
# roll forward / backward
print (pd.tseries.offsets.MonthEnd().rollforward(dt))
print (pd.tseries.offsets.MonthEnd().rollback(dt))

2011-01-31 00:00:00
2010-12-31 00:00:00


Check documentation for other offsets: https://pandas.pydata.org/docs/reference/offset_frequency.html

In [12]:
# subtract two times 
# maybe for checking days till next earnings?

dt1 = pd.to_datetime('20110101')
dt2 = pd.to_datetime('20120630')

diff = dt2 - dt1
diff 

Timedelta('546 days 00:00:00')

In [13]:
diff.days

546

In [14]:
# faster way to create multiple timestamp objects. useful for converting excel dates from strings to timestamps
date = ['20110102','20110103','20110105']
dt_index = pd.DatetimeIndex(date)
dt_index

DatetimeIndex(['2011-01-02', '2011-01-03', '2011-01-05'], dtype='datetime64[ns]', freq=None)

In [15]:
dt_index[0]

Timestamp('2011-01-02 00:00:00')

In [16]:
## get time range 
days = pd.date_range(dt1,dt2,freq='D')
days

DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03', '2011-01-04',
               '2011-01-05', '2011-01-06', '2011-01-07', '2011-01-08',
               '2011-01-09', '2011-01-10',
               ...
               '2012-06-21', '2012-06-22', '2012-06-23', '2012-06-24',
               '2012-06-25', '2012-06-26', '2012-06-27', '2012-06-28',
               '2012-06-29', '2012-06-30'],
              dtype='datetime64[ns]', length=547, freq='D')

In [17]:
# can use strings as inputs and also use monthly frequency
months = pd.date_range('20110101','20121231',freq='M')
months

  months = pd.date_range('20110101','20121231',freq='M')


DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31', '2011-04-30',
               '2011-05-31', '2011-06-30', '2011-07-31', '2011-08-31',
               '2011-09-30', '2011-10-31', '2011-11-30', '2011-12-31',
               '2012-01-31', '2012-02-29', '2012-03-31', '2012-04-30',
               '2012-05-31', '2012-06-30', '2012-07-31', '2012-08-31',
               '2012-09-30', '2012-10-31', '2012-11-30', '2012-12-31'],
              dtype='datetime64[ns]', freq='ME')

See documentation for other frequencies: https://pandas.pydata.org/docs/user_guide/timeseries.html#timeseries-offset-aliases

In [18]:
## DataFrames with pandas datetime index 
univ = ['SPY','TLT','VXX','QQQ']
days = pd.date_range('20190101','20210630',freq='min')
df = pd.DataFrame(np.random.randn(len(days),len(univ)),index=days,columns=univ)
df

Unnamed: 0,SPY,TLT,VXX,QQQ
2019-01-01 00:00:00,0.664185,0.576571,0.448652,2.355082
2019-01-01 00:01:00,-2.504634,0.652321,-0.817453,0.587978
2019-01-01 00:02:00,1.558711,-0.357100,0.274141,0.423721
2019-01-01 00:03:00,0.495836,0.853814,0.459064,0.631602
2019-01-01 00:04:00,-1.138802,-0.171343,0.672672,-1.277929
...,...,...,...,...
2021-06-29 23:56:00,-0.959657,-0.017613,2.167485,0.123945
2021-06-29 23:57:00,0.336677,-0.784209,0.268581,0.830495
2021-06-29 23:58:00,-0.404078,-0.703314,0.216138,-0.081901
2021-06-29 23:59:00,-1.172789,-0.028967,0.792779,-0.047648


In [None]:
df.loc['2019-01-01 00:00:00']

In [None]:
df.loc['2019-01-01']

In [None]:
df.resample('M').sum()

In [None]:
# 5 minute high 
df.resample('5min').max()

In [None]:
# close
df.resample('5min').last()

Again, refer to documentation for how to specify the frequencies: https://pandas.pydata.org/docs/user_guide/timeseries.html#timeseries-offset-aliases


Many other topics on time series in pandas not covered here!