In [1]:
import pandas as pd 
import numpy as np 

dt = '20110107'
dt = pd.to_datetime(dt)

In [2]:
dt

Timestamp('2011-01-07 00:00:00')

In [3]:
pd.to_datetime('2011-01-07')

Timestamp('2011-01-07 00:00:00')

In [4]:
# get time fields
print (dt.year)

print (dt.month)

print (dt.day)

# maybe you care about price behavior on a particular hour, minute or second of the day?
print (dt.hour)

print (dt.minute)

print (dt.second)

2011
1
7
0
0
0


In [5]:
# useful for checking if weekend
dt.weekday()

4

In [6]:
# handles hours/minute/seconds as well
pd.to_datetime('20110107 12:10:30')

Timestamp('2011-01-07 12:10:30')

In [7]:
# convert back to a string
dt.strftime('%Y%m%d')

'20110107'

Check documentation for other string formatting codes: https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior

In [8]:
# can shift dates
dt + pd.tseries.offsets.Day()

Timestamp('2011-01-08 00:00:00')

In [9]:
# can shift by multiple time units
dt + pd.tseries.offsets.Day(2)

Timestamp('2011-01-09 00:00:00')

In [10]:
# shift only by business day (skip weekends)
dt + pd.tseries.offsets.BDay()

Timestamp('2011-01-10 00:00:00')

In [11]:
# roll forward / backward
print (pd.tseries.offsets.MonthEnd().rollforward(dt))
print (pd.tseries.offsets.MonthEnd().rollback(dt))

2011-01-31 00:00:00
2010-12-31 00:00:00


Check documentation for other offsets: https://pandas.pydata.org/docs/reference/offset_frequency.html

In [12]:
# subtract two times 
# maybe for checking days till next earnings?

dt1 = pd.to_datetime('20110101')
dt2 = pd.to_datetime('20120630')

diff = dt2 - dt1
diff 

Timedelta('546 days 00:00:00')

In [13]:
diff.days

546

In [14]:
# faster way to create multiple timestamp objects. useful for converting excel dates from strings to timestamps
date = ['20110102','20110103','20110105']
dt_index = pd.DatetimeIndex(date)
dt_index

DatetimeIndex(['2011-01-02', '2011-01-03', '2011-01-05'], dtype='datetime64[ns]', freq=None)

In [15]:
dt_index[0]

Timestamp('2011-01-02 00:00:00')

In [16]:
## get time range 
days = pd.date_range(dt1,dt2,freq='D')
days

DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03', '2011-01-04',
               '2011-01-05', '2011-01-06', '2011-01-07', '2011-01-08',
               '2011-01-09', '2011-01-10',
               ...
               '2012-06-21', '2012-06-22', '2012-06-23', '2012-06-24',
               '2012-06-25', '2012-06-26', '2012-06-27', '2012-06-28',
               '2012-06-29', '2012-06-30'],
              dtype='datetime64[ns]', length=547, freq='D')

In [17]:
# can use strings as inputs and also use monthly frequency
months = pd.date_range('20110101','20121231',freq='M')
months

  months = pd.date_range('20110101','20121231',freq='M')


DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31', '2011-04-30',
               '2011-05-31', '2011-06-30', '2011-07-31', '2011-08-31',
               '2011-09-30', '2011-10-31', '2011-11-30', '2011-12-31',
               '2012-01-31', '2012-02-29', '2012-03-31', '2012-04-30',
               '2012-05-31', '2012-06-30', '2012-07-31', '2012-08-31',
               '2012-09-30', '2012-10-31', '2012-11-30', '2012-12-31'],
              dtype='datetime64[ns]', freq='ME')

See documentation for other frequencies: https://pandas.pydata.org/docs/user_guide/timeseries.html#timeseries-offset-aliases

In [19]:
## DataFrames with pandas datetime index 
univ = ['SPY','TLT','VXX','QQQ']
days = pd.date_range('20190101','20210630',freq='min')
df = pd.DataFrame(np.random.randn(len(days),len(univ)),index=days,columns=univ)
df

Unnamed: 0,SPY,TLT,VXX,QQQ
2019-01-01 00:00:00,-0.505279,-0.000292,0.149514,0.054235
2019-01-01 00:01:00,0.152258,0.314673,1.508726,0.634877
2019-01-01 00:02:00,0.309812,-0.435587,-1.014003,-0.246448
2019-01-01 00:03:00,0.644837,-0.556488,-1.273130,-0.054354
2019-01-01 00:04:00,1.074807,1.490826,-1.053984,0.562539
...,...,...,...,...
2021-06-29 23:56:00,1.639331,-1.083719,2.291862,0.265446
2021-06-29 23:57:00,0.866292,-1.851090,1.836498,1.084791
2021-06-29 23:58:00,0.415843,-0.183006,-0.871886,-0.837434
2021-06-29 23:59:00,0.611600,-2.540064,0.368530,-2.036690


In [20]:
df.loc['2019-01-01 00:00:00']

SPY   -0.505279
TLT   -0.000292
VXX    0.149514
QQQ    0.054235
Name: 2019-01-01 00:00:00, dtype: float64

In [21]:
df.loc['2019-01-01']

Unnamed: 0,SPY,TLT,VXX,QQQ
2019-01-01 00:00:00,-0.505279,-0.000292,0.149514,0.054235
2019-01-01 00:01:00,0.152258,0.314673,1.508726,0.634877
2019-01-01 00:02:00,0.309812,-0.435587,-1.014003,-0.246448
2019-01-01 00:03:00,0.644837,-0.556488,-1.273130,-0.054354
2019-01-01 00:04:00,1.074807,1.490826,-1.053984,0.562539
...,...,...,...,...
2019-01-01 23:55:00,0.473984,-1.501100,-0.094507,0.412395
2019-01-01 23:56:00,0.640821,-1.793078,-0.857134,0.355790
2019-01-01 23:57:00,1.881106,-1.065516,-1.063221,-0.926882
2019-01-01 23:58:00,-1.420806,0.279749,0.537966,-0.303405


In [22]:
df.resample('M').sum()

  df.resample('M').sum()


Unnamed: 0,SPY,TLT,VXX,QQQ
2019-01-31,-32.226951,15.390845,-2.167869,-433.110602
2019-02-28,74.730023,-26.194914,194.859671,-39.364799
2019-03-31,226.108971,-41.1881,-255.831909,-86.049408
2019-04-30,139.098587,169.370418,55.502667,-17.711988
2019-05-31,64.28257,-5.217075,31.363402,-221.072628
2019-06-30,-230.508892,249.795314,70.081844,218.089364
2019-07-31,234.406606,-220.987741,-154.659256,-226.772572
2019-08-31,211.930801,277.127181,-293.048302,162.027486
2019-09-30,17.835717,-153.592843,-365.405633,-352.671241
2019-10-31,166.987117,33.23348,236.76139,22.827559


In [23]:
# 5 minute high 
df.resample('5min').max()

Unnamed: 0,SPY,TLT,VXX,QQQ
2019-01-01 00:00:00,1.074807,1.490826,1.508726,0.634877
2019-01-01 00:05:00,1.195941,1.625372,-0.327045,0.960882
2019-01-01 00:10:00,0.686190,1.713763,0.458336,0.825226
2019-01-01 00:15:00,2.447914,0.718938,0.276785,1.463981
2019-01-01 00:20:00,1.562657,1.216774,0.895385,1.836100
...,...,...,...,...
2021-06-29 23:40:00,0.819364,-0.133401,0.937376,0.505045
2021-06-29 23:45:00,0.694201,1.383114,0.253634,0.681415
2021-06-29 23:50:00,1.011482,1.088419,0.338431,0.853486
2021-06-29 23:55:00,1.639331,-0.183006,2.291862,1.084791


In [24]:
# close
df.resample('5min').last()

Unnamed: 0,SPY,TLT,VXX,QQQ
2019-01-01 00:00:00,1.074807,1.490826,-1.053984,0.562539
2019-01-01 00:05:00,0.313593,-1.267370,-1.166587,-1.694317
2019-01-01 00:10:00,-0.637195,-1.917156,-0.165416,0.015490
2019-01-01 00:15:00,-1.323091,-0.449906,-0.629378,0.141788
2019-01-01 00:20:00,1.501666,0.782004,0.895385,1.836100
...,...,...,...,...
2021-06-29 23:40:00,0.819364,-0.607935,0.639834,0.218725
2021-06-29 23:45:00,0.694201,0.500468,-0.740575,-0.295175
2021-06-29 23:50:00,-0.350821,0.888757,0.270659,-1.773200
2021-06-29 23:55:00,0.611600,-2.540064,0.368530,-2.036690


Again, refer to documentation for how to specify the frequencies: https://pandas.pydata.org/docs/user_guide/timeseries.html#timeseries-offset-aliases


Many other topics on time series in pandas not covered here!