In [50]:
import pandas as pd
import datetime as dt
from pandas_datareader import data

### The pandas `Timestamp` object

In [7]:
# It is pandas datetime object
pd.Timestamp("2015-03-31")
pd.Timestamp("2015/03/31")
pd.Timestamp('2021-03-08 08:32:14')
pd.Timestamp('2021-03-08 6:32:14 PM')

Timestamp('2021-03-08 18:32:14')

In [8]:
pd.Timestamp(dt.datetime(2000, 2, 3, 21, 32, 22))

Timestamp('2000-02-03 21:32:22')

### The pandas `DateTImeIndex` object

In [11]:
dates = ['2016-01-02', '2016-03-12', '2008-4-3']
pd.DatetimeIndex(dates)

DatetimeIndex(['2016-01-02', '2016-03-12', '2008-04-03'], dtype='datetime64[ns]', freq=None)

In [14]:
dates = [dt.date(2015, 1, 2), dt.date(2022, 6, 1), dt.date(2010, 7, 11)]
dtIndex = pd.DatetimeIndex(dates)

In [15]:
values = [100, 200, 300]
pd.Series(data=values, index=dtIndex)

2015-01-02    100
2022-06-01    200
2010-07-11    300
dtype: int64

### The `pd.to_datetime()` method

In [16]:
pd.to_datetime('2001-04-19')

Timestamp('2001-04-19 00:00:00')

In [19]:
dates = pd.Series(['July 3rd, 1995', '10/03/1993', 'Hello', '2014-02-31'])
dates

0    July 3rd, 1995
1        10/03/1993
2             Hello
3        2014-02-31
dtype: object

In [21]:
pd.to_datetime(dates, errors='coerce')

0   1995-07-03
1   1993-10-03
2          NaT
3          NaT
dtype: datetime64[ns]

In [23]:
pd.to_datetime([12353433, 124342323, 4324234234], unit='s')

DatetimeIndex(['1970-05-23 23:30:33', '1973-12-10 03:32:03',
               '2107-01-12 00:10:34'],
              dtype='datetime64[ns]', freq=None)

### Create a range of dates with the `pd.date_range()` method

In [28]:
times = pd.date_range(start='2016-01-01', end='2016-01-10', freq='2D')
times

DatetimeIndex(['2016-01-01', '2016-01-03', '2016-01-05', '2016-01-07',
               '2016-01-09'],
              dtype='datetime64[ns]', freq='2D')

In [27]:
type(times)

pandas.core.indexes.datetimes.DatetimeIndex

In [33]:
# You can do weeks with a starting day
times = pd.date_range(start='2016-01-01', end='2016-01-20', freq='W-SUN')
times

DatetimeIndex(['2016-01-03', '2016-01-10', '2016-01-17'], dtype='datetime64[ns]', freq='W-SUN')

In [36]:
# periods defines the number of timestamps you want to get
# you get 25 timestamp from starting date at a specified freq interval
# freq='B' only generates business days
pd.date_range(start='2012-09-09', periods=25, freq='B')

DatetimeIndex(['2012-09-10', '2012-09-11', '2012-09-12', '2012-09-13',
               '2012-09-14', '2012-09-17', '2012-09-18', '2012-09-19',
               '2012-09-20', '2012-09-21', '2012-09-24', '2012-09-25',
               '2012-09-26', '2012-09-27', '2012-09-28', '2012-10-01',
               '2012-10-02', '2012-10-03', '2012-10-04', '2012-10-05',
               '2012-10-08', '2012-10-09', '2012-10-10', '2012-10-11',
               '2012-10-12'],
              dtype='datetime64[ns]', freq='B')

In [37]:
# Can do similar things with end param
# you will go backwards until you generate specified num of periods
pd.date_range(end='1999-12-31', periods=25, freq='B')

DatetimeIndex(['1999-11-29', '1999-11-30', '1999-12-01', '1999-12-02',
               '1999-12-03', '1999-12-06', '1999-12-07', '1999-12-08',
               '1999-12-09', '1999-12-10', '1999-12-13', '1999-12-14',
               '1999-12-15', '1999-12-16', '1999-12-17', '1999-12-20',
               '1999-12-21', '1999-12-22', '1999-12-23', '1999-12-24',
               '1999-12-27', '1999-12-28', '1999-12-29', '1999-12-30',
               '1999-12-31'],
              dtype='datetime64[ns]', freq='B')

### The `.dt` accessor

In [38]:
bunch_of_dates = pd.date_range(start='2000-01-01', end='2010-12-31', freq='24D')

In [40]:
s = pd.Series(bunch_of_dates)
s

0     2000-01-01
1     2000-01-25
2     2000-02-18
3     2000-03-13
4     2000-04-06
         ...    
163   2010-09-17
164   2010-10-11
165   2010-11-04
166   2010-11-28
167   2010-12-22
Length: 168, dtype: datetime64[ns]

In [48]:
mask = s.dt.is_quarter_start
s[mask]

0     2000-01-01
19    2001-04-01
38    2002-07-01
137   2009-01-01
dtype: datetime64[ns]

### The `pandas_datareader` package

In [54]:
stocks = data.DataReader(name='MSFT', 
                data_source='yahoo', 
                start='2010-01-01', 
                end='2020-12-31')
stocks.head(2)

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,31.1,30.59,30.620001,30.950001,38409100.0,23.855659
2010-01-05,31.1,30.639999,30.85,30.959999,49749600.0,23.863363


In [56]:
stocks.index

DatetimeIndex(['2010-01-04', '2010-01-05', '2010-01-06', '2010-01-07',
               '2010-01-08', '2010-01-11', '2010-01-12', '2010-01-13',
               '2010-01-14', '2010-01-15',
               ...
               '2020-12-17', '2020-12-18', '2020-12-21', '2020-12-22',
               '2020-12-23', '2020-12-24', '2020-12-28', '2020-12-29',
               '2020-12-30', '2020-12-31'],
              dtype='datetime64[ns]', name='Date', length=2769, freq=None)

### Selecting rows from a df with a `DataTimeIndex`

In [57]:
stocks = data.DataReader(name='MSFT', 
                data_source='yahoo', 
                start='2010-01-01', 
                end='2020-12-31')
stocks.head(2)

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,31.1,30.59,30.620001,30.950001,38409100.0,23.855652
2010-01-05,31.1,30.639999,30.85,30.959999,49749600.0,23.863363


In [59]:
# better to wrap date inside a TimeStamp
stocks.loc[pd.Timestamp('2010-01-04')]

High         3.110000e+01
Low          3.059000e+01
Open         3.062000e+01
Close        3.095000e+01
Volume       3.840910e+07
Adj Close    2.385565e+01
Name: 2010-01-04 00:00:00, dtype: float64

In [61]:
stocks.iloc[0]

High         3.110000e+01
Low          3.059000e+01
Open         3.062000e+01
Close        3.095000e+01
Volume       3.840910e+07
Adj Close    2.385565e+01
Name: 2010-01-04 00:00:00, dtype: float64

In [66]:
# Pull multiple values, you need to wrap first 
stocks.loc[[pd.Timestamp('2010-01-04'), pd.Timestamp('2010-01-05')]]

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,31.1,30.59,30.620001,30.950001,38409100.0,23.855652
2010-01-05,31.1,30.639999,30.85,30.959999,49749600.0,23.863363


In [71]:
# Pull msft prices on my bday
bdays = pd.date_range(start='1989-10-21', end='2020-12-31', freq=pd.DateOffset(years=1))
mask = stocks.index.isin(bdays)
stocks[mask]

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-10-21,25.540001,25.049999,25.4,25.42,50032400.0,19.879173
2011-10-21,27.190001,26.799999,27.15,27.16,76620600.0,21.774193
2013-10-21,35.200001,34.91,34.98,34.990002,27433500.0,29.716061
2014-10-21,44.98,44.189999,44.360001,44.880001,36433800.0,39.208752
2015-10-21,47.990002,47.110001,47.919998,47.200001,25144300.0,42.339947
2016-10-21,60.450001,59.490002,60.279999,59.66,80032200.0,54.98465
2019-10-21,138.5,137.009995,138.449997,138.429993,20078200.0,135.10025
2020-10-21,216.919998,213.119995,213.119995,214.800003,22724900.0,212.019821


### The `Timestamp` object attributes and methods

In [72]:
stocks = data.DataReader(name='MSFT', 
                data_source='yahoo', 
                start='2010-01-01', 
                end='2020-12-31')
stocks.head(2)

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,31.1,30.59,30.620001,30.950001,38409100.0,23.855652
2010-01-05,31.1,30.639999,30.85,30.959999,49749600.0,23.863365


In [76]:
someday = stocks.index[500]
someday

Timestamp('2011-12-27 00:00:00')

In [82]:
someday.month
someday.week
someday.is_month_start
someday.day
someday.month_name()
someday.day_name()

'Tuesday'

In [84]:
stocks.insert(0, 'Day of Week', stocks.index.day_name())

ValueError: cannot insert Day of Week, already exists

In [85]:
stocks

Unnamed: 0_level_0,Day of Week,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2010-01-04,Monday,31.100000,30.590000,30.620001,30.950001,38409100.0,23.855652
2010-01-05,Tuesday,31.100000,30.639999,30.850000,30.959999,49749600.0,23.863365
2010-01-06,Wednesday,31.080000,30.520000,30.879999,30.770000,58182400.0,23.716919
2010-01-07,Thursday,30.700001,30.190001,30.629999,30.450001,50559700.0,23.470270
2010-01-08,Friday,30.879999,30.240000,30.280001,30.660000,51197400.0,23.632135
...,...,...,...,...,...,...,...
2020-12-24,Thursday,223.610001,221.199997,221.419998,222.750000,10550600.0,220.442566
2020-12-28,Monday,226.029999,223.020004,224.449997,224.960007,17933500.0,222.629654
2020-12-29,Tuesday,227.179993,223.580002,226.309998,224.149994,17403200.0,221.828064
2020-12-30,Wednesday,225.630005,221.470001,225.229996,221.679993,20272300.0,219.383636


In [87]:
stocks.insert(1, 'Is Start of Month', stocks.index.is_month_start)

ValueError: cannot insert Is Start of Month, already exists

In [88]:
stocks

Unnamed: 0_level_0,Day of Week,Is Start of Month,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2010-01-04,Monday,False,31.100000,30.590000,30.620001,30.950001,38409100.0,23.855652
2010-01-05,Tuesday,False,31.100000,30.639999,30.850000,30.959999,49749600.0,23.863365
2010-01-06,Wednesday,False,31.080000,30.520000,30.879999,30.770000,58182400.0,23.716919
2010-01-07,Thursday,False,30.700001,30.190001,30.629999,30.450001,50559700.0,23.470270
2010-01-08,Friday,False,30.879999,30.240000,30.280001,30.660000,51197400.0,23.632135
...,...,...,...,...,...,...,...,...
2020-12-24,Thursday,False,223.610001,221.199997,221.419998,222.750000,10550600.0,220.442566
2020-12-28,Monday,False,226.029999,223.020004,224.449997,224.960007,17933500.0,222.629654
2020-12-29,Tuesday,False,227.179993,223.580002,226.309998,224.149994,17403200.0,221.828064
2020-12-30,Wednesday,False,225.630005,221.470001,225.229996,221.679993,20272300.0,219.383636


In [90]:
stocks[stocks['Is Start of Month']]

Unnamed: 0_level_0,Day of Week,Is Start of Month,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2010-02-01,Monday,True,28.480000,27.920000,28.389999,28.410000,85931100.0,21.897875
2010-03-01,Monday,True,29.049999,28.530001,28.770000,29.020000,43805400.0,22.472647
2010-04-01,Thursday,True,29.540001,28.620001,29.350000,29.160000,74768100.0,22.581060
2010-06-01,Tuesday,True,26.309999,25.520000,25.530001,25.889999,76152400.0,20.139286
2010-07-01,Thursday,True,23.320000,22.730000,23.090000,23.160000,92239400.0,18.015682
...,...,...,...,...,...,...,...,...
2020-06-01,Monday,True,183.000000,181.460007,182.539993,182.830002,22622400.0,180.028427
2020-07-01,Wednesday,True,206.350006,201.770004,203.139999,204.699997,32061200.0,201.563293
2020-09-01,Tuesday,True,227.449997,224.429993,225.509995,227.270004,25725500.0,224.328430
2020-10-01,Thursday,True,213.990005,211.320007,213.490005,212.460007,27158400.0,209.710114


### The `pd.DateOffset` object

In [91]:
stocks = data.DataReader(name='MSFT', 
                data_source='yahoo', 
                start='2010-01-01', 
                end='2020-12-31')
stocks.head(2)

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,31.1,30.59,30.620001,30.950001,38409100.0,23.855659
2010-01-05,31.1,30.639999,30.85,30.959999,49749600.0,23.863358


In [92]:
# Allows to subtract or add to date
stocks.index + pd.DateOffset(days=5)

DatetimeIndex(['2010-01-09', '2010-01-10', '2010-01-11', '2010-01-12',
               '2010-01-13', '2010-01-16', '2010-01-17', '2010-01-18',
               '2010-01-19', '2010-01-20',
               ...
               '2020-12-22', '2020-12-23', '2020-12-26', '2020-12-27',
               '2020-12-28', '2020-12-29', '2021-01-02', '2021-01-03',
               '2021-01-04', '2021-01-05'],
              dtype='datetime64[ns]', name='Date', length=2769, freq=None)

### Timeseries Offsets

In [93]:
# Adding dynamic amounts of dates, like rounding to the end of the month
stocks = data.DataReader(name='MSFT', 
                data_source='yahoo', 
                start='2010-01-01', 
                end='2020-12-31')
stocks.head(2)

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,31.1,30.59,30.620001,30.950001,38409100.0,23.855656
2010-01-05,31.1,30.639999,30.85,30.959999,49749600.0,23.863365


In [94]:
stocks.index + pd.tseries.offsets.MonthEnd()

DatetimeIndex(['2010-01-31', '2010-01-31', '2010-01-31', '2010-01-31',
               '2010-01-31', '2010-01-31', '2010-01-31', '2010-01-31',
               '2010-01-31', '2010-01-31',
               ...
               '2020-12-31', '2020-12-31', '2020-12-31', '2020-12-31',
               '2020-12-31', '2020-12-31', '2020-12-31', '2020-12-31',
               '2020-12-31', '2021-01-31'],
              dtype='datetime64[ns]', name='Date', length=2769, freq=None)

In [98]:
from pandas.tseries import offsets
stocks.index + offsets.MonthEnd()

DatetimeIndex(['2010-01-31', '2010-01-31', '2010-01-31', '2010-01-31',
               '2010-01-31', '2010-01-31', '2010-01-31', '2010-01-31',
               '2010-01-31', '2010-01-31',
               ...
               '2020-12-31', '2020-12-31', '2020-12-31', '2020-12-31',
               '2020-12-31', '2020-12-31', '2020-12-31', '2020-12-31',
               '2020-12-31', '2021-01-31'],
              dtype='datetime64[ns]', name='Date', length=2769, freq=None)

### The `Timedelta` object

In [95]:
stocks = data.DataReader(name='MSFT', 
                data_source='yahoo', 
                start='2010-01-01', 
                end='2020-12-31')
stocks.head(2)

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,31.1,30.59,30.620001,30.950001,38409100.0,23.855659
2010-01-05,31.1,30.639999,30.85,30.959999,49749600.0,23.863361


In [99]:
# Timedelta represents a time span, difference between 2 times
time_a = pd.Timestamp('2020-03-31 04:34:13PM')
time_b = pd.Timestamp('2020-03-21 01:44:33PM')

time_a - time_b

Timedelta('10 days 02:49:40')

In [101]:
time_a + pd.Timedelta(days=3)

Timestamp('2020-04-03 16:34:13')

### `Timedelta` in a dataset

In [102]:
stocks = data.DataReader(name='MSFT', 
                data_source='yahoo', 
                start='2010-01-01', 
                end='2020-12-31')
stocks.head(2)

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,31.1,30.59,30.620001,30.950001,38409100.0,23.855656
2010-01-05,31.1,30.639999,30.85,30.959999,49749600.0,23.863367


In [105]:
shipping = pd.read_csv('../datasets/ecommerce.csv', 
            index_col='ID', 
            parse_dates=['order_date', 'delivery_date'])
shipping.head()

Unnamed: 0_level_0,order_date,delivery_date
ID,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1998-05-24,1999-02-05
2,1992-04-22,1998-03-06
4,1991-02-10,1992-08-26
5,1992-07-21,1997-11-20
7,1993-09-02,1998-06-10


In [109]:
# Find how long it took to deliver
shipping['Delivery Time'] = shipping['delivery_date'] - shipping['order_date']
shipping

Unnamed: 0_level_0,order_date,delivery_date,Delivery Time
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,1998-05-24,1999-02-05,257 days
2,1992-04-22,1998-03-06,2144 days
4,1991-02-10,1992-08-26,563 days
5,1992-07-21,1997-11-20,1948 days
7,1993-09-02,1998-06-10,1742 days
...,...,...,...
990,1991-06-24,1996-02-02,1684 days
991,1991-09-09,1998-03-30,2394 days
993,1990-11-16,1998-04-27,2719 days
994,1993-06-03,1993-06-13,10 days


In [114]:
# Get orders that took more than year to deliver
mask = shipping['Delivery Time'] > '365 days'
shipping[mask]

Unnamed: 0_level_0,order_date,delivery_date,Delivery Time
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2,1992-04-22,1998-03-06,2144 days
4,1991-02-10,1992-08-26,563 days
5,1992-07-21,1997-11-20,1948 days
7,1993-09-02,1998-06-10,1742 days
9,1990-01-25,1994-10-02,1711 days
...,...,...,...
986,1990-12-10,1992-12-16,737 days
990,1991-06-24,1996-02-02,1684 days
991,1991-09-09,1998-03-30,2394 days
993,1990-11-16,1998-04-27,2719 days
