In [14]:
#import needed libraries
import pandas as pd
import datetime as dt

# Built-in datetime methods

In [15]:
#create a date object
birthday = dt.date(1976, 4, 4)

In [16]:
#get info out of the new date object.  Attributes: year, month, day
birthday.day 

4

In [17]:
#datetime object has a timestamp with it.  argument order: Y, M, D, H, M, S
then = dt.datetime(2010,4,4,6,12,56)

In [18]:
#use dot notation + attributes (year, month, day, hour, minute, second)
then.year

2010

# pandas datetimes methods

## timestamp object: pandas version of a python datetime

In [19]:

''' you can use dates in many different formats.  Try any of the following:
  12 Oct 2013
  2013 October 12
  10/12/2018
  10-12-2018
  October 12, 2018
  6:45am 12 Oct 2018
  Oct. 12, 2018 20:33:12
'''

pd.Timestamp("Oct. 12, 2018 20:33:12")

Timestamp('2018-10-12 20:33:12')

## DateTimeIndex object

In [20]:
dates= ["6:45am 12 Oct 2018", "Oct. 13, 2018 20:33:12", "4/4/1978"]
dtIndex = pd.DatetimeIndex(dates)

In [21]:
#create a series and use the datetimeindex as the index for a series
values = [234, 456,143]
pd.Series(data = values, index = dtIndex)

2018-10-12 06:45:00    234
2018-10-13 20:33:12    456
1978-04-04 00:00:00    143
dtype: int64

## pd.to_datetime()
a method to convert an object to a datetime object

In [22]:
#convert to datetime in several different ways
pd.to_datetime("10-12-2018")
pd.to_datetime(dt.date(2015,1,1,))
pd.to_datetime(dt.datetime(2018,2,3,5,24,21))
pd.to_datetime(dates)

DatetimeIndex(['2018-10-12 06:45:00', '2018-10-13 20:33:12',
               '1978-04-04 00:00:00'],
              dtype='datetime64[ns]', freq=None)

In [23]:
times = pd.Series(dates)
times

0        6:45am 12 Oct 2018
1    Oct. 13, 2018 20:33:12
2                  4/4/1978
dtype: object

In [24]:
#convert a series of dates to a datetime object
pd.to_datetime(times)

0   2018-10-12 06:45:00
1   2018-10-13 20:33:12
2   1978-04-04 00:00:00
dtype: datetime64[ns]

In [25]:
#given invalid data
messedUpDates = pd.Series(["July 4, 1776", "10/4/1012","How are you?","2015-2-30"])
messedUpDates

0    July 4, 1776
1       10/4/1012
2    How are you?
3       2015-2-30
dtype: object

In [26]:
pd.to_datetime(messedUpDates) #this will throw an error with the strings that can't really be converted to dates

OutOfBoundsDatetime: Out of bounds nanosecond timestamp: 1012-10-04 00:00:00

In [27]:
pd.to_datetime(messedUpDates, errors="coerce") # use the errors argument to not report errors

0   1776-07-04
1          NaT
2          NaT
3          NaT
dtype: datetime64[ns]

NaT = "not a time" which is essentiall "null," but for datetimes

In [28]:
#convert UNIX times to datetimes. Use 'unit' parameter to indicate that these are seconds
pd.to_datetime([1234567890, 1087654321, 4352987654], unit="s")

DatetimeIndex(['2009-02-13 23:31:30', '2004-06-19 14:12:01',
               '2107-12-10 19:14:14'],
              dtype='datetime64[ns]', freq=None)

## .date_range() method
create ranges of dates

In [29]:
pd.date_range(start = "1-1-2015", end = "1-1-2018", freq = "m") 
#freq specifies the interval.  10D -= every 10 days.
    # freq options: Y= year (A=year end), M=month end (MS=month start), 
    # W=week (W-SUN or W-WED to start week on a specific day),
    # B = "business days", D=day, H=hour, S=seconds, 

DatetimeIndex(['2015-01-31', '2015-02-28', '2015-03-31', '2015-04-30',
               '2015-05-31', '2015-06-30', '2015-07-31', '2015-08-31',
               '2015-09-30', '2015-10-31', '2015-11-30', '2015-12-31',
               '2016-01-31', '2016-02-29', '2016-03-31', '2016-04-30',
               '2016-05-31', '2016-06-30', '2016-07-31', '2016-08-31',
               '2016-09-30', '2016-10-31', '2016-11-30', '2016-12-31',
               '2017-01-31', '2017-02-28', '2017-03-31', '2017-04-30',
               '2017-05-31', '2017-06-30', '2017-07-31', '2017-08-31',
               '2017-09-30', '2017-10-31', '2017-11-30', '2017-12-31'],
              dtype='datetime64[ns]', freq='M')

In [30]:
#create range with a specific # of values
pd.date_range(start="10-10-2018", periods=10) 


DatetimeIndex(['2018-10-10', '2018-10-11', '2018-10-12', '2018-10-13',
               '2018-10-14', '2018-10-15', '2018-10-16', '2018-10-17',
               '2018-10-18', '2018-10-19'],
              dtype='datetime64[ns]', freq='D')

In [31]:
#start with the end in mind and go backwards
pd.date_range(end = "1999-12-31", periods=20, freq="W-WED")

DatetimeIndex(['1999-08-18', '1999-08-25', '1999-09-01', '1999-09-08',
               '1999-09-15', '1999-09-22', '1999-09-29', '1999-10-06',
               '1999-10-13', '1999-10-20', '1999-10-27', '1999-11-03',
               '1999-11-10', '1999-11-17', '1999-11-24', '1999-12-01',
               '1999-12-08', '1999-12-15', '1999-12-22', '1999-12-29'],
              dtype='datetime64[ns]', freq='W-WED')

## .dt Accessor
similar to .str accessor, but for dates instead of strings.

In [32]:
lotsOfDates = pd.date_range(start = "1-1-2000", periods = 50, freq="MS")

In [33]:
datesSeries = pd.Series(lotsOfDates)
datesSeries.head()

0   2000-01-01
1   2000-02-01
2   2000-03-01
3   2000-04-01
4   2000-05-01
dtype: datetime64[ns]

In [34]:
#notice that you can't just access the parts of the date with normal python notation.  e.g.,
datesSeries.hour

AttributeError: 'Series' object has no attribute 'hour'

In [35]:
#instead, prefix it with the .dt Accessor first
datesSeries.dt.hour

0     0
1     0
2     0
3     0
4     0
5     0
6     0
7     0
8     0
9     0
10    0
11    0
12    0
13    0
14    0
15    0
16    0
17    0
18    0
19    0
20    0
21    0
22    0
23    0
24    0
25    0
26    0
27    0
28    0
29    0
30    0
31    0
32    0
33    0
34    0
35    0
36    0
37    0
38    0
39    0
40    0
41    0
42    0
43    0
44    0
45    0
46    0
47    0
48    0
49    0
dtype: int64

In [36]:
#find out the weekday (full name) for this info
datesSeries.dt.weekday_name

0      Saturday
1       Tuesday
2     Wednesday
3      Saturday
4        Monday
5      Thursday
6      Saturday
7       Tuesday
8        Friday
9        Sunday
10    Wednesday
11       Friday
12       Monday
13     Thursday
14     Thursday
15       Sunday
16      Tuesday
17       Friday
18       Sunday
19    Wednesday
20     Saturday
21       Monday
22     Thursday
23     Saturday
24      Tuesday
25       Friday
26       Friday
27       Monday
28    Wednesday
29     Saturday
30       Monday
31     Thursday
32       Sunday
33      Tuesday
34       Friday
35       Sunday
36    Wednesday
37     Saturday
38     Saturday
39      Tuesday
40     Thursday
41       Sunday
42      Tuesday
43       Friday
44       Monday
45    Wednesday
46     Saturday
47       Monday
48     Thursday
49       Sunday
dtype: object

In [37]:
#check if any of these dates falls on a leap year
datesSeries.dt.is_leap_year

0      True
1      True
2      True
3      True
4      True
5      True
6      True
7      True
8      True
9      True
10     True
11     True
12    False
13    False
14    False
15    False
16    False
17    False
18    False
19    False
20    False
21    False
22    False
23    False
24    False
25    False
26    False
27    False
28    False
29    False
30    False
31    False
32    False
33    False
34    False
35    False
36    False
37    False
38    False
39    False
40    False
41    False
42    False
43    False
44    False
45    False
46    False
47    False
48     True
49     True
dtype: bool

In [38]:
#the is_ ... attibute allows you to check all sorts of relative date.  
datesSeries.dt.is_quarter_start

0      True
1     False
2     False
3      True
4     False
5     False
6      True
7     False
8     False
9      True
10    False
11    False
12     True
13    False
14    False
15     True
16    False
17    False
18     True
19    False
20    False
21     True
22    False
23    False
24     True
25    False
26    False
27     True
28    False
29    False
30     True
31    False
32    False
33     True
34    False
35    False
36     True
37    False
38    False
39     True
40    False
41    False
42     True
43    False
44    False
45     True
46    False
47    False
48     True
49    False
dtype: bool

# Work with pandas_datareader Library
great library for working with dates in financial data

In [40]:
#WARNING: the most recent version of pandas_datareader seems to break with the most recent version of pandas
## the following line is a hack that solves the problem temporarily without having to modify any of the system files.  In the future, it may not be necessary.  I got this solution from https://stackoverflow.com/questions/50394873/import-pandas-datareader-gives-importerror-cannot-import-name-is-list-like
pd.core.common.is_list_like = pd.api.types.is_list_like
from pandas_datareader import data

In [171]:
#query an online source for financial info!
co = "AMZN" #look up a company's stock ticker symbol at: https://www.nasdaq.com/symbol/
start = "1-1-2013"
end = "10-12-2018"

stocks = data.DataReader(name = co, data_source = "iex", start = start, end = end)
# google = data.DataReader(name = "GOOG", data_source = "google", start = "2014-04-01", end="2018-10-10")
#for some reason, google is detecting this as a script and won't let me retrieve the data.

#possible valid sources for stock data as of 2018-10-15
"iex"
"quandl"
"stooq" #limited stocks

#see https://github.com/rsvp/fecon235/issues/7 for further discussion on this issue

5y


'stooq'

In [127]:
#get info about this dataframe
stocks.values
stocks.columns
stocks.info()
stocks.axes
stocks.head()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1259 entries, 2013-10-15 to 2018-10-12
Data columns (total 5 columns):
open      1259 non-null float64
high      1259 non-null float64
low       1259 non-null float64
close     1259 non-null float64
volume    1259 non-null int64
dtypes: float64(4), int64(1)
memory usage: 59.0 KB


Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2013-10-15,309.87,310.79,305.26,306.4,2261554
2013-10-16,308.38,310.8,305.554,310.49,2180521
2013-10-17,307.07,311.0,305.24,310.77,2648384
2013-10-18,319.36,331.89,316.75,328.931,5969814
2013-10-21,329.89,330.0,323.8,326.44,2527617


# Selecting and extracting from a dataframe with a DateTimeIndex

In [128]:
# let specific info at specific location
stocks.loc['2018-01-03']

open         1188.30
high         1205.49
low          1188.30
close        1204.20
volume    3108793.00
Name: 2018-01-03 00:00:00, dtype: float64

In [129]:
#get range of data.  use the .loc[] or iloc[] methods.  Can get individual records or ranges.
stocks.loc['2014-04-01':'2018-10-01']

Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2014-04-01,338.09,344.4300,338.00,342.990,3602899
2014-04-02,345.99,348.2950,340.38,341.960,4465927
2014-04-03,341.82,342.5000,328.46,333.620,6399299
2014-04-04,335.15,335.4400,315.61,323.000,12534578
2014-04-07,320.99,324.9400,313.13,317.760,7077360
2014-04-08,321.88,328.0000,318.44,327.070,6585583
2014-04-09,328.47,332.1800,322.50,331.805,5058664
2014-04-10,330.60,331.0000,316.50,317.110,6133376
2014-04-11,314.00,316.5000,309.50,311.730,7292028
2014-04-14,317.67,320.4800,311.28,315.910,4293532


In [130]:
#create a DateTimeIndex with all of my birthdays in it up to this point
birthdays  = pd.date_range(start = "1978-04-04", end = "2018-12-31", freq= pd.DateOffset(years = 1))
birthdays

DatetimeIndex(['1978-04-04', '1979-04-04', '1980-04-04', '1981-04-04',
               '1982-04-04', '1983-04-04', '1984-04-04', '1985-04-04',
               '1986-04-04', '1987-04-04', '1988-04-04', '1989-04-04',
               '1990-04-04', '1991-04-04', '1992-04-04', '1993-04-04',
               '1994-04-04', '1995-04-04', '1996-04-04', '1997-04-04',
               '1998-04-04', '1999-04-04', '2000-04-04', '2001-04-04',
               '2002-04-04', '2003-04-04', '2004-04-04', '2005-04-04',
               '2006-04-04', '2007-04-04', '2008-04-04', '2009-04-04',
               '2010-04-04', '2011-04-04', '2012-04-04', '2013-04-04',
               '2014-04-04', '2015-04-04', '2016-04-04', '2017-04-04',
               '2018-04-04'],
              dtype='datetime64[ns]', freq='<DateOffset: years=1>')

In [131]:
#see if any of the dates of the stocks is in my birthdays list
mask = stocks.index.isin(birthdays)
mask

array([False, False, False, ..., False, False, False])

In [133]:
#check if the stock market was open on any of my birthdays (warning: the stocks data only contains the past 5 years)
stocks[mask] #this originally returned nothing b/c the dates were brought in as strings

Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2014-04-04,335.15,335.44,315.61,323.0,12534578
2016-04-04,599.0,599.5,590.55,593.19,2470825
2017-04-04,891.5,908.5384,890.28,906.83,4984656
2018-04-04,1358.24,1415.39,1352.88,1410.57,6982305


# Timestamp Object Attributes

In [134]:
#get stocks data using DataReader

someday = stocks.index[500]
type(someday)

pandas._libs.tslibs.timestamps.Timestamp

In [137]:
#for some reason, this data cane across as string data. Convert the dates to datetime first
stocks.index = pd.to_datetime(stocks.index)
someday

Timestamp('2015-10-09 00:00:00')

In [138]:
stocks.head(10)

Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2013-10-15,309.87,310.79,305.26,306.4,2261554
2013-10-16,308.38,310.8,305.554,310.49,2180521
2013-10-17,307.07,311.0,305.24,310.77,2648384
2013-10-18,319.36,331.89,316.75,328.931,5969814
2013-10-21,329.89,330.0,323.8,326.44,2527617
2013-10-22,327.72,337.11,325.68,332.54,3942953
2013-10-23,330.84,331.72,324.0601,326.756,2818158
2013-10-24,329.63,332.6499,326.75,332.21,5884655
2013-10-25,358.6,368.4,352.62,363.39,12043903
2013-10-28,359.92,362.75,357.2,358.16,3635848


In [140]:
#insert a column of new values into the list
stocks.insert(0, "Day of the Week", stocks.index.weekday_name)

ValueError: cannot insert Day of the Week, already exists

In [141]:
stocks.head(3)

Unnamed: 0_level_0,Day of the Week,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2013-10-15,Tuesday,309.87,310.79,305.26,306.4,2261554
2013-10-16,Wednesday,308.38,310.8,305.554,310.49,2180521
2013-10-17,Thursday,307.07,311.0,305.24,310.77,2648384


In [144]:
#insert a column that indicates whether or not this is the start of the month
stocks.insert(1, "Start of month?", stocks.index.is_month_start)

In [145]:
stocks.head(3)

Unnamed: 0_level_0,Day of the Week,Start of month?,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2013-10-15,Tuesday,False,309.87,310.79,305.26,306.4,2261554
2013-10-16,Wednesday,False,308.38,310.8,305.554,310.49,2180521
2013-10-17,Thursday,False,307.07,311.0,305.24,310.77,2648384


In [148]:
#so let's now find the stock price at the beginning of each month
stocks[stocks['Start of month?']]

Unnamed: 0_level_0,Day of the Week,Start of month?,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2013-11-01,Friday,True,365.63,365.77,356.1,359.002,3332126
2014-04-01,Tuesday,True,338.09,344.43,338.0,342.99,3602899
2014-05-01,Thursday,True,304.13,310.48,304.0,307.89,4329167
2014-07-01,Tuesday,True,325.86,333.2,325.1,332.39,3178548
2014-08-01,Friday,True,313.69,315.83,304.588,307.06,7443091
2014-10-01,Wednesday,True,322.04,322.165,315.55,317.46,3096735
2014-12-01,Monday,True,338.12,340.64,325.93,326.0,4944861
2015-04-01,Wednesday,True,372.1,373.16,368.34,370.255,2458095
2015-05-01,Friday,True,423.82,425.64,416.0,422.87,3565824
2015-06-01,Monday,True,430.4,433.16,426.2,430.92,2253264


## .truncate() method
used for slicing operations on datetime index objects in pandas

In [152]:
stocks.truncate(before ="2014-02-13", after = "2014-10-10")

Unnamed: 0_level_0,Day of the Week,Start of month?,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2014-02-13,Thursday,False,347.700,357.2000,346.5000,357.20,4181445
2014-02-14,Friday,False,359.340,359.3400,353.3500,357.35,3524570
2014-02-18,Tuesday,False,355.280,355.7300,349.4500,353.65,5000493
2014-02-19,Wednesday,False,352.640,354.5400,346.1000,347.38,4175393
2014-02-20,Thursday,False,348.800,350.4600,344.3800,349.80,3496663
2014-02-21,Friday,False,352.440,354.1410,346.7500,346.76,4212144
2014-02-24,Monday,False,345.190,353.0000,343.2900,351.78,3647182
2014-02-25,Tuesday,False,353.000,361.0800,351.5800,358.32,3747076
2014-02-26,Wednesday,False,359.860,364.7500,357.1700,359.80,3622506
2014-02-27,Thursday,False,357.220,360.5900,355.5000,360.13,3105442


## DateOffset objects
ways to modify existing times

In [173]:
#pull in new data
google = data.DataReader(name="GOOG",data_source="quandl", start= dt.date(2013,1,1), end=dt.datetime.now())

In [174]:
google.head(3)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,ExDividend,SplitRatio,AdjOpen,AdjHigh,AdjLow,AdjClose,AdjVolume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2018-03-27,1063.0,1064.84,996.92,1005.1,3029471.0,0.0,1.0,1063.0,1064.84,996.92,1005.1,3029471.0
2018-03-26,1046.0,1055.63,1008.4,1053.21,2558385.0,0.0,1.0,1046.0,1055.63,1008.4,1053.21,2558385.0
2018-03-23,1047.03,1063.36,1021.22,1021.57,2113497.0,0.0,1.0,1047.03,1063.36,1021.22,1021.57,2113497.0


In [175]:
google.index

DatetimeIndex(['2018-03-27', '2018-03-26', '2018-03-23', '2018-03-22',
               '2018-03-21', '2018-03-20', '2018-03-19', '2018-03-16',
               '2018-03-15', '2018-03-14',
               ...
               '2014-04-09', '2014-04-08', '2014-04-07', '2014-04-04',
               '2014-04-03', '2014-04-02', '2014-04-01', '2014-03-31',
               '2014-03-28', '2014-03-27'],
              dtype='datetime64[ns]', name='Date', length=1007, freq=None)

In [176]:
#add 5 days to the dates on each date
google.index + pd.DateOffset(days = 5)

DatetimeIndex(['2018-04-01', '2018-03-31', '2018-03-28', '2018-03-27',
               '2018-03-26', '2018-03-25', '2018-03-24', '2018-03-21',
               '2018-03-20', '2018-03-19',
               ...
               '2014-04-14', '2014-04-13', '2014-04-12', '2014-04-09',
               '2014-04-08', '2014-04-07', '2014-04-06', '2014-04-05',
               '2014-04-02', '2014-04-01'],
              dtype='datetime64[ns]', name='Date', length=1007, freq=None)

In [181]:
#add 3 weeks to each date
google.index + pd.DateOffset(weeks = 3)

DatetimeIndex(['2018-04-17', '2018-04-16', '2018-04-13', '2018-04-12',
               '2018-04-11', '2018-04-10', '2018-04-09', '2018-04-06',
               '2018-04-05', '2018-04-04',
               ...
               '2014-04-30', '2014-04-29', '2014-04-28', '2014-04-25',
               '2014-04-24', '2014-04-23', '2014-04-22', '2014-04-21',
               '2014-04-18', '2014-04-17'],
              dtype='datetime64[ns]', name='Date', length=1007, freq=None)

In [179]:
#change the time by 3 hours (subtract)
google.index - pd.DateOffset(hours = 3)

DatetimeIndex(['2018-03-26 21:00:00', '2018-03-25 21:00:00',
               '2018-03-22 21:00:00', '2018-03-21 21:00:00',
               '2018-03-20 21:00:00', '2018-03-19 21:00:00',
               '2018-03-18 21:00:00', '2018-03-15 21:00:00',
               '2018-03-14 21:00:00', '2018-03-13 21:00:00',
               ...
               '2014-04-08 21:00:00', '2014-04-07 21:00:00',
               '2014-04-06 21:00:00', '2014-04-03 21:00:00',
               '2014-04-02 21:00:00', '2014-04-01 21:00:00',
               '2014-03-31 21:00:00', '2014-03-30 21:00:00',
               '2014-03-27 21:00:00', '2014-03-26 21:00:00'],
              dtype='datetime64[ns]', name='Date', length=1007, freq=None)

In [184]:
#change multiple aspects of the date at once
google.index + pd.DateOffset(minutes = -4, years = -2, days = -15)

DatetimeIndex(['2016-03-11 23:56:00', '2016-03-10 23:56:00',
               '2016-03-07 23:56:00', '2016-03-06 23:56:00',
               '2016-03-05 23:56:00', '2016-03-04 23:56:00',
               '2016-03-03 23:56:00', '2016-02-29 23:56:00',
               '2016-02-28 23:56:00', '2016-02-27 23:56:00',
               ...
               '2012-03-24 23:56:00', '2012-03-23 23:56:00',
               '2012-03-22 23:56:00', '2012-03-19 23:56:00',
               '2012-03-18 23:56:00', '2012-03-17 23:56:00',
               '2012-03-16 23:56:00', '2012-03-15 23:56:00',
               '2012-03-12 23:56:00', '2012-03-11 23:56:00'],
              dtype='datetime64[ns]', name='Date', length=1007, freq=None)

### pd.tseries module to make relative changes to dates

In [185]:
#round dates to the closest end of the month (which would require a different operation per date)
google.index + pd.tseries.offsets.MonthEnd()

DatetimeIndex(['2018-03-31', '2018-03-31', '2018-03-31', '2018-03-31',
               '2018-03-31', '2018-03-31', '2018-03-31', '2018-03-31',
               '2018-03-31', '2018-03-31',
               ...
               '2014-04-30', '2014-04-30', '2014-04-30', '2014-04-30',
               '2014-04-30', '2014-04-30', '2014-04-30', '2014-04-30',
               '2014-03-31', '2014-03-31'],
              dtype='datetime64[ns]', name='Date', length=1007, freq=None)

### Timedelta() to check differences between dates
Timedelta() represents a distance in time, a duration, not a specific moment in time

In [189]:
#create a few times
time1 = pd.Timestamp("1978-04-04")
time2 = pd.Timestamp(pd.datetime.now())

time2 - time1 #this is the time I've been alive!

Timedelta('14804 days 15:58:49.618001')

In [191]:
#create a TimeDelta object
duration = pd.Timedelta(days = 3, hours = 1, minutes = 14)

In [194]:
#Timedelta is very flexible
pd.Timedelta("3 days 7 hours 30 seconds")

Timedelta('3 days 07:00:30')

### Timedelta in a dataset

In [196]:
shipping = pd.read_csv('../data/pandas/ecommerce.csv', index_col="ID", parse_dates=['order_date','delivery_date'])
shipping.head(3)

Unnamed: 0_level_0,order_date,delivery_date
ID,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1998-05-24,1999-02-05
2,1992-04-22,1998-03-06
4,1991-02-10,1992-08-26


In [202]:
#calculate the time differences with these dates
shipping['Delivery Time'] = shipping['delivery_date'] - shipping['order_date']
shipping.head(10)

Unnamed: 0_level_0,order_date,delivery_date,Delivery Time
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,1998-05-24,1999-02-05,257 days
2,1992-04-22,1998-03-06,2144 days
4,1991-02-10,1992-08-26,563 days
5,1992-07-21,1997-11-20,1948 days
7,1993-09-02,1998-06-10,1742 days
8,1993-06-10,1993-11-11,154 days
9,1990-01-25,1994-10-02,1711 days
10,1992-02-23,1998-12-30,2502 days
11,1996-07-12,1997-07-14,367 days
18,1995-06-18,1997-10-13,848 days


In [204]:
shipping['Twice as long'] = shipping['delivery_date']+shipping['Delivery Time']
shipping.head(10)

Unnamed: 0_level_0,order_date,delivery_date,Delivery Time,Twice as long
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,1998-05-24,1999-02-05,257 days,1999-10-20
2,1992-04-22,1998-03-06,2144 days,2004-01-18
4,1991-02-10,1992-08-26,563 days,1994-03-12
5,1992-07-21,1997-11-20,1948 days,2003-03-22
7,1993-09-02,1998-06-10,1742 days,2003-03-18
8,1993-06-10,1993-11-11,154 days,1994-04-14
9,1990-01-25,1994-10-02,1711 days,1999-06-09
10,1992-02-23,1998-12-30,2502 days,2005-11-05
11,1996-07-12,1997-07-14,367 days,1998-07-16
18,1995-06-18,1997-10-13,848 days,2000-02-08


In [208]:
#find all the times that took longer than 3 . years to deliver
shipping[shipping['Delivery Time'] > "1095 days"]

Unnamed: 0_level_0,order_date,delivery_date,Delivery Time,Twice as long
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2,1992-04-22,1998-03-06,2144 days,2004-01-18
5,1992-07-21,1997-11-20,1948 days,2003-03-22
7,1993-09-02,1998-06-10,1742 days,2003-03-18
9,1990-01-25,1994-10-02,1711 days,1999-06-09
10,1992-02-23,1998-12-30,2502 days,2005-11-05
20,1992-10-17,1998-10-06,2180 days,2004-09-24
23,1992-05-30,1999-08-15,2633 days,2006-10-30
32,1990-01-20,1998-07-24,3107 days,2007-01-25
36,1990-05-15,1994-02-14,1371 days,1997-11-16
41,1992-02-06,1996-05-10,1555 days,2000-08-12


In [209]:
#you can use .max(), min(), etc. on a TimeDelta object.  e.g., 

## find shortest delivery time
shipping['Delivery Time'].min()

Timedelta('8 days 00:00:00')

In [210]:
#longest delivery time
shipping['Delivery Time'].max()

Timedelta('3583 days 00:00:00')

In [211]:
#average delivery time
shipping['Delivery Time'].mean()

Timedelta('1217 days 22:53:53.532934')