In [1]:
import pandas as pd
import datetime as dt
from pandas_datareader import data 

# Python DateTime Module

In [2]:
# creating a date object
birthday = dt.date(
    year = 1985,
    month = 9,
    day = 10
)

In [3]:
# returning date parts
print(
    birthday.year,
    birthday.month,
    birthday.day
)

1985 9 10


In [4]:
# create date time object
birthtime = dt.datetime(
    year = 1985,
    month = 9,
    day = 10,
    hour = 17, # military time
    minute = 13,
    second = 57
)

In [5]:
# returning date parts
print(
    birthtime.year,
    birthtime.month,
    birthtime.day,
    birthtime.hour,
    birthtime.minute,
    birthtime.second
)

1985 9 10 17 13 57


In [6]:
# display date time object as a string (also applicable to date object)
str(birthtime)

'1985-09-10 17:13:57'

# Pandas TimeStamp Object

In [7]:
# create timestamp (pandas is extremely flexible with how the data in inputted)
print(pd.Timestamp('2015-01-01'))
print(pd.Timestamp('2015/01/01'))
print(pd.Timestamp('2015, 01, 01'))
print(pd.Timestamp('1/1/2015'))
print(pd.Timestamp('19/1/2015')) # pandas will be able to figure out which the month is 
print(pd.Timestamp('1/19/2015')) # pandas will be able to figure out which the month is
print(pd.Timestamp('4/3/2015')) # defaults to month for first argument
print(pd.Timestamp('2015-01-01 08:30:15'))
print(pd.Timestamp('2015-01-01 08:30:15 PM'))

2015-01-01 00:00:00
2015-01-01 00:00:00
2015-01-01 00:00:00
2015-01-01 00:00:00
2015-01-19 00:00:00
2015-01-19 00:00:00
2015-04-03 00:00:00
2015-01-01 08:30:15
2015-01-01 20:30:15


# Pandas DateTimeIndex Object

In [8]:
# create list of dates
dates = ['2016-01-02', '2016-04-12', '2009/09/07']

# create DateTimeIndex object
# converts to datetime objects
pd.DatetimeIndex(dates)

DatetimeIndex(['2016-01-02', '2016-04-12', '2009-09-07'], dtype='datetime64[ns]', freq=None)

In [9]:
# creates list of dates using python date module
dates = [dt.date(2016, 1, 10), dt.date(1994, 6, 13), dt.date(2003, 12, 29)]

# create DateTimeIndex object
# converts to datetime objects
pd.DatetimeIndex(dates)

DatetimeIndex(['2016-01-10', '1994-06-13', '2003-12-29'], dtype='datetime64[ns]', freq=None)

- Primary purpose is to convert strings to datetime objects.
- Can then use dates as a series or dataframe index.

In [10]:
# store date time index in a variable
dt_index = pd.DatetimeIndex(dates)

# create list of values and pass into a series with date time as an index
values = [100, 200, 300]
pd.Series(data = values, index = dt_index)

2016-01-10    100
1994-06-13    200
2003-12-29    300
dtype: int64

# Pandas to_datetime() Method

In [11]:
# convert different objects to datetime
print(pd.to_datetime('2001-04-19'))
print(pd.to_datetime(dt.date(2015, 1, 1)))
print(pd.to_datetime(dt.datetime(2015, 1, 1, 14, 35, 20)))
print(pd.to_datetime(['2015-01-03', '2014/02/08', '2016', 'July 4th, 1996']))

2001-04-19 00:00:00
2015-01-01 00:00:00
2015-01-01 14:35:20
DatetimeIndex(['2015-01-03', '2014-02-08', '2016-01-01', '1996-07-04'], dtype='datetime64[ns]', freq=None)


In [12]:
# converting a series values to time stamps
times = pd.Series(['2015-01-03', '2014/02/08', '2016', 'July 4th, 1996'])
pd.to_datetime(times)

0   2015-01-03
1   2014-02-08
2   2016-01-01
3   1996-07-04
dtype: datetime64[ns]

In [13]:
# converting a series with non-date values to time stamps
times = pd.Series(['2015-01-03', '2014/02/08', '2016', 'July 4th, 1996', 'Hello', '2015-02-40'])
pd.to_datetime(times, errors = 'coerce') # default errors is raise which will error out

0   2015-01-03
1   2014-02-08
2   2016-01-01
3   1996-07-04
4          NaT
5          NaT
dtype: datetime64[ns]

# Create Date Ranges as DateTimeIndex

In [14]:
# generate daily index
# generate daily index
pd.date_range(
    start = '2016-01-01',
    end = '2016-01-10',
    freq = '1D'
)

DatetimeIndex(['2016-01-01', '2016-01-02', '2016-01-03', '2016-01-04',
               '2016-01-05', '2016-01-06', '2016-01-07', '2016-01-08',
               '2016-01-09', '2016-01-10'],
              dtype='datetime64[ns]', freq='D')

In [15]:
# generate two-daily index
pd.date_range(
    start = '2016-01-01',
    end = '2016-01-10',
    freq = '2D'
)

DatetimeIndex(['2016-01-01', '2016-01-03', '2016-01-05', '2016-01-07',
               '2016-01-09'],
              dtype='datetime64[ns]', freq='2D')

In [16]:
# generate business daily index
pd.date_range(
    start = '2016-01-01',
    end = '2016-01-10',
    freq = 'B' # business days
)

DatetimeIndex(['2016-01-01', '2016-01-04', '2016-01-05', '2016-01-06',
               '2016-01-07', '2016-01-08'],
              dtype='datetime64[ns]', freq='B')

In [17]:
# generate specified day index
pd.date_range(
    start = '2016-01-01',
    end = '2016-01-17',
    freq = 'W-SUN' # sundays (W-DAY where DAY = MON, TUE, WED..)
)

DatetimeIndex(['2016-01-03', '2016-01-10', '2016-01-17'], dtype='datetime64[ns]', freq='W-SUN')

In [18]:
# generate hourly index
pd.date_range(
    start = '2016-01-01',
    end = '2016-01-10',
    freq = '12H' # every hour
)

DatetimeIndex(['2016-01-01 00:00:00', '2016-01-01 12:00:00',
               '2016-01-02 00:00:00', '2016-01-02 12:00:00',
               '2016-01-03 00:00:00', '2016-01-03 12:00:00',
               '2016-01-04 00:00:00', '2016-01-04 12:00:00',
               '2016-01-05 00:00:00', '2016-01-05 12:00:00',
               '2016-01-06 00:00:00', '2016-01-06 12:00:00',
               '2016-01-07 00:00:00', '2016-01-07 12:00:00',
               '2016-01-08 00:00:00', '2016-01-08 12:00:00',
               '2016-01-09 00:00:00', '2016-01-09 12:00:00',
               '2016-01-10 00:00:00'],
              dtype='datetime64[ns]', freq='12H')

In [19]:
# generate end of month index
pd.date_range(
    start = '2016-01-01',
    end = '2016-12-31',
    freq = 'M' # end of month
)

DatetimeIndex(['2016-01-31', '2016-02-29', '2016-03-31', '2016-04-30',
               '2016-05-31', '2016-06-30', '2016-07-31', '2016-08-31',
               '2016-09-30', '2016-10-31', '2016-11-30', '2016-12-31'],
              dtype='datetime64[ns]', freq='M')

In [20]:
# generate start of month index
pd.date_range(
    start = '2016-01-01',
    end = '2016-12-31',
    freq = 'MS' # start of month
)

DatetimeIndex(['2016-01-01', '2016-02-01', '2016-03-01', '2016-04-01',
               '2016-05-01', '2016-06-01', '2016-07-01', '2016-08-01',
               '2016-09-01', '2016-10-01', '2016-11-01', '2016-12-01'],
              dtype='datetime64[ns]', freq='MS')

In [21]:
# generate year end index
pd.date_range(
    start = '2016-01-01',
    end = '2022-12-31',
    freq = 'A' # year end (AS for year start)
)

DatetimeIndex(['2016-12-31', '2017-12-31', '2018-12-31', '2019-12-31',
               '2020-12-31', '2021-12-31', '2022-12-31'],
              dtype='datetime64[ns]', freq='A-DEC')

- The periods argument can be used in place of the start or end argument. 
- Pandas will;
    - Start at the start date, and end after the specified number of periods OR
    - End at the end date, and start with the specified number of periods

# The .dt Accessor
- Similar to the .str accessor
- Used on datetime objects to use attributes like day, month, weekday_name

In [22]:
# generate dates and pass into a series
dates = pd.date_range(
    start = '2016-01-01',
    end = '2016-12-31',
    freq = '2D'
)

dates_series = pd.Series(dates)

dates_series

0     2016-01-01
1     2016-01-03
2     2016-01-05
3     2016-01-07
4     2016-01-09
         ...    
178   2016-12-22
179   2016-12-24
180   2016-12-26
181   2016-12-28
182   2016-12-30
Length: 183, dtype: datetime64[ns]

In [23]:
# display day
dates_series.dt.day

0       1
1       3
2       5
3       7
4       9
       ..
178    22
179    24
180    26
181    28
182    30
Length: 183, dtype: int64

In [24]:
# display month
dates_series.dt.month_name()

0       January
1       January
2       January
3       January
4       January
         ...   
178    December
179    December
180    December
181    December
182    December
Length: 183, dtype: object

# Import New Data for Remaining Lessons

In [76]:
# import data
stocks = data.DataReader(
    name = 'MSFT',
    data_source = 'yahoo',
    start = '2010-01-01',
    end = '2020-12-31'
)

stocks.head()

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,31.1,30.59,30.620001,30.950001,38409100.0,23.749811
2010-01-05,31.1,30.639999,30.85,30.959999,49749600.0,23.757488
2010-01-06,31.08,30.52,30.879999,30.77,58182400.0,23.611685
2010-01-07,30.700001,30.190001,30.629999,30.450001,50559700.0,23.366129
2010-01-08,30.879999,30.24,30.280001,30.66,51197400.0,23.527271


In [29]:
# attribute review
print(stocks.values)
print('\n')
print(stocks.columns)
print('\n')
print(stocks.index)
print('\n')
print(stocks.axes)

[[3.11000004e+01 3.05900002e+01 3.06200008e+01 3.09500008e+01
  3.84091000e+07 2.37498169e+01]
 [3.11000004e+01 3.06399994e+01 3.08500004e+01 3.09599991e+01
  4.97496000e+07 2.37574806e+01]
 [3.10799999e+01 3.05200005e+01 3.08799992e+01 3.07700005e+01
  5.81824000e+07 2.36116867e+01]
 ...
 [2.27179993e+02 2.23580002e+02 2.26309998e+02 2.24149994e+02
  1.74032000e+07 2.20843826e+02]
 [2.25630005e+02 2.21470001e+02 2.25229996e+02 2.21679993e+02
  2.02723000e+07 2.18410248e+02]
 [2.23000000e+02 2.19679993e+02 2.21699997e+02 2.22419998e+02
  2.09421000e+07 2.19139343e+02]]


Index(['High', 'Low', 'Open', 'Close', 'Volume', 'Adj Close'], dtype='object')


DatetimeIndex(['2010-01-04', '2010-01-05', '2010-01-06', '2010-01-07',
               '2010-01-08', '2010-01-11', '2010-01-12', '2010-01-13',
               '2010-01-14', '2010-01-15',
               ...
               '2020-12-17', '2020-12-18', '2020-12-21', '2020-12-22',
               '2020-12-23', '2020-12-24', '2020-12-28', '2020-12-

# Selecting Rows w/ DateTimeIndex

In [35]:
# selecting row with index label
stocks.loc['2010-01-04']
stocks.loc[pd.Timestamp('2010-01-04')] # same result but best practice to use timestamp

High         3.110000e+01
Low          3.059000e+01
Open         3.062000e+01
Close        3.095000e+01
Volume       3.840910e+07
Adj Close    2.374982e+01
Name: 2010-01-04 00:00:00, dtype: float64

In [36]:
# selecting row with index position
stocks.iloc[0]

High         3.110000e+01
Low          3.059000e+01
Open         3.062000e+01
Close        3.095000e+01
Volume       3.840910e+07
Adj Close    2.374982e+01
Name: 2010-01-04 00:00:00, dtype: float64

In [39]:
# selecting rows with index labels
stocks.loc[[pd.Timestamp('2010-01-04'), pd.Timestamp('2010-01-05')]]

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,31.1,30.59,30.620001,30.950001,38409100.0,23.749817
2010-01-05,31.1,30.639999,30.85,30.959999,49749600.0,23.757481


In [46]:
# selecting rows with index positions
stocks.iloc[[0, 1, 2, 3, 4]]

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,31.1,30.59,30.620001,30.950001,38409100.0,23.749817
2010-01-05,31.1,30.639999,30.85,30.959999,49749600.0,23.757481
2010-01-06,31.08,30.52,30.879999,30.77,58182400.0,23.611687
2010-01-07,30.700001,30.190001,30.629999,30.450001,50559700.0,23.366138
2010-01-08,30.879999,30.24,30.280001,30.66,51197400.0,23.527283


In [50]:
# selecting range of rows with index labels
stocks.loc['2010-01-04':'2010-01-08'] # end point inclusive

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,31.1,30.59,30.620001,30.950001,38409100.0,23.749817
2010-01-05,31.1,30.639999,30.85,30.959999,49749600.0,23.757481
2010-01-06,31.08,30.52,30.879999,30.77,58182400.0,23.611687
2010-01-07,30.700001,30.190001,30.629999,30.450001,50559700.0,23.366138
2010-01-08,30.879999,30.24,30.280001,30.66,51197400.0,23.527283


In [59]:
# selecting range of rows with index positions
stocks.iloc[0:5] # end point exclusive

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,31.1,30.59,30.620001,30.950001,38409100.0,23.749817
2010-01-05,31.1,30.639999,30.85,30.959999,49749600.0,23.757481
2010-01-06,31.08,30.52,30.879999,30.77,58182400.0,23.611687
2010-01-07,30.700001,30.190001,30.629999,30.450001,50559700.0,23.366138
2010-01-08,30.879999,30.24,30.280001,30.66,51197400.0,23.527283


# Inserting DateTime Objects

In [77]:
# insert day of week
stocks.insert(0, 'Day of Week', stocks.index.day_name())
stocks.head()

Unnamed: 0_level_0,Day of Week,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2010-01-04,Monday,31.1,30.59,30.620001,30.950001,38409100.0,23.749811
2010-01-05,Tuesday,31.1,30.639999,30.85,30.959999,49749600.0,23.757488
2010-01-06,Wednesday,31.08,30.52,30.879999,30.77,58182400.0,23.611685
2010-01-07,Thursday,30.700001,30.190001,30.629999,30.450001,50559700.0,23.366129
2010-01-08,Friday,30.879999,30.24,30.280001,30.66,51197400.0,23.527271


In [78]:
# insert start of month
stocks.insert(1, 'Start of Month', stocks.index.is_month_start)
stocks.head()

Unnamed: 0_level_0,Day of Week,Start of Month,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2010-01-04,Monday,False,31.1,30.59,30.620001,30.950001,38409100.0,23.749811
2010-01-05,Tuesday,False,31.1,30.639999,30.85,30.959999,49749600.0,23.757488
2010-01-06,Wednesday,False,31.08,30.52,30.879999,30.77,58182400.0,23.611685
2010-01-07,Thursday,False,30.700001,30.190001,30.629999,30.450001,50559700.0,23.366129
2010-01-08,Friday,False,30.879999,30.24,30.280001,30.66,51197400.0,23.527271


# Time Delta

In [79]:
# time difference
time_a = pd.Timestamp('2020-03-31 04:35:16PM')
time_b = pd.Timestamp('2022-03-31 07:47:23PM')

time_b - time_a

Timedelta('730 days 03:12:07')

In [85]:
# import data
shipping = pd.read_csv('data/ecommerce.csv', index_col = 'ID', parse_dates = ['order_date', 'delivery_date'])
shipping.head()

Unnamed: 0_level_0,order_date,delivery_date
ID,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1998-05-24,1999-02-05
2,1992-04-22,1998-03-06
4,1991-02-10,1992-08-26
5,1992-07-21,1997-11-20
7,1993-09-02,1998-06-10


In [89]:
# insert time delta
shipping['delivery_time'] = shipping['delivery_date'] - shipping['order_date']
shipping.head()

Unnamed: 0_level_0,order_date,delivery_date,delivery_time
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,1998-05-24,1999-02-05,257 days
2,1992-04-22,1998-03-06,2144 days
4,1991-02-10,1992-08-26,563 days
5,1992-07-21,1997-11-20,1948 days
7,1993-09-02,1998-06-10,1742 days


In [92]:
# filtering on delivery days
long_shipping = shipping['delivery_time'] > '365 days'
shipping[long_shipping].sort_values(by = 'delivery_time')

Unnamed: 0_level_0,order_date,delivery_date,delivery_time
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
11,1996-07-12,1997-07-14,367 days
457,1991-06-17,1992-06-18,367 days
76,1997-05-26,1998-06-05,375 days
445,1993-02-11,1994-02-24,378 days
326,1998-05-12,1999-05-29,382 days
...,...,...,...
331,1990-09-18,1999-12-19,3379 days
130,1990-04-02,1999-08-16,3423 days
904,1990-02-13,1999-11-15,3562 days
314,1990-03-07,1999-12-25,3580 days


In [94]:
# methods on delivery days
print(shipping['delivery_time'].min())
print(shipping['delivery_time'].max())

8 days 00:00:00
3583 days 00:00:00
