# Chapter 11: Working with dates and times

## 11.1 Introducing the Timestamp object

### 11.1.1 How Python works with datetimes

In [1]:
import datetime as dt
import pandas as pd

In [2]:
# The two lines below are equivalent
birthday = dt.date(1991, 4, 12)
birthday = dt.date(year=1991, month=4, day=12)
birthday

datetime.date(1991, 4, 12)

In [3]:
birthday.year

1991

In [4]:
birthday.month

4

In [5]:
birthday.day

12

In [6]:
# a date object is immutable
# birthday.month = 10 # AttributeError

In [7]:
# The two lines below are equivalent
alarm_clock = dt.time(6, 43, 25)
alarm_clock = dt.time(hour=6, minute=43, second=25)
alarm_clock

datetime.time(6, 43, 25)

In [8]:
dt.time() # midnight: 0 hours, 0 minutes, and 0 seconds

datetime.time(0, 0)

In [9]:
dt.time(hour=9, second=42) # 9:00:42 a.m

datetime.time(9, 0, 42)

In [10]:
# The time constructor uses a 24-hour clock
dt.time(hour=19, minute=43, second=22)

datetime.time(19, 43, 22)

In [11]:
alarm_clock.hour

6

In [12]:
alarm_clock.minute

43

In [13]:
alarm_clock.second

25

In [14]:
# The two lines below are equivalent
moon_landing = dt.datetime(1969, 7, 20, 22, 56, 20)
moon_landing = dt.datetime(
    year=1969,
    month=7,
    day=20,
    hour=22,
    minute=56,
    second=20
)
moon_landing

datetime.datetime(1969, 7, 20, 22, 56, 20)

In [15]:
# The year, month, and day parameters are required.
dt.datetime(2020, 1, 1)

datetime.datetime(2020, 1, 1, 0, 0)

In [16]:
dt.timedelta(
    weeks=8,
    days=6,
    hours=3,
    minutes=58,
    seconds=12
)

datetime.timedelta(days=62, seconds=14292)

### 11.1.2 How pandas works with datetimes

In [17]:
# The two lines below are equivalent
pd.Timestamp(1991, 4, 12)
pd.Timestamp(year=1991, month=4, day=12)

Timestamp('1991-04-12 00:00:00')

In [18]:
(pd.Timestamp(year=1991, month=4, day=12)
     == dt.datetime(year=1991, month=4, day=12))

True

In [19]:
(pd.Timestamp(year=1991, month=4, day=12, minute=2)
    == dt.datetime(year=1991, month=4, day=12, minute=1))

False

In [20]:
pd.Timestamp("2015-03-31")

Timestamp('2015-03-31 00:00:00')

In [21]:
pd.Timestamp("2015/03/31")

Timestamp('2015-03-31 00:00:00')

In [22]:
pd.Timestamp("03/31/2015")

Timestamp('2015-03-31 00:00:00')

In [23]:
# include the time in a variety of written formats:
pd.Timestamp("2021-03-08 08:35:15")

Timestamp('2021-03-08 08:35:15')

In [24]:
pd.Timestamp("2021-03-08 6:13:29 PM")

Timestamp('2021-03-08 18:13:29')

In [25]:
# the Timestamp constructor accepts Python's native date, time and datetime objects:
pd.Timestamp(dt.datetime(2000, 2, 3, 21, 35, 22))

Timestamp('2000-02-03 21:35:22')

In [26]:
my_time = pd.Timestamp(dt.datetime(2000, 2, 3, 21, 35, 22))
print(my_time.year)
print(my_time.month)
print(my_time.day)
print(my_time.hour)
print(my_time.minute)
print(my_time.second)

2000
2
3
21
35
22


## 11.2 Storing multiple timestamps in a DatetimeIndex

In [27]:
pd.Series([1, 2, 3]).index

RangeIndex(start=0, stop=3, step=1)

In [28]:
pd.Series([1, 2, 3], index=['A', 'B', 'C']).index

Index(['A', 'B', 'C'], dtype='object')

In [29]:
timestamps = [
    pd.Timestamp("2020-01-01"),
    pd.Timestamp("2020-02-01"),
    pd.Timestamp("2020-03-01"),
]

pd.Series([1, 2, 3], index=timestamps).index

DatetimeIndex(['2020-01-01', '2020-02-01', '2020-03-01'], dtype='datetime64[ns]', freq=None)

In [30]:
datetimes = [
    dt.datetime(2020, 1, 1),
    dt.datetime(2020, 2, 1),
    dt.datetime(2020, 3, 1),
]

pd.Series([1, 2, 3], index=datetimes).index

DatetimeIndex(['2020-01-01', '2020-02-01', '2020-03-01'], dtype='datetime64[ns]', freq=None)

In [31]:
# create a DatetimeIndex from scratch
string_dates = ["2018/01/02", "2016/04/12", "2009/09/07"]
pd.DatetimeIndex(data=string_dates)

DatetimeIndex(['2018-01-02', '2016-04-12', '2009-09-07'], dtype='datetime64[ns]', freq=None)

In [32]:
mixed_dates = [
    dt.date(2018, 1, 2),
    "2016/04/12",
    pd.Timestamp(2009, 9, 7)
]

dt_index = pd.DatetimeIndex(mixed_dates)
dt_index

DatetimeIndex(['2018-01-02', '2016-04-12', '2009-09-07'], dtype='datetime64[ns]', freq=None)

In [33]:
s = pd.Series(data=[100, 200, 300],  index=dt_index)
s

2018-01-02    100
2016-04-12    200
2009-09-07    300
dtype: int64

In [34]:
s.sort_index()

2009-09-07    300
2016-04-12    200
2018-01-02    100
dtype: int64

In [35]:
morning = pd.Timestamp("2020-01-01 11:22:22 AM")
evening = pd.Timestamp("2020-01-01 11:22:22 PM")

morning < evening

True

## 11.3 Converting coolumn or index values to datetimes

In [36]:
disney = pd.read_csv("data/ch11/disney.csv")
disney.head()

Unnamed: 0,Date,High,Low,Open,Close
0,1962-01-02,0.096026,0.092908,0.092908,0.092908
1,1962-01-03,0.094467,0.092908,0.092908,0.094155
2,1962-01-04,0.094467,0.093532,0.094155,0.094155
3,1962-01-05,0.094779,0.093844,0.094155,0.094467
4,1962-01-08,0.095714,0.092285,0.094467,0.094155


In [37]:
disney.dtypes

Date      object
High     float64
Low      float64
Open     float64
Close    float64
dtype: object

In [38]:
disney = pd.read_csv("data/ch11/disney.csv", parse_dates=["Date"])

In [39]:
string_dates = ["2015-01-01", "2016-02-02", "2017-03-03"]
dt_index = pd.to_datetime(string_dates)
dt_index

DatetimeIndex(['2015-01-01', '2016-02-02', '2017-03-03'], dtype='datetime64[ns]', freq=None)

In [40]:
pd.to_datetime(disney["Date"]).head()

0   1962-01-02
1   1962-01-03
2   1962-01-04
3   1962-01-05
4   1962-01-08
Name: Date, dtype: datetime64[ns]

In [41]:
disney["Date"] = pd.to_datetime(disney["Date"])

In [42]:
disney.dtypes

Date     datetime64[ns]
High            float64
Low             float64
Open            float64
Close           float64
dtype: object

## 11.4 Using the DatetimeProperties object

In [43]:
disney["Date"].dt

<pandas.core.indexes.accessors.DatetimeProperties object at 0x000002059E329850>

In [44]:
disney["Date"].head(3)

0   1962-01-02
1   1962-01-03
2   1962-01-04
Name: Date, dtype: datetime64[ns]

In [45]:
disney["Date"].dt.day.head(3)

0    2
1    3
2    4
Name: Date, dtype: int32

In [46]:
disney["Date"].dt.month.head(3)

0    1
1    1
2    1
Name: Date, dtype: int32

In [47]:
disney["Date"].dt.year.head(3)

0    1962
1    1962
2    1962
Name: Date, dtype: int32

In [48]:
disney["Date"].dt.dayofweek.head()

0    1
1    2
2    3
3    4
4    0
Name: Date, dtype: int32

In [49]:
disney["Date"].dt.day_name().head()

0      Tuesday
1    Wednesday
2     Thursday
3       Friday
4       Monday
Name: Date, dtype: object

In [50]:
disney["Day of Week"] = disney["Date"].dt.day_name()

In [51]:
group = disney.groupby("Day of Week")

In [52]:
group.mean(numeric_only=True)

Unnamed: 0_level_0,High,Low,Open,Close
Day of Week,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Friday,23.767304,23.318898,23.552872,23.554498
Monday,23.377271,22.930606,23.161392,23.162543
Thursday,23.770234,23.288687,23.534561,23.540359
Tuesday,23.791234,23.335267,23.571755,23.562907
Wednesday,23.842743,23.355419,23.605618,23.609873


In [53]:
disney["Date"].dt.month_name().head()

0    January
1    January
2    January
3    January
4    January
Name: Date, dtype: object

In [54]:
disney["Date"].dt.is_quarter_start.tail()

14722    False
14723    False
14724    False
14725     True
14726    False
Name: Date, dtype: bool

In [55]:
disney[disney["Date"].dt.is_quarter_start].head()

Unnamed: 0,Date,High,Low,Open,Close,Day of Week
189,1962-10-01,0.064849,0.062355,0.063913,0.062355,Monday
314,1963-04-01,0.087989,0.086704,0.087025,0.086704,Monday
377,1963-07-01,0.096338,0.095053,0.096338,0.095696,Monday
441,1963-10-01,0.110467,0.107898,0.107898,0.110467,Tuesday
565,1964-04-01,0.116248,0.112394,0.112394,0.116248,Wednesday


In [56]:
disney[disney["Date"].dt.is_quarter_end].head()

Unnamed: 0,Date,High,Low,Open,Close,Day of Week
251,1962-12-31,0.074501,0.07129,0.074501,0.072253,Monday
440,1963-09-30,0.109825,0.105972,0.108541,0.107577,Monday
502,1963-12-31,0.101476,0.09698,0.097622,0.101476,Tuesday
564,1964-03-31,0.115605,0.112394,0.114963,0.112394,Tuesday
628,1964-06-30,0.101476,0.100191,0.101476,0.100834,Tuesday


In [57]:
disney[disney["Date"].dt.is_month_start].head()

Unnamed: 0,Date,High,Low,Open,Close,Day of Week
22,1962-02-01,0.096338,0.093532,0.093532,0.094779,Thursday
41,1962-03-01,0.095714,0.093532,0.093532,0.095714,Thursday
83,1962-05-01,0.087296,0.085426,0.085738,0.086673,Tuesday
105,1962-06-01,0.079814,0.077943,0.079814,0.079814,Friday
147,1962-08-01,0.06859,0.068278,0.06859,0.06859,Wednesday


In [58]:
disney[disney["Date"].dt.is_month_end].head()

Unnamed: 0,Date,High,Low,Open,Close,Day of Week
21,1962-01-31,0.093844,0.092908,0.093532,0.093532,Wednesday
40,1962-02-28,0.094779,0.09322,0.094155,0.09322,Wednesday
82,1962-04-30,0.087608,0.085738,0.087608,0.085738,Monday
104,1962-05-31,0.082308,0.079814,0.079814,0.079814,Thursday
146,1962-07-31,0.069214,0.068278,0.068278,0.06859,Tuesday


In [59]:
disney[disney["Date"].dt.is_year_start].head()

Unnamed: 0,Date,High,Low,Open,Close,Day of Week


In [60]:
disney[disney["Date"].dt.is_year_end].head()

Unnamed: 0,Date,High,Low,Open,Close,Day of Week
251,1962-12-31,0.074501,0.07129,0.074501,0.072253,Monday
502,1963-12-31,0.101476,0.09698,0.097622,0.101476,Tuesday
755,1964-12-31,0.117853,0.11689,0.11689,0.11689,Thursday
1007,1965-12-31,0.154141,0.150929,0.153498,0.152214,Friday
1736,1968-12-31,0.439301,0.431594,0.434163,0.436732,Tuesday


## 11.5 Adding and subtracting durations of time

In [61]:
pd.DateOffset(years=3, months=4, days=5)

<DateOffset: days=5, months=4, years=3>

In [62]:
disney["Date"].head()

0   1962-01-02
1   1962-01-03
2   1962-01-04
3   1962-01-05
4   1962-01-08
Name: Date, dtype: datetime64[ns]

In [63]:
# add five days to each date in the Date column:
(disney["Date"] + pd.DateOffset(days=5)).head()

0   1962-01-07
1   1962-01-08
2   1962-01-09
3   1962-01-10
4   1962-01-13
Name: Date, dtype: datetime64[ns]

In [64]:
# subtract three days
(disney["Date"] - pd.DateOffset(days=3)).head()

0   1961-12-30
1   1961-12-31
2   1962-01-01
3   1962-01-02
4   1962-01-05
Name: Date, dtype: datetime64[ns]

In [65]:
(disney["Date"] + pd.DateOffset(days=10, hours=6)).head()

0   1962-01-12 06:00:00
1   1962-01-13 06:00:00
2   1962-01-14 06:00:00
3   1962-01-15 06:00:00
4   1962-01-18 06:00:00
Name: Date, dtype: datetime64[ns]

In [66]:
(
    disney["Date"]
    - pd.DateOffset(
        years=1, months=3, days=10, hours=6, minutes=3
    )
).head()

0   1960-09-21 17:57:00
1   1960-09-22 17:57:00
2   1960-09-23 17:57:00
3   1960-09-24 17:57:00
4   1960-09-27 17:57:00
Name: Date, dtype: datetime64[ns]

## 11.6 Date offsets

In [67]:
disney["Date"].tail()

14722   2020-06-26
14723   2020-06-29
14724   2020-06-30
14725   2020-07-01
14726   2020-07-02
Name: Date, dtype: datetime64[ns]

In [68]:
(disney["Date"] + pd.offsets.MonthEnd()).tail()

14722   2020-06-30
14723   2020-06-30
14724   2020-07-31
14725   2020-07-31
14726   2020-07-31
Name: Date, dtype: datetime64[ns]

In [69]:
(disney["Date"] - pd.offsets.MonthEnd()).tail()

14722   2020-05-31
14723   2020-05-31
14724   2020-05-31
14725   2020-06-30
14726   2020-06-30
Name: Date, dtype: datetime64[ns]

In [70]:
(disney["Date"] + pd.offsets.MonthBegin()).tail()

14722   2020-07-01
14723   2020-07-01
14724   2020-07-01
14725   2020-08-01
14726   2020-08-01
Name: Date, dtype: datetime64[ns]

In [71]:
(disney["Date"] - pd.offsets.MonthBegin()).tail()

14722   2020-06-01
14723   2020-06-01
14724   2020-06-01
14725   2020-06-01
14726   2020-07-01
Name: Date, dtype: datetime64[ns]

In [72]:
# example for BMonthEnd (Business Month End)
may_dates = ["2020-05-28", "2020-05-29", "2020-05-30"]
end_of_may = pd.Series(pd.to_datetime(may_dates))
end_of_may

0   2020-05-28
1   2020-05-29
2   2020-05-30
dtype: datetime64[ns]

In [73]:
end_of_may + pd.offsets.MonthEnd()

0   2020-05-31
1   2020-05-31
2   2020-05-31
dtype: datetime64[ns]

In [74]:
end_of_may + pd.offsets.BMonthEnd()

0   2020-05-29
1   2020-06-30
2   2020-06-30
dtype: datetime64[ns]

## 11.7 The Timedelta object

In [75]:
duration = pd.Timedelta(
    days=8,
    hours=7,
    minutes=6,
    seconds=5
)

duration

Timedelta('8 days 07:06:05')

In [76]:
duration = pd.to_timedelta("3 hours, 5 minutes, 12 seconds")
duration

Timedelta('0 days 03:05:12')

In [77]:
pd.to_timedelta(5, unit="hour")

Timedelta('0 days 05:00:00')

In [78]:
pd.to_timedelta([5, 10, 15], unit="day")

TimedeltaIndex(['5 days', '10 days', '15 days'], dtype='timedelta64[ns]', freq=None)

In [79]:
# Usually, Timedelta obejcts are derived rather than created from scratch.
pd.Timestamp("1999-02-05") - pd.Timestamp("1998-05-24")

Timedelta('257 days 00:00:00')

In [80]:
deliveries = pd.read_csv("data/ch11/deliveries.csv")
deliveries.head()

Unnamed: 0,order_date,delivery_date
0,5/24/98,2/5/99
1,4/22/92,3/6/98
2,2/10/91,8/26/92
3,7/21/92,11/20/97
4,9/2/93,6/10/98


In [81]:
deliveries.dtypes

order_date       object
delivery_date    object
dtype: object

In [82]:
deliveries["order_date"] = pd.to_datetime(
    deliveries["order_date"]
)

deliveries["delivery_date"] = pd.to_datetime(
    deliveries["delivery_date"]
)

  deliveries["order_date"] = pd.to_datetime(
  deliveries["delivery_date"] = pd.to_datetime(


In [83]:
# a more scalable solution is to iterate over the column names with a for loop.
for column in ["order_date", "delivery_date"]:
    deliveries[column] = pd.to_datetime(deliveries[column])

In [84]:
deliveries.dtypes

order_date       datetime64[ns]
delivery_date    datetime64[ns]
dtype: object

In [85]:
deliveries.head()

Unnamed: 0,order_date,delivery_date
0,1998-05-24,1999-02-05
1,1992-04-22,1998-03-06
2,1991-02-10,1992-08-26
3,1992-07-21,1997-11-20
4,1993-09-02,1998-06-10


In [86]:
(deliveries["delivery_date"] - deliveries["order_date"]).head()

0    257 days
1   2144 days
2    563 days
3   1948 days
4   1742 days
dtype: timedelta64[ns]

In [87]:
deliveries["duration"] = (
    deliveries["delivery_date"] - deliveries["order_date"]
)

deliveries.head()

Unnamed: 0,order_date,delivery_date,duration
0,1998-05-24,1999-02-05,257 days
1,1992-04-22,1998-03-06,2144 days
2,1991-02-10,1992-08-26,563 days
3,1992-07-21,1997-11-20,1948 days
4,1993-09-02,1998-06-10,1742 days


In [88]:
deliveries.dtypes

order_date        datetime64[ns]
delivery_date     datetime64[ns]
duration         timedelta64[ns]
dtype: object

In [89]:
(deliveries["delivery_date"] - deliveries["duration"]).head()

0   1998-05-24
1   1992-04-22
2   1991-02-10
3   1992-07-21
4   1993-09-02
dtype: datetime64[ns]

In [90]:
(deliveries["delivery_date"] + deliveries["duration"]).head()

0   1999-10-20
1   2004-01-18
2   1994-03-12
3   2003-03-22
4   2003-03-18
dtype: datetime64[ns]

In [91]:
deliveries.sort_values("duration")

Unnamed: 0,order_date,delivery_date,duration
454,1990-05-24,1990-06-01,8 days
294,1994-08-11,1994-08-20,9 days
10,1998-05-10,1998-05-19,9 days
499,1993-06-03,1993-06-13,10 days
143,1997-09-20,1997-10-06,16 days
...,...,...,...
152,1990-09-18,1999-12-19,3379 days
62,1990-04-02,1999-08-16,3423 days
458,1990-02-13,1999-11-15,3562 days
145,1990-03-07,1999-12-25,3580 days


In [92]:
# Mathematical methods are also available on Timedelta Series.
deliveries["duration"].max()

Timedelta('3583 days 00:00:00')

In [93]:
deliveries["duration"].min()

Timedelta('8 days 00:00:00')

In [94]:
deliveries["duration"].mean()

Timedelta('1217 days 22:53:53.532934128')

In [95]:
# Filter the DataFrame for packages that took more than a year to deliver.
# The two lines below are equivalent
(deliveries["duration"] > pd.Timedelta(days=365)).head()
(deliveries["duration"] > "365 days").head()

0    False
1     True
2     True
3     True
4     True
Name: duration, dtype: bool

In [96]:
deliveries[deliveries["duration"] > "365 days"].head()

Unnamed: 0,order_date,delivery_date,duration
1,1992-04-22,1998-03-06,2144 days
2,1991-02-10,1992-08-26,563 days
3,1992-07-21,1997-11-20,1948 days
4,1993-09-02,1998-06-10,1742 days
6,1990-01-25,1994-10-02,1711 days


In [97]:
# get as granular as needed whth the comparison duration.
long_time = (
    deliveries["duration"] > "2000 days, 8 hours, 4 minutes"
)

deliveries[long_time].head()

Unnamed: 0,order_date,delivery_date,duration
1,1992-04-22,1998-03-06,2144 days
7,1992-02-23,1998-12-30,2502 days
11,1992-10-17,1998-10-06,2180 days
12,1992-05-30,1999-08-15,2633 days
15,1990-01-20,1998-07-24,3107 days


## 11.8 Coding challenge

### 11.8.1 Problems

In [98]:
citi_bike = pd.read_csv("data/ch11/citibike.csv")
citi_bike.head()

Unnamed: 0,start_time,stop_time
0,2020-06-01 00:00:03.3720,2020-06-01 00:17:46.2080
1,2020-06-01 00:00:03.5530,2020-06-01 01:03:33.9360
2,2020-06-01 00:00:09.6140,2020-06-01 00:17:06.8330
3,2020-06-01 00:00:12.1780,2020-06-01 00:03:58.8640
4,2020-06-01 00:00:21.2550,2020-06-01 00:24:18.9650


In [99]:
citi_bike.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1882273 entries, 0 to 1882272
Data columns (total 2 columns):
 #   Column      Dtype 
---  ------      ----- 
 0   start_time  object
 1   stop_time   object
dtypes: object(2)
memory usage: 28.7+ MB


### 11.8.2 Solutions

In [100]:
# 1. Convert the start_time and stop_time columns to store datetime (Timestamp) values instead of strings.
for column in ["start_time", "stop_time"]:
    citi_bike[column] = pd.to_datetime(citi_bike[column])

In [101]:
citi_bike.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1882273 entries, 0 to 1882272
Data columns (total 2 columns):
 #   Column      Dtype         
---  ------      -----         
 0   start_time  datetime64[ns]
 1   stop_time   datetime64[ns]
dtypes: datetime64[ns](2)
memory usage: 28.7 MB


In [102]:
# 2.  Count the rides that oourred on each day of the week (Monday, Tuesday, and so on).
# Which weekday is the most popular for a bike ride? Use the start_time column as your starting point.
citi_bike["start_time"].dt.day_name().head()

0    Monday
1    Monday
2    Monday
3    Monday
4    Monday
Name: start_time, dtype: object

In [103]:
citi_bike["start_time"].dt.day_name().value_counts()

start_time
Tuesday      305833
Sunday       301482
Monday       292690
Saturday     285966
Friday       258479
Wednesday    222647
Thursday     215176
Name: count, dtype: int64

In [104]:
# 3. Count the rides per week for each week with the month.
# To do so, round each date in the start_time column to its previous or current Monday.
# Assume that each week starts on a Monday and ends on a Sunday.
# Thus, the first week of June would be Monday, June 1 through Sunday, June 7.
citi_bike["start_time"].dt.dayofweek.head()

0    0
1    0
2    0
3    0
4    0
Name: start_time, dtype: int32

In [105]:
days_away_from_monday = citi_bike["start_time"].dt.dayofweek

In [106]:
citi_bike["start_time"] - pd.to_timedelta(
    days_away_from_monday, unit="day"
)

0         2020-06-01 00:00:03.372
1         2020-06-01 00:00:03.553
2         2020-06-01 00:00:09.614
3         2020-06-01 00:00:12.178
4         2020-06-01 00:00:21.255
                    ...          
1882268   2020-06-29 23:59:41.116
1882269   2020-06-29 23:59:46.426
1882270   2020-06-29 23:59:47.477
1882271   2020-06-29 23:59:53.395
1882272   2020-06-29 23:59:53.901
Name: start_time, Length: 1882273, dtype: datetime64[ns]

In [107]:
dates_rounded_to_monday = citi_bike[
    "start_time"
] - pd.to_timedelta(days_away_from_monday, unit="day")

In [108]:
dates_rounded_to_monday.value_counts().head()

start_time
2020-06-08 18:30:13.661    3
2020-06-08 18:28:49.897    3
2020-06-15 20:10:51.645    3
2020-06-22 17:43:44.188    3
2020-06-01 16:27:02.575    3
Name: count, dtype: int64

In [109]:
dates_rounded_to_monday.dt.date.head()

0    2020-06-01
1    2020-06-01
2    2020-06-01
3    2020-06-01
4    2020-06-01
Name: start_time, dtype: object

In [110]:
dates_rounded_to_monday.dt.date.value_counts()

start_time
2020-06-15    481211
2020-06-08    471384
2020-06-22    465412
2020-06-01    337590
2020-06-29    126676
Name: count, dtype: int64

In [111]:
# 4. Calculate the duration of each ride, and save the results to a new duration column.
citi_bike["duration"] = (
    citi_bike["stop_time"] - citi_bike["start_time"]
)

citi_bike.head()

Unnamed: 0,start_time,stop_time,duration
0,2020-06-01 00:00:03.372,2020-06-01 00:17:46.208,0 days 00:17:42.836000
1,2020-06-01 00:00:03.553,2020-06-01 01:03:33.936,0 days 01:03:30.383000
2,2020-06-01 00:00:09.614,2020-06-01 00:17:06.833,0 days 00:16:57.219000
3,2020-06-01 00:00:12.178,2020-06-01 00:03:58.864,0 days 00:03:46.686000
4,2020-06-01 00:00:21.255,2020-06-01 00:24:18.965,0 days 00:23:57.710000


In [112]:
# 5. Find the average duration of a bike ride.
citi_bike["duration"].mean()

Timedelta('0 days 00:27:19.590506853')

In [113]:
# 6. Extract the five longest bike rides by duration from the data set.
citi_bike["duration"].sort_values(ascending=False).head()

50593    32 days 15:01:54.940000
98339    31 days 01:47:20.632000
52306    30 days 19:32:20.696000
15171    30 days 04:26:48.424000
149761   28 days 09:24:50.696000
Name: duration, dtype: timedelta64[ns]