# Working with dates and times

## Dates in python
In python we use the datetime package to handle dates and times. There is plenty of functionality already built-in there ready to be used.

In [1]:
from datetime import date

In [8]:
two_dates = [date(2014, 5, 13), date(2025, 1, 20)]

In [9]:
two_dates

[datetime.date(2014, 5, 13), datetime.date(2025, 1, 20)]

In [10]:
two_dates[0].year

2014

In [11]:
two_dates[1].month

1

Weekdays in python starts from 0

In [12]:
two_dates[1].weekday()

0

### Math with python dates

In [16]:
diff = two_dates[1] - two_dates[0]
diff

datetime.timedelta(days=3905)

In [17]:
min(two_dates)

datetime.date(2014, 5, 13)

In [18]:
max(two_dates)

datetime.date(2025, 1, 20)

In [19]:
from datetime import timedelta

In [20]:
td = timedelta(days=29)

In [21]:
date(1999, 1, 1) + td

datetime.date(1999, 1, 30)

In [22]:
td.days

29

In [23]:
unsorted_dates = [date(2014, 5, 13), date(2025, 1, 20), date(2005, 1, 20), date(2015, 1, 20)]

sorted(unsorted_dates)

[datetime.date(2005, 1, 20),
 datetime.date(2014, 5, 13),
 datetime.date(2015, 1, 20),
 datetime.date(2025, 1, 20)]

## Parsing and formatting


In [24]:
d = date(2017, 11, 5)
print (d)

2017-11-05


The default date format in datetime is the ISO 8601, that is YYYY-MM-DD

In [25]:
d.isoformat()

'2017-11-05'

If you want a date to be formatted in a different way, dtrftime():

In [28]:
d.strftime('%Y')

'2017'

In [29]:
d.strftime('%Y/%m/%d')

'2017/11/05'

## Adding times

In [30]:
from datetime import datetime

In [32]:
dt = datetime(2024, 12, 25, 12, 34, 56)
dt

datetime.datetime(2024, 12, 25, 12, 34, 56)

In [33]:
dt.replace(month=5)

datetime.datetime(2024, 5, 25, 12, 34, 56)

In [34]:
dt.strftime('%Y=%m-%d')

'2024=12-25'

In [35]:
dt.strftime('%Y=%m-%d %H:%M:%S')

'2024=12-25 12:34:56'

In [36]:
dt.isoformat()

'2024-12-25T12:34:56'

In [41]:
datetime.strptime('2024-12-25 12:34:56', '%Y-%m-%d %H:%M:%S')

datetime.datetime(2024, 12, 25, 12, 34, 56)

In [52]:
timestamp = 1776765153.0
print(datetime.fromtimestamp(timestamp))

2026-04-21 11:52:33


## Working with durations

In [55]:
start = datetime.strptime('2021-12-25 12:34:56', '%Y-%m-%d %H:%M:%S')
end = datetime.strptime('2022-04-25 08:32:17', '%Y-%m-%d %H:%M:%S')

duration = end - start

duration

datetime.timedelta(days=120, seconds=71841)

In [56]:
type(duration)

datetime.timedelta

In [57]:
duration.total_seconds()

10439841.0

In [58]:
from datetime import timedelta
delta1 = timedelta(seconds=1)

In [60]:
delta2 = timedelta(days=1, seconds=1)

In [61]:
start + delta1

datetime.datetime(2021, 12, 25, 12, 34, 57)

## Timezones

In [63]:
from datetime import datetime, timedelta, timezone

In [91]:
onebike_datetimes=[{'start': datetime(2017, 10, 3, 19, 24, 10),
  'end': datetime(2017, 10, 3, 19, 52, 8)},
 {'start': datetime(2017, 10, 3, 20, 17, 6),
  'end': datetime(2017, 10, 3, 20, 23, 52)},
 {'start': datetime(2017, 10, 3, 20, 45, 21),
  'end': datetime(2017, 10, 3, 20, 57, 10)},
 {'start': datetime(2017, 10, 4, 7, 4, 57),
  'end': datetime(2017, 10, 4, 7, 13, 31)}]

In [92]:
ET = timezone(timedelta(hours=-5))

In [93]:
dt = datetime(2021, 12, 25, 12, 34, 56,  tzinfo= ET)

In [94]:
dt

datetime.datetime(2021, 12, 25, 12, 34, 56, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=68400)))

In [95]:
pst = timezone(timedelta(hours=-8))

dt = datetime(2017, 10, 1, 15, 26, 26, tzinfo=pst)

print(dt.isoformat())

2017-10-01T15:26:26-08:00


In [96]:
edt = timezone(timedelta(hours=-4))

for trip in onebike_datetimes[:2]:
  # Update trip['start'] and trip['end']
  trip['start'] = trip['start'].replace(tzinfo=edt)
  trip['end'] = trip['end'].replace(tzinfo=edt)

In [97]:
onebike_datetimes

[{'start': datetime.datetime(2017, 10, 3, 19, 24, 10, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=72000))),
  'end': datetime.datetime(2017, 10, 3, 19, 52, 8, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=72000)))},
 {'start': datetime.datetime(2017, 10, 3, 20, 17, 6, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=72000))),
  'end': datetime.datetime(2017, 10, 3, 20, 23, 52, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=72000)))},
 {'start': datetime.datetime(2017, 10, 3, 20, 45, 21),
  'end': datetime.datetime(2017, 10, 3, 20, 57, 10)},
 {'start': datetime.datetime(2017, 10, 4, 7, 4, 57),
  'end': datetime.datetime(2017, 10, 4, 7, 13, 31)}]

## Timezone databases

In [98]:
from dateutil import tz

In [99]:
et = tz.gettz('America/New_York')

In [100]:
et

tzfile('/usr/share/zoneinfo/America/New_York')

In [101]:
!cat /usr/share/zoneinfo/America/New_York

@p��9`ˈ�p�#�p�`���u���@���U��� ���5��� ������������p��e��މpݩ�`޾kp߉d`��Mp�iF`�~/p�I(`�^p�W.��G-��7��'����������������ֶ��Ƶ���`��p�`�p��`�o�p�_y`�Oxp�?[`�/Zp�(w��<pY���X���;���:����������� ������w��p�``�pP�`@�p0�`�p	�`	���
�gp�������f��e�yH�iG�Y*�I)�9�)�")`��`�
p��`��p��`��p��` v �!��`"U��#j��$5��%J��&��'*s�'��p)
@p������9`����ˈ�p�����#�p�����`�������u�������@�������U������� �������5������� ������������������������p������e������މp����ݩ�`����޾kp����߉d`������Mp�����iF`�����~/p�����I(`�����^p�����W.������G-������7������'������������������������������������ֶ������Ƶ�������`������p�����`�����p������`�����o�p�����_y`�����Oxp�����?[`�����/Zp�����(w������<p����Y�������X�������;�������:�����������������������     ���    ���    w��    p�`    `�p    P�`    @�p    0�`    �p    	�`    	���    �����`�����e�p�������������j�p�����5�`�����S��������`�����3����������������������އ�������p�������i�������R�������K�������4������~-�������Qp�����gJ`�

In [102]:
last = datetime(2017,12,30,15, 9, 3, tzinfo=et)

In [104]:
print(last)

2017-12-30 15:09:03-05:00


In [105]:
sm = tz.gettz('Pacific/Apia')

local = datetime(2017,12,30,15, 9, 3)

notlocal = local.astimezone(sm)

print(local.isoformat())
print(notlocal.isoformat())

2017-12-30T15:09:03
2017-12-31T04:09:03+14:00


## Daylight Saving Time

In [109]:
spring_ahead_159am = datetime(2017, 3, 12, 1, 59, 59)
spring_ahead_159am.isoformat()

'2017-03-12T01:59:59'

In [111]:
spring_ahead_3am = datetime(2017, 3, 12, 3, 0, 0)

(spring_ahead_3am - spring_ahead_159am).total_seconds()

3601.0

In [112]:
# The issue is, there was no 2am that day in X region, because it was the way where clocks were adaapted from 2 to 3
# How to solve this?

In [113]:
EST = timezone(timedelta(hours=-5))
EDT = timezone(timedelta(hours=-4))

spring_ahead_159am = spring_ahead_159am.replace(tzinfo=EST)
spring_ahead_159am.isoformat()


'2017-03-12T01:59:59-05:00'

In [114]:
spring_ahead_3am=spring_ahead_3am.replace(tzinfo=EDT)
spring_ahead_3am.isoformat()

'2017-03-12T03:00:00-04:00'

In [115]:
(spring_ahead_3am - spring_ahead_159am).total_seconds()

1.0

In [116]:
# But this approach sucks since this would require us to keep track of when the switching day is and so on

In [118]:
eastern = tz.gettz('America/New_York')

spring_ahead_159am = datetime(2017, 3, 12, 1, 59, 59, tzinfo = eastern)
spring_ahead_3am = datetime(2017, 3, 12, 3, 0, 0, tzinfo = eastern)

(spring_ahead_3am - spring_ahead_159am).total_seconds()

3601.0

In [119]:
# When the clocks are back from 3am to 2am, to restore the usual time delta the methods ddatetime_ambibuaous and enfold are of help

# Dates and times in Pandas

In [120]:
import pandas as pd

In [123]:
rides = pd.read_csv('../data/capital-onebike.csv')

In [124]:
rides.head()

Unnamed: 0,Start date,End date,Start station number,Start station,End station number,End station,Bike number,Member type
0,2017-10-01 15:23:25,2017-10-01 15:26:26,31038,Glebe Rd & 11th St N,31036,George Mason Dr & Wilson Blvd,W20529,Member
1,2017-10-01 15:42:57,2017-10-01 17:49:59,31036,George Mason Dr & Wilson Blvd,31036,George Mason Dr & Wilson Blvd,W20529,Casual
2,2017-10-02 06:37:10,2017-10-02 06:42:53,31036,George Mason Dr & Wilson Blvd,31037,Ballston Metro / N Stuart & 9th St N,W20529,Member
3,2017-10-02 08:56:45,2017-10-02 09:18:03,31037,Ballston Metro / N Stuart & 9th St N,31295,Potomac & M St NW,W20529,Member
4,2017-10-02 18:23:48,2017-10-02 18:45:05,31295,Potomac & M St NW,31230,Metro Center / 12th & G St NW,W20529,Member


In [125]:
rides['Start date']

0      2017-10-01 15:23:25
1      2017-10-01 15:42:57
2      2017-10-02 06:37:10
3      2017-10-02 08:56:45
4      2017-10-02 18:23:48
              ...         
285    2017-12-29 14:32:55
286    2017-12-29 15:08:26
287    2017-12-29 20:33:34
288    2017-12-30 13:51:03
289    2017-12-30 15:09:03
Name: Start date, Length: 290, dtype: object

In [126]:
rides.iloc[2]

Start date                               2017-10-02 06:37:10
End date                                 2017-10-02 06:42:53
Start station number                                   31036
Start station                  George Mason Dr & Wilson Blvd
End station number                                     31037
End station             Ballston Metro / N Stuart & 9th St N
Bike number                                           W20529
Member type                                           Member
Name: 2, dtype: object

In [127]:
rides.dtypes

Start date              object
End date                object
Start station number     int64
Start station           object
End station number       int64
End station             object
Bike number             object
Member type             object
dtype: object

In [128]:
rides = pd.read_csv('../data/capital-onebike.csv', parse_dates=['Start date', 'End date'])

In [129]:
rides.dtypes

Start date              datetime64[ns]
End date                datetime64[ns]
Start station number             int64
Start station                   object
End station number               int64
End station                     object
Bike number                     object
Member type                     object
dtype: object

In [130]:
rides['duration'] = rides['End date'] - rides['Start date']

In [131]:
rides.dtypes

Start date               datetime64[ns]
End date                 datetime64[ns]
Start station number              int64
Start station                    object
End station number                int64
End station                      object
Bike number                      object
Member type                      object
duration                timedelta64[ns]
dtype: object

In [132]:
rides.head()

Unnamed: 0,Start date,End date,Start station number,Start station,End station number,End station,Bike number,Member type,duration
0,2017-10-01 15:23:25,2017-10-01 15:26:26,31038,Glebe Rd & 11th St N,31036,George Mason Dr & Wilson Blvd,W20529,Member,0 days 00:03:01
1,2017-10-01 15:42:57,2017-10-01 17:49:59,31036,George Mason Dr & Wilson Blvd,31036,George Mason Dr & Wilson Blvd,W20529,Casual,0 days 02:07:02
2,2017-10-02 06:37:10,2017-10-02 06:42:53,31036,George Mason Dr & Wilson Blvd,31037,Ballston Metro / N Stuart & 9th St N,W20529,Member,0 days 00:05:43
3,2017-10-02 08:56:45,2017-10-02 09:18:03,31037,Ballston Metro / N Stuart & 9th St N,31295,Potomac & M St NW,W20529,Member,0 days 00:21:18
4,2017-10-02 18:23:48,2017-10-02 18:45:05,31295,Potomac & M St NW,31230,Metro Center / 12th & G St NW,W20529,Member,0 days 00:21:17


In [141]:
rides['duration seconds'] = rides['duration'].dt.total_seconds()

In [142]:
rides.head()

Unnamed: 0,Start date,End date,Start station number,Start station,End station number,End station,Bike number,Member type,duration,duration seconds
0,2017-10-01 15:23:25,2017-10-01 15:26:26,31038,Glebe Rd & 11th St N,31036,George Mason Dr & Wilson Blvd,W20529,Member,0 days 00:03:01,181.0
1,2017-10-01 15:42:57,2017-10-01 17:49:59,31036,George Mason Dr & Wilson Blvd,31036,George Mason Dr & Wilson Blvd,W20529,Casual,0 days 02:07:02,7622.0
2,2017-10-02 06:37:10,2017-10-02 06:42:53,31036,George Mason Dr & Wilson Blvd,31037,Ballston Metro / N Stuart & 9th St N,W20529,Member,0 days 00:05:43,343.0
3,2017-10-02 08:56:45,2017-10-02 09:18:03,31037,Ballston Metro / N Stuart & 9th St N,31295,Potomac & M St NW,W20529,Member,0 days 00:21:18,1278.0
4,2017-10-02 18:23:48,2017-10-02 18:45:05,31295,Potomac & M St NW,31230,Metro Center / 12th & G St NW,W20529,Member,0 days 00:21:17,1277.0


In [136]:
rides.dtypes

Start date              datetime64[ns]
End date                datetime64[ns]
Start station number             int64
Start station                   object
End station number               int64
End station                     object
Bike number                     object
Member type                     object
duration                       float64
dtype: object

In [138]:
rides.duration.mean()

np.float64(1178.9310344827586)

In [140]:
rides['duration'] = rides['End date'] - rides['Start date']
rides.duration.sum()/timedelta(days=91)

0.04348417785917786

In [143]:
# average duration by month

rides.resample('M', on = 'Start date')['duration seconds'].mean()

  rides.resample('M', on = 'Start date')['duration seconds'].mean()


Start date
2017-10-31    1886.453704
2017-11-30     854.174757
2017-12-31     635.101266
Freq: ME, Name: duration seconds, dtype: float64

In [146]:
# There is no timezone associated to the datetimes in the dataframe, so...
rides['duration'].dt.total_seconds().min()

np.float64(-3346.0)

In [148]:
# Lets assign a time zone to the datetime columns
rides['Start date'].head().dt.tz_localize('America/New_York')

0   2017-10-01 15:23:25-04:00
1   2017-10-01 15:42:57-04:00
2   2017-10-02 06:37:10-04:00
3   2017-10-02 08:56:45-04:00
4   2017-10-02 18:23:48-04:00
Name: Start date, dtype: datetime64[ns, America/New_York]

In [149]:
rides['Start date']=rides['Start date'].dt.tz_localize('America/New_York')

AmbiguousTimeError: Cannot infer dst time from 2017-11-05 01:56:50, try using the 'ambiguous' argument

In [150]:
rides['Start date']=rides['Start date'].dt.tz_localize('America/New_York', ambiguous='NaT')

In [151]:
rides.iloc[129]

Start date                              NaT
End date                2017-11-05 01:01:04
Start station number                  31615
Start station                 6th & H St NE
End station number                    31627
End station                   3rd & M St NE
Bike number                          W20529
Member type                          Member
duration                  -1 days +23:04:14
duration seconds                    -3346.0
Name: 129, dtype: object

In [152]:
rides['Start date'].dt.year

0      2017.0
1      2017.0
2      2017.0
3      2017.0
4      2017.0
        ...  
285    2017.0
286    2017.0
287    2017.0
288    2017.0
289    2017.0
Name: Start date, Length: 290, dtype: float64

In [154]:
rides['Start date'].dt.day_name()

0        Sunday
1        Sunday
2        Monday
3        Monday
4        Monday
         ...   
285      Friday
286      Friday
287      Friday
288    Saturday
289    Saturday
Name: Start date, Length: 290, dtype: object

In [155]:
rides['Start date'].shift(1).head(3)

0                         NaT
1   2017-10-01 15:23:25-04:00
2   2017-10-01 15:42:57-04:00
Name: Start date, dtype: datetime64[ns, America/New_York]

In [157]:
rides['Start date'].head(3)

0   2017-10-01 15:23:25-04:00
1   2017-10-01 15:42:57-04:00
2   2017-10-02 06:37:10-04:00
Name: Start date, dtype: datetime64[ns, America/New_York]