# Working with dates and times

## Dates in python
In python we use the datetime package to handle dates and times. There is plenty of functionality already built-in there ready to be used.

In [None]:
from datetime import date

In [None]:
two_dates = [date(2014, 5, 13), date(2025, 1, 20)]

In [None]:
two_dates

In [None]:
two_dates[0].year

In [None]:
two_dates[1].month

Weekdays in python starts from 0

In [None]:
two_dates[1].weekday()

### Math with python dates

In [None]:
diff = two_dates[1] - two_dates[0]
diff

In [None]:
min(two_dates)

In [None]:
max(two_dates)

In [None]:
from datetime import timedelta

In [None]:
td = timedelta(days=29)

In [None]:
date(1999, 1, 1) + td

In [None]:
td.days

In [None]:
unsorted_dates = [date(2014, 5, 13), date(2025, 1, 20), date(2005, 1, 20), date(2015, 1, 20)]

sorted(unsorted_dates)

## Parsing and formatting


In [None]:
d = date(2017, 11, 5)
print (d)

The default date format in datetime is the ISO 8601, that is YYYY-MM-DD

In [None]:
d.isoformat()

If you want a date to be formatted in a different way, dtrftime():

In [None]:
d.strftime('%Y')

In [None]:
d.strftime('%Y/%m/%d')

## Adding times

In [None]:
from datetime import datetime

In [None]:
dt = datetime(2024, 12, 25, 12, 34, 56)
dt

In [None]:
dt.replace(month=5)

In [None]:
dt.strftime('%Y=%m-%d')

In [None]:
dt.strftime('%Y=%m-%d %H:%M:%S')

In [None]:
dt.isoformat()

In [None]:
datetime.strptime('2024-12-25 12:34:56', '%Y-%m-%d %H:%M:%S')

In [None]:
timestamp = 1776765153.0
print(datetime.fromtimestamp(timestamp))

## Working with durations

In [None]:
start = datetime.strptime('2021-12-25 12:34:56', '%Y-%m-%d %H:%M:%S')
end = datetime.strptime('2022-04-25 08:32:17', '%Y-%m-%d %H:%M:%S')

duration = end - start

duration

In [None]:
type(duration)

In [None]:
duration.total_seconds()

In [None]:
from datetime import timedelta
delta1 = timedelta(seconds=1)

In [None]:
delta2 = timedelta(days=1, seconds=1)

In [None]:
start + delta1

## Timezones

In [None]:
from datetime import datetime, timedelta, timezone

In [None]:
onebike_datetimes=[{'start': datetime(2017, 10, 3, 19, 24, 10),
  'end': datetime(2017, 10, 3, 19, 52, 8)},
 {'start': datetime(2017, 10, 3, 20, 17, 6),
  'end': datetime(2017, 10, 3, 20, 23, 52)},
 {'start': datetime(2017, 10, 3, 20, 45, 21),
  'end': datetime(2017, 10, 3, 20, 57, 10)},
 {'start': datetime(2017, 10, 4, 7, 4, 57),
  'end': datetime(2017, 10, 4, 7, 13, 31)}]

In [None]:
ET = timezone(timedelta(hours=-5))

In [None]:
dt = datetime(2021, 12, 25, 12, 34, 56,  tzinfo= ET)

In [None]:
dt

In [None]:
pst = timezone(timedelta(hours=-8))

dt = datetime(2017, 10, 1, 15, 26, 26, tzinfo=pst)

print(dt.isoformat())

In [None]:
edt = timezone(timedelta(hours=-4))

for trip in onebike_datetimes[:2]:
  # Update trip['start'] and trip['end']
  trip['start'] = trip['start'].replace(tzinfo=edt)
  trip['end'] = trip['end'].replace(tzinfo=edt)

In [None]:
onebike_datetimes

## Timezone databases

In [None]:
from dateutil import tz

In [None]:
et = tz.gettz('America/New_York')

In [None]:
et

In [None]:
!cat /usr/share/zoneinfo/America/New_York

In [None]:
last = datetime(2017,12,30,15, 9, 3, tzinfo=et)

In [None]:
print(last)

In [None]:
sm = tz.gettz('Pacific/Apia')

local = datetime(2017,12,30,15, 9, 3)

notlocal = local.astimezone(sm)

print(local.isoformat())
print(notlocal.isoformat())

## Daylight Saving Time

In [None]:
spring_ahead_159am = datetime(2017, 3, 12, 1, 59, 59)
spring_ahead_159am.isoformat()

In [None]:
spring_ahead_3am = datetime(2017, 3, 12, 3, 0, 0)

(spring_ahead_3am - spring_ahead_159am).total_seconds()

In [None]:
# The issue is, there was no 2am that day in X region, because it was the way where clocks were adaapted from 2 to 3
# How to solve this?

In [None]:
EST = timezone(timedelta(hours=-5))
EDT = timezone(timedelta(hours=-4))

spring_ahead_159am = spring_ahead_159am.replace(tzinfo=EST)
spring_ahead_159am.isoformat()


In [None]:
spring_ahead_3am=spring_ahead_3am.replace(tzinfo=EDT)
spring_ahead_3am.isoformat()

In [None]:
(spring_ahead_3am - spring_ahead_159am).total_seconds()

In [None]:
# But this approach sucks since this would require us to keep track of when the switching day is and so on

In [None]:
eastern = tz.gettz('America/New_York')

spring_ahead_159am = datetime(2017, 3, 12, 1, 59, 59, tzinfo = eastern)
spring_ahead_3am = datetime(2017, 3, 12, 3, 0, 0, tzinfo = eastern)

(spring_ahead_3am - spring_ahead_159am).total_seconds()

In [None]:
# When the clocks are back from 3am to 2am, to restore the usual time delta the methods ddatetime_ambibuaous and enfold are of help

# Dates and times in Pandas

In [None]:
import pandas as pd

In [None]:
rides = pd.read_csv('../data/capital-onebike.csv')

In [None]:
rides.head()

In [None]:
rides['Start date']

In [None]:
rides.iloc[2]

In [None]:
rides.dtypes

In [None]:
rides = pd.read_csv('../data/capital-onebike.csv', parse_dates=['Start date', 'End date'])

In [None]:
rides.dtypes

In [None]:
rides['duration'] = rides['End date'] - rides['Start date']

In [None]:
rides.dtypes

In [None]:
rides.head()

In [None]:
rides['duration seconds'] = rides['duration'].dt.total_seconds()

In [None]:
rides.head()

In [None]:
rides.dtypes

In [None]:
rides.duration.mean()

In [None]:
rides['duration'] = rides['End date'] - rides['Start date']
rides.duration.sum()/timedelta(days=91)

In [None]:
# average duration by month

rides.resample('M', on = 'Start date')['duration seconds'].mean()

In [None]:
# There is no timezone associated to the datetimes in the dataframe, so...
rides['duration'].dt.total_seconds().min()

In [None]:
# Lets assign a time zone to the datetime columns
rides['Start date'].head().dt.tz_localize('America/New_York')

In [None]:
rides['Start date']=rides['Start date'].dt.tz_localize('America/New_York')

In [None]:
rides['Start date']=rides['Start date'].dt.tz_localize('America/New_York', ambiguous='NaT')

In [None]:
rides.iloc[129]

In [None]:
rides['Start date'].dt.year

In [None]:
rides['Start date'].dt.day_name()

In [None]:
rides['Start date'].shift(1).head(3)

In [None]:
rides['Start date'].head(3)