In [3]:
# Creating date objects
from datetime import date
two_hurricanes_dates = [date(2016, 10, 7), date(2017, 6, 21)]
print(two_hurricanes_dates[0].year)
print(two_hurricanes_dates[0].month)
print(two_hurricanes_dates[0].day)
print(two_hurricanes_dates[0].weekday())

2016
10
7
4


In [7]:
# Math with dates
from datetime import date
d1 = date(2017, 11, 5)
d2 = date(2017, 12, 4)
l = [d1, d2]
print(min(l))

# Subtract two dates
delta = d2 - d1
print(delta.days)

from datetime import timedelta
td = timedelta(days=29)
print(d1 + td)

2017-11-05
29
2017-12-04


In [8]:
# ISO 8601 format
from datetime import date
d = date(2017, 11, 5)
print(d)

# Express the date in ISO 8601 format and put it in a list
print([d.isoformat()])

2017-11-05
['2017-11-05']


In [10]:
# Every other format: strftime
d = date(2017, 1, 5)
print(d.strftime('%Y'))

# Format: YYYY/MM/DD
print(d.strftime('%Y/%m/%d'))

2017
2017/01/05


In [3]:
# Dates and Times
from datetime import datetime
dt = datetime(year=2017, month=10, day=1, hour= 15,
            minute=23, second=25, microsecond=500000)
print(dt)

dt_hr = dt.replace(minute=0, second=0, microsecond=0)
print(dt_hr)

2017-10-01 15:23:25.500000
2017-10-01 15:00:00


In [7]:
# Printing datetimes
dt = datetime(2017, 12, 30, 15, 19, 13)
print(dt.strftime('%Y-%m-%d'))
print(dt.strftime('%Y-%m-%d %H:%M:%S'))

2017-12-30
2017-12-30 15:19:13
<class 'datetime.datetime'>


In [8]:
# Parsing datetimes with strptime
# strptime() - string parse time
from datetime import datetime
dt = datetime.strptime('12/30/2017 15:19:13', '%m/%d/%Y %H:%M:%S')
print(dt)

2017-12-30 15:19:13
<class 'datetime.datetime'>


In [12]:
# Parsing datetimes in Python (Unix timestamp)
ts = 1514665153.0
print(datetime.fromtimestamp(ts))

2017-12-31 01:49:13


In [13]:
# Working with durations
start = datetime(2017, 10, 8, 23, 46, 47)
end = datetime(2017, 10, 9, 0, 10, 57)
duration = end - start
print(duration.total_seconds())

1450.0


In [18]:
# Creating timedeltas
from datetime import timedelta
delta1 = timedelta(seconds=1)
print(start)
print(start+delta1)

delta2 = timedelta(days=1, seconds=1)
print(start+delta2)

# Negative timedeltas
delta3 = timedelta(weeks=-1)
print(start+delta3)

2017-10-08 23:46:47
2017-10-08 23:46:48
2017-10-09 23:46:48
2017-10-01 23:46:47


In [5]:
#  UTC
from datetime import datetime, timedelta, timezone
ET = timezone(timedelta(hours=-5))
dt = datetime(2017, 12, 30, 15, 9, 3, tzinfo=ET)
print(dt)

IST = timezone(timedelta(hours=5, minutes=30))
print(dt.astimezone(IST))

print(dt.replace(tzinfo=timezone.utc))

# Change original to watch UTC
print(dt.astimezone(timezone.utc))

2017-12-30 15:09:03-05:00
2017-12-31 01:39:03+05:30
2017-12-30 15:09:03+00:00
2017-12-30 20:09:03+00:00


In [9]:
# Time zone database
from datetime import datetime
from dateutil import tz

# Format - 'Continent/City'
# Eastern time
et = tz.gettz('America/New_York')

last = datetime(2017, 12, 30, 15, 9, 3, tzinfo=et)
print(last)
first = datetime(2017, 10, 1, 15, 23, 25, tzinfo=et)
print(first)

2017-12-30 15:09:03-05:00
2017-10-01 15:23:25-04:00


In [16]:
# Start of Daylight Saving Time
spring_ahead_159am = datetime(2017, 3, 12, 1, 59, 59)
spring_ahead_159am.isoformat()

spring_ahead_3am = datetime(2017, 3, 12, 3, 0, 0)
spring_ahead_3am.isoformat()

(spring_ahead_3am - spring_ahead_159am).total_seconds()

from datetime import timezone, timedelta

EST = timezone(timedelta(hours=-5))
EDT = timezone(timedelta(hours=-4))

spring_ahead_159am = spring_ahead_159am.replace(tzinfo=EST)
spring_ahead_159am.isoformat()

spring_ahead_3am = spring_ahead_3am.replace(tzinfo=EDT)
spring_ahead_3am.isoformat()

(spring_ahead_3am - spring_ahead_159am).seconds

1

In [None]:
from dateutil import tz
eastern = tz.gettz('America/New_York')
spring_ahead_159am = datetime(2017, 3, 12, 1, 59, 59, tzinfo=eastern)
spring_ahead_3am = datetime(2017, 3, 12, 3, 0, 0, tzinfo=eastern)

In [20]:
# Ending Daylight Saving Time
eastern = tz.gettz('US/Eastern')
# 2017-11-05 01:00:00
first_1am = datetime(2017, 11, 5, 1, 0, 0, tzinfo=eastern)
tz.datetime_ambiguous(first_1am)

# 2017-11-05 01:00:00 again
second_1am = datetime(2017, 11, 5, 1, 0, 0, tzinfo=eastern)
second_1am = tz.enfold(second_1am)

(first_1am - second_1am).seconds

# Put into UTC as it is unambigious
first_1am = first_1am.astimezone(tz.UTC)
second_1am = second_1am.astimezone(tz.UTC)
(second_1am - first_1am).total_seconds()

3600.0

In [None]:
# A simple Pandas example
import pandas as pd
rides = pd.read_csv('capital-onebike.csv')
rides.iloc[2]   # Prints the second row

# Loading datetimes with parse_dates
rides = pd.read_csv('capital-onebike.csv', parse_dates=['Start date', 'End date'])

# Or:
rides['Start date'] = pd.to_datetime(rides['Start date'], format='%Y-%m-%d %H:%M:%S')

# Timezone-aware arithmetic
rides['Duration'] = rides['End date'] - rides['Start date']
print(rides['Duration'].head(5))

rides['Duration']\
    .dt.total_seconds()\
    .head(5)


In [None]:
# Summarizing data in Pandas
# Average time out of the dock
rides['Duration'].mean()

# Total time out of the dock
rides['Duration'].sum()

# Percent of time out of the dock
rides['Duration'].sum() / timedelta(days=91)

# Count how many time the bike started at each station
rides['Member type'].value_counts()

# Percent of rides by member
rides['Member type'].value_counts() / len(rides)

# Add duration (in seconds) column
rides['Duration seconds'] = rides['Duration'].dt.total_seconds()

# Average duration per member type
rides.groupby('Member type')['Duration seconds'].mean()

# Average duration by month
rides.resample('M', on='Start date')['Duration seconds'].mean()

# Size per group
rides.groupby('Member type').size()

# First ride per group
rides.groupby('Member type').first()

In [None]:
# Timezones in Pandas
rides['Start date'].head(3)\
    .dt.tz_localize('America/New_York')

# Handle ambiguoug datetimes
rides['Start date'] = rides['Start date']\
    .dt.tz_localize('America/New_York', ambiguous='NaT')

rides['End date'] = rides['End date']\
    .dt.tz_localize('America/New_York', ambiguous='NaT')

# Re-calculate duration, ignoring bad row
rides['Duration'] = rides['End date'] - rides['Start date']

# Find the minimun again
rides['Duration'].dt.total_seconds().min()

# Year of first three rows
rides['Start date']\
    .head(3)\
        .dt.year

# See weekdays for first three rides
rides['Start date']\
    .head(3)\
        .dt.day_name()

# Shift the indexes forward one, padding with NaT
rides['End date'].shift(1).head(3)