
# Working with date columns

## Revisiting Python dates

In [None]:
from datetime import date, time, datetime, timedelta 

today = date(2020, 9, 30)
now = datetime(2020, 9, 30, 12, 33)
birthday = date(1989, 3, 21)

days_of_holiday = timedelta(days=14) 
minutes_of_nap = timedelta(minutes=30)

In [None]:
today + days_of_holiday

In [None]:
now + minutes_of_nap

In [None]:
today > birthday

In [None]:
today - birthday

In [None]:
birthday.strftime('%d/%m/%y')

In [None]:
birthday.strftime('%A, %d %B, %Y')

In [None]:
my_date = '30/09/20'
datetime.strptime(my_date, '%d/%m/%y')

## Date columns

In [None]:
ledger_df.info()

In [None]:
ledger_df['Date'].iloc[0]

In [None]:
timestamp_date = pd.Timestamp(2020, 1, 1)
datetime_date = datetime(2020, 1, 1)

timestamp_date == datetime_date

In [None]:
ledger_df[['Date', 'Deadline']]

### Converting strings to dates

In [None]:
pd.to_datetime(ledger_df['Deadline'])

In [None]:
dates = pd.Series(['05/07/20', '05/08/20', '05/09/20'])

pd.to_datetime(dates)

In [None]:
pd.to_datetime(dates, format='%d/%m/%y')

In [None]:
pd.to_datetime(ledger_df['Deadline']).describe()

In [None]:
ledger_df['Deadline'] = pd.to_datetime(ledger_df['Deadline'])

In [None]:
ledger_df.info()

In [None]:
ledger_df[['Date', 'Deadline']]

## Pandas date methods

In [None]:
ledger_df['Deadline'].dt.year

In [None]:
ledger_df['Deadline'].dt.day_name()

In [None]:
ledger_df['Deadline'].dt.day_name('de_DE')

### Filtering date columns

In [None]:
ledger_df[ledger_df['Date'] > '15/01/2020']

In [None]:
ledger_df[ledger_df['Date'] > '15th of January, 2020']
ledger_df[ledger_df['Date'] > '15/1/2020']
ledger_df[ledger_df['Date'] > '2020, Jan 15']
ledger_df[ledger_df['Date'] > '1-15-20']
ledger_df[ledger_df['Date'] > '2020/1/15']

In [None]:
import datetime as dt

start_date = dt.datetime(2020, 1, 15)
end_date = dt.datetime(2020, 1, 20)

shift = dt.timedelta(days=1)

ledger_df[
    (ledger_df['Date'] > start_date - shift) & 
    (ledger_df['Date'] < end_date + shift)
]

In [None]:
ledger_df[
    (ledger_df['Deadline'].dt.year.isin([2018, 2019])) & 
    (ledger_df['Deadline'].dt.quarter == 4) & 
    (ledger_df['Deadline'].dt.day_name() == 'Thursday')
]

### Converting dates back to strings

In [None]:
ledger_df['Deadline'].dt.strftime("%A, %d %B, %Y")

## Pandas date arithmetic

### Timedeltas

In [None]:
ledger_df['Deadline']

In [None]:
ledger_df['Deadline'] + pd.Timedelta(days=2)

In [None]:
ledger_df['Deadline'] - pd.Timedelta(weeks=4)

In [None]:
ledger_df['Deadline'] + pd.Timedelta(weeks=4, days=3, hours=2, minutes=1)

In [None]:
ledger_df['Date'] - ledger_df['Deadline']

In [None]:
(ledger_df['Date'] - ledger_df['Deadline']).iloc[0]

In [None]:
pd.Timedelta(days=2) + pd.Timedelta(weeks=1)

In [None]:
(ledger_df['Date'] - ledger_df['Deadline']) / pd.Timedelta(hours=1)

### Date offsets

In [None]:
# equivalent to
# ledger_df['Deadline'] + pd.Timedelta(days=2)
ledger_df['Deadline'] + pd.DateOffset(days=2)

In [None]:
ledger_df['Deadline'] + pd.DateOffset(years=2, months=5)

In [None]:
ledger_df['Deadline'] + pd.DateOffset(year=1999)

In [None]:
import pandas.tseries.offsets as offsets

In [None]:
offsets.<TAB>

In [None]:
reference_date = pd.Timestamp(2020, 9, 30)

reference_date.day_name()

In [None]:
(reference_date + offsets.BusinessDay(3)).day_name()

In [None]:
reference_date - offsets.Easter(3)

In [None]:
ledger_df['Deadline'] + offsets.BusinessDay(3) 

### Periods

In [None]:
ledger_df['Deadline'].dt.to_period(freq='Q')

In [None]:
ledger_df['Deadline'].dt.to_period(freq='Q-SEP')

In [None]:
# A stands for annual
ledger_df['Deadline'].dt.to_period(freq='A')

In [None]:
ledger_df['Deadline'].dt.to_period(freq='A-MAR')

In [None]:
periods = ledger_df['Deadline'].dt.to_period(freq='Q-SEP')

periods

In [None]:
periods.dt.to_timestamp()

## Overthinking: Timezones

In [None]:
us_pacific_deadlines = ledger_df['Deadline'].dt.tz_localize('US/Pacific')

us_pacific_deadlines

In [None]:
us_pacific_deadlines.dt.tz_convert('US/Eastern')

In [None]:
us_pacific_deadlines.dt.tz_convert('Europe/Berlin')

In [None]:
us_pacific_deadlines.dt.tz_localize(None)

In [None]:
ledger_df['Date'].dt.tz_localize('US/Pacific') > ledger_df['Deadline']