In [1]:
%%html
<style>
table {float:left}
</style>

# Python Date/Time utilities

* [dateutil parser](https://dateutil.readthedocs.io/en/stable/parser.html)

> generic date/time string parser which is able to parse most known formats to represent a date and/or time.


In [83]:
import os
import sys
import logging
import datetime
import calendar
import inspect
from typing import (
    List,
    Dict,
    Tuple,
    Any,
    Optional,
    Callable,
    Iterable,
)

import dateutil 
from dateutil import relativedelta
from dateutil.tz import tzutc
import datefinder
import holidays
import numpy as np

In [3]:
sys.path.append(os.getcwd())
sys.path.append(f"{os.getcwd()}/../lib")

In [73]:
%load_ext autoreload
%autoreload 2

from util_datetime import (
    get_datetime_components,
    parse_date_string_as_string,
    get_dates_from_string,
    parse_datetime_string,
    get_epoch_from_datetime,
    get_epoch_from_string,
    get_seconds_between_datetimes,
    get_datetime_after_duration,
    get_elapsed_time,
    get_holidays,
    get_cyclic_time_of_day,
    get_cyclic_day_of_week,
    get_cyclic_month_of_year,
)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [5]:
logger = logging.getLogger(__name__)

---
# Instantiation

Creating datetime instance

In [6]:
datetime.datetime.now()

datetime.datetime(2023, 1, 11, 16, 35, 35, 49430)

In [7]:
datetime.datetime(year=2020, month=10, day=11, hour=4, minute=10, second=52, microsecond=33)

datetime.datetime(2020, 10, 11, 4, 10, 52, 33)

---
# Datatime components

In [8]:
print(inspect.getsource(get_datetime_components))

def get_datetime_components(date_time: datetime) -> dict:
    """Extract date/time components as dictionary
    Args:
        date_time: datetime instane to extract components
    """
    return {
        "year": date_time.year,
        "month": date_time.month,
        "day": date_time.day,
        "weekday": date_time.weekday(),
        "hour": date_time.hour,
        "minute": date_time.minute,
        "second": date_time.second,
        "microsecond": date_time.microsecond,
        "tzinfo": date_time.tzinfo,
    }



In [9]:
get_datetime_components(datetime.datetime.now())

{'year': 2023,
 'month': 1,
 'day': 11,
 'weekday': 2,
 'hour': 16,
 'minute': 35,
 'second': 35,
 'microsecond': 75011,
 'tzinfo': None}

---
# Parse date

In [10]:
print(inspect.getsource(parse_date_string_as_string))

def parse_date_string_as_string(date_str: str, formats=None, year_first=False) -> datetime.datetime:
    day_first_date_formats = (
        # --------------------------------------------------------------------------------
        # 21/01/01 as 21MAY2021
        # --------------------------------------------------------------------------------
        '%d/%m/%y',
        '%d-%m-%y',
        '%d %m %y',
        # --------------------------------------------------------------------------------
        # 21/01/2001 as 21JAN2021
        # --------------------------------------------------------------------------------
        '%d/%m/%Y',
        '%d-%m-%Y',
        '%d %m %Y',
        # --------------------------------------------------------------------------------
        # 21/Jan/01 as 21JAN2021
        # --------------------------------------------------------------------------------
        '%d/%b/%y',
        '%d-%b-%y',
        '%d %b %y',
        # ---------------------------------

In [11]:
fmt = '%d/%m/%y'
parse_date_string_as_string(date_str="21 JAN 22", formats=None, year_first=True)
# parse_date_string_as_string(date_str="8:16:00 AM", formats=None)

datetime.datetime(2021, 1, 22, 0, 0)

## datefinder

* [datefinder - extract dates from text](https://datefinder.readthedocs.io/en/latest/)

> A python module for locating dates inside text. Use this package to extract all sorts of date like strings from a document and turn them into datetime objects.

In [12]:
print(inspect.getsource(get_dates_from_string))

def get_dates_from_string(
        text: str,
        strict: bool = True,
) -> List[datetime.datetime]:
    """Extract datet from string
    Args:
        text: string to find a date
        strict: require complete date. July 2016 of Monday will not return datetimes.
    Returns: datetime
    Raises: RuntimeError is no date
    """
    dates = list(datefinder.find_dates(text, source=False, index=False, strict=strict, base_date=None))
    if len(dates) <= 0:
        raise RuntimeError(f"invalid text with no date: [{text}]")

    return dates



In [13]:
get_dates_from_string(text=" AM")

RuntimeError: invalid text with no date: [ AM]

In [14]:
get_dates_from_string(text="8:16:00 AM")

RuntimeError: invalid text with no date: [8:16:00 AM]

In [15]:
get_dates_from_string(text="21 JAN 22")

[datetime.datetime(2022, 1, 21, 0, 0)]

In [16]:
get_dates_from_string(text="2022-09-01")

[datetime.datetime(2022, 9, 1, 0, 0)]

In [17]:
get_dates_from_string(text="2022 09 01")

[datetime.datetime(2022, 9, 1, 0, 0)]

In [18]:
get_dates_from_string(text="Aug 28 1999 12:00AM")

[datetime.datetime(1999, 8, 28, 0, 0)]

In [19]:
get_dates_from_string(text="2022 09 01")

[datetime.datetime(2022, 9, 1, 0, 0)]

# Parse date/time

* [dateutil - powerful extensions to datetime](https://dateutil.readthedocs.io/en/stable/)

```
pip install python-dateutil
```

In [20]:
print(inspect.getsource(parse_datetime_string))

def parse_datetime_string(
        date_time_str: str,
        dayfirst: bool = False,
        yearfirst: bool = False,
) -> datetime.datetime:
    """
    Args:
        date_time_str: date/time string to parse
        dayfirst: regard the first digits as day e.g 01/05/09 as 01/May/2009
        yearfirst: regard the first digits as year e.g. 01/05/09 as 2001/May/09
    Returns: python datetime instance
    Raises:
        dateutil.parser.ParserError: Failure to parse a datetime string.
    """
    try:
        return dateutil.parser.parse(date_time_str, dayfirst=dayfirst, yearfirst=yearfirst)
    except dateutil.parser.ParserError as e:
        raise RuntimeError(f"parse_datetime_string() invalid date/time string [{date_time_str}]") from e
        raise RuntimeError(f"parse_datetime_string() invalid timezone in [{date_time_str}]") from e



In [21]:
parse_datetime_string("Aug 28 1999 00:01AM")

datetime.datetime(1999, 8, 28, 0, 1)

In [22]:
parse_datetime_string("2021-10-31")

datetime.datetime(2021, 10, 31, 0, 0)

In [23]:
parse_datetime_string("21-10-31", yearfirst=True) # datetime.datetime(2021, 10, 31, 0, 0)

datetime.datetime(2021, 10, 31, 0, 0)

In [24]:
parse_datetime_string("01/05/09", yearfirst=True)

datetime.datetime(2001, 5, 9, 0, 0)

In [25]:
parse_datetime_string("01/05/09", dayfirst=True)

datetime.datetime(2009, 5, 1, 0, 0)

In [26]:
parse_datetime_string("8:16:00 AM")

datetime.datetime(2023, 1, 11, 8, 16)

---

# Format string

* [strftime() and strptime() Format Codes](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes)

> The following is a list of all the format codes that the 1989 C standard requires, and these work on all platforms with a standard C implementation.



| Directive | Meaning                                                                                                                                                                          | Example                                                                      |
|-----------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------|
| %a        | Weekday as locale’s abbreviated name.                                                                                                                                            | Sun, Mon, …, Sat (en_US); So, Mo, …, Sa (de_DE)                              |
| %A        | Weekday as locale’s full name.                                                                                                                                                   | Sunday, Monday, …, Saturday (en_US); Sonntag, Montag, …, Samstag (de_DE)     |
| %w        | Weekday as a decimal number, where 0 is Sunday and 6 is Saturday.                                                                                                                | 0, 1, …, 6                                                                   |
| %d        | Day of the month as a zero-padded decimal number.                                                                                                                                | 01, 02, …, 31                                                                |
| %b        | Month as locale’s abbreviated name.                                                                                                                                              | Jan, Feb, …, Dec (en_US); Jan, Feb, …, Dez (de_DE)                           |
| %B        | Month as locale’s full name.                                                                                                                                                     | January, February, …, December (en_US); Januar, Februar, …, Dezember (de_DE) |
| %m        | Month as a zero-padded decimal number.                                                                                                                                           | 01, 02, …, 12                                                                |
| %y        | Year without century as a zero-padded decimal number.                                                                                                                            | 00, 01, …, 99                                                                |
| %Y        | Year with century as a decimal number.                                                                                                                                           | 0001, 0002, …, 2013, 2014, …, 9998, 9999                                     |
| %H        | Hour (24-hour clock) as a zero-padded decimal number.                                                                                                                            | 00, 01, …, 23                                                                |
| %I        | Hour (12-hour clock) as a zero-padded decimal number.                                                                                                                            | 01, 02, …, 12                                                                |
| %p        | Locale’s equivalent of either AM or PM.                                                                                                                                          | AM, PM (en_US); am, pm (de_DE)                                               |
| %M        | Minute as a zero-padded decimal number.                                                                                                                                          | 00, 01, …, 59                                                                |
| %S        | Second as a zero-padded decimal number.                                                                                                                                          | 00, 01, …, 59                                                                |
| %f        | Microsecond as a decimal number, zero-padded to 6 digits.                                                                                                                        | 000000, 000001, …, 999999                                                    |
| %z        | UTC offset in the form ±HHMM[SS[.ffffff]] (empty string if the object is naive).                                                                                                 | (empty), +0000, -0400, +1030, +063415, -030712.345216                        |
| %Z        | Time zone name (empty string if the object is naive).                                                                                                                            | (empty), UTC, GMT                                                            |
| %j        | Day of the year as a zero-padded decimal number.                                                                                                                                 | 001, 002, …, 366                                                             |
| %U        | Week number of the year (Sunday as the first day of the week) as a zero-padded decimal number. All days in a new year preceding the first Sunday are considered to be in week 0. | 00, 01, …, 53                                                                |
| %W        | Week number of the year (Monday as the first day of the week) as a zero-padded decimal number. All days in a new year preceding the first Monday are considered to be in week 0. | 00, 01, …, 53                                                                |
| %c        | Locale’s appropriate date and time representation.                                                                                                                               | Tue Aug 16 21:30:00 1988 (en_US); Di 16 Aug 21:30:00 1988 (de_DE)            |
| %x        | Locale’s appropriate date representation.                                                                                                                                        | 08/16/88 (None); 08/16/1988 (en_US); 16.08.1988 (de_DE)                      |
| %X        | Locale’s appropriate time representation.                                                                                                                                        | 21:30:00 (en_US); 21:30:00 (de_DE)                                           |
| %%        | A literal '%' character.                                                                                                                                                         | %                                                                            |


In [27]:
datetime.datetime.now().strftime("%Y%b%d%H%M").upper()

'2023JAN111635'

## Convert date/time string into ISO 8601 without TZ

In [28]:
def str_to_iso8601(literal) -> int:
    """Convert literal date/time string into ISO 8601 format
    e.g. "01 Mar 2018 11:00:00 GMT+1000" into yyyy-MM-dd'T'HH:mm:ssZ or yyyy-MM-dd'T'HH:mm:ss.SSSZ
    """
    literal = ' '.join(literal.split())
    dt_gmt = datetime.datetime.strptime(literal, '%d %b %Y %H:%M:%S %Z%z')

    seconds_since_epoch = int(dt_gmt.timestamp())
    dt_utc = datetime.datetime.utcfromtimestamp(seconds_since_epoch)
    return dt_utc.replace(tzinfo=None).isoformat()


print(f'{str_to_iso8601("01 Mar 2018 11:00:00 GMT+1000")}Z')

2018-03-01T01:00:00Z


---
# Epoch
## Convert date/time string into epoch

Check the result with https://www.epochconverter.com/

In [35]:
print(inspect.getsource(get_epoch_from_string))

def get_epoch_from_string(literal, format='%d %b %Y %H:%M:%S %Z%z') -> int:
    """Convert literal date/time string into epoch time in seconds
    e.g. "01 Mar 2018 11:00:00 GMT+1000" into 1519866000
    Args:
        literal: string to parse date/time
        format: date/time format of the string
    Returns: epoch (in seconds) as int
    """
    literal = ' '.join(literal.split())
    dt = datetime.datetime.strptime(literal, format)
    return int(dt.timestamp())  # sec



In [36]:
get_epoch_from_string("01 Mar 2018 11:00:00 GMT+1000")

1519866000

## Convert datetime to epoch
It looks Python date/time function may have bugs.

* [Convert python datetime to epoch with strftime](https://stackoverflow.com/questions/11743019/convert-python-datetime-to-epoch-with-strftime)

> 1st of April 2012 UTC from epoch is 1333238400 but this above returns 1333234800 which is different by 1 hour.
> ```
> >>>datetime.datetime.datetime(2012,04,01,0,0).strftime('%s')
'1333234800'
> ```

> don't use .strftime("%s"): it is not supported, it is not portable, it may silently produce a wrong result for an aware datetime object, it fails if input is in UTC (as in the question) but local timezone is not UTC – 
jfs

> I'm on Python 3.6 and datetime.datetime.datetime(2012,4,1,0,0).timestamp() does not give the correct epoch time. 


In [37]:
print(inspect.getsource(get_epoch_from_datetime))

def get_epoch_from_datetime(date_time: datetime.datetime) -> int:
    """Convert literal datetime into epoch time in seconds
    Args:
        date_time: string to parse date/time
    Returns: epoch (in seconds) as int
    """
    return calendar.timegm(date_time.timetuple())



In [38]:
get_epoch_from_datetime(datetime.datetime(2012, 4, 1, 0, 0))

1333238400

---
# Duratinon between date/time

In [39]:
print(inspect.getsource(get_seconds_between_datetimes))

def get_seconds_between_datetimes(
        from_datetime: datetime.datetime,
        to_datetime: datetime.datetime
) -> int:
    """Seconds between two datetime instances
    Args:
        from_datetime: from
        to_datetime: to
    Returns: seconds as int
    """
    start = calendar.timegm(to_datetime.timetuple())
    end = calendar.timegm(from_datetime.timetuple())

    return int((start - end) / (3600 * 24))



* [dateutil.relativedelta.relativedelta](https://dateutil.readthedocs.io/en/stable/relativedelta.html)

In [40]:
get_seconds_between_datetimes(
    from_datetime=datetime.datetime(2012, 9, 1, 0, 0),
    to_datetime=datetime.datetime(2012, 12, 1, 0, 0)
)

91

In [41]:
print(inspect.getsource(get_datetime_after_duration))

def get_datetime_after_duration(
        start_datetime: datetime.datetime,
        years: int = 0,
        months: int = 0,
        weeks: int = 0,
        days: int = 0,
        hours: int = 0,
        minutes: int = 0,
        seconds: int = 0,
        microseconds: int = 0
) -> datetime:
    """Date/time after duration from the start point
    Args:
        start_datetime: start point datetime
    Returns: datetime instance
    :param microseconds:
    :param seconds:
    :param minutes:
    :param hours:
    :param days:
    :param weeks:
    :param start_datetime:
    :param months:
    :param years:
    """
    end_datetime = start_datetime + dateutil.relativedelta.relativedelta(
        years=years,
        months=months,
        weeks=weeks,
        days=days,
        hours=hours,
        minutes=minutes,
        seconds=seconds,
        microseconds=microseconds
    )
    return end_datetime



In [42]:
get_datetime_after_duration(
    start_datetime=dateutil.parser.parse("2021-10-31"),
    years=1,
    days=1,
    hours=1
)

datetime.datetime(2022, 11, 1, 1, 0)

In [43]:
end_date = dateutil.parser.parse("2021-10-31")
start_date = end_date - dateutil.relativedelta.relativedelta(years=1)
end_date - start_date >= datetime.timedelta(days=365 - 30)

True

In [44]:
end = dateutil.parser.parse("2021-04-03")
end - dateutil.relativedelta.relativedelta(years=1) + datetime.timedelta(days=1)

datetime.datetime(2020, 4, 4, 0, 0)

## Elapsed time of function execution

In [45]:
print(inspect.getsource(get_elapsed_time))

def get_elapsed_time(func, arg):
    """
    Args:
        func: function to execute and time it
        arg: arguments to the function
    Returns: datetime.datetime.timedelta object
    """
    start = datetime.datetime.now()
    func()
    end = datetime.datetime.now()
    elapsed = end - start
    return elapsed



In [46]:
get_elapsed_time(print, "hoge")




datetime.timedelta(microseconds=171)

---
# Holidays

* [holidays](https://python-holidays.readthedocs.io/en/latest/)

> A fast, efficient Python library for generating country- and subdivision- (e.g. state or province) specific sets of government-designated holidays on the fly. It aims to make determining whether a specific date is a holiday as fast and flexible as possible.
>
> The standard way to refer to a country is by using its [ISO 3166-1 alpha-2 code](https://en.wikipedia.org/wiki/List_of_ISO_3166_country_codes) 
>
> ```pip install --upgrade holidays```


* [holidays - API Reference](https://python-holidays.readthedocs.io/en/latest/api.html)

* [Python | Holidays library](https://www.geeksforgeeks.org/python-holidays-library/)

> Python Holidays library is an efficient library for determining whether a specific date is a holiday as fast and flexible as possible. For any country, one can find whether that day is a holiday or not. Only fixed days(Public) holidays like Christmas, New Year, etc. can be detected.



In [47]:
print(inspect.getsource(get_holidays))

def get_holidays(
        country: str,
        states: List[str] = None,
        years: List[int] = None
) -> Dict[object, str]:
    """Get holidays of the country and state (optional).
    If state is not specified, the common holidays for all states will be provided.

    holidays.utils.country_holidays(
        country, subdiv=None, years=None, expand=True, observed=True
    )
    observed:
        Whether to include the dates of when public holiday are observed
        (e.g. a holiday falling on a Sunday being observed the following Monday).
        False may not work for all countries.

    Args:
        country: ISO 3166-1 alpha-2 code, e.g. AU
        states: states in the country e.g. ACT (default), NSW, NT, QLD, SA, TAS, VIC, WA in AU
        years: list of years to get holidays from. If None, empty dict is returned
    """
    assert years is not None and len(years) > 0, "years is required"

    result = dict()
    if states is not None and len(states) > 0:
        for state

In [57]:
for holiday in get_holidays(country="AU", states=["QLD", "NSW"], years=[2022]).items():
    print(holiday)

(datetime.date(2022, 9, 22), 'National Day of Mourning for Queen Elizabeth II')
(datetime.date(2022, 1, 1), "New Year's Day")
(datetime.date(2022, 1, 3), "New Year's Day (Observed)")
(datetime.date(2022, 1, 26), 'Australia Day')
(datetime.date(2022, 4, 15), 'Good Friday')
(datetime.date(2022, 4, 16), 'Easter Saturday')
(datetime.date(2022, 4, 17), 'Easter Sunday')
(datetime.date(2022, 4, 18), 'Easter Monday')
(datetime.date(2022, 4, 25), 'Anzac Day')
(datetime.date(2022, 10, 3), 'Labour Day')
(datetime.date(2022, 5, 2), 'Labour Day')
(datetime.date(2022, 8, 10), 'The Royal Queensland Show')
(datetime.date(2022, 12, 25), 'Christmas Day')
(datetime.date(2022, 12, 27), 'Christmas Day (Observed)')
(datetime.date(2022, 12, 26), 'Boxing Day')
(datetime.date(2022, 6, 13), "Queen's Birthday")
(datetime.date(2022, 8, 1), 'Bank Holiday')


In [56]:
for holiday in get_holidays(country="AU", years=[2023]).items():
    print(holiday)

(datetime.date(2023, 1, 1), "New Year's Day")
(datetime.date(2023, 1, 2), "New Year's Day (Observed)")
(datetime.date(2023, 1, 26), 'Australia Day')
(datetime.date(2023, 4, 7), 'Good Friday')
(datetime.date(2023, 4, 10), 'Easter Monday')
(datetime.date(2023, 4, 25), 'Anzac Day')
(datetime.date(2023, 12, 25), 'Christmas Day')
(datetime.date(2023, 12, 26), 'Boxing Day')


---
# Date/Time in cyclic

Hours of the day, days of the week, months in a year are all examples of features that are cyclical. 

* [What is a good way to transform Cyclic Ordinal attributes?](https://datascience.stackexchange.com/a/6335/68313)

> The most logical way to transform hour is into two variables that swing back and forth out of sync. Imagine the position of the end of the hour hand of a 24-hour clock. The x position swings back and forth out of sync with the y position. For a 24-hour clock you can accomplish this with:
> ```x=sin(2pi*hour/24), y=cos(2pi*hour/24)```

* [Feature Engineering - Handling Cyclical Features](http://blog.davidkaleko.com/feature-engineering-cyclical-features.html)

> Zero (midnight) is on the right, and the hours increase counterclockwise around the circle. In this way, 23:59 is very close to 00:00
> <img src="./image/cyclic_time.png" align="left" width=300/>

In [87]:
print(inspect.getsource(get_cyclic_time_of_day))

def get_cyclic_time_of_day(hours: int, minutes: int, seconds: int) -> Tuple[TYPE_FLOAT, TYPE_FLOAT]:
    """Encode time in day as cyclic
    https://datascience.stackexchange.com/a/6335/68313
    http://blog.davidkaleko.com/feature-engineering-cyclical-features.html

    Args:
        hours: hours in day
        minutes: minutes in the hour
        seconds: seconds in the minute
    Returns: (time in sin, time in cos)
    """
    seconds_in_day = TYPE_FLOAT(24 * 60 * 60)
    assert 0 <= hours < 24, f"invalid hours [{hours}]"
    assert 0 <= hours < 60, f"invalid minutes [{minutes}]"
    assert 0 <= hours < 60, f"invalid seconds [{seconds}]"

    seconds: TYPE_FLOAT = \
        TYPE_FLOAT(60) * TYPE_FLOAT(60) * TYPE_FLOAT(hours) + \
        TYPE_FLOAT(60) * TYPE_FLOAT(minutes) + \
        TYPE_FLOAT(seconds)

    time_in_sin: TYPE_FLOAT
    time_in_cos: TYPE_FLOAT
    time_in_sin = x = np.sin(TYPE_FLOAT(2) * TYPE_FLOAT(np.pi) * seconds / seconds_in_day)
    time_in_cos = y = np.cos(TYPE

## Month in year

In [94]:
get_cyclic_month_of_year(months=2) # 2 is Feb

(0.5000000126183913, 0.8660253964992068)

## Day of week

In [89]:
get_cyclic_day_of_week(day_of_week=1) # 1 is Tuesday

(0.7818314980415186, 0.6234897823301957)

## Hour

In [88]:
get_cyclic_time_of_day(hours=23, minutes=38, seconds=0)

(-0.09584551, 0.9953962)