In [None]:
%%html
<style>
table {float:left}
</style>

# Python Date/Time utilities

* [dateutil parser](https://dateutil.readthedocs.io/en/stable/parser.html)

> generic date/time string parser which is able to parse most known formats to represent a date and/or time.


In [118]:
import sys
import logging
import datetime
import calendar
from typing import (
    List,
    Dict,
    Tuple,
    Any,
    Optional,
    Callable
)

import dateutil 
from dateutil import relativedelta
from dateutil.tz import tzutc

import datefinder

In [37]:
logger = logging.getLogger(__name__)

---
# Instantiation

Creating datetime instance

In [8]:
datetime.datetime.now()

datetime.datetime(2023, 1, 11, 12, 42, 17, 262143)

In [9]:
datetime.datetime(year=2020, month=10, day=11, hour=4, minute=10, second=52, microsecond=33)

datetime.datetime(2020, 10, 11, 4, 10, 52, 33)

---
# Datatime components

In [34]:
def get_datetime_components(dt: datetime):
    return {
        "year": dt.year,
        "month": dt.month,
        "day": dt.day,
        "weekday": dt.weekday(),
        "hour": dt.hour,
        "minute": dt.minute,
        "second": dt.second,
        "microsecond": dt.microsecond,
        "tzinfo": dt.tzinfo,
    }

In [35]:
get_datetime_components(datetime.datetime.now())

{'year': 2023,
 'month': 1,
 'day': 11,
 'weekday': 2,
 'hour': 12,
 'minute': 46,
 'second': 29,
 'microsecond': 427627,
 'tzinfo': None}

---
# Parse date

In [101]:
def parse_date_string_as_string(date_str: str, formats=None, year_first=False) -> str:
    day_first_date_formats = (
        # --------------------------------------------------------------------------------
        # 21/01/01 as 21MAY2021
        # --------------------------------------------------------------------------------
        '%d/%m/%y',
        '%d-%m-%y',
        '%d %m %y',
        # --------------------------------------------------------------------------------
        # 21/01/2001 as 21JAN2021
        # --------------------------------------------------------------------------------
        '%d/%m/%Y',
        '%d-%m-%Y',
        '%d %m %Y',
        # --------------------------------------------------------------------------------
        # 21/Jan/01 as 21JAN2021
        # --------------------------------------------------------------------------------
        '%d/%b/%y',
        '%d-%b-%y',
        '%d %b %y',
        # --------------------------------------------------------------------------------
        # 21/January/01 as 21JAN2021
        # --------------------------------------------------------------------------------
        '%d/%B/%Y',
        '%d-%B-%Y',
        '%d %B %Y',
    )
    year_first_date_formats = (
        # --------------------------------------------------------------------------------
        # 21/01/01 as 21MAY2021
        # --------------------------------------------------------------------------------
        '%y/%m/%d',
        '%y-%m-%d',
        '%y %m %d',
        # --------------------------------------------------------------------------------
        # 21/01/2001 as 21JAN2021
        # --------------------------------------------------------------------------------
        '%Y/%m/%d',
        '%Y-%m-%d',
        '%Y %m %d',
        # --------------------------------------------------------------------------------
        # 21/Jan/01 as 21JAN2021
        # --------------------------------------------------------------------------------
        '%y/%b/%d',
        '%y-%b-%d',
        '%y %b %d',
        # --------------------------------------------------------------------------------
        # 21/January/01 as 21JAN2021
        # --------------------------------------------------------------------------------
        '%Y/%B/%d',
        '%Y-%B-%d',
        '%Y %B %d',
    )
    date_formats = []
    if formats is None:
        # --------------------------------------------------------------------------------
        # Reverse the format string in day_first_date_formats when year first.
        # --------------------------------------------------------------------------------
        if year_first:
            date_formats = year_first_date_formats
        else:
            date_formats = day_first_date_formats

        logger.debug(date_formats)
        
    else:
        # --------------------------------------------------------------------------------
        # Do not touch given formats if it is not None
        # --------------------------------------------------------------------------------
        date_formats = formats

    parsed = []
    for fmt in date_formats:
        try:
            parsed.append(datetime.datetime.strptime(date_str, fmt)) 
        except ValueError as e:
            pass

    logger.debug(parsed)
    if len(parsed) == 0 or len(parsed) > 1:
        raise RuntimeError(f"invalid date_str [{date_str}]")

    return parsed[0]

In [105]:
fmt = '%d/%m/%y'
parse_date_string_as_string(date_str="21 JAN 22", formats=None, year_first=True)
# parse_date_string_as_string(date_str="8:16:00 AM", formats=None)

datetime.datetime(2021, 1, 22, 0, 0)

## datefinder

* [datefinder - extract dates from text](https://datefinder.readthedocs.io/en/latest/)

> A python module for locating dates inside text. Use this package to extract all sorts of date like strings from a document and turn them into datetime objects.

In [124]:
def get_dates_from_str(
    text: str,
    strict: bool = True,
) -> List[datetime.datetime]:
    """Extract datet from string
    Args:
        text: string to find a date
        strict: require complete date. July 2016 of Monday will not return datetimes.
    Returns: datetime
    Raises: RuntimeError is no date
    """
    dates = list(datefinder.find_dates(text, source=False, index=False, strict=strict, base_date=None))
    if len(dates) <= 0:
        raise RuntimeError(f"invalid text with no date: [{text}]")
        
    return dates

In [126]:
get_dates_from_str(text=" AM")

RuntimeError: invalid text with no date: [ AM]

In [123]:
get_dates_from_str(text="8:16:00 AM")

RuntimeError: invalid text with no date: [{text}]

In [127]:
get_dates_from_str(text="21 JAN 22")

[datetime.datetime(2022, 1, 21, 0, 0)]

In [128]:
get_dates_from_str(text="2022-09-01")

[datetime.datetime(2022, 9, 1, 0, 0)]

In [129]:
get_dates_from_str(text="2022 09 01")

[datetime.datetime(2022, 9, 1, 0, 0)]

In [130]:
get_dates_from_str(text="Aug 28 1999 12:00AM")

[datetime.datetime(1999, 8, 28, 0, 0)]

In [131]:
get_dates_from_str(text="2022 09 01")

[datetime.datetime(2022, 9, 1, 0, 0)]

# Parse date/time

* [dateutil - powerful extensions to datetime](https://dateutil.readthedocs.io/en/stable/)

```
pip install python-dateutil
```

In [110]:
def parse_datetime_string(
    date_time_str: str,
    dayfirst: bool = False,
    yearfirst: bool = False,
) -> datetime.datetime:
    """
    Args: 
        date_time_str: date/time string to parse
        dayfirst: regard the first digits as day e.g 01/05/09 as 01/May/2009
        yearfirst: regard the first digits as year e.g. 01/05/09 as 2001/May/09 
    Returns: python datetime instance
    Raises: 
        dateutil.parser.ParserError: Failure to parse a datetime string.
        dateutil.parser.UnknownTimezoneWarning:  timezone cannot be parsed into a tzinfo.
    """
    try:
        return dateutil.parser.parse(date_time_str, dayfirst=dayfirst, yearfirst=yearfirst)
    except dateutil.parser.ParserError as e:
        raise RuntimeError(f"parse_datetime_string() invalid date/time string [{date_time_str}]") from e
    except dateutil.parser.UnknownTimezoneWarning as e:
        raise RuntimeError(f"parse_datetime_string() invalid timezone in [{date_time_str}]") from e


In [114]:
parse_datetime_string("Aug 28 1999 00:01AM")

datetime.datetime(1999, 8, 28, 0, 1)

In [67]:
parse_datetime_string("2021-10-31")

datetime.datetime(2021, 10, 31, 0, 0)

In [68]:
parse_datetime_string("21-10-31", yearfirst=True) # datetime.datetime(2021, 10, 31, 0, 0)

datetime.datetime(2021, 10, 31, 0, 0)

In [52]:
parse_datetime_string("01/05/09", yearfirst=True)

datetime.datetime(2001, 5, 9, 0, 0)

In [53]:
parse_datetime_string("01/05/09", dayfirst=True)

datetime.datetime(2009, 5, 1, 0, 0)

In [69]:
parse_datetime_string("8:16:00 AM")

datetime.datetime(2023, 1, 11, 8, 16)

---

# Format string

* [strftime() and strptime() Format Codes](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes)

> The following is a list of all the format codes that the 1989 C standard requires, and these work on all platforms with a standard C implementation.



| Directive | Meaning                                                                                                                                                                          | Example                                                                      |
|-----------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------|
| %a        | Weekday as locale’s abbreviated name.                                                                                                                                            | Sun, Mon, …, Sat (en_US); So, Mo, …, Sa (de_DE)                              |
| %A        | Weekday as locale’s full name.                                                                                                                                                   | Sunday, Monday, …, Saturday (en_US); Sonntag, Montag, …, Samstag (de_DE)     |
| %w        | Weekday as a decimal number, where 0 is Sunday and 6 is Saturday.                                                                                                                | 0, 1, …, 6                                                                   |
| %d        | Day of the month as a zero-padded decimal number.                                                                                                                                | 01, 02, …, 31                                                                |
| %b        | Month as locale’s abbreviated name.                                                                                                                                              | Jan, Feb, …, Dec (en_US); Jan, Feb, …, Dez (de_DE)                           |
| %B        | Month as locale’s full name.                                                                                                                                                     | January, February, …, December (en_US); Januar, Februar, …, Dezember (de_DE) |
| %m        | Month as a zero-padded decimal number.                                                                                                                                           | 01, 02, …, 12                                                                |
| %y        | Year without century as a zero-padded decimal number.                                                                                                                            | 00, 01, …, 99                                                                |
| %Y        | Year with century as a decimal number.                                                                                                                                           | 0001, 0002, …, 2013, 2014, …, 9998, 9999                                     |
| %H        | Hour (24-hour clock) as a zero-padded decimal number.                                                                                                                            | 00, 01, …, 23                                                                |
| %I        | Hour (12-hour clock) as a zero-padded decimal number.                                                                                                                            | 01, 02, …, 12                                                                |
| %p        | Locale’s equivalent of either AM or PM.                                                                                                                                          | AM, PM (en_US); am, pm (de_DE)                                               |
| %M        | Minute as a zero-padded decimal number.                                                                                                                                          | 00, 01, …, 59                                                                |
| %S        | Second as a zero-padded decimal number.                                                                                                                                          | 00, 01, …, 59                                                                |
| %f        | Microsecond as a decimal number, zero-padded to 6 digits.                                                                                                                        | 000000, 000001, …, 999999                                                    |
| %z        | UTC offset in the form ±HHMM[SS[.ffffff]] (empty string if the object is naive).                                                                                                 | (empty), +0000, -0400, +1030, +063415, -030712.345216                        |
| %Z        | Time zone name (empty string if the object is naive).                                                                                                                            | (empty), UTC, GMT                                                            |
| %j        | Day of the year as a zero-padded decimal number.                                                                                                                                 | 001, 002, …, 366                                                             |
| %U        | Week number of the year (Sunday as the first day of the week) as a zero-padded decimal number. All days in a new year preceding the first Sunday are considered to be in week 0. | 00, 01, …, 53                                                                |
| %W        | Week number of the year (Monday as the first day of the week) as a zero-padded decimal number. All days in a new year preceding the first Monday are considered to be in week 0. | 00, 01, …, 53                                                                |
| %c        | Locale’s appropriate date and time representation.                                                                                                                               | Tue Aug 16 21:30:00 1988 (en_US); Di 16 Aug 21:30:00 1988 (de_DE)            |
| %x        | Locale’s appropriate date representation.                                                                                                                                        | 08/16/88 (None); 08/16/1988 (en_US); 16.08.1988 (de_DE)                      |
| %X        | Locale’s appropriate time representation.                                                                                                                                        | 21:30:00 (en_US); 21:30:00 (de_DE)                                           |
| %%        | A literal '%' character.                                                                                                                                                         | %                                                                            |


In [13]:
datetime.datetime.now().strftime("%Y%b%d%H%M").upper()

'2023JAN111242'

## Convert date/time string into ISO 8601 without TZ

In [14]:
def str_to_iso8601(literal) -> int:
    """Convert literal date/time string into ISO 8601 format
    e.g. "01 Mar 2018 11:00:00 GMT+1000" into yyyy-MM-dd'T'HH:mm:ssZ or yyyy-MM-dd'T'HH:mm:ss.SSSZ
    """
    literal = ' '.join(literal.split())
    dt_gmt = datetime.datetime.strptime(literal, '%d %b %Y %H:%M:%S %Z%z')

    seconds_since_epoch = int(dt_gmt.timestamp())
    dt_utc = datetime.datetime.utcfromtimestamp(seconds_since_epoch)
    return dt_utc.replace(tzinfo=None).isoformat()


print(f'{str_to_iso8601("01 Mar 2018 11:00:00 GMT+1000")}Z')

2018-03-01T01:00:00Z


---
# Epoch
## Convert date/time string into epoch

Check the result with https://www.epochconverter.com/

In [132]:
def get_epoch_from_string(literal, format='%d %b %Y %H:%M:%S %Z%z') -> int:
    """Convert literal date/time string into epoch time in seconds
    e.g. "01 Mar 2018 11:00:00 GMT+1000" into 1519866000
    Args: 
        literal: string to parse date/time
        format: date/time format of the string
    Returns: epoch (in seconds) as int
    """
    literal = ' '.join(literal.split())
    dt = datetime.datetime.strptime(literal, format)
    return int(dt.timestamp())  # sec

In [133]:
str_to_epoch("01 Mar 2018 11:00:00 GMT+1000")

1519866000

## Convert datetime to epoch
It looks Python date/time function may have bugs.

* [Convert python datetime to epoch with strftime](https://stackoverflow.com/questions/11743019/convert-python-datetime-to-epoch-with-strftime)

> 1st of April 2012 UTC from epoch is 1333238400 but this above returns 1333234800 which is different by 1 hour.
> ```
> >>>datetime.datetime.datetime(2012,04,01,0,0).strftime('%s')
'1333234800'
> ```

> don't use .strftime("%s"): it is not supported, it is not portable, it may silently produce a wrong result for an aware datetime object, it fails if input is in UTC (as in the question) but local timezone is not UTC – 
jfs

> I'm on Python 3.6 and datetime.datetime.datetime(2012,4,1,0,0).timestamp() does not give the correct epoch time. 


In [137]:
def get_epoch_from_datetime(date_time: datetime.datetime) -> int:
    """Convert literal datetime into epoch time in seconds
    Args: 
        literal: string to parse date/time
        format: date/time format of the string
    Returns: epoch (in seconds) as int
    """
    return calendar.timegm(date_time.timetuple())

In [139]:
get_epoch_from_datetime(datetime.datetime(2012, 4, 1, 0, 0))

1333238400

---
# Duratinon between date/time

In [147]:
def get_seconds_between_datetimes(from_datetime: datetime.datetime, to_datetime: datetime.datetime) -> int:
    start = calendar.timegm(to_datetime.timetuple())
    end = calendar.timegm(from_datetime.timetuple())

    return (start - end) / (3600 * 24)

* [dateutil.relativedelta.relativedelta](https://dateutil.readthedocs.io/en/stable/relativedelta.html)

In [150]:
get_seconds_between_datetimes(
    from_datetime=datetime.datetime(2012, 9, 1, 0, 0),
    to_datetime=datetime.datetime(2012, 12, 1, 0, 0)
)

91.0

In [151]:
def get_datetime_after_duration(
    start_datetime: datetime.datetime,
    years: int = 0, 
    months: int = 0, 
    weeks: int = 0, 
    days: int = 0, 
    hours: int = 0, 
    minutes: int = 0, 
    seconds: int = 0, 
    microseconds: int = 0
) -> datetime:
    """Date/time after duration from the start point
    Args:
        start_datetime: start point datetime
    Returns: datetime instance
    """
    end_datetime = start_datetime + dateutil.relativedelta.relativedelta(
        years=years,
        months=months,
        weeks=weeks,
        days=days,
        hours=hours,
        minutes=minutes,
        seconds=seconds,
        microseconds=microseconds
    )
    return end_datetime
    

In [152]:
get_datetime_after_duration(
    start_datetime=dateutil.parser.parse("2021-10-31"),
    years=1,
    days=1,
    hours=1
)

datetime.datetime(2022, 11, 1, 1, 0)

In [25]:
end_date = dateutil.parser.parse("2021-10-31")
start_date = end_date - dateutil.relativedelta.relativedelta(years=1)
end_date - start_date >= datetime.timedelta(days=365 - 30)

True

In [31]:
end = dateutil.parser.parse("2021-04-03")
end - dateutil.relativedelta.relativedelta(years=1) + datetime.timedelta(days=1)

datetime.datetime(2020, 4, 4, 0, 0)

## Elapsed time of function execution

In [32]:
def elapsed_time(func, arg):
    """
    Args:
        func: function to execute and time it
        arg: arguments to the function
    Returns: datetime.datetime.timedelta object
    """
    start = datetime.datetime.now()
    func()
    end = datetime.datetime.now()
    elapsed = end - start
    return elapsed

In [33]:
elapsed_time(print, "hoge")




datetime.timedelta(microseconds=61)