Skip to content

Commit

Permalink
Merge 8a18b45 into d713ee1
Browse files Browse the repository at this point in the history
  • Loading branch information
jpinner-lyft committed Sep 16, 2020
2 parents d713ee1 + 8a18b45 commit e73b897
Show file tree
Hide file tree
Showing 5 changed files with 54 additions and 99 deletions.
6 changes: 6 additions & 0 deletions docs/release_notes.rst
Expand Up @@ -13,6 +13,12 @@ This is major release and contains breaking changes. Please read the notes below
This release introduces :ref:`polymorphism` support via :py:class:`DiscriminatorAttribute <pynamodb.attributes.DiscriminatorAttribute>`.
Discriminator values are written to DynamoDB and used during deserialization to instantiate the desired class.

** UTCDateTimeAttribute **

The UTCDateTimeAttribute now strictly requires the date string format '%Y-%m-%dT%H:%M:%S.%f%z' to ensure proper ordering.
PynamoDB has always written values with this format but previously would accept reading other formats.
Items written using other formats must be rewritten before upgrading.

Other changes in this release:

* Python 2 is no longer supported. Python 3.6 or greater is now required.
Expand Down
48 changes: 18 additions & 30 deletions pynamodb/attributes.py
Expand Up @@ -11,7 +11,6 @@
from datetime import datetime
from datetime import timedelta
from datetime import timezone
from dateutil.parser import parse
from inspect import getfullargspec
from inspect import getmembers
from typing import Any, Callable, Dict, Generic, List, Mapping, Optional, TypeVar, Type, Union, Set, overload
Expand Down Expand Up @@ -668,16 +667,25 @@ def deserialize(self, value):
"""
Takes a UTC datetime string and returns a datetime object
"""
return self._fast_parse_utc_date_string(value)

@staticmethod
def _fast_parse_utc_date_string(date_string: str) -> datetime:
# Method to quickly parse strings formatted with '%Y-%m-%dT%H:%M:%S.%f+0000'.
# This is ~5.8x faster than using strptime and 38x faster than dateutil.parser.parse.
_int = int # Hack to prevent global lookups of int, speeds up the function ~10%
try:
return _fast_parse_utc_datestring(value)
except (ValueError, IndexError):
try:
# Attempt to parse the datetime with the datetime format used
# by default when storing UTCDateTimeAttributes. This is significantly
# faster than always going through dateutil.
return datetime.strptime(value, DATETIME_FORMAT)
except ValueError:
return parse(value)
if (len(date_string) != 31 or date_string[4] != '-' or date_string[7] != '-'
or date_string[10] != 'T' or date_string[13] != ':' or date_string[16] != ':'
or date_string[19] != '.' or date_string[26:31] != '+0000'):
raise ValueError("Datetime string '{}' does not match format '{}'".format(date_string, DATETIME_FORMAT))
return datetime(
_int(date_string[0:4]), _int(date_string[5:7]), _int(date_string[8:10]),
_int(date_string[11:13]), _int(date_string[14:16]), _int(date_string[17:19]),
_int(date_string[20:26]), timezone.utc
)
except (TypeError, ValueError):
raise ValueError("Datetime string '{}' does not match format '{}'".format(date_string, DATETIME_FORMAT))


class NullAttribute(Attribute[None]):
Expand Down Expand Up @@ -970,26 +978,6 @@ def _get_class_for_serialize(value):
return SERIALIZE_CLASS_MAP[value_type]


def _fast_parse_utc_datestring(datestring):
# Method to quickly parse strings formatted with '%Y-%m-%dT%H:%M:%S.%f+0000'.
# This is ~5.8x faster than using strptime and 38x faster than dateutil.parser.parse.
_int = int # Hack to prevent global lookups of int, speeds up the function ~10%
try:
if (datestring[4] != '-' or datestring[7] != '-' or datestring[10] != 'T' or
datestring[13] != ':' or datestring[16] != ':' or datestring[19] != '.' or
datestring[-5:] != '+0000'):
raise ValueError("Datetime string '{}' does not match format "
"'%Y-%m-%dT%H:%M:%S.%f+0000'".format(datestring))
return datetime(
_int(datestring[0:4]), _int(datestring[5:7]), _int(datestring[8:10]),
_int(datestring[11:13]), _int(datestring[14:16]), _int(datestring[17:19]),
_int(round(float(datestring[19:-5]) * 1e6)), timezone.utc
)
except (TypeError, ValueError):
raise ValueError("Datetime string '{}' does not match format "
"'%Y-%m-%dT%H:%M:%S.%f+0000'".format(datestring))


class ListAttribute(Generic[_T], Attribute[List[_T]]):
attr_type = LIST
element_type: Optional[Type[Attribute]] = None
Expand Down
3 changes: 0 additions & 3 deletions requirements-dev.txt
Expand Up @@ -2,9 +2,6 @@ pytest>=5
pytest-env
pytest-mock

# Due to https://github.com/boto/botocore/issues/1872. Remove after botocore fixes.
python-dateutil==2.8.0

# only used in .travis.yml
coveralls
mypy==0.770;python_version>="3.7"
Expand Down
1 change: 0 additions & 1 deletion setup.py
Expand Up @@ -3,7 +3,6 @@

install_requires = [
'botocore>=1.12.54',
'python-dateutil>=2.1,<3.0.0',
]

setup(
Expand Down
95 changes: 30 additions & 65 deletions tests/test_attributes.py
Expand Up @@ -14,10 +14,9 @@
from pynamodb.attributes import (
BinarySetAttribute, BinaryAttribute, NumberSetAttribute, NumberAttribute,
UnicodeAttribute, UnicodeSetAttribute, UTCDateTimeAttribute, BooleanAttribute, MapAttribute,
ListAttribute, JSONAttribute, TTLAttribute, _fast_parse_utc_datestring,
VersionAttribute)
ListAttribute, JSONAttribute, TTLAttribute, VersionAttribute)
from pynamodb.constants import (
DATETIME_FORMAT, DEFAULT_ENCODING, NUMBER, STRING, STRING_SET, NUMBER_SET, BINARY_SET,
DEFAULT_ENCODING, NUMBER, STRING, STRING_SET, NUMBER_SET, BINARY_SET,
BINARY, BOOLEAN,
)
from pynamodb.models import Model
Expand Down Expand Up @@ -128,87 +127,53 @@ class TestUTCDateTimeAttribute:
"""
Tests UTCDateTime attributes
"""

def setup(self):
self.attr = UTCDateTimeAttribute()
self.dt = datetime(2047, 1, 6, 8, 21, 30, 2000, tzinfo=timezone.utc)

def test_utc_datetime_attribute(self):
"""
UTCDateTimeAttribute.default
"""
attr = UTCDateTimeAttribute()
assert attr is not None
attr = UTCDateTimeAttribute(default=self.dt)
assert attr.attr_type == STRING
tstamp = datetime.now()
attr = UTCDateTimeAttribute(default=tstamp)
assert attr.default == tstamp

def test_utc_date_time_deserialize(self):
"""
UTCDateTimeAttribute.deserialize
"""
tstamp = datetime.now(timezone.utc)
attr = UTCDateTimeAttribute()
assert attr.deserialize(tstamp.strftime(DATETIME_FORMAT)) == tstamp
assert attr.default == self.dt

def test_dateutil_parser_fallback(self):
def test_utc_date_time_serialize(self):
"""
UTCDateTimeAttribute.deserialize
UTCDateTimeAttribute.serialize
"""
expected_value = datetime(2047, 1, 6, 8, 21, tzinfo=timezone.utc)
attr = UTCDateTimeAttribute()
assert attr.deserialize('January 6, 2047 at 8:21:00AM UTC') == expected_value
assert self.attr.serialize(self.dt) == '2047-01-06T08:21:30.002000+0000'

@patch('pynamodb.attributes.datetime')
@patch('pynamodb.attributes.parse')
def test_utc_date_time_deserialize_parse_args(self, parse_mock, datetime_mock):
def test_utc_date_time_deserialize(self):
"""
UTCDateTimeAttribute.deserialize
"""
tstamp = datetime.now(timezone.utc)
attr = UTCDateTimeAttribute()

tstamp_str = tstamp.strftime(DATETIME_FORMAT)
attr.deserialize(tstamp_str)

parse_mock.assert_not_called()
datetime_mock.strptime.assert_not_called()

def test_utc_date_time_serialize(self):
"""
UTCDateTimeAttribute.serialize
"""
tstamp = datetime.now()
attr = UTCDateTimeAttribute()
assert attr.serialize(tstamp) == tstamp.replace(tzinfo=timezone.utc).strftime(DATETIME_FORMAT)

def test__fast_parse_utc_datestring_roundtrips(self):
tstamp = datetime.now(timezone.utc)
tstamp_str = tstamp.strftime(DATETIME_FORMAT)
assert _fast_parse_utc_datestring(tstamp_str) == tstamp

def test__fast_parse_utc_datestring_no_microseconds(self):
expected_value = datetime(2047, 1, 6, 8, 21, tzinfo=timezone.utc)
assert _fast_parse_utc_datestring('2047-01-06T08:21:00.0+0000') == expected_value
assert self.attr.deserialize('2047-01-06T08:21:30.002000+0000') == self.dt

@pytest.mark.parametrize(
"invalid_string",
[
'2.47-01-06T08:21:00.0+0000',
'2047-01-06T08:21:00.+0000',
'2047-01-06T08:21:00.0',
'2047-01-06 08:21:00.0+0000',
'abcd-01-06T08:21:00.0+0000',
'2047-ab-06T08:21:00.0+0000',
'2047-01-abT08:21:00.0+0000',
'2047-01-06Tab:21:00.0+0000',
'2047-01-06T08:ab:00.0+0000',
'2047-01-06T08:ab:00.0+0000',
'2047-01-06T08:21:00.a+0000',
'2047-01-06T08:21:00.0.1+0000',
'2047-01-06T08:21:00.0+00000'
'2047-01-06T08:21:30.002000', # naive datetime
'2047-01-06T08:21:30+0000', # missing microseconds
'2047-01-06T08:21:30.001+0000', # shortened microseconds
'2047-01-06T08:21:30.002000-0000' # "negative" utc
'2047-01-06T08:21:30.002000+0030' # not utc
'2047-01-06 08:21:30.002000+0000', # missing separator
'2.47-01-06T08:21:30.002000+0000',
'abcd-01-06T08:21:30.002000+0000',
'2047-ab-06T08:21:30.002000+0000',
'2047-01-abT08:21:30.002000+0000',
'2047-01-06Tab:21:30.002000+0000',
'2047-01-06T08:ab:30.002000+0000',
'2047-01-06T08:21:ab.002000+0000',
'2047-01-06T08:21:30.a00000+0000',
]
)
def test__fast_parse_utc_datestring_invalid_input(self, invalid_string):
def test_utc_date_time_invalid(self, invalid_string):
with pytest.raises(ValueError, match="does not match format"):
_fast_parse_utc_datestring(invalid_string)

self.attr.deserialize(invalid_string)


class TestBinaryAttribute:
Expand Down

0 comments on commit e73b897

Please sign in to comment.