From b451a83620d12024fe3e0d78cb74d3f9a6204e49 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Fri, 1 Dec 2017 13:38:15 -0500 Subject: [PATCH 01/21] Add C implementation of date.fromisoformat --- Modules/_datetimemodule.c | 98 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index b50cddad5dd2fd..17cb2202ac1fc7 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -663,6 +663,58 @@ set_date_fields(PyDateTime_Date *self, int y, int m, int d) SET_DAY(self, d); } +/* --------------------------------------------------------------------------- + * String parsing utilities and helper functions + */ +static inline unsigned int to_int(char ptr) +{ + return (unsigned int)(ptr - '0'); +} + +static inline int is_digit(unsigned int tmp) +{ + return tmp <= 9; +} + +static inline const char* parse_digits(const char* ptr, int* var, + size_t num_digits) +{ + for (size_t i = 0; i < num_digits; ++i) { + int tmp = to_int(*(ptr++)); + if (!is_digit(tmp)) { return NULL; } + *var *= 10; + *var += (signed int)tmp; + } + + return ptr; +} + +static int parse_isoformat_date(const char *dtstr, + int* year, int *month, int* day) { + /* Parse the date components of the result of date.isoformat() + * + * Return codes: + * 0: Success + * -1: Failed to parse date component + * -2: Failed to parse dateseparator + */ + const char *p = dtstr; + p = parse_digits(p, year, 4); + if (NULL == p) { return -1; } + + if (*(p++) != '-') { return -2; } + + p = parse_digits(p, month, 2); + if (NULL == p) { return -1; } + + if (*(p++) != '-') { return -2; } + + p = parse_digits(p, day, 2); + if (p == NULL) { return -1; } + + return 0; +} + /* --------------------------------------------------------------------------- * Create various objects, mostly without range checking. */ @@ -2607,6 +2659,7 @@ date_fromtimestamp(PyObject *cls, PyObject *args) return result; } + /* Return new date from proleptic Gregorian ordinal. Raises ValueError if * the ordinal is out of range. */ @@ -2633,6 +2686,47 @@ date_fromordinal(PyObject *cls, PyObject *args) return result; } +/* Return the new date from a string as generated by date.isoformat() */ +static PyObject * +date_fromisoformat(PyObject *cls, PyObject *dtstr) { + assert(dtstr != NULL); + + if (!PyUnicode_Check(dtstr)) { + PyErr_SetString(PyExc_TypeError, "fromisoformat: argument must be str"); + return NULL; + } + assert(dtstr != NULL || PyUnicode_Check(dtstr)); + + Py_ssize_t len; + + const char * dt_ptr = PyUnicode_AsUTF8AndSize(dtstr, &len); + + int year = 0, month = 0, day = 0; + + int rv; + if (len == 10) { + rv = parse_isoformat_date(dt_ptr, &year, &month, &day); + } else { + rv = -1; + } + + if (rv < 0) { + PyErr_Format(PyExc_ValueError, "Invalid isoformat string: %s", + dt_ptr); + return NULL; + } + + PyObject *result; + if ( (PyTypeObject*)cls == &PyDateTime_DateType ) { + result = new_date_ex(year, month, day, (PyTypeObject*)cls); + } else { + result = PyObject_CallFunction(cls, "iii", year, month, day); + } + + return result; +} + + /* * Date arithmetic. */ @@ -2925,6 +3019,10 @@ static PyMethodDef date_methods[] = { PyDoc_STR("int -> date corresponding to a proleptic Gregorian " "ordinal.")}, + {"fromisoformat", (PyCFunction)date_fromisoformat, METH_O | + METH_CLASS, + PyDoc_STR("str -> Construct a date from the output of date.isoformat()")}, + {"today", (PyCFunction)date_today, METH_NOARGS | METH_CLASS, PyDoc_STR("Current date or datetime: same as " "self.__class__.fromtimestamp(time.time()).")}, From 4ccef0a380a839e66171ec5e4b2f0eef2cd51773 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Fri, 1 Dec 2017 13:38:36 -0500 Subject: [PATCH 02/21] Add python implementation of date.fromisoformat --- Lib/datetime.py | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/Lib/datetime.py b/Lib/datetime.py index 67d8600921c382..88c581cb012ed5 100644 --- a/Lib/datetime.py +++ b/Lib/datetime.py @@ -237,6 +237,24 @@ def _wrap_strftime(object, format, timetuple): newformat = "".join(newformat) return _time.strftime(newformat, timetuple) +# Helpers for parsing the result of isoformat() +def _parse_isoformat_date(dtstr): + # It is assumed that this function will only be called with a + # string of length exactly 10, and (though this is not used), not + year = int(dtstr[0:4]) + if dtstr[4] != '-': + raise ValueError('Invalid date separator: %s' % dtstr[4]) + + month = int(dtstr[5:7]) + + if dtstr[7] != '-': + raise ValueError('Invalid date separator') + + day = int(dtstr[8:10]) + + return (year, month, day) + + # Just raise TypeError if the arg isn't None or a string. def _check_tzname(name): if name is not None and not isinstance(name, str): @@ -732,6 +750,19 @@ def fromordinal(cls, n): y, m, d = _ord2ymd(n) return cls(y, m, d) + @classmethod + def fromisoformat(cls, dtstr): + """Construct a date from the output of date.isoformat().""" + if not isinstance(dtstr, str): + raise TypeError('fromisoformat: argument must be str') + + try: + assert len(dtstr) == 10 + return cls(*_parse_isoformat_date(dtstr)) + except: + raise ValueError('Invalid isoformat string: {}'.format(dtstr)) + + # Conversions to string def __repr__(self): @@ -2277,7 +2308,7 @@ def _name_from_offset(delta): _date_class, _days_before_month, _days_before_year, _days_in_month, _format_time, _is_leap, _isoweek1monday, _math, _ord2ymd, _time, _time_class, _tzinfo_class, _wrap_strftime, _ymd2ord, - _divide_and_round) + _divide_and_round, _parse_isoformat_date) # XXX Since import * above excludes names that start with _, # docstring does not get overwritten. In the future, it may be # appropriate to maintain a single module level docstring and From 13e8e48bb438281f1fe88882b1bd4a40af318771 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Fri, 1 Dec 2017 13:38:49 -0500 Subject: [PATCH 03/21] Add tests for date.fromisoformat --- Lib/test/datetimetester.py | 49 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py index d0886c47baec17..01628286882669 100644 --- a/Lib/test/datetimetester.py +++ b/Lib/test/datetimetester.py @@ -1588,6 +1588,46 @@ def test_backdoor_resistance(self): # blow up because other fields are insane. self.theclass(base[:2] + bytes([ord_byte]) + base[3:]) + def test_fromisoformat(self): + # Test that isoformat() is reversible + base_dates = [ + (1, 1, 1), + (1000, 2, 14), + (1900, 1, 1), + (2000, 2, 29), + (2004, 11, 12), + (2004, 4, 3), + (2017, 5, 30), + ] + + for dt_tuple in base_dates: + dt = self.theclass(*dt_tuple) + dt_rt = dt.fromisoformat(dt.isoformat()) + + self.assertEqual(dt, dt_rt) + + def test_fromisoformat_fails(self): + # Test that fromisoformat() fails on invalid values + bad_strs = [ + '', # Empty string + '009-03-04', # Not 10 characters + '123456789', # Not a date + '200a-12-04', # Invalid character in year + '2009-1a-04', # Invalid character in month + '2009-12-0a', # Invalid character in day + '2009-01-32', # Invalid day + '2009-02-29', # Invalid leap day + ] + + for bad_str in bad_strs: + with self.assertRaises(ValueError): + self.theclass.fromisoformat(bad_str) + + bad_types = [b'2009-03-01', None] + for bad_type in bad_types: + with self.assertRaises(TypeError): + self.theclass.fromisoformat(bad_type) + ############################################################################# # datetime tests @@ -2334,6 +2374,15 @@ def newmeth(self, start): self.assertEqual(dt2.newmeth(-7), dt1.year + dt1.month + dt1.second - 7) + @unittest.skip('Not implemented yet') + def test_fromisoformat(self): + pass + + @unittest.skip('Not implemented yet') + def test_fromisoformat_fails(self): + pass + + class TestSubclassDateTime(TestDateTime): theclass = SubclassDatetime # Override tests not designed for subclass From 6fa5c557d9ff8276aa8f35fbfaf3a19e522a95a2 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Fri, 1 Dec 2017 20:43:58 -0500 Subject: [PATCH 04/21] Implement C version of datetime.fromisoformat --- Modules/_datetimemodule.c | 171 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 171 insertions(+) diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index 17cb2202ac1fc7..2e168d84606205 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -715,6 +715,90 @@ static int parse_isoformat_date(const char *dtstr, return 0; } +// Macro that short-circuits to timezone parsing +#define PARSE_ISOFORMAT_ADVANCE_TIME_SEP(SEP) { \ + if (*p == '\0') { return 0; } \ + switch(*(p++)) { \ + case SEP: \ + break; \ + case '-': \ + tzsign = -1; \ + case '+': \ + goto parse_timezone; \ + default: \ + return -4; \ + } \ +} + +static int parse_isoformat_time(const char *dtstr, int* hour, int *minute, int *second, + int* microsecond, int *tzoffset) { + // Parse the time portion of a datetime.isoformat() string + // + // Return codes: + // 0: Success (no tzoffset) + // 1: Success (with tzoffset) + // -3: Failed to parse time component + // -4: Failed to parse time separator + // -5: Malformed timezone string + // -6: Extra information at end of string + + const char *p = dtstr; + + // Parse time - all components are optional except hour + int tzsign = 1; + p = parse_digits(p, hour, 2); + if (NULL == p) { return -3; } + + PARSE_ISOFORMAT_ADVANCE_TIME_SEP(':'); + + p = parse_digits(p, minute, 2); + if (NULL == p) { return -3; } + + PARSE_ISOFORMAT_ADVANCE_TIME_SEP(':'); + + p = parse_digits(p, second, 2); + if (NULL == p) { return -3; } + + PARSE_ISOFORMAT_ADVANCE_TIME_SEP('.'); + + p = parse_digits(p, microsecond, 3); + if (NULL == p) { return -3; } + + // Microseconds + if (*p >= '0' && *p <= '9') { + p = parse_digits(p, microsecond, 3); + if (NULL == p) { return -3; } + } else { + *microsecond *= 1000; + } + + if (*p == '\0') { return 0; } + + switch(*(p++)) { + case '-': + tzsign = -1; + case '+': + break; + default: + return -5; + } + +parse_timezone:; + int tzhours = 0, tzminutes = 0; + p = parse_digits(p, &tzhours, 2); + if (NULL == p || *(p++) != ':') { return -5; } + + p = parse_digits(p, &tzminutes, 2); + if (NULL == p) { return -5; } + + // Convert hours:minutes into seconds + *tzoffset = tzsign * ((3600 * tzhours) + (60 * tzminutes)); + + if (*p != '\0') { return -6; } + + return 1; +} + /* --------------------------------------------------------------------------- * Create various objects, mostly without range checking. */ @@ -4604,6 +4688,89 @@ datetime_combine(PyObject *cls, PyObject *args, PyObject *kw) return result; } +static PyObject * +datetime_fromisoformat(PyObject* cls, PyObject *dtstr) { + assert(dtstr != NULL); + + if (!PyUnicode_Check(dtstr)) { + PyErr_SetString(PyExc_TypeError, "fromisoformat: argument must be str"); + return NULL; + } + + Py_ssize_t len; + const char * dt_ptr = PyUnicode_AsUTF8AndSize(dtstr, &len); + const char * p = dt_ptr; + + int year = 0, month = 0, day = 0; + int hour = 0, minute = 0, second = 0, microsecond = 0, tzoffset = 0; + + // date has a fixed length of 10 + int rv = parse_isoformat_date(p, &year, &month, &day); + + if (!rv && len > 10) { + // In UTF-8, the length of multi-byte characters is encoded in the MSB + if ((p[10] & 0x80) == 0) { + p += 11; + } else { + switch(p[10] & 0xf0) { + case 0xe0: + p += 13; + break; + case 0xf0: + p += 14; + break; + default: + p += 12; + break; + } + } + + rv = parse_isoformat_time(p, &hour, &minute, &second, + µsecond, &tzoffset); + } + if (rv < 0) { + PyErr_Format(PyExc_ValueError, "Invalid isoformat string: %s", dt_ptr); + return NULL; + } + + + PyObject* tzinfo = NULL; + if (rv == 1) { + if( tzoffset == 0 ) { + tzinfo = PyDateTime_TimeZone_UTC; + } else { + PyObject* delta = new_delta(0, tzoffset, 0, 1); + tzinfo = new_timezone(delta, NULL); + Py_DECREF(delta); + } + } else { + tzinfo = Py_None; + } + Py_INCREF(tzinfo); + + PyObject* dt; + if ( (PyTypeObject*)cls == &PyDateTime_DateTimeType ) { + // Use the fast path constructor + dt = new_datetime(year, month, day, hour, minute, second, microsecond, + tzinfo, 0); + } else { + // Subclass + dt = PyObject_CallFunction(cls, "iiiiiiiO", + year, + month, + day, + hour, + minute, + second, + microsecond, + tzinfo); + } + + Py_DECREF(tzinfo); + return dt; +} + + /* * Destructor. */ @@ -5617,6 +5784,10 @@ static PyMethodDef datetime_methods[] = { METH_VARARGS | METH_KEYWORDS | METH_CLASS, PyDoc_STR("date, time -> datetime with same date and time fields")}, + {"fromisoformat", (PyCFunction)datetime_fromisoformat, + METH_O | METH_CLASS, + PyDoc_STR("string -> datetime from datetime.isoformat() output")}, + /* Instance methods: */ {"date", (PyCFunction)datetime_getdate, METH_NOARGS, From 0d44220dc96d1c06291c76cd6a49913868d984ee Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Fri, 1 Dec 2017 20:44:35 -0500 Subject: [PATCH 05/21] Add initial test suite for C-only datetime.fromisoformat --- Lib/test/datetimetester.py | 168 +++++++++++++++++++++++++++++++++++-- 1 file changed, 159 insertions(+), 9 deletions(-) diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py index 01628286882669..436a5b661fc004 100644 --- a/Lib/test/datetimetester.py +++ b/Lib/test/datetimetester.py @@ -1597,14 +1597,26 @@ def test_fromisoformat(self): (2000, 2, 29), (2004, 11, 12), (2004, 4, 3), - (2017, 5, 30), + (2017, 5, 30) ] for dt_tuple in base_dates: dt = self.theclass(*dt_tuple) - dt_rt = dt.fromisoformat(dt.isoformat()) + dt_str = dt.isoformat() + with self.subTest(dt_str=dt_str): + dt_rt = self.theclass.fromisoformat(dt.isoformat()) - self.assertEqual(dt, dt_rt) + self.assertEqual(dt, dt_rt) + + def test_fromisoformat_subclass(self): + class DateSubclass(self.theclass): + pass + + dt = DateSubclass(2014, 12, 14) + + dt_rt = DateSubclass.fromisoformat(dt.isoformat()) + + self.assertIsInstance(dt_rt, DateSubclass) def test_fromisoformat_fails(self): # Test that fromisoformat() fails on invalid values @@ -1617,12 +1629,15 @@ def test_fromisoformat_fails(self): '2009-12-0a', # Invalid character in day '2009-01-32', # Invalid day '2009-02-29', # Invalid leap day + '20090228', # Valid ISO8601 output not from isoformat() ] for bad_str in bad_strs: with self.assertRaises(ValueError): self.theclass.fromisoformat(bad_str) + def test_fromisoformat_fails_typeerror(self): + # Test that fromisoformat fails when passed the wrong type bad_types = [b'2009-03-01', None] for bad_type in bad_types: with self.assertRaises(TypeError): @@ -2374,14 +2389,149 @@ def newmeth(self, start): self.assertEqual(dt2.newmeth(-7), dt1.year + dt1.month + dt1.second - 7) - @unittest.skip('Not implemented yet') def test_fromisoformat(self): - pass + if '_Pure' in self.__class__.__name__: + self.skipTest('Only run for Fast C implementation') + + super().test_fromisoformat() + + def test_fromisoformat_datetime(self): + if '_Pure' in self.__class__.__name__: + self.skipTest('Only run for Fast C implementation') + + # Test that isoformat() is reversible + base_dates = [ + (1, 1, 1), + (1000, 2, 14), + (1900, 1, 1), + (2004, 11, 12), + (2004, 4, 3), + (2017, 5, 30) + ] + + base_times = [ + (0, 0, 0, 0), + (0, 0, 0, 241000), + (0, 0, 0, 234567), + (23, 59, 47), + (12, 30, 45, 234567) + ] + + separators = [ + ' ', 'T', '\u007f', # 1-bit widths + '\u0080', 'ʁ', # 2-bit widths + 'ᛇ', '時', # 3-bit widths + '🐍' # 4-bit widths + ] + + tzinfos = [None, timezone.utc, + timezone(timedelta(hours=-5)), + timezone(timedelta(hours=2)), + timezone(timedelta(hours=6, minutes=27))] + + dts = [self.theclass(*date_tuple, *time_tuple, tzinfo=tzi) + for date_tuple in base_dates + for time_tuple in base_times + for tzi in tzinfos] + + for dt in dts: + for sep in separators: + dtstr = dt.isoformat(sep=sep) + + with self.subTest(dtstr=dtstr): + dt_rt = self.theclass.fromisoformat(dtstr) + self.assertEqual(dt, dt_rt) + + def test_fromisoformat_timespecs(self): + if '_Pure' in self.__class__.__name__: + self.skipTest('Only run for Fast C implementation') + + datetime_bases = [ + (2009, 12, 4, 8, 17, 45, 123456), + (2009, 12, 4, 8, 17, 45, 0)] + + tzinfos = [None, timezone.utc, + timezone(timedelta(hours=-5)), + timezone(timedelta(hours=2)), + timezone(timedelta(hours=6, minutes=27))] + + timespecs = ['hours', 'minutes', 'seconds', + 'milliseconds', 'microseconds'] + + for ip, ts in enumerate(timespecs): + for tzi in tzinfos: + for dt_tuple in datetime_bases: + if ts == 'milliseconds': + new_microseconds = 1000 * (dt_tuple[6] // 1000) + dt_tuple = dt_tuple[0:6] + (new_microseconds,) + + dt = self.theclass(*(dt_tuple[0:(4 + ip)]), tzinfo=tzi) + dtstr = dt.isoformat(timespec=ts) + with self.subTest(dtstr=dtstr): + dt_rt = self.theclass.fromisoformat(dtstr) + self.assertEqual(dt, dt_rt) + + def test_fromisoformat_fails_datetime(self): + if '_Pure' in self.__class__.__name__: + self.skipTest('Only run for Fast C implementation') + + # Test that fromisoformat() fails on invalid values + bad_strs = [ + '', # Empty string + '2009.04-19T03', # Wrong first separator + '2009-04.19T03', # Wrong second separator + '2009-04-19T0a', # Invalid hours + '2009-04-19T03:1a:45', # Invalid minutes + '2009-04-19T03:15:4a', # Invalid seconds + '2009-04-19T03;15:45', # Bad first time separator + '2009-04-19T03:15;45', # Bad second time separator + '2009-04-19T03:15:4500:00', # Bad time zone separator + '2009-04-19T03:15:45.2345', # Too many digits for milliseconds + '2009-04-19T03:15:45.1234567', # Too many digits for microseconds + '2009-04-19T03:15:45.123456+24:30', # Invalid time zone offset + '2009-04-19T03:15:45.123456-24:30', # Invalid negative offset + '2009-04-10ᛇᛇᛇᛇᛇ12:15', # Too many unicode separators + '2009-04-19T1', # Incomplete hours + '2009-04-19T12:3', # Incomplete minutes + '2009-04-19T12:30:4', # Incomplete seconds + '2009-04-19T12:', # Ends with time separator + '2009-04-19T12:30:', # Ends with time separator + '2009-04-19T12:30:45.', # Ends with time separator + '2009-04-19T12:30:45.123456+', # Ends with timzone separator + '2009-04-19T12:30:45.123456-', # Ends with timzone separator + '2009-04-19T12:30:45.123456-05:00a', # Extra text + '2009-04-19T12:30:45.123-05:00a', # Extra text + '2009-04-19T12:30:45-05:00a', # Extra text + ] + + for bad_str in bad_strs: + with self.assertRaises(ValueError): + self.theclass.fromisoformat(bad_str) + + def test_fromisoformat_utc(self): + if '_Pure' in self.__class__.__name__: + self.skipTest('Only run for Fast C implementation') + + dt_str = '2014-04-19T13:21:13+00:00' + dt = self.theclass.fromisoformat(dt_str) + + self.assertIs(dt.tzinfo, timezone.utc) + + def test_fromisoformat_subclass(self): + if '_Pure' in self.__class__.__name__: + self.skipTest('Only run for Fast C implementation') + + class DateTimeSubclass(self.theclass): + pass + + dt = DateTimeSubclass(2014, 12, 14, 9, 30, 45, 457390, + tzinfo=timezone(timedelta(hours=10, minutes=45))) + + dt_rt = DateTimeSubclass.fromisoformat(dt.isoformat()) + + self.assertEqual(dt, dt_rt) + self.assertIsInstance(dt_rt, DateTimeSubclass) - @unittest.skip('Not implemented yet') - def test_fromisoformat_fails(self): - pass - class TestSubclassDateTime(TestDateTime): theclass = SubclassDatetime From 327d0fc2bca061f596212e913671d0c653963be2 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Mon, 4 Dec 2017 11:58:57 +0000 Subject: [PATCH 06/21] Add C implementation of time.fromisoformat() --- Modules/_datetimemodule.c | 88 +++++++++++++++++++++++++++++++-------- 1 file changed, 71 insertions(+), 17 deletions(-) diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index 2e168d84606205..c86edfe518f17c 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -730,8 +730,9 @@ static int parse_isoformat_date(const char *dtstr, } \ } -static int parse_isoformat_time(const char *dtstr, int* hour, int *minute, int *second, - int* microsecond, int *tzoffset) { +static int +parse_isoformat_time(const char *dtstr, int* hour, int *minute, int *second, + int* microsecond, int *tzoffset) { // Parse the time portion of a datetime.isoformat() string // // Return codes: @@ -799,6 +800,7 @@ parse_timezone:; return 1; } + /* --------------------------------------------------------------------------- * Create various objects, mostly without range checking. */ @@ -1199,6 +1201,27 @@ append_keyword_fold(PyObject *repr, int fold) return repr; } +static inline PyObject * +tzinfo_from_isoformat_results(int rv, int tzoffset) { + PyObject *tzinfo; + if (rv == 1) { + // Create a timezone from offset in seconds (0 returns UTC) + if (tzoffset == 0) { + Py_INCREF(PyDateTime_TimeZone_UTC); + return PyDateTime_TimeZone_UTC; + } + + PyObject *delta = new_delta(0, tzoffset, 0, 1); + tzinfo = new_timezone(delta, NULL); + Py_XDECREF(delta); + } else { + tzinfo = Py_None; + Py_INCREF(Py_None); + } + + return tzinfo; +} + /* --------------------------------------------------------------------------- * String format helpers. */ @@ -2779,7 +2802,6 @@ date_fromisoformat(PyObject *cls, PyObject *dtstr) { PyErr_SetString(PyExc_TypeError, "fromisoformat: argument must be str"); return NULL; } - assert(dtstr != NULL || PyUnicode_Check(dtstr)); Py_ssize_t len; @@ -4154,6 +4176,45 @@ time_replace(PyDateTime_Time *self, PyObject *args, PyObject *kw) return clone; } +static PyObject * +time_fromisoformat(PyObject *cls, PyObject *tstr) { + assert(tstr != NULL); + + if (!PyUnicode_Check(tstr)) { + PyErr_SetString(PyExc_TypeError, "fromisoformat: argument must be str"); + return NULL; + } + + const char *p = PyUnicode_AsUTF8(tstr); + + int hour = 0, minute = 0, second = 0, microsecond = 0, tzoffset = 0; + int rv = parse_isoformat_time(p, &hour, &minute, &second, µsecond, + &tzoffset); + + if (rv < 0) { + PyErr_Format(PyExc_ValueError, "Invalid isoformat string: %s", p); + return NULL; + } + + PyObject *tzinfo = tzinfo_from_isoformat_results(rv, tzoffset); + + if (tzinfo == NULL) { + return NULL; + } + + PyObject *t; + if ( (PyTypeObject *)cls == &PyDateTime_TimeType ) { + t = new_time(hour, minute, second, microsecond, tzinfo, 0); + } else { + t = PyObject_CallFunction(cls, "iiiiO", + hour, minute, second, microsecond, tzinfo); + } + + Py_DECREF(tzinfo); + return t; +} + + /* Pickle support, a simple use of __reduce__. */ /* Let basestate be the non-tzinfo data string. @@ -4223,6 +4284,9 @@ static PyMethodDef time_methods[] = { {"replace", (PyCFunction)time_replace, METH_VARARGS | METH_KEYWORDS, PyDoc_STR("Return time with new specified fields.")}, + {"fromisoformat", (PyCFunction)time_fromisoformat, METH_O | METH_CLASS, + PyDoc_STR("string -> time from time.isoformat() output")}, + {"__reduce_ex__", (PyCFunction)time_reduce_ex, METH_VARARGS, PyDoc_STR("__reduce_ex__(proto) -> (cls, state)")}, @@ -4726,27 +4790,17 @@ datetime_fromisoformat(PyObject* cls, PyObject *dtstr) { } rv = parse_isoformat_time(p, &hour, &minute, &second, - µsecond, &tzoffset); + µsecond, &tzoffset); } if (rv < 0) { PyErr_Format(PyExc_ValueError, "Invalid isoformat string: %s", dt_ptr); return NULL; } - - PyObject* tzinfo = NULL; - if (rv == 1) { - if( tzoffset == 0 ) { - tzinfo = PyDateTime_TimeZone_UTC; - } else { - PyObject* delta = new_delta(0, tzoffset, 0, 1); - tzinfo = new_timezone(delta, NULL); - Py_DECREF(delta); - } - } else { - tzinfo = Py_None; + PyObject* tzinfo = tzinfo_from_isoformat_results(rv, tzoffset); + if (tzinfo == NULL) { + return NULL; } - Py_INCREF(tzinfo); PyObject* dt; if ( (PyTypeObject*)cls == &PyDateTime_DateTimeType ) { From 52b2175918d4426ca8cbf70d80b018afdac19a58 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Mon, 4 Dec 2017 11:59:41 +0000 Subject: [PATCH 07/21] Add tests for time.isoformat() --- Lib/test/datetimetester.py | 118 ++++++++++++++++++++++++++++++++++++- 1 file changed, 116 insertions(+), 2 deletions(-) diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py index 436a5b661fc004..abb951b2ed8dc1 100644 --- a/Lib/test/datetimetester.py +++ b/Lib/test/datetimetester.py @@ -49,7 +49,6 @@ INF = float("inf") NAN = float("nan") - ############################################################################# # module tests @@ -1638,7 +1637,9 @@ def test_fromisoformat_fails(self): def test_fromisoformat_fails_typeerror(self): # Test that fromisoformat fails when passed the wrong type - bad_types = [b'2009-03-01', None] + import io + + bad_types = [b'2009-03-01', None, io.StringIO('2009-03-01')] for bad_type in bad_types: with self.assertRaises(TypeError): self.theclass.fromisoformat(bad_type) @@ -3254,6 +3255,119 @@ def utcoffset(self, t): t2 = t2.replace(tzinfo=Varies()) self.assertTrue(t1 < t2) # t1's offset counter still going up + def test_fromisoformat(self): + if '_Pure' in self.__class__.__name__: + self.skipTest('Only run for Fast C implementation') + + time_examples = [ + (0, 0, 0, 0), + (23, 59, 59, 999999), + ] + + hh = (9, 12, 20) + mm = (5, 30) + ss = (4, 45) + usec = (0, 245000, 678901) + + time_examples += list(itertools.product(hh, mm, ss, usec)) + + tzinfos = [None, timezone.utc, + timezone(timedelta(hours=-5)), + timezone(timedelta(hours=2)), + timezone(timedelta(hours=6, minutes=27))] + + for ttup in time_examples: + for tzi in tzinfos: + t = self.theclass(*ttup, tzinfo=tzi) + tstr = t.isoformat() + + with self.subTest(tstr=tstr): + t_rt = self.theclass.fromisoformat(tstr) + self.assertEqual(t, t_rt) + + def test_fromisoformat_timespecs(self): + if '_Pure' in self.__class__.__name__: + self.skipTest('Only run for Fast C implementation') + + time_bases = [ + (8, 17, 45, 123456), + (8, 17, 45, 0) + ] + + tzinfos = [None, timezone.utc, + timezone(timedelta(hours=-5)), + timezone(timedelta(hours=2)), + timezone(timedelta(hours=6, minutes=27))] + + timespecs = ['hours', 'minutes', 'seconds', + 'milliseconds', 'microseconds'] + + for ip, ts in enumerate(timespecs): + for tzi in tzinfos: + for t_tuple in time_bases: + if ts == 'milliseconds': + new_microseconds = 1000 * (t_tuple[-1] // 1000) + t_tuple = t_tuple[0:-1] + (new_microseconds,) + + t = self.theclass(*(t_tuple[0:(1 + ip)]), tzinfo=tzi) + tstr = t.isoformat(timespec=ts) + with self.subTest(tstr=tstr): + t_rt = self.theclass.fromisoformat(tstr) + self.assertEqual(t, t_rt) + + def test_fromisoformat_fails(self): + if '_Pure' in self.__class__.__name__: + self.skipTest('Only run for Fast C implementation') + + bad_strs = [ + '', # Empty string + '12:', # Ends on a separator + '12:30:', # Ends on a separator + '12:30:15.', # Ends on a separator + '1a:30:45.334034', # Invalid character in hours + '12:a0:45.334034', # Invalid character in minutes + '12:30:a5.334034', # Invalid character in seconds + '12:30:45.1234', # Too many digits for milliseconds + '12:30:45.1234567', # Too many digits for microseconds + '12:30:45.123456+24:30', # Invalid time zone offset + '12:30:45.123456-24:30', # Invalid negative offset + '12:30:45', # Uses full-width unicode colons + '12:30:45․123456', # Uses \u2024 in place of decimal point + ] + + for bad_str in bad_strs: + try: + self.theclass.fromisoformat(bad_str) + self.assertTrue(False) + except ValueError: + pass + + def test_fromisoformat_fails_typeerror(self): + # Test the fromisoformat fails when passed the wrong type + if '_Pure' in self.__class__.__name__: + self.skipTest('Only run for Fast C implementation') + + import io + + bad_types = [b'12:30:45', None, io.StringIO('12:30:45')] + + for bad_type in bad_types: + with self.assertRaises(TypeError): + self.theclass(bad_type) + + def test_fromisoformat_subclass(self): + if '_Pure' in self.__class__.__name__: + self.skipTest('Only run for Fast C implementation') + + class TimeSubclass(self.theclass): + pass + + tsc = TimeSubclass(12, 14, 45, 203745, tzinfo=timezone.utc) + tsc_rt = TimeSubclass.fromisoformat(tsc.isoformat()) + + self.assertEqual(tsc, tsc_rt) + self.assertIsInstance(tsc_rt, TimeSubclass) + def test_subclass_timetz(self): class C(self.theclass): From f1b78af608d1275058badf4a3eb18837148e1faa Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Mon, 4 Dec 2017 15:24:22 +0000 Subject: [PATCH 08/21] Add pure python implementation of time.fromisoformat() --- Lib/datetime.py | 83 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 80 insertions(+), 3 deletions(-) diff --git a/Lib/datetime.py b/Lib/datetime.py index 88c581cb012ed5..b5d484b58498c9 100644 --- a/Lib/datetime.py +++ b/Lib/datetime.py @@ -240,7 +240,7 @@ def _wrap_strftime(object, format, timetuple): # Helpers for parsing the result of isoformat() def _parse_isoformat_date(dtstr): # It is assumed that this function will only be called with a - # string of length exactly 10, and (though this is not used), not + # string of length exactly 10, and (though this is not used) ASCII-only year = int(dtstr[0:4]) if dtstr[4] != '-': raise ValueError('Invalid date separator: %s' % dtstr[4]) @@ -252,7 +252,72 @@ def _parse_isoformat_date(dtstr): day = int(dtstr[8:10]) - return (year, month, day) + return [year, month, day] + +def _parse_isoformat_time(tstr): + len_str = len(tstr) + if len_str < 2: + raise ValueError('Isoformat time too short') + + time_comps = [0, 0, 0, 0, None] + pos = 0 + comp = 0 + while True: + if (len_str - pos) < 2: + raise ValueError('Incomplete time component') + + time_comps[comp] = int(tstr[pos:pos+2]) + + pos += 2 + comp += 1 + next_char = tstr[pos:pos+1] + if comp > 2 or next_char in '-+': + break + + if next_char != ':': + raise ValueError('Invalid time separator') + if pos >= len_str: + break + + pos += 1 + + # Parse milli/microseconds if it exists + if pos < len_str and tstr[pos] == '.': + pos += 1 + len_remainder = len_str - pos + + # Valid isoformat strings at this point can be: + # fff (length 3) + # ffffff (length 6) + # fff+HH:MM (length 9) + # ffffff+HH:MM (length 12) + if not (12 >= len_remainder > 0) or len_remainder % 3: + raise ValueError('Invalid microseconds or offset') + + len_micro = len_remainder - (6 if len_remainder > 8 else 0) + units = (1 if len_micro == 6 else 1000) # Micro- or milli- seconds + time_comps[3] = units * int(tstr[pos:pos+len_micro]) + pos += len_micro + + # Parse tzinfo if it exists + if pos < len_str: + sep_char = tstr[pos] + + if sep_char not in '-+' or (len_str - pos) != 6 or tstr[pos+3] != ':': + raise ValueError('Malformed time zone string') + + hh = int(tstr[pos+1:pos+3]) + mm = int(tstr[pos+4:pos+6]) + + pos += 6 + td = (-1 if sep_char == '-' else 1) * timedelta(hours=hh, minutes=mm) + + time_comps[-1] = timezone(td) + + if pos < len_str: + raise ValueError('Invalid isoformat time') + + return time_comps # Just raise TypeError if the arg isn't None or a string. @@ -1275,6 +1340,18 @@ def isoformat(self, timespec='auto'): __str__ = isoformat + @classmethod + def fromisoformat(cls, tstr): + """Construct a time from the output of isoformat().""" + if not isinstance(tstr, str): + raise TypeError('fromisoformat: argument must be str') + + try: + return cls(*_parse_isoformat_time(time_string)) + except: + raise ValueError('Invalid isoformat string: {}'.format(tstr)) + + def strftime(self, fmt): """Format using strftime(). The date part of the timestamp passed to underlying strftime should not be used. @@ -2308,7 +2385,7 @@ def _name_from_offset(delta): _date_class, _days_before_month, _days_before_year, _days_in_month, _format_time, _is_leap, _isoweek1monday, _math, _ord2ymd, _time, _time_class, _tzinfo_class, _wrap_strftime, _ymd2ord, - _divide_and_round, _parse_isoformat_date) + _divide_and_round, _parse_isoformat_date, _parse_isoformat_time) # XXX Since import * above excludes names that start with _, # docstring does not get overwritten. In the future, it may be # appropriate to maintain a single module level docstring and From aeaa9ca2a657d2dc98b0bd168b784ab0707d36f0 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Mon, 4 Dec 2017 15:24:58 +0000 Subject: [PATCH 09/21] Add tests for pure python time.fromisoformat() --- Lib/test/datetimetester.py | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py index abb951b2ed8dc1..2eaaf9ea093ff1 100644 --- a/Lib/test/datetimetester.py +++ b/Lib/test/datetimetester.py @@ -3256,9 +3256,6 @@ def utcoffset(self, t): self.assertTrue(t1 < t2) # t1's offset counter still going up def test_fromisoformat(self): - if '_Pure' in self.__class__.__name__: - self.skipTest('Only run for Fast C implementation') - time_examples = [ (0, 0, 0, 0), (23, 59, 59, 999999), @@ -3286,9 +3283,6 @@ def test_fromisoformat(self): self.assertEqual(t, t_rt) def test_fromisoformat_timespecs(self): - if '_Pure' in self.__class__.__name__: - self.skipTest('Only run for Fast C implementation') - time_bases = [ (8, 17, 45, 123456), (8, 17, 45, 0) @@ -3316,14 +3310,14 @@ def test_fromisoformat_timespecs(self): self.assertEqual(t, t_rt) def test_fromisoformat_fails(self): - if '_Pure' in self.__class__.__name__: - self.skipTest('Only run for Fast C implementation') - bad_strs = [ '', # Empty string '12:', # Ends on a separator '12:30:', # Ends on a separator '12:30:15.', # Ends on a separator + '1', # Incomplete hours + '12:3', # Incomplete minutes + '12:30:1', # Incomplete seconds '1a:30:45.334034', # Invalid character in hours '12:a0:45.334034', # Invalid character in minutes '12:30:a5.334034', # Invalid character in seconds @@ -3333,32 +3327,27 @@ def test_fromisoformat_fails(self): '12:30:45.123456-24:30', # Invalid negative offset '12:30:45', # Uses full-width unicode colons '12:30:45․123456', # Uses \u2024 in place of decimal point + '12:30:45a', # Extra at tend of basic time + '12:30:45.123a', # Extra at end of millisecond time + '12:30:45.123456a', # Extra at end of microsecond time + '12:30:45.123456+12:00a', # Extra at end of full time ] for bad_str in bad_strs: - try: + with self.assertRaises(ValueError): self.theclass.fromisoformat(bad_str) - self.assertTrue(False) - except ValueError: - pass def test_fromisoformat_fails_typeerror(self): # Test the fromisoformat fails when passed the wrong type - if '_Pure' in self.__class__.__name__: - self.skipTest('Only run for Fast C implementation') - import io bad_types = [b'12:30:45', None, io.StringIO('12:30:45')] for bad_type in bad_types: with self.assertRaises(TypeError): - self.theclass(bad_type) + self.theclass.fromisoformat(bad_type) def test_fromisoformat_subclass(self): - if '_Pure' in self.__class__.__name__: - self.skipTest('Only run for Fast C implementation') - class TimeSubclass(self.theclass): pass From 094ccf4368118af8e8f4867278d93ff85db3aa5f Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Mon, 4 Dec 2017 16:28:16 +0000 Subject: [PATCH 10/21] Add pure python implementation of datetime.fromisoformat --- Lib/datetime.py | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/Lib/datetime.py b/Lib/datetime.py index b5d484b58498c9..6eb2703ad056a0 100644 --- a/Lib/datetime.py +++ b/Lib/datetime.py @@ -276,8 +276,6 @@ def _parse_isoformat_time(tstr): if next_char != ':': raise ValueError('Invalid time separator') - if pos >= len_str: - break pos += 1 @@ -285,7 +283,7 @@ def _parse_isoformat_time(tstr): if pos < len_str and tstr[pos] == '.': pos += 1 len_remainder = len_str - pos - + # Valid isoformat strings at this point can be: # fff (length 3) # ffffff (length 6) @@ -308,14 +306,11 @@ def _parse_isoformat_time(tstr): hh = int(tstr[pos+1:pos+3]) mm = int(tstr[pos+4:pos+6]) - + pos += 6 td = (-1 if sep_char == '-' else 1) * timedelta(hours=hh, minutes=mm) - - time_comps[-1] = timezone(td) - if pos < len_str: - raise ValueError('Invalid isoformat time') + time_comps[-1] = timezone(td) return time_comps @@ -1605,6 +1600,31 @@ def combine(cls, date, time, tzinfo=True): time.hour, time.minute, time.second, time.microsecond, tzinfo, fold=time.fold) + @classmethod + def fromisoformat(cls, dtstr): + """Construct a datetime from the output of datetime.isoformat().""" + if not isinstance(dtstr, str): + raise TypeError('fromisoformat: argument must be str') + + # Split this at the separator + dstr = dtstr[0:10] + tstr = dtstr[11:] + + try: + date_components = _parse_isoformat_date(dstr) + except ValueError: + raise ValueError('Invalid isoformat string: {}'.format(dtstr)) + + if tstr: + try: + time_components = _parse_isoformat_time(tstr) + except ValueError: + raise ValueError('Invalid isoformat string: {}'.format(dtstr)) + else: + time_components = [0, 0, 0, 0, None] + + return cls(*(date_components + time_components)) + def timetuple(self): "Return local time tuple compatible with time.localtime()." dst = self.dst() From 2a8120d21b7f872d12120563746f97deaad3bd09 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Mon, 4 Dec 2017 16:28:42 +0000 Subject: [PATCH 11/21] Enable tests for pure python datetime.fromisoformat --- Lib/test/datetimetester.py | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py index 2eaaf9ea093ff1..440923dd8938e5 100644 --- a/Lib/test/datetimetester.py +++ b/Lib/test/datetimetester.py @@ -2390,16 +2390,7 @@ def newmeth(self, start): self.assertEqual(dt2.newmeth(-7), dt1.year + dt1.month + dt1.second - 7) - def test_fromisoformat(self): - if '_Pure' in self.__class__.__name__: - self.skipTest('Only run for Fast C implementation') - - super().test_fromisoformat() - def test_fromisoformat_datetime(self): - if '_Pure' in self.__class__.__name__: - self.skipTest('Only run for Fast C implementation') - # Test that isoformat() is reversible base_dates = [ (1, 1, 1), @@ -2444,9 +2435,6 @@ def test_fromisoformat_datetime(self): self.assertEqual(dt, dt_rt) def test_fromisoformat_timespecs(self): - if '_Pure' in self.__class__.__name__: - self.skipTest('Only run for Fast C implementation') - datetime_bases = [ (2009, 12, 4, 8, 17, 45, 123456), (2009, 12, 4, 8, 17, 45, 0)] @@ -2473,9 +2461,6 @@ def test_fromisoformat_timespecs(self): self.assertEqual(dt, dt_rt) def test_fromisoformat_fails_datetime(self): - if '_Pure' in self.__class__.__name__: - self.skipTest('Only run for Fast C implementation') - # Test that fromisoformat() fails on invalid values bad_strs = [ '', # Empty string @@ -2510,18 +2495,12 @@ def test_fromisoformat_fails_datetime(self): self.theclass.fromisoformat(bad_str) def test_fromisoformat_utc(self): - if '_Pure' in self.__class__.__name__: - self.skipTest('Only run for Fast C implementation') - dt_str = '2014-04-19T13:21:13+00:00' dt = self.theclass.fromisoformat(dt_str) self.assertIs(dt.tzinfo, timezone.utc) def test_fromisoformat_subclass(self): - if '_Pure' in self.__class__.__name__: - self.skipTest('Only run for Fast C implementation') - class DateTimeSubclass(self.theclass): pass From af9e6d02acb5db4112fa75bfc9450a3f521d5e65 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Mon, 4 Dec 2017 17:13:13 +0000 Subject: [PATCH 12/21] Add documentation for [date][time].fromisoformat() --- Doc/library/datetime.rst | 47 ++++++++++++++++++- Lib/datetime.py | 28 +++++------ .../2017-12-04-17-41-40.bpo-15873.-T4TRK.rst | 3 ++ 3 files changed, 63 insertions(+), 15 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2017-12-04-17-41-40.bpo-15873.-T4TRK.rst diff --git a/Doc/library/datetime.rst b/Doc/library/datetime.rst index dce51a16e868a4..95d85f88727f55 100644 --- a/Doc/library/datetime.rst +++ b/Doc/library/datetime.rst @@ -436,6 +436,21 @@ Other constructors, all class methods: d``. +.. classmethod:: date.fromisoformat(date_string) + + Return a :class:`date` corresponding to a *date_string* in one of the ISO 8601 + formats emitted by :meth:`date.isoformat`. Specifically, this function supports + strings in the format(s) ``YYYY-MM-DD``. + + .. caution:: + + This does not support parsing arbitrary ISO 8601 strings - it is only intended + as the inverse operation of :meth:`date.isoformat`. + + .. versionadded:: 3.7 + + + Class attributes: .. attribute:: date.min @@ -819,6 +834,20 @@ Other constructors, all class methods: Added the *tzinfo* argument. +.. classmethod:: datetime.fromisoformat(date_string) + + Return a :class:`datetime` corresponding to a *date_string* in one of the + ISO 8601 formats emitted by :meth:`datetime.isoformat`. Specifically, this function + supports strings in the format(s) ``YYYY-MM-DD[*[HH[:MM[:SS[.mmm[mmm]]]]][+HH:MM]]``, + where ``*`` can match any single character. + + .. caution:: + + This does not support parsing arbitrary ISO 8601 strings - it is only intended + as the inverse operation of :meth:`datetime.isoformat`. + + .. versionadded:: 3.7 + .. classmethod:: datetime.strptime(date_string, format) Return a :class:`.datetime` corresponding to *date_string*, parsed according to @@ -1486,6 +1515,23 @@ In boolean contexts, a :class:`.time` object is always considered to be true. error-prone and has been removed in Python 3.5. See :issue:`13936` for full details. + +Other constructor: + +.. classmethod:: time.fromisoformat(date_string) + + Return a :class:`time` corresponding to a *time_string* in one of the ISO 8601 + formats emitted by :meth:`time.isoformat`. Specifically, this function supports + strings in the format(s) ``HH[:MM[:SS[.mmm[mmm]]]]][+HH:MM]``. + + .. caution:: + + This does not support parsing arbitrary ISO 8601 strings - it is only intended + as the inverse operation of :meth:`time.isoformat`. + + .. versionadded:: 3.7 + + Instance methods: .. method:: time.replace(hour=self.hour, minute=self.minute, second=self.second, \ @@ -1587,7 +1633,6 @@ Instance methods: ``self.tzinfo.tzname(None)``, or raises an exception if the latter doesn't return ``None`` or a string object. - Example: >>> from datetime import time, tzinfo, timedelta diff --git a/Lib/datetime.py b/Lib/datetime.py index 6eb2703ad056a0..4e165111e9a2cc 100644 --- a/Lib/datetime.py +++ b/Lib/datetime.py @@ -811,16 +811,16 @@ def fromordinal(cls, n): return cls(y, m, d) @classmethod - def fromisoformat(cls, dtstr): + def fromisoformat(cls, date_string): """Construct a date from the output of date.isoformat().""" - if not isinstance(dtstr, str): + if not isinstance(date_string, str): raise TypeError('fromisoformat: argument must be str') try: - assert len(dtstr) == 10 - return cls(*_parse_isoformat_date(dtstr)) + assert len(date_string) == 10 + return cls(*_parse_isoformat_date(date_string)) except: - raise ValueError('Invalid isoformat string: {}'.format(dtstr)) + raise ValueError('Invalid isoformat string: %s' % date_string) # Conversions to string @@ -1336,15 +1336,15 @@ def isoformat(self, timespec='auto'): __str__ = isoformat @classmethod - def fromisoformat(cls, tstr): + def fromisoformat(cls, time_string): """Construct a time from the output of isoformat().""" - if not isinstance(tstr, str): + if not isinstance(time_string, str): raise TypeError('fromisoformat: argument must be str') try: return cls(*_parse_isoformat_time(time_string)) except: - raise ValueError('Invalid isoformat string: {}'.format(tstr)) + raise ValueError('Invalid isoformat string: %s' % time_string) def strftime(self, fmt): @@ -1601,25 +1601,25 @@ def combine(cls, date, time, tzinfo=True): tzinfo, fold=time.fold) @classmethod - def fromisoformat(cls, dtstr): + def fromisoformat(cls, date_string): """Construct a datetime from the output of datetime.isoformat().""" - if not isinstance(dtstr, str): + if not isinstance(date_string, str): raise TypeError('fromisoformat: argument must be str') # Split this at the separator - dstr = dtstr[0:10] - tstr = dtstr[11:] + dstr = date_string[0:10] + tstr = date_string[11:] try: date_components = _parse_isoformat_date(dstr) except ValueError: - raise ValueError('Invalid isoformat string: {}'.format(dtstr)) + raise ValueError('Invalid isoformat string: %s' % date_string) if tstr: try: time_components = _parse_isoformat_time(tstr) except ValueError: - raise ValueError('Invalid isoformat string: {}'.format(dtstr)) + raise ValueError('Invalid isoformat string: %s' % date_string) else: time_components = [0, 0, 0, 0, None] diff --git a/Misc/NEWS.d/next/Library/2017-12-04-17-41-40.bpo-15873.-T4TRK.rst b/Misc/NEWS.d/next/Library/2017-12-04-17-41-40.bpo-15873.-T4TRK.rst new file mode 100644 index 00000000000000..98a841597af11a --- /dev/null +++ b/Misc/NEWS.d/next/Library/2017-12-04-17-41-40.bpo-15873.-T4TRK.rst @@ -0,0 +1,3 @@ +Added new alternate constructors :meth:`datetime.datetime.fromisoformat`, +:meth:`datetime.time.fromisoformat` and :meth:`datetime.date.fromisoformat` +as the inverse operation of each classes's respective ``isoformat`` methods. From cf802aff7492781714eb2b83854dfe79ccf71561 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Tue, 5 Dec 2017 01:41:31 +0000 Subject: [PATCH 13/21] Consolidate helper functions into parse_digits --- Modules/_datetimemodule.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index c86edfe518f17c..3b62844d38bcda 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -666,22 +666,13 @@ set_date_fields(PyDateTime_Date *self, int y, int m, int d) /* --------------------------------------------------------------------------- * String parsing utilities and helper functions */ -static inline unsigned int to_int(char ptr) -{ - return (unsigned int)(ptr - '0'); -} - -static inline int is_digit(unsigned int tmp) -{ - return tmp <= 9; -} -static inline const char* parse_digits(const char* ptr, int* var, +static const char* parse_digits(const char* ptr, int* var, size_t num_digits) { for (size_t i = 0; i < num_digits; ++i) { - int tmp = to_int(*(ptr++)); - if (!is_digit(tmp)) { return NULL; } + unsigned int tmp = (unsigned int)(*(ptr++) - '0'); + if (tmp > 9) { return NULL; } *var *= 10; *var += (signed int)tmp; } From 626d239b911565825cf47a683425506c51aed731 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Tue, 5 Dec 2017 01:47:15 +0000 Subject: [PATCH 14/21] Refactor datetime.isoformat round trip tests --- Lib/test/datetimetester.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py index 440923dd8938e5..0a5ba55b599434 100644 --- a/Lib/test/datetimetester.py +++ b/Lib/test/datetimetester.py @@ -2394,10 +2394,8 @@ def test_fromisoformat_datetime(self): # Test that isoformat() is reversible base_dates = [ (1, 1, 1), - (1000, 2, 14), (1900, 1, 1), (2004, 11, 12), - (2004, 4, 3), (2017, 5, 30) ] @@ -2405,16 +2403,10 @@ def test_fromisoformat_datetime(self): (0, 0, 0, 0), (0, 0, 0, 241000), (0, 0, 0, 234567), - (23, 59, 47), (12, 30, 45, 234567) ] - separators = [ - ' ', 'T', '\u007f', # 1-bit widths - '\u0080', 'ʁ', # 2-bit widths - 'ᛇ', '時', # 3-bit widths - '🐍' # 4-bit widths - ] + separators = [' ', 'T'] tzinfos = [None, timezone.utc, timezone(timedelta(hours=-5)), @@ -2434,6 +2426,22 @@ def test_fromisoformat_datetime(self): dt_rt = self.theclass.fromisoformat(dtstr) self.assertEqual(dt, dt_rt) + def test_fromisoformat_separators(self): + separators = [ + ' ', 'T', '\u007f', # 1-bit widths + '\u0080', 'ʁ', # 2-bit widths + 'ᛇ', '時', # 3-bit widths + '🐍' # 4-bit widths + ] + + for sep in separators: + dt = self.theclass(2018, 1, 31, 23, 59, 47, 124789) + dtstr = dt.isoformat(sep=sep) + + with self.subTest(dtstr=dtstr): + dt_rt = self.theclass.fromisoformat(dtstr) + self.assertEqual(dt, dt_rt) + def test_fromisoformat_timespecs(self): datetime_bases = [ (2009, 12, 4, 8, 17, 45, 123456), From 7c771e73c411635394908ebe904803415337a7ea Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Tue, 5 Dec 2017 22:57:51 +0000 Subject: [PATCH 15/21] Refactor C code for PEP 7 --- Modules/_datetimemodule.c | 69 +++++++++++++++++++++++++++++---------- 1 file changed, 51 insertions(+), 18 deletions(-) diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index 3b62844d38bcda..61eb163f9c2e12 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -667,12 +667,15 @@ set_date_fields(PyDateTime_Date *self, int y, int m, int d) * String parsing utilities and helper functions */ -static const char* parse_digits(const char* ptr, int* var, +static const char* +parse_digits(const char* ptr, int* var, size_t num_digits) { for (size_t i = 0; i < num_digits; ++i) { unsigned int tmp = (unsigned int)(*(ptr++) - '0'); - if (tmp > 9) { return NULL; } + if (tmp > 9) { + return NULL; + } *var *= 10; *var += (signed int)tmp; } @@ -691,24 +694,36 @@ static int parse_isoformat_date(const char *dtstr, */ const char *p = dtstr; p = parse_digits(p, year, 4); - if (NULL == p) { return -1; } + if (NULL == p) { + return -1; + } - if (*(p++) != '-') { return -2; } + if (*(p++) != '-') { + return -2; + } p = parse_digits(p, month, 2); - if (NULL == p) { return -1; } + if (NULL == p) { + return -1; + } - if (*(p++) != '-') { return -2; } + if (*(p++) != '-') { + return -2; + } p = parse_digits(p, day, 2); - if (p == NULL) { return -1; } + if (p == NULL) { + return -1; + } return 0; } // Macro that short-circuits to timezone parsing -#define PARSE_ISOFORMAT_ADVANCE_TIME_SEP(SEP) { \ - if (*p == '\0') { return 0; } \ +#define PARSE_ISOFORMAT_ADVANCE_TIME_SEP(SEP) { \ + if (*p == '\0') { \ + return 0; \ + } \ switch(*(p++)) { \ case SEP: \ break; \ @@ -739,32 +754,44 @@ parse_isoformat_time(const char *dtstr, int* hour, int *minute, int *second, // Parse time - all components are optional except hour int tzsign = 1; p = parse_digits(p, hour, 2); - if (NULL == p) { return -3; } + if (NULL == p) { + return -3; + } PARSE_ISOFORMAT_ADVANCE_TIME_SEP(':'); p = parse_digits(p, minute, 2); - if (NULL == p) { return -3; } + if (NULL == p) { + return -3; + } PARSE_ISOFORMAT_ADVANCE_TIME_SEP(':'); p = parse_digits(p, second, 2); - if (NULL == p) { return -3; } + if (NULL == p) { + return -3; + } PARSE_ISOFORMAT_ADVANCE_TIME_SEP('.'); p = parse_digits(p, microsecond, 3); - if (NULL == p) { return -3; } + if (NULL == p) { + return -3; + } // Microseconds if (*p >= '0' && *p <= '9') { p = parse_digits(p, microsecond, 3); - if (NULL == p) { return -3; } + if (NULL == p) { + return -3; + } } else { *microsecond *= 1000; } - if (*p == '\0') { return 0; } + if (*p == '\0') { + return 0; + } switch(*(p++)) { case '-': @@ -778,15 +805,21 @@ parse_isoformat_time(const char *dtstr, int* hour, int *minute, int *second, parse_timezone:; int tzhours = 0, tzminutes = 0; p = parse_digits(p, &tzhours, 2); - if (NULL == p || *(p++) != ':') { return -5; } + if (NULL == p || *(p++) != ':') { + return -5; + } p = parse_digits(p, &tzminutes, 2); - if (NULL == p) { return -5; } + if (NULL == p) { + return -5; + } // Convert hours:minutes into seconds *tzoffset = tzsign * ((3600 * tzhours) + (60 * tzminutes)); - if (*p != '\0') { return -6; } + if (*p != '\0') { + return -6; + } return 1; } From 8fbd75224709d8f458be58615f31ea14b4f97e3e Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Wed, 6 Dec 2017 15:18:58 +0000 Subject: [PATCH 16/21] Add support for seconds in fromisoformat offsets --- Lib/datetime.py | 30 +++++++++++++++++++++++++----- Lib/test/datetimetester.py | 21 +++++++++++++++++++-- Modules/_datetimemodule.c | 12 ++++++++++-- 3 files changed, 54 insertions(+), 9 deletions(-) diff --git a/Lib/datetime.py b/Lib/datetime.py index 4e165111e9a2cc..261f63e0e80120 100644 --- a/Lib/datetime.py +++ b/Lib/datetime.py @@ -288,11 +288,22 @@ def _parse_isoformat_time(tstr): # fff (length 3) # ffffff (length 6) # fff+HH:MM (length 9) + # fff+HH:MM:SS (length 12) # ffffff+HH:MM (length 12) - if not (12 >= len_remainder > 0) or len_remainder % 3: + # ffffff+HH:MM:SS (length 15) + if not (15 >= len_remainder > 0) or len_remainder % 3: raise ValueError('Invalid microseconds or offset') - len_micro = len_remainder - (6 if len_remainder > 8 else 0) + if len_remainder > 8: + if len_remainder == 15 or (len_remainder == 12 and + tstr[-6] == ':'): + len_tz = 9 + else: + len_tz = 6 + else: + len_tz = 0 + + len_micro = len_remainder - len_tz units = (1 if len_micro == 6 else 1000) # Micro- or milli- seconds time_comps[3] = units * int(tstr[pos:pos+len_micro]) pos += len_micro @@ -301,14 +312,23 @@ def _parse_isoformat_time(tstr): if pos < len_str: sep_char = tstr[pos] - if sep_char not in '-+' or (len_str - pos) != 6 or tstr[pos+3] != ':': + tzlen = len_str - pos + if (sep_char not in '-+' or + tzlen not in (6, 9) or tstr[pos+3] != ':'): raise ValueError('Malformed time zone string') hh = int(tstr[pos+1:pos+3]) mm = int(tstr[pos+4:pos+6]) - pos += 6 - td = (-1 if sep_char == '-' else 1) * timedelta(hours=hh, minutes=mm) + if tzlen == 9: + ss = int(tstr[pos+7:pos+9]) + else: + ss = 0 + + pos += tzlen + td = (-1 if sep_char == '-' else 1) * timedelta(hours=hh, + minutes=mm, + seconds=ss) time_comps[-1] = timezone(td) diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py index 0a5ba55b599434..6f9b8aaadccc36 100644 --- a/Lib/test/datetimetester.py +++ b/Lib/test/datetimetester.py @@ -3256,7 +3256,6 @@ def test_fromisoformat(self): time_examples += list(itertools.product(hh, mm, ss, usec)) tzinfos = [None, timezone.utc, - timezone(timedelta(hours=-5)), timezone(timedelta(hours=2)), timezone(timedelta(hours=6, minutes=27))] @@ -3269,6 +3268,24 @@ def test_fromisoformat(self): t_rt = self.theclass.fromisoformat(tstr) self.assertEqual(t, t_rt) + def test_fromisoformat_timezone(self): + base_time = self.theclass(12, 30, 45, 217456) + + tzinfos = [None, timezone.utc, + timezone(timedelta(hours=0)), + timezone(timedelta(hours=-5)), + timezone(timedelta(hours=2)), + timezone(timedelta(hours=6, minutes=27)), + timezone(timedelta(hours=12, minutes=32, seconds=30))] + + for tzi in tzinfos: + t = base_time.replace(tzinfo=tzi) + tstr = t.isoformat() + + with self.subTest(tstr=tstr): + t_rt = self.theclass.fromisoformat(tstr) + assert t == t_rt + def test_fromisoformat_timespecs(self): time_bases = [ (8, 17, 45, 123456), @@ -3317,7 +3334,7 @@ def test_fromisoformat_fails(self): '12:30:45a', # Extra at tend of basic time '12:30:45.123a', # Extra at end of millisecond time '12:30:45.123456a', # Extra at end of microsecond time - '12:30:45.123456+12:00a', # Extra at end of full time + '12:30:45.123456+12:00:30a', # Extra at end of full time ] for bad_str in bad_strs: diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index 61eb163f9c2e12..7c59a2e242840b 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -803,7 +803,7 @@ parse_isoformat_time(const char *dtstr, int* hour, int *minute, int *second, } parse_timezone:; - int tzhours = 0, tzminutes = 0; + int tzhours = 0, tzminutes = 0, tzseconds = 0; p = parse_digits(p, &tzhours, 2); if (NULL == p || *(p++) != ':') { return -5; @@ -814,8 +814,16 @@ parse_timezone:; return -5; } + if (*p == ':') { + p++; + p = parse_digits(p, &tzseconds, 2); + if (NULL == p) { + return -5; + } + } + // Convert hours:minutes into seconds - *tzoffset = tzsign * ((3600 * tzhours) + (60 * tzminutes)); + *tzoffset = tzsign * ((3600 * tzhours) + (60 * tzminutes) + tzseconds); if (*p != '\0') { return -6; From 4d55e050a41b643b3c6ddf32688d3859386c29aa Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Sat, 9 Dec 2017 15:14:07 +0000 Subject: [PATCH 17/21] Fix pure python implementation of isoformat() for sub-second zones See bpo-5288 --- Lib/datetime.py | 48 +++++++++++++++--------------- Lib/test/datetimetester.py | 60 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+), 25 deletions(-) diff --git a/Lib/datetime.py b/Lib/datetime.py index 261f63e0e80120..e2e04056a592dd 100644 --- a/Lib/datetime.py +++ b/Lib/datetime.py @@ -173,6 +173,24 @@ def _format_time(hh, mm, ss, us, timespec='auto'): else: return fmt.format(hh, mm, ss, us) +def _format_offset(off): + s = '' + if off is not None: + if off.days < 0: + sign = "-" + off = -off + else: + sign = "+" + hh, mm = divmod(off, timedelta(hours=1)) + mm, ss = divmod(mm, timedelta(minutes=1)) + s += "%s%02d:%02d" % (sign, hh, mm) + if ss or ss.microseconds: + s += ":%02d" % ss.seconds + + if ss.microseconds: + s += '.%06d' % ss.microseconds + return s + # Correctly substitute for %z and %Z escapes in strftime formats. def _wrap_strftime(object, format, timetuple): # Don't call utcoffset() or tzname() unless actually needed. @@ -1304,19 +1322,7 @@ def __hash__(self): def _tzstr(self, sep=":"): """Return formatted timezone offset (+xx:xx) or None.""" off = self.utcoffset() - if off is not None: - if off.days < 0: - sign = "-" - off = -off - else: - sign = "+" - hh, mm = divmod(off, timedelta(hours=1)) - mm, ss = divmod(mm, timedelta(minutes=1)) - assert 0 <= hh < 24 - off = "%s%02d%s%02d" % (sign, hh, sep, mm) - if ss: - off += ':%02d' % ss.seconds - return off + return _format_offset(off) def __repr__(self): """Convert to formal string, for repr().""" @@ -1821,18 +1827,10 @@ def isoformat(self, sep='T', timespec='auto'): self._microsecond, timespec)) off = self.utcoffset() - if off is not None: - if off.days < 0: - sign = "-" - off = -off - else: - sign = "+" - hh, mm = divmod(off, timedelta(hours=1)) - mm, ss = divmod(mm, timedelta(minutes=1)) - s += "%s%02d:%02d" % (sign, hh, mm) - if ss: - assert not ss.microseconds - s += ":%02d" % ss.seconds + tz = _format_offset(off) + if tz: + s += tz + return s def __repr__(self): diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py index 6f9b8aaadccc36..6d15e0d3a9eae6 100644 --- a/Lib/test/datetimetester.py +++ b/Lib/test/datetimetester.py @@ -1731,6 +1731,36 @@ def test_isoformat(self): t = self.theclass(2, 3, 2, tzinfo=tz) self.assertEqual(t.isoformat(), "0002-03-02T00:00:00+00:00:16") + def test_isoformat_timezone(self): + tzoffsets = [ + ('05:00', timedelta(hours=5)), + ('02:00', timedelta(hours=2)), + ('06:27', timedelta(hours=6, minutes=27)), + ('12:32:30', timedelta(hours=12, minutes=32, seconds=30)), + ('02:04:09.123456', timedelta(hours=2, minutes=4, seconds=9, microseconds=123456)) + ] + + tzinfos = [ + ('', None), + ('+00:00', timezone.utc), + ('+00:00', timezone(timedelta(0))), + ] + + tzinfos += [ + (prefix + expected, timezone(sign * td)) + for expected, td in tzoffsets + for prefix, sign in [('-', -1), ('+', 1)] + ] + + dt_base = self.theclass(2016, 4, 1, 12, 37, 9) + exp_base = '2016-04-01T12:37:09' + + for exp_tz, tzi in tzinfos: + dt = dt_base.replace(tzinfo=tzi) + exp = exp_base + exp_tz + with self.subTest(tzi=tzi): + assert dt.isoformat() == exp + def test_format(self): dt = self.theclass(2007, 9, 10, 4, 5, 1, 123) self.assertEqual(dt.__format__(''), str(dt)) @@ -2704,6 +2734,36 @@ def test_isoformat(self): self.assertEqual(t.isoformat(timespec='microseconds'), "12:34:56.000000") self.assertEqual(t.isoformat(timespec='auto'), "12:34:56") + def test_isoformat_timezone(self): + tzoffsets = [ + ('05:00', timedelta(hours=5)), + ('02:00', timedelta(hours=2)), + ('06:27', timedelta(hours=6, minutes=27)), + ('12:32:30', timedelta(hours=12, minutes=32, seconds=30)), + ('02:04:09.123456', timedelta(hours=2, minutes=4, seconds=9, microseconds=123456)) + ] + + tzinfos = [ + ('', None), + ('+00:00', timezone.utc), + ('+00:00', timezone(timedelta(0))), + ] + + tzinfos += [ + (prefix + expected, timezone(sign * td)) + for expected, td in tzoffsets + for prefix, sign in [('-', -1), ('+', 1)] + ] + + t_base = self.theclass(12, 37, 9) + exp_base = '12:37:09' + + for exp_tz, tzi in tzinfos: + t = t_base.replace(tzinfo=tzi) + exp = exp_base + exp_tz + with self.subTest(tzi=tzi): + assert t.isoformat() == exp + def test_1653736(self): # verify it doesn't accept extra keyword arguments t = self.theclass(second=1) From 5a233fb42c9c9e4b905d62a5c6f250752e74e14b Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Sat, 9 Dec 2017 15:14:41 +0000 Subject: [PATCH 18/21] Add support for subsecond offsets to fromisoformat --- Doc/library/datetime.rst | 4 +- Lib/datetime.py | 109 ++++++++++---------- Lib/test/datetimetester.py | 57 ++++++++--- Modules/_datetimemodule.c | 198 ++++++++++++++++++------------------- 4 files changed, 199 insertions(+), 169 deletions(-) diff --git a/Doc/library/datetime.rst b/Doc/library/datetime.rst index 95d85f88727f55..4c2ffa1ebc8d90 100644 --- a/Doc/library/datetime.rst +++ b/Doc/library/datetime.rst @@ -838,7 +838,7 @@ Other constructors, all class methods: Return a :class:`datetime` corresponding to a *date_string* in one of the ISO 8601 formats emitted by :meth:`datetime.isoformat`. Specifically, this function - supports strings in the format(s) ``YYYY-MM-DD[*[HH[:MM[:SS[.mmm[mmm]]]]][+HH:MM]]``, + supports strings in the format(s) ``YYYY-MM-DD[*HH[:MM[:SS[.mmm[mmm]]]]][+HH:MM[:SS[.ffffff]]]``, where ``*`` can match any single character. .. caution:: @@ -1522,7 +1522,7 @@ Other constructor: Return a :class:`time` corresponding to a *time_string* in one of the ISO 8601 formats emitted by :meth:`time.isoformat`. Specifically, this function supports - strings in the format(s) ``HH[:MM[:SS[.mmm[mmm]]]]][+HH:MM]``. + strings in the format(s) ``HH[:MM[:SS[.mmm[mmm]]]]][+HH:MM[:SS[.ffffff]]]``. .. caution:: diff --git a/Lib/datetime.py b/Lib/datetime.py index e2e04056a592dd..f4d7ad8170b28c 100644 --- a/Lib/datetime.py +++ b/Lib/datetime.py @@ -272,83 +272,81 @@ def _parse_isoformat_date(dtstr): return [year, month, day] -def _parse_isoformat_time(tstr): +def _parse_hh_mm_ss_ff(tstr): + # Parses things of the form HH[:MM[:SS[.fff[fff]]]] len_str = len(tstr) - if len_str < 2: - raise ValueError('Isoformat time too short') - time_comps = [0, 0, 0, 0, None] + time_comps = [0, 0, 0, 0] pos = 0 - comp = 0 - while True: + for comp in range(0, 3): if (len_str - pos) < 2: raise ValueError('Incomplete time component') time_comps[comp] = int(tstr[pos:pos+2]) pos += 2 - comp += 1 next_char = tstr[pos:pos+1] - if comp > 2 or next_char in '-+': + + if not next_char or comp >= 2: break if next_char != ':': - raise ValueError('Invalid time separator') + raise ValueError('Invalid time separator: %c' % next_char) pos += 1 - # Parse milli/microseconds if it exists - if pos < len_str and tstr[pos] == '.': - pos += 1 - len_remainder = len_str - pos - - # Valid isoformat strings at this point can be: - # fff (length 3) - # ffffff (length 6) - # fff+HH:MM (length 9) - # fff+HH:MM:SS (length 12) - # ffffff+HH:MM (length 12) - # ffffff+HH:MM:SS (length 15) - if not (15 >= len_remainder > 0) or len_remainder % 3: - raise ValueError('Invalid microseconds or offset') - - if len_remainder > 8: - if len_remainder == 15 or (len_remainder == 12 and - tstr[-6] == ':'): - len_tz = 9 - else: - len_tz = 6 + if pos < len_str: + if tstr[pos] != '.': + raise ValueError('Invalid microsecond component') else: - len_tz = 0 + pos += 1 - len_micro = len_remainder - len_tz - units = (1 if len_micro == 6 else 1000) # Micro- or milli- seconds - time_comps[3] = units * int(tstr[pos:pos+len_micro]) - pos += len_micro + len_remainder = len_str - pos + if len_remainder not in (3, 6): + raise ValueError('Invalid microsecond component') - # Parse tzinfo if it exists - if pos < len_str: - sep_char = tstr[pos] + time_comps[3] = int(tstr[pos:]) + if len_remainder == 3: + time_comps[3] *= 1000 - tzlen = len_str - pos - if (sep_char not in '-+' or - tzlen not in (6, 9) or tstr[pos+3] != ':'): - raise ValueError('Malformed time zone string') + return time_comps - hh = int(tstr[pos+1:pos+3]) - mm = int(tstr[pos+4:pos+6]) +def _parse_isoformat_time(tstr): + # Format supported is HH[:MM[:SS[.fff[fff]]]][+HH:MM[:SS[.ffffff]]] + len_str = len(tstr) + if len_str < 2: + raise ValueError('Isoformat time too short') + + # This is equivalent to re.search('[+-]', tstr), but faster + tz_pos = (tstr.find('-') + 1 or tstr.find('+') + 1) + timestr = tstr[:tz_pos-1] if tz_pos > 0 else tstr + + time_comps = _parse_hh_mm_ss_ff(timestr) - if tzlen == 9: - ss = int(tstr[pos+7:pos+9]) + tzi = None + if tz_pos > 0: + tzstr = tstr[tz_pos:] + + # Valid time zone strings are: + # HH:MM len: 5 + # HH:MM:SS len: 8 + # HH:MM:SS.ffffff len: 15 + + if len(tzstr) not in (5, 8, 15): + raise ValueError('Malformed time zone string') + + tz_comps = _parse_hh_mm_ss_ff(tzstr) + if all(x == 0 for x in tz_comps): + tzi = timezone.utc else: - ss = 0 + tzsign = -1 if tstr[tz_pos - 1] == '-' else 1 + + td = timedelta(hours=tz_comps[0], minutes=tz_comps[1], + seconds=tz_comps[2], microseconds=tz_comps[3]) - pos += tzlen - td = (-1 if sep_char == '-' else 1) * timedelta(hours=hh, - minutes=mm, - seconds=ss) + tzi = timezone(tzsign * td) - time_comps[-1] = timezone(td) + time_comps.append(tzi) return time_comps @@ -2421,9 +2419,10 @@ def _name_from_offset(delta): _check_date_fields, _check_int_field, _check_time_fields, _check_tzinfo_arg, _check_tzname, _check_utc_offset, _cmp, _cmperror, _date_class, _days_before_month, _days_before_year, _days_in_month, - _format_time, _is_leap, _isoweek1monday, _math, _ord2ymd, - _time, _time_class, _tzinfo_class, _wrap_strftime, _ymd2ord, - _divide_and_round, _parse_isoformat_date, _parse_isoformat_time) + _format_time, _format_offset, _is_leap, _isoweek1monday, _math, + _ord2ymd, _time, _time_class, _tzinfo_class, _wrap_strftime, _ymd2ord, + _divide_and_round, _parse_isoformat_date, _parse_isoformat_time, + _parse_hh_mm_ss_ff) # XXX Since import * above excludes names that start with _, # docstring does not get overwritten. In the future, it may be # appropriate to maintain a single module level docstring and diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py index 6d15e0d3a9eae6..b3bcec51a9a856 100644 --- a/Lib/test/datetimetester.py +++ b/Lib/test/datetimetester.py @@ -2440,8 +2440,7 @@ def test_fromisoformat_datetime(self): tzinfos = [None, timezone.utc, timezone(timedelta(hours=-5)), - timezone(timedelta(hours=2)), - timezone(timedelta(hours=6, minutes=27))] + timezone(timedelta(hours=2))] dts = [self.theclass(*date_tuple, *time_tuple, tzinfo=tzi) for date_tuple in base_dates @@ -2456,6 +2455,31 @@ def test_fromisoformat_datetime(self): dt_rt = self.theclass.fromisoformat(dtstr) self.assertEqual(dt, dt_rt) + def test_fromisoformat_timezone(self): + base_dt = self.theclass(2014, 12, 30, 12, 30, 45, 217456) + + tzoffsets = [ + timedelta(hours=5), timedelta(hours=2), + timedelta(hours=6, minutes=27), + timedelta(hours=12, minutes=32, seconds=30), + timedelta(hours=2, minutes=4, seconds=9, microseconds=123456) + ] + + tzoffsets += [-1 * td for td in tzoffsets] + + tzinfos = [None, timezone.utc, + timezone(timedelta(hours=0))] + + tzinfos += [timezone(td) for td in tzoffsets] + + for tzi in tzinfos: + dt = base_dt.replace(tzinfo=tzi) + dtstr = dt.isoformat() + + with self.subTest(tstr=dtstr): + dt_rt = self.theclass.fromisoformat(dtstr) + assert dt == dt_rt, dt_rt + def test_fromisoformat_separators(self): separators = [ ' ', 'T', '\u007f', # 1-bit widths @@ -2529,8 +2553,9 @@ def test_fromisoformat_fails_datetime(self): ] for bad_str in bad_strs: - with self.assertRaises(ValueError): - self.theclass.fromisoformat(bad_str) + with self.subTest(bad_str=bad_str): + with self.assertRaises(ValueError): + self.theclass.fromisoformat(bad_str) def test_fromisoformat_utc(self): dt_str = '2014-04-19T13:21:13+00:00' @@ -3331,12 +3356,19 @@ def test_fromisoformat(self): def test_fromisoformat_timezone(self): base_time = self.theclass(12, 30, 45, 217456) + tzoffsets = [ + timedelta(hours=5), timedelta(hours=2), + timedelta(hours=6, minutes=27), + timedelta(hours=12, minutes=32, seconds=30), + timedelta(hours=2, minutes=4, seconds=9, microseconds=123456) + ] + + tzoffsets += [-1 * td for td in tzoffsets] + tzinfos = [None, timezone.utc, - timezone(timedelta(hours=0)), - timezone(timedelta(hours=-5)), - timezone(timedelta(hours=2)), - timezone(timedelta(hours=6, minutes=27)), - timezone(timedelta(hours=12, minutes=32, seconds=30))] + timezone(timedelta(hours=0))] + + tzinfos += [timezone(td) for td in tzoffsets] for tzi in tzinfos: t = base_time.replace(tzinfo=tzi) @@ -3344,7 +3376,7 @@ def test_fromisoformat_timezone(self): with self.subTest(tstr=tstr): t_rt = self.theclass.fromisoformat(tstr) - assert t == t_rt + assert t == t_rt, t_rt def test_fromisoformat_timespecs(self): time_bases = [ @@ -3398,8 +3430,9 @@ def test_fromisoformat_fails(self): ] for bad_str in bad_strs: - with self.assertRaises(ValueError): - self.theclass.fromisoformat(bad_str) + with self.subTest(bad_str=bad_str): + with self.assertRaises(ValueError): + self.theclass.fromisoformat(bad_str) def test_fromisoformat_fails_typeerror(self): # Test the fromisoformat fails when passed the wrong type diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index 7c59a2e242840b..624196702b60a2 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -668,8 +668,7 @@ set_date_fields(PyDateTime_Date *self, int y, int m, int d) */ static const char* -parse_digits(const char* ptr, int* var, - size_t num_digits) +parse_digits(const char* ptr, int* var, size_t num_digits) { for (size_t i = 0; i < num_digits; ++i) { unsigned int tmp = (unsigned int)(*(ptr++) - '0'); @@ -719,26 +718,55 @@ static int parse_isoformat_date(const char *dtstr, return 0; } -// Macro that short-circuits to timezone parsing -#define PARSE_ISOFORMAT_ADVANCE_TIME_SEP(SEP) { \ - if (*p == '\0') { \ - return 0; \ - } \ - switch(*(p++)) { \ - case SEP: \ - break; \ - case '-': \ - tzsign = -1; \ - case '+': \ - goto parse_timezone; \ - default: \ - return -4; \ - } \ +static int +parse_hh_mm_ss_ff(const char *tstr, const char *tstr_end, + int* hour, int* minute, int *second, int *microsecond) { + const char *p = tstr; + const char *p_end = tstr_end; + int *vals[3] = {hour, minute, second}; + + // Parse [HH[:MM[:SS]]] + for (size_t i = 0; i < 3; ++i) { + p = parse_digits(p, vals[i], 2); + if (NULL == p) { + return -3; + } + + char c = *(p++); + if (p >= p_end) { + return c != '\0'; + } else if (c == ':') { + continue; + } else if (c == '.') { + break; + } else { + return -4; // Malformed time separator + } + } + + // Parse .fff[fff] + size_t len_remains = p_end - p; + if (!(len_remains == 6 || len_remains == 3)) { + return -3; + } + + p = parse_digits(p, microsecond, len_remains); + if (NULL == p) { + return -3; + } + + if (len_remains == 3) { + *microsecond *= 1000; + } + + // Return 1 if it's not the end of the string + return *p != '\0'; } static int -parse_isoformat_time(const char *dtstr, int* hour, int *minute, int *second, - int* microsecond, int *tzoffset) { +parse_isoformat_time(const char *dtstr, size_t dtlen, + int* hour, int *minute, int *second, int *microsecond, + int* tzoffset, int *tzmicrosecond) { // Parse the time portion of a datetime.isoformat() string // // Return codes: @@ -747,89 +775,52 @@ parse_isoformat_time(const char *dtstr, int* hour, int *minute, int *second, // -3: Failed to parse time component // -4: Failed to parse time separator // -5: Malformed timezone string - // -6: Extra information at end of string const char *p = dtstr; - - // Parse time - all components are optional except hour - int tzsign = 1; - p = parse_digits(p, hour, 2); - if (NULL == p) { - return -3; - } - - PARSE_ISOFORMAT_ADVANCE_TIME_SEP(':'); - - p = parse_digits(p, minute, 2); - if (NULL == p) { - return -3; - } - - PARSE_ISOFORMAT_ADVANCE_TIME_SEP(':'); - - p = parse_digits(p, second, 2); - if (NULL == p) { - return -3; - } + const char *p_end = dtstr + dtlen; - PARSE_ISOFORMAT_ADVANCE_TIME_SEP('.'); - - p = parse_digits(p, microsecond, 3); - if (NULL == p) { - return -3; - } - - // Microseconds - if (*p >= '0' && *p <= '9') { - p = parse_digits(p, microsecond, 3); - if (NULL == p) { - return -3; + const char *tzinfo_pos = p; + do { + if (*tzinfo_pos == '+' || *tzinfo_pos == '-') { + break; } - } else { - *microsecond *= 1000; - } + } while(++tzinfo_pos < p_end); - if (*p == '\0') { - return 0; - } - - switch(*(p++)) { - case '-': - tzsign = -1; - case '+': - break; - default: + int rv = parse_hh_mm_ss_ff(dtstr, tzinfo_pos, + hour, minute, second, microsecond); + + if (rv < 0) { + return rv; + } else if (tzinfo_pos == p_end) { + // We know that there's no time zone, so if there's stuff at the + // end of the string it's an error. + if (rv == 1) { return -5; + } else { + return 0; + } } -parse_timezone:; - int tzhours = 0, tzminutes = 0, tzseconds = 0; - p = parse_digits(p, &tzhours, 2); - if (NULL == p || *(p++) != ':') { - return -5; - } - - p = parse_digits(p, &tzminutes, 2); - if (NULL == p) { + // Parse time zone component + // Valid formats are: + // - +HH:MM (len 6) + // - +HH:MM:SS (len 9) + // - +HH:MM:SS.ffffff (len 16) + size_t tzlen = p_end - tzinfo_pos; + if (!(tzlen == 6 || tzlen == 9 || tzlen == 16)) { return -5; } - if (*p == ':') { - p++; - p = parse_digits(p, &tzseconds, 2); - if (NULL == p) { - return -5; - } - } + int tzsign = (*tzinfo_pos == '-')?-1:1; + tzinfo_pos++; + int tzhour = 0, tzminute = 0, tzsecond = 0; + rv = parse_hh_mm_ss_ff(tzinfo_pos, p_end, + &tzhour, &tzminute, &tzsecond, tzmicrosecond); - // Convert hours:minutes into seconds - *tzoffset = tzsign * ((3600 * tzhours) + (60 * tzminutes) + tzseconds); + *tzoffset = tzsign * ((tzhour * 3600) + (tzminute * 60) + tzsecond); + *tzmicrosecond *= tzsign; - if (*p != '\0') { - return -6; - } - - return 1; + return rv?-5:1; } @@ -1234,7 +1225,7 @@ append_keyword_fold(PyObject *repr, int fold) } static inline PyObject * -tzinfo_from_isoformat_results(int rv, int tzoffset) { +tzinfo_from_isoformat_results(int rv, int tzoffset, int tz_useconds) { PyObject *tzinfo; if (rv == 1) { // Create a timezone from offset in seconds (0 returns UTC) @@ -1243,7 +1234,7 @@ tzinfo_from_isoformat_results(int rv, int tzoffset) { return PyDateTime_TimeZone_UTC; } - PyObject *delta = new_delta(0, tzoffset, 0, 1); + PyObject *delta = new_delta(0, tzoffset, tz_useconds, 1); tzinfo = new_timezone(delta, NULL); Py_XDECREF(delta); } else { @@ -4217,18 +4208,22 @@ time_fromisoformat(PyObject *cls, PyObject *tstr) { return NULL; } - const char *p = PyUnicode_AsUTF8(tstr); + Py_ssize_t len; + const char *p = PyUnicode_AsUTF8AndSize(tstr, &len); - int hour = 0, minute = 0, second = 0, microsecond = 0, tzoffset = 0; - int rv = parse_isoformat_time(p, &hour, &minute, &second, µsecond, - &tzoffset); + int hour = 0, minute = 0, second = 0, microsecond = 0; + int tzoffset, tzimicrosecond = 0; + int rv = parse_isoformat_time(p, len, + &hour, &minute, &second, µsecond, + &tzoffset, &tzimicrosecond); if (rv < 0) { PyErr_Format(PyExc_ValueError, "Invalid isoformat string: %s", p); return NULL; } - PyObject *tzinfo = tzinfo_from_isoformat_results(rv, tzoffset); + PyObject *tzinfo = tzinfo_from_isoformat_results(rv, tzoffset, + tzimicrosecond); if (tzinfo == NULL) { return NULL; @@ -4798,7 +4793,8 @@ datetime_fromisoformat(PyObject* cls, PyObject *dtstr) { const char * p = dt_ptr; int year = 0, month = 0, day = 0; - int hour = 0, minute = 0, second = 0, microsecond = 0, tzoffset = 0; + int hour = 0, minute = 0, second = 0, microsecond = 0; + int tzoffset = 0, tzusec = 0; // date has a fixed length of 10 int rv = parse_isoformat_date(p, &year, &month, &day); @@ -4821,15 +4817,17 @@ datetime_fromisoformat(PyObject* cls, PyObject *dtstr) { } } - rv = parse_isoformat_time(p, &hour, &minute, &second, - µsecond, &tzoffset); + len -= (p - dt_ptr); + rv = parse_isoformat_time(p, len, + &hour, &minute, &second, µsecond, + &tzoffset, &tzusec); } if (rv < 0) { PyErr_Format(PyExc_ValueError, "Invalid isoformat string: %s", dt_ptr); return NULL; } - PyObject* tzinfo = tzinfo_from_isoformat_results(rv, tzoffset); + PyObject* tzinfo = tzinfo_from_isoformat_results(rv, tzoffset, tzusec); if (tzinfo == NULL) { return NULL; } From 9fa91db1b718d4a8377eab3afccd16c717b5085b Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Sun, 17 Dec 2017 21:09:36 -0500 Subject: [PATCH 19/21] Fix documentation and pure python error catching in fromisoformat --- Doc/library/datetime.rst | 17 +++++++++-------- Lib/datetime.py | 4 ++-- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/Doc/library/datetime.rst b/Doc/library/datetime.rst index 4c2ffa1ebc8d90..c1b164ebc1f23a 100644 --- a/Doc/library/datetime.rst +++ b/Doc/library/datetime.rst @@ -438,9 +438,9 @@ Other constructors, all class methods: .. classmethod:: date.fromisoformat(date_string) - Return a :class:`date` corresponding to a *date_string* in one of the ISO 8601 - formats emitted by :meth:`date.isoformat`. Specifically, this function supports - strings in the format(s) ``YYYY-MM-DD``. + Return a :class:`date` corresponding to a *date_string* in the format emitted + by :meth:`date.isoformat`. Specifically, this function supports strings in + the format(s) ``YYYY-MM-DD``. .. caution:: @@ -837,8 +837,9 @@ Other constructors, all class methods: .. classmethod:: datetime.fromisoformat(date_string) Return a :class:`datetime` corresponding to a *date_string* in one of the - ISO 8601 formats emitted by :meth:`datetime.isoformat`. Specifically, this function - supports strings in the format(s) ``YYYY-MM-DD[*HH[:MM[:SS[.mmm[mmm]]]]][+HH:MM[:SS[.ffffff]]]``, + formats emitted by :meth:`date.isoformat` and :meth:`datetime.isoformat`. + Specifically, this function supports strings in the format(s) + ``YYYY-MM-DD[*HH[:MM[:SS[.mmm[mmm]]]][+HH:MM[:SS[.ffffff]]]]``, where ``*`` can match any single character. .. caution:: @@ -1518,11 +1519,11 @@ In boolean contexts, a :class:`.time` object is always considered to be true. Other constructor: -.. classmethod:: time.fromisoformat(date_string) +.. classmethod:: time.fromisoformat(time_string) - Return a :class:`time` corresponding to a *time_string* in one of the ISO 8601 + Return a :class:`time` corresponding to a *time_string* in one of the formats emitted by :meth:`time.isoformat`. Specifically, this function supports - strings in the format(s) ``HH[:MM[:SS[.mmm[mmm]]]]][+HH:MM[:SS[.ffffff]]]``. + strings in the format(s) ``HH[:MM[:SS[.mmm[mmm]]]][+HH:MM[:SS[.ffffff]]]``. .. caution:: diff --git a/Lib/datetime.py b/Lib/datetime.py index f4d7ad8170b28c..7b22c52b6b2870 100644 --- a/Lib/datetime.py +++ b/Lib/datetime.py @@ -855,7 +855,7 @@ def fromisoformat(cls, date_string): try: assert len(date_string) == 10 return cls(*_parse_isoformat_date(date_string)) - except: + except Exception: raise ValueError('Invalid isoformat string: %s' % date_string) @@ -1367,7 +1367,7 @@ def fromisoformat(cls, time_string): try: return cls(*_parse_isoformat_time(time_string)) - except: + except Exception: raise ValueError('Invalid isoformat string: %s' % time_string) From ffdb2afbca78ebcf7adbf42537c7d93cf1d0bb50 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Sun, 17 Dec 2017 21:28:09 -0500 Subject: [PATCH 20/21] Drop unsupported sep parameter in _tzstr --- Lib/datetime.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/datetime.py b/Lib/datetime.py index 7b22c52b6b2870..8fa18a78932c04 100644 --- a/Lib/datetime.py +++ b/Lib/datetime.py @@ -1317,8 +1317,8 @@ def __hash__(self): # Conversion to string - def _tzstr(self, sep=":"): - """Return formatted timezone offset (+xx:xx) or None.""" + def _tzstr(self): + """Return formatted timezone offset (+xx:xx) or an empty string.""" off = self.utcoffset() return _format_offset(off) From 18a5fa84b49a56acee0c9f9ef4773b786759d1c3 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Sun, 17 Dec 2017 21:28:22 -0500 Subject: [PATCH 21/21] Add test for ambiguous isoformat strings --- Lib/test/datetimetester.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py index b3bcec51a9a856..1d0c1c5bd236f6 100644 --- a/Lib/test/datetimetester.py +++ b/Lib/test/datetimetester.py @@ -2496,6 +2496,17 @@ def test_fromisoformat_separators(self): dt_rt = self.theclass.fromisoformat(dtstr) self.assertEqual(dt, dt_rt) + def test_fromisoformat_ambiguous(self): + # Test strings like 2018-01-31+12:15 (where +12:15 is not a time zone) + separators = ['+', '-'] + for sep in separators: + dt = self.theclass(2018, 1, 31, 12, 15) + dtstr = dt.isoformat(sep=sep) + + with self.subTest(dtstr=dtstr): + dt_rt = self.theclass.fromisoformat(dtstr) + self.assertEqual(dt, dt_rt) + def test_fromisoformat_timespecs(self): datetime_bases = [ (2009, 12, 4, 8, 17, 45, 123456),