Skip to content

Commit

Permalink
Merge pull request #11146 from chris-b1/inference-padding
Browse files Browse the repository at this point in the history
PERF: infer_datetime_format without padding #11142
  • Loading branch information
jreback committed Sep 20, 2015
2 parents d35c84b + 417dbb4 commit 9e7dc17
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 13 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.17.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1031,6 +1031,7 @@ Performance Improvements
- 20x improvement in ``concat`` of Categoricals when categories are identical (:issue:`10587`)
- Improved performance of ``to_datetime`` when specified format string is ISO8601 (:issue:`10178`)
- 2x improvement of ``Series.value_counts`` for float dtype (:issue:`10821`)
- Enable ``infer_datetime_format`` in ``to_datetime`` when date components do not have 0 padding (:issue:`11142`)

.. _whatsnew_0170.bug_fixes:

Expand Down
18 changes: 18 additions & 0 deletions pandas/tseries/tests/test_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -4615,6 +4615,24 @@ def test_guess_datetime_format_invalid_inputs(self):
for invalid_dt in invalid_dts:
self.assertTrue(tools._guess_datetime_format(invalid_dt) is None)

def test_guess_datetime_format_nopadding(self):
# GH 11142
dt_string_to_format = (
('2011-1-1', '%Y-%m-%d'),
('30-1-2011', '%d-%m-%Y'),
('1/1/2011', '%m/%d/%Y'),
('2011-1-1 00:00:00', '%Y-%m-%d %H:%M:%S'),
('2011-1-1 0:0:0', '%Y-%m-%d %H:%M:%S'),
('2011-1-3T00:00:0', '%Y-%m-%dT%H:%M:%S')
)

for dt_string, dt_format in dt_string_to_format:
self.assertEqual(
tools._guess_datetime_format(dt_string),
dt_format
)


def test_guess_datetime_format_for_array(self):
expected_format = '%Y-%m-%d %H:%M:%S.%f'
dt_string = datetime(2011, 12, 30, 0, 0, 0).strftime(expected_format)
Expand Down
31 changes: 18 additions & 13 deletions pandas/tseries/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,20 +86,21 @@ def _guess_datetime_format(dt_str, dayfirst=False,
if not isinstance(dt_str, compat.string_types):
return None

day_attribute_and_format = (('day',), '%d')
day_attribute_and_format = (('day',), '%d', 2)

# attr name, format, padding (if any)
datetime_attrs_to_format = [
(('year', 'month', 'day'), '%Y%m%d'),
(('year',), '%Y'),
(('month',), '%B'),
(('month',), '%b'),
(('month',), '%m'),
(('year', 'month', 'day'), '%Y%m%d', 0),
(('year',), '%Y', 0),
(('month',), '%B', 0),
(('month',), '%b', 0),
(('month',), '%m', 2),
day_attribute_and_format,
(('hour',), '%H'),
(('minute',), '%M'),
(('second',), '%S'),
(('microsecond',), '%f'),
(('second', 'microsecond'), '%S.%f'),
(('hour',), '%H', 2),
(('minute',), '%M', 2),
(('second',), '%S', 2),
(('microsecond',), '%f', 6),
(('second', 'microsecond'), '%S.%f', 0),
]

if dayfirst:
Expand All @@ -125,7 +126,7 @@ def _guess_datetime_format(dt_str, dayfirst=False,
format_guess = [None] * len(tokens)
found_attrs = set()

for attrs, attr_format in datetime_attrs_to_format:
for attrs, attr_format, padding in datetime_attrs_to_format:
# If a given attribute has been placed in the format string, skip
# over other formats for that same underlying attribute (IE, month
# can be represented in multiple different ways)
Expand All @@ -134,9 +135,11 @@ def _guess_datetime_format(dt_str, dayfirst=False,

if all(getattr(parsed_datetime, attr) is not None for attr in attrs):
for i, token_format in enumerate(format_guess):
token_filled = tokens[i].zfill(padding)
if (token_format is None and
tokens[i] == parsed_datetime.strftime(attr_format)):
token_filled == parsed_datetime.strftime(attr_format)):
format_guess[i] = attr_format
tokens[i] = token_filled
found_attrs.update(attrs)
break

Expand All @@ -163,6 +166,8 @@ def _guess_datetime_format(dt_str, dayfirst=False,

guessed_format = ''.join(output_format)

# rebuild string, capturing any inferred padding
dt_str = ''.join(tokens)
if parsed_datetime.strftime(guessed_format) == dt_str:
return guessed_format

Expand Down

0 comments on commit 9e7dc17

Please sign in to comment.