Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DateTimeParser Validation and Performance Improvements #4593

Merged
merged 5 commits into from
Jun 29, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 19 additions & 5 deletions Foundation/src/DateTimeFormat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,11 +150,25 @@ bool DateTimeFormat::hasFormat(const std::string& fmt)

bool DateTimeFormat::isValid(const std::string& dateTime)
{
for (const auto& f : REGEX_LIST)
{
if (RegularExpression(*f).match(dateTime)) return true;
}
return false;

static const RegularExpression regs[] = {
RegularExpression(DateTimeFormat::ISO8601_REGEX),
RegularExpression(DateTimeFormat::RFC822_REGEX),
RegularExpression(DateTimeFormat::RFC1123_REGEX),
RegularExpression(DateTimeFormat::HTTP_REGEX),
RegularExpression(DateTimeFormat::RFC850_REGEX),
RegularExpression(DateTimeFormat::RFC1036_REGEX),
RegularExpression(DateTimeFormat::ASCTIME_REGEX),
RegularExpression(DateTimeFormat::SORTABLE_REGEX)
};
// make sure the regex list and the array of regexes are in sync
poco_assert((sizeof(regs) / sizeof(regs[0])) == REGEX_LIST.size());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

REGEX_LIST is used only in this place. I propose to remove it as a class member and have it as a static member in this function.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I left it where it is because it's part of the public API of this class, unfortunately.

Wasn't sure if this would be 1.13.x or 1.14 and didn't want to make a breaking change just in case.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@andrewauclair 1.14, you can break it


for (const auto& f : regs)
{
if (f.match(dateTime)) return true;
}
return false;
}


Expand Down
233 changes: 179 additions & 54 deletions Foundation/src/DateTimeParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,42 +18,142 @@
#include "Poco/Exception.h"
#include "Poco/Ascii.h"
#include "Poco/String.h"
#include <iostream>

namespace {
using parse_iter = std::string::const_iterator;

namespace Poco {
[[nodiscard]] parse_iter skip_non_digits(parse_iter it, parse_iter end)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shall these helper functions be inlined?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No. inline is used in headers to avoid ODR issues. Free functions in cpp files should be either static or in an anonymous namespace to avoid IFNDR issues.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@andrewauclair please don't introduce new naming conventions, we have our own

{
while (it != end && !Poco::Ascii::isDigit(*it))
{
++it;
}
return it;
}


#define SKIP_JUNK() \
while (it != end && !Ascii::isDigit(*it)) ++it
[[nodiscard]] parse_iter skip_digits(parse_iter it, parse_iter end)
{
while (it != end && Poco::Ascii::isDigit(*it))
{
++it;
}
return it;
}


#define SKIP_DIGITS() \
while (it != end && Ascii::isDigit(*it)) ++it
int parse_number_n(const std::string& dtStr, parse_iter& it, parse_iter end, int n)
{
parse_iter num_start = end;
int i = 0;

for (; it != end && i < n && Poco::Ascii::isDigit(*it); ++it, ++i)
{
if (num_start == end)
{
num_start = it;
}
}

#define PARSE_NUMBER(var) \
while (it != end && Ascii::isDigit(*it)) var = var*10 + ((*it++) - '0')
if (num_start == end)
{
throw Poco::SyntaxException("Invalid DateTimeString: " + dtStr + ", No number found to parse");
}

std::string number(num_start, it);
int result = 0;
Fixed Show fixed Hide fixed
try
{
return std::stoi(number);
}
catch(const std::exception& e)
{
throw Poco::SyntaxException("Invalid DateTimeString: " + dtStr + ", invalid number: " + number);
}
}
}

#define PARSE_NUMBER_N(var, n) \
{ int i = 0; while (i++ < n && it != end && Ascii::isDigit(*it)) var = var*10 + ((*it++) - '0'); }
namespace Poco {


#define PARSE_FRACTIONAL_N(var, n) \
{ int i = 0; while (i < n && it != end && Ascii::isDigit(*it)) { var = var*10 + ((*it++) - '0'); i++; } while (i++ < n) var *= 10; }
void DateTimeParser::parse(const std::string& fmt, const std::string& dtStr, DateTime& dateTime, int& timeZoneDifferential)
{
const auto str = Poco::trim(dtStr);

if (fmt.empty() || str.empty())
{
throw SyntaxException("Invalid DateTimeString: " + dtStr);
}
else if (DateTimeFormat::hasFormat(fmt) && !DateTimeFormat::isValid(str))
{
throw SyntaxException("Invalid DateTimeString: " + dtStr);
}

const auto parse_number = [&dtStr](parse_iter& it, parse_iter end)
{
parse_iter num_start = end;

inline std::string cleanedInputString(const std::string& str)
{
return Poco::trim(str);
}
for (; it != end && Poco::Ascii::isDigit(*it); ++it)
{
if (num_start == end)
{
num_start = it;
}
}

void DateTimeParser::parse(const std::string& fmt, const std::string& dtStr, DateTime& dateTime, int& timeZoneDifferential)
{
const auto str = cleanedInputString(dtStr);
if (num_start == end)
{
throw Poco::SyntaxException("Invalid DateTimeString: " + dtStr + ", No number found to parse");
}

std::string number(num_start, it);
int result = 0;
Fixed Show fixed Hide fixed
try
{
return std::stoi(number);
}
catch(const std::exception& e)
{
throw SyntaxException("Invalid DateTimeString: " + dtStr + ", invalid number: " + number);
}
};



if (fmt.empty() || str.empty() || (DateTimeFormat::hasFormat(fmt) && !DateTimeFormat::isValid(str)))
throw SyntaxException("Invalid DateTimeString:" + dtStr);
const auto parse_fractional_n = [dtStr](parse_iter& it, parse_iter end, int n)
{
parse_iter num_start = end;
int i = 0;

for (; it != end && i < n && Poco::Ascii::isDigit(*it); ++it, ++i)
{
if (num_start == end)
{
num_start = it;
}
}

if (num_start == end)
{
return 0;
}

std::string number(num_start, it);
int result = 0;
try
{
result = std::stoi(number);
}
catch(const std::exception& e)
{
throw SyntaxException("Invalid DateTimeString: " + dtStr + ", invalid number: " + number);
}

while (i++ < n) result *= 10;

return result;
};

int year = 0;
int month = 0;
Expand Down Expand Up @@ -81,8 +181,8 @@
{
switch (*itf)
{
case 'w':
case 'W':
case 'w': // Weekday, abbreviated
case 'W': // Weekday
while (it != end && Ascii::isSpace(*it)) ++it;
while (it != end && Ascii::isAlpha(*it)) ++it;
break;
Expand All @@ -94,32 +194,33 @@
case 'd':
case 'e':
case 'f':
SKIP_JUNK();
PARSE_NUMBER_N(day, 2);
it = skip_non_digits(it, end);
day = parse_number_n(dtStr, it, end, 2);
dayParsed = true;
break;
case 'm':
case 'n':
case 'o':
SKIP_JUNK();
PARSE_NUMBER_N(month, 2);
it = skip_non_digits(it, end);
month = parse_number_n(dtStr, it, end, 2);
monthParsed = true;
break;
case 'y':
SKIP_JUNK();
PARSE_NUMBER_N(year, 2);
it = skip_non_digits(it, end);
year = parse_number_n(dtStr, it, end, 2);
if (year >= 69)
year += 1900;
else
year += 2000;
break;
case 'Y':
SKIP_JUNK();
PARSE_NUMBER_N(year, 4);
it = skip_non_digits(it, end);
year = parse_number_n(dtStr, it, end, 4);
break;
case 'r':
SKIP_JUNK();
PARSE_NUMBER(year);
it = skip_non_digits(it, end);
year = parse_number(it, end);

if (year < 1000)
{
if (year >= 69)
Expand All @@ -130,46 +231,53 @@
break;
case 'H':
case 'h':
SKIP_JUNK();
PARSE_NUMBER_N(hour, 2);
it = skip_non_digits(it, end);
hour = parse_number_n(dtStr, it, end, 2);
break;
case 'a':
case 'A':
hour = parseAMPM(it, end, hour);
break;
case 'M':
SKIP_JUNK();
PARSE_NUMBER_N(minute, 2);
it = skip_non_digits(it, end);
minute = parse_number_n(dtStr, it, end, 2);
break;
case 'S':
SKIP_JUNK();
PARSE_NUMBER_N(second, 2);
it = skip_non_digits(it, end);
second = parse_number_n(dtStr, it, end, 2);
break;
case 's':
SKIP_JUNK();
PARSE_NUMBER_N(second, 2);
it = skip_non_digits(it, end);
second = parse_number_n(dtStr, it, end, 2);

if (it != end && (*it == '.' || *it == ','))
{
++it;
PARSE_FRACTIONAL_N(millis, 3);
PARSE_FRACTIONAL_N(micros, 3);
SKIP_DIGITS();

if (it != end && !Ascii::isDigit(*it))
{
throw SyntaxException("Invalid DateTimeString: " + dtStr + ", missing millisecond");
}

millis = parse_fractional_n(it, end, 3);
micros = parse_fractional_n(it, end, 3);
it = skip_digits(it, end);
}
break;
case 'i':
SKIP_JUNK();
PARSE_NUMBER_N(millis, 3);
it = skip_non_digits(it, end);
millis = parse_number_n(dtStr, it, end, 3);
break;
case 'c':
SKIP_JUNK();
PARSE_NUMBER_N(millis, 1);
it = skip_non_digits(it, end);
millis = parse_number_n(dtStr, it, end, 1);
millis *= 100;
break;
case 'F':
SKIP_JUNK();
PARSE_FRACTIONAL_N(millis, 3);
PARSE_FRACTIONAL_N(micros, 3);
SKIP_DIGITS();
it = skip_non_digits(it, end);
millis = parse_number_n(dtStr, it, end, 3);
micros = parse_number_n(dtStr, it, end, 3);
it = skip_digits(it, end);
break;
case 'z':
case 'Z':
Expand Down Expand Up @@ -233,7 +341,7 @@

bool DateTimeParser::tryParse(const std::string& dtStr, DateTime& dateTime, int& timeZoneDifferential)
{
const auto str = cleanedInputString(dtStr);
const auto str = Poco::trim(dtStr);

if (str.length() < 4) return false;

Expand Down Expand Up @@ -338,12 +446,29 @@
int sign = *it == '+' ? 1 : -1;
++it;
int hours = 0;
PARSE_NUMBER_N(hours, 2);
try
{
hours = parse_number_n("", it, end, 2);
}
catch(const SyntaxException& e)
{
throw SyntaxException("Timezone invalid number: hours");
}

if (hours < 0 || hours > 23)
throw SyntaxException("Timezone difference hours out of range");
if (it != end && *it == ':') ++it;
int minutes = 0;
PARSE_NUMBER_N(minutes, 2);
try
{
minutes = parse_number_n("", it, end, 2);
}
catch(const SyntaxException& e)
{
throw SyntaxException("Timezone invalid number: minutes");
}

// PARSE_NUMBER_N(minutes, 2);
Fixed Show fixed Hide fixed
if (minutes < 0 || minutes > 59)
throw SyntaxException("Timezone difference minutes out of range");
tzd += sign*(hours*3600 + minutes*60);
Expand Down
10 changes: 10 additions & 0 deletions Foundation/testsuite/src/DateTimeParserTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ void DateTimeParserTest::testISO8601Frac()
assertTrue (dt.microsecond() == 0);
assertTrue (tzd == 0);
testBad(DateTimeFormat::ISO8601_FRAC_FORMAT, "2005-01-08T12:30:00.1J", tzd);
testBad(DateTimeFormat::ISO8601_FRAC_FORMAT, "2005-01-08T12:30:00.Z", tzd);

dt = DateTimeParser::parse(DateTimeFormat::ISO8601_FRAC_FORMAT, "2005-01-08T12:30:00.123+01:00", tzd);
assertTrue (dt.year() == 2005);
Expand Down Expand Up @@ -616,6 +617,15 @@ void DateTimeParserTest::testCustom()
catch (SyntaxException&)
{
}

// bad year (not a number)
testBad("%y", "YY", tzd);

// bad year (number too big)
testBad("%r", "123456789101112131415", tzd);

// check that an invalid millisecond is detected with a custom format
testBad("T%H:%M:%s %z", "T12:30:00.Z", tzd);
}


Expand Down
Loading