From 2b87e0d17aedb990c39c900e7dbf4b99c0e010ac Mon Sep 17 00:00:00 2001 From: LionbridgeCS2 Date: Thu, 4 Nov 2021 17:32:14 +0100 Subject: [PATCH] [* DateTimeV2] Split extraction of "[day] [calendar date] into multiple entities if day doesn't match date (#2709) --- .../English/DateTimeDefinitions.cs | 20 +-- .../Constants.cs | 5 + .../Extractors/BaseDateExtractor.cs | 147 ++++++++++++++++-- .../Extractors/Metadata.cs | 3 + Patterns/English/English-DateTime.yaml | 20 +-- Specs/DateTime/English/DateExtractor.json | 6 +- Specs/DateTime/English/DateParser.json | 6 +- Specs/DateTime/English/DateTimeModel.json | 142 +++++++++++++++++ 8 files changed, 309 insertions(+), 40 deletions(-) diff --git a/.NET/Microsoft.Recognizers.Definitions.Common/English/DateTimeDefinitions.cs b/.NET/Microsoft.Recognizers.Definitions.Common/English/DateTimeDefinitions.cs index 666f4defca..61596dfa1b 100644 --- a/.NET/Microsoft.Recognizers.Definitions.Common/English/DateTimeDefinitions.cs +++ b/.NET/Microsoft.Recognizers.Definitions.Common/English/DateTimeDefinitions.cs @@ -126,17 +126,17 @@ public static class DateTimeDefinitions public const string DatePreposition = @"\b(on|in)"; public static readonly string DateExtractorYearTermRegex = $@"(\s+|\s*[/\\.,-]\s*|\s+of\s+){DateYearRegex}"; public static readonly string DayPrefix = $@"\b({WeekDayRegex}|{SpecialDayRegex})\b"; - public static readonly string DateExtractor1 = $@"\b({DayPrefix}\s*[,-]?\s*)?(({MonthRegex}[\.]?\s*[/\\.,-]?\s*{DayRegex})|(\({MonthRegex}\s*[-./]\s*{DayRegex}\)))(\s*\(\s*{DayPrefix}\s*\))?({DateExtractorYearTermRegex}\b)?"; - public static readonly string DateExtractor3 = $@"\b({DayPrefix}(\s+|\s*,\s*))?({DayRegex}[\.]?(\s+|\s*[-,/]\s*|\s+of\s+){MonthRegex}[\.]?((\s+in)?{DateExtractorYearTermRegex})?|{BaseDateTime.FourDigitYearRegex}\s*[-./]?\s*(the\s+)?(?(?:3[0-1]|[1-2]\d|0?[1-9])(?:th|nd|rd|st)?)[\.]?(\s+|\s*[-,/]\s*|\s+of\s+){MonthRegex}[\.]?)\b"; + public static readonly string DateExtractor1 = $@"\b(?{DayPrefix}\s*[,-]?\s*)?(({MonthRegex}[\.]?\s*[/\\.,-]?\s*{DayRegex})|(\({MonthRegex}\s*[-./]\s*{DayRegex}\)))(?\s*\(\s*{DayPrefix}\s*\))?({DateExtractorYearTermRegex}\b)?"; + public static readonly string DateExtractor3 = $@"\b(?{DayPrefix}(\s+|\s*,\s*))?({DayRegex}[\.]?(\s+|\s*[-,/]\s*|\s+of\s+){MonthRegex}[\.]?((\s+in)?{DateExtractorYearTermRegex})?|{BaseDateTime.FourDigitYearRegex}\s*[-./]?\s*(the\s+)?(?(?:3[0-1]|[1-2]\d|0?[1-9])(?:th|nd|rd|st)?)[\.]?(\s+|\s*[-,/]\s*|\s+of\s+){MonthRegex}[\.]?)\b"; public static readonly string DateExtractor4 = $@"\b{MonthNumRegex}\s*[/\\\-]\s*{DayRegex}[\.]?\s*[/\\\-]\s*{DateYearRegex}"; - public static readonly string DateExtractor5 = $@"\b({DayPrefix}(\s*,)?\s+)?{DayRegex}\s*[/\\\-\.]\s*({MonthNumRegex}|{MonthRegex})\s*[/\\\-\.]\s*{DateYearRegex}(?!\s*[/\\\-\.]\s*\d+)"; - public static readonly string DateExtractor6 = $@"(?<={DatePreposition}\s+)({StrictRelativeRegex}\s+)?({DayPrefix}\s+)?{MonthNumRegex}[\-\.]{DayRegex}(?![%]){BaseDateTime.CheckDecimalRegex}\b"; - public static readonly string DateExtractor7L = $@"\b({DayPrefix}(\s*,)?\s+)?{MonthNumRegex}\s*/\s*{DayRegex}{DateExtractorYearTermRegex}(?![%])\b"; - public static readonly string DateExtractor7S = $@"\b({DayPrefix}(\s*,)?\s+)?{MonthNumRegex}\s*/\s*{DayRegex}(?![%]){BaseDateTime.CheckDecimalRegex}\b"; - public static readonly string DateExtractor8 = $@"(?<={DatePreposition}\s+)({StrictRelativeRegex}\s+)?({DayPrefix}\s+)?{DayRegex}[\\\-]{MonthNumRegex}(?![%]){BaseDateTime.CheckDecimalRegex}\b"; - public static readonly string DateExtractor9L = $@"\b({DayPrefix}(\s*,)?\s+)?{DayRegex}\s*/\s*{MonthNumRegex}{DateExtractorYearTermRegex}(?![%])\b"; - public static readonly string DateExtractor9S = $@"\b({DayPrefix}(\s*,)?\s+)?{DayRegex}\s*/\s*{MonthNumRegex}{BaseDateTime.CheckDecimalRegex}(?![%])\b"; - public static readonly string DateExtractorA = $@"\b({DayPrefix}(\s*,)?\s+)?(({BaseDateTime.FourDigitYearRegex}\s*[/\\\-\.]\s*({MonthNumRegex}|{MonthRegex})\s*[/\\\-\.]\s*{DayRegex})|({MonthRegex}\s*[/\\\-\.]\s*{BaseDateTime.FourDigitYearRegex}\s*[/\\\-\.]\s*(the\s+)?(?(?:3[0-1]|[1-2]\d|0?[1-9])(?:th|nd|rd|st)?))|({DayRegex}\s*[/\\\-\.]\s*{BaseDateTime.FourDigitYearRegex}\s*[/\\\-\.]\s*{MonthRegex}))"; + public static readonly string DateExtractor5 = $@"\b(?{DayPrefix}(\s*,)?\s+)?{DayRegex}\s*[/\\\-\.]\s*({MonthNumRegex}|{MonthRegex})\s*[/\\\-\.]\s*{DateYearRegex}(?!\s*[/\\\-\.]\s*\d+)"; + public static readonly string DateExtractor6 = $@"(?<={DatePreposition}\s+)({StrictRelativeRegex}\s+)?(?{DayPrefix}\s+)?{MonthNumRegex}[\-\.]{DayRegex}(?![%]){BaseDateTime.CheckDecimalRegex}\b"; + public static readonly string DateExtractor7L = $@"\b(?{DayPrefix}(\s*,)?\s+)?{MonthNumRegex}\s*/\s*{DayRegex}{DateExtractorYearTermRegex}(?![%])\b"; + public static readonly string DateExtractor7S = $@"\b(?{DayPrefix}(\s*,)?\s+)?{MonthNumRegex}\s*/\s*{DayRegex}(?![%]){BaseDateTime.CheckDecimalRegex}\b"; + public static readonly string DateExtractor8 = $@"(?<={DatePreposition}\s+)({StrictRelativeRegex}\s+)?(?{DayPrefix}\s+)?{DayRegex}[\\\-]{MonthNumRegex}(?![%]){BaseDateTime.CheckDecimalRegex}\b"; + public static readonly string DateExtractor9L = $@"\b(?{DayPrefix}(\s*,)?\s+)?{DayRegex}\s*/\s*{MonthNumRegex}{DateExtractorYearTermRegex}(?![%])\b"; + public static readonly string DateExtractor9S = $@"\b(?{DayPrefix}(\s*,)?\s+)?{DayRegex}\s*/\s*{MonthNumRegex}{BaseDateTime.CheckDecimalRegex}(?![%])\b"; + public static readonly string DateExtractorA = $@"\b(?{DayPrefix}(\s*,)?\s+)?(({BaseDateTime.FourDigitYearRegex}\s*[/\\\-\.]\s*({MonthNumRegex}|{MonthRegex})\s*[/\\\-\.]\s*{DayRegex})|({MonthRegex}\s*[/\\\-\.]\s*{BaseDateTime.FourDigitYearRegex}\s*[/\\\-\.]\s*(the\s+)?(?(?:3[0-1]|[1-2]\d|0?[1-9])(?:th|nd|rd|st)?))|({DayRegex}\s*[/\\\-\.]\s*{BaseDateTime.FourDigitYearRegex}\s*[/\\\-\.]\s*{MonthRegex}))"; public static readonly string OfMonth = $@"^\s*(day\s+)?of\s*{MonthRegex}"; public static readonly string MonthEnd = $@"{MonthRegex}\s*(the)?\s*$"; public static readonly string WeekDayEnd = $@"(this\s+)?{WeekDayRegex}\s*,?\s*$"; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Constants.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Constants.cs index a689ff1a47..98d5e5efb1 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Constants.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Constants.cs @@ -181,6 +181,11 @@ public static class Constants public const string MinuteGroupName = "min"; public const string HourGroupName = "hour"; public const string YearGroupName = "year"; + public const string MonthGroupName = "month"; + public const string DayGroupName = "day"; + public const string WeekdayGroupName = "weekday"; + public const string DayPrefixGroupName = "dayprefix"; + public const string DayOfMonthGroupName = "DayOfMonth"; public const string TimeOfDayGroupName = "timeOfDay"; public const string BusinessDayGroupName = "business"; public const string LeftAmPmGroupName = "leftDesc"; diff --git a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDateExtractor.cs b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDateExtractor.cs index 3d8444c926..3f236108ed 100644 --- a/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDateExtractor.cs +++ b/.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDateExtractor.cs @@ -146,6 +146,8 @@ private List ExtractImpl(string text, DateObject reference) tokens.AddRange(NumberWithMonth(text, reference)); tokens.AddRange(ExtractRelativeDurationDate(text, tokens, reference)); + tokens = TruncateInconsistentDates(tokens); + var results = Token.MergeAllTokens(tokens, text, ExtractorName); return results; @@ -179,6 +181,8 @@ private List BasicRegexMatch(string text) results.Add(new Token(match.Index, match.Index + match.Length)); } + // Check if prefix weekday and date agree + results = ValidateWeekdayPrefix(match, results); } } } @@ -191,11 +195,11 @@ private List BasicRegexMatch(string text) private bool ValidateMatch(Match match, string text) { // If the match doesn't contains "year" part, it will not be ambiguous and it's a valid match - var isValidMatch = !match.Groups["year"].Success; + var isValidMatch = !match.Groups[Constants.YearGroupName].Success; if (!isValidMatch) { - var yearGroup = match.Groups["year"]; + var yearGroup = match.Groups[Constants.YearGroupName]; // If the "year" part is not at the end of the match, it's a valid match if (yearGroup.Index + yearGroup.Length != match.Index + match.Length) @@ -222,11 +226,11 @@ private bool ValidateMatch(Match match, string text) } // Expressions with mixed separators are not considered valid dates e.g. "30/4.85" (unless one is a comma "30/4, 2016") - if (match.Groups["day"].Success && match.Groups["month"].Success) + if (match.Groups[Constants.DayGroupName].Success && match.Groups[Constants.MonthGroupName].Success) { - var noDateText = match.Value.Replace(match.Groups["year"].Value, string.Empty) - .Replace(match.Groups["month"].Value, string.Empty) - .Replace(match.Groups["day"].Value, string.Empty); + var noDateText = match.Value.Replace(match.Groups[Constants.YearGroupName].Value, string.Empty) + .Replace(match.Groups[Constants.MonthGroupName].Value, string.Empty) + .Replace(match.Groups[Constants.DayGroupName].Value, string.Empty); var separators = new List { '/', '\\', '-', '.' }; if (separators.Count(separator => noDateText.Contains(separator)) > 1) @@ -331,7 +335,7 @@ private List NumberWithMonth(string text, DateObject reference) var endIndex = match.Index + match.Length + (result.Length ?? 0); ExtendWithWeekdayAndYear( - ref startIndex, ref endIndex, Config.MonthOfYear.GetValueOrDefault(match.Groups["month"].Value, reference.Month), + ref startIndex, ref endIndex, Config.MonthOfYear.GetValueOrDefault(match.Groups[Constants.MonthGroupName].Value, reference.Month), num, text, reference); ret.Add(new Token(startIndex, endIndex)); @@ -345,7 +349,7 @@ private List NumberWithMonth(string text, DateObject reference) { if (matchCase.Success) { - var ordinalNum = matchCase.Groups["DayOfMonth"].Value; + var ordinalNum = matchCase.Groups[Constants.DayOfMonthGroupName].Value; if (ordinalNum == result.Text) { var endLength = 0; @@ -372,8 +376,8 @@ private List NumberWithMonth(string text, DateObject reference) { if (matchCase.Success) { - var ordinalNum = matchCase.Groups["DayOfMonth"].Value; - if (ordinalNum == result.Text && matchCase.Groups["DayOfMonth"].Index == result.Start) + var ordinalNum = matchCase.Groups[Constants.DayOfMonthGroupName].Value; + if (ordinalNum == result.Text && matchCase.Groups[Constants.DayOfMonthGroupName].Index == result.Start) { // Get week of day for the ordinal number which is regarded as a date of reference month var date = DateObject.MinValue.SafeCreateFromValue(reference.Year, reference.Month, num); @@ -381,7 +385,7 @@ private List NumberWithMonth(string text, DateObject reference) // Get week day from text directly, compare it with the weekday generated above // to see whether they refer to the same week day - var extractedWeekDayStr = matchCase.Groups["weekday"].Value; + var extractedWeekDayStr = matchCase.Groups[Constants.WeekdayGroupName].Value; if (!date.Equals(DateObject.MinValue) && numWeekDayInt == Config.DayOfWeek[extractedWeekDayStr]) @@ -455,7 +459,7 @@ private List NumberWithMonth(string text, DateObject reference) if (beginMatch.Success && num >= 1 && num <= 5 && result.Type.Equals(Number.Constants.SYS_NUM_ORDINAL, StringComparison.Ordinal)) { - var weekDayStr = beginMatch.Groups["weekday"].Value; + var weekDayStr = beginMatch.Groups[Constants.WeekdayGroupName].Value; if (this.Config.DayOfWeek.ContainsKey(weekDayStr)) { var spaceLen = suffixStr.Length - suffixStr.Trim().Length; @@ -476,7 +480,7 @@ private List NumberWithMonth(string text, DateObject reference) var endIndex = (result.Start + result.Length ?? 0) + match.Length; ExtendWithWeekdayAndYear(ref startIndex, ref endIndex, - Config.MonthOfYear.GetValueOrDefault(match.Groups["month"].Value, reference.Month), + Config.MonthOfYear.GetValueOrDefault(match.Groups[Constants.MonthGroupName].Value, reference.Month), num, text, reference); ret.Add(new Token(startIndex, endIndex)); @@ -520,7 +524,7 @@ private void ExtendWithWeekdayAndYear(ref int startIndex, ref int endIndex, int { // Get weekday from context directly, compare it with the weekday extraction above // to see whether they reference the same weekday - var extractedWeekDayStr = matchWeekDay.Groups["weekday"].Value; + var extractedWeekDayStr = matchWeekDay.Groups[Constants.WeekdayGroupName].Value; var numWeekDayStr = date.DayOfWeek.ToString().ToLowerInvariant(); if (Config.DayOfWeek.TryGetValue(numWeekDayStr, out var weekDay1) && @@ -692,5 +696,120 @@ private int GetYearIndex(string affix, ref int year, out bool success, bool inPr return index; } + + // Remove weekday prefix when it does not agree with date + private List TruncateInconsistentDates(List tokens) + { + var newTokens = new List(); + var splitIndices = tokens.Select(o => o.Metadata != null ? o.Metadata.SplitIndex : 0).ToList(); + splitIndices.RemoveAll(i => i == 0); + + foreach (var token in tokens) + { + var newToken = token; + foreach (var index in splitIndices) + { + if (index < token.End && index > token.Start) + { + newToken = new Token(index, token.End); + break; + } + } + + newTokens.Add(newToken); + } + + return newTokens; + } + + // Check if weekday prefix agrees with date + private List ValidateWeekdayPrefix(Match match, List results) + { + if (match.Groups[Constants.WeekdayGroupName].Success && match.Groups[Constants.DayPrefixGroupName].Success) + { + // If a span has already been checked, skip + for (int i = 0; i < results.Count - 1; i++) + { + if (match.Index == results[i].Start && match.Index + match.Length == results[i].End) + { + return results; + } + } + + var date = ParseDate(match); + if (date > DateObject.MinValue) + { + // Get weekday from date + var numWeekDayInt = (int)date.DayOfWeek; + + // Get weekday from text directly, compare it with the weekday generated above + // to see whether they refer to the same day + var extractedWeekDayStr = match.Groups[Constants.WeekdayGroupName].Value; + + // If weekdays do not agree, add metadata to token so that prefix will be removed + if (numWeekDayInt != Config.DayOfWeek[extractedWeekDayStr]) + { + var startPrefix = match.Groups[Constants.DayPrefixGroupName].Index; + var endPrefix = startPrefix + match.Groups[Constants.DayPrefixGroupName].Length; + var splitIndex = match.Groups[Constants.DayGroupName].Index >= endPrefix ? endPrefix : 0; + + var metadata = new Metadata { SplitIndex = splitIndex }; + var newToken = new Token(results[results.Count - 1].Start, results[results.Count - 1].End, metadata); + results[results.Count - 1] = newToken; + } + } + } + + return results; + } + + // Parse date from match + private DateObject ParseDate(Match match) + { + // Get year + var isYearParsed = int.TryParse(match.Groups[Constants.YearGroupName].Value, out var year); + + // Get month + var isMonthParsed = int.TryParse(match.Groups[Constants.MonthGroupName].Value, out var month); + if (!isMonthParsed) + { + isMonthParsed = Config.MonthOfYear.TryGetValue(match.Groups[Constants.MonthGroupName].Value, out month); + } + + // Get day + var day = 0; + if (isYearParsed && isMonthParsed) + { + var dayStr = match.Groups[Constants.DayGroupName].Value; + + var isDayParsed = int.TryParse(dayStr, out day); + if (!isDayParsed) + { + var dayErs = Config.OrdinalExtractor.Extract(dayStr); + if (dayErs.Count == 0) + { + dayErs.AddRange(this.Config.IntegerExtractor.Extract(dayStr)); + } + + foreach (var er in dayErs) + { + isDayParsed = int.TryParse((this.Config.NumberParser.Parse(er).Value ?? 0).ToString(), out day); + if (isDayParsed) + { + break; + } + } + } + } + + // Create date + var date = DateObject.MinValue; + if (day > 0 && day <= 31) + { + date = date.SafeCreateFromValue(year, month, day); + } + + return date; + } } } diff --git a/.NET/Microsoft.Recognizers.Text/Extractors/Metadata.cs b/.NET/Microsoft.Recognizers.Text/Extractors/Metadata.cs index 67c0b383b1..631ebc19cf 100644 --- a/.NET/Microsoft.Recognizers.Text/Extractors/Metadata.cs +++ b/.NET/Microsoft.Recognizers.Text/Extractors/Metadata.cs @@ -30,6 +30,9 @@ public class Metadata // For cases where a language has variations in handling decimal separators public bool TreatAsInteger { get; set; } = false; + // Used to split dates when the weekday does not agree with the day + public int SplitIndex { get; set; } = 0; + public Metadata Clone() { return (Metadata)MemberwiseClone(); diff --git a/Patterns/English/English-DateTime.yaml b/Patterns/English/English-DateTime.yaml index 3d24d0d32c..b8de1664d8 100644 --- a/Patterns/English/English-DateTime.yaml +++ b/Patterns/English/English-DateTime.yaml @@ -267,42 +267,42 @@ DayPrefix: !nestedRegex def: \b({WeekDayRegex}|{SpecialDayRegex})\b references: [ WeekDayRegex, SpecialDayRegex ] DateExtractor1: !nestedRegex - def: \b({DayPrefix}\s*[,-]?\s*)?(({MonthRegex}[\.]?\s*[/\\.,-]?\s*{DayRegex})|(\({MonthRegex}\s*[-./]\s*{DayRegex}\)))(\s*\(\s*{DayPrefix}\s*\))?({DateExtractorYearTermRegex}\b)? + def: \b(?{DayPrefix}\s*[,-]?\s*)?(({MonthRegex}[\.]?\s*[/\\.,-]?\s*{DayRegex})|(\({MonthRegex}\s*[-./]\s*{DayRegex}\)))(\s*\(\s*{DayPrefix}\s*\))?({DateExtractorYearTermRegex}\b)? references: [ DayPrefix, MonthRegex, DayRegex, DateExtractorYearTermRegex ] DateExtractor3: !nestedRegex - def: \b({DayPrefix}(\s+|\s*,\s*))?({DayRegex}[\.]?(\s+|\s*[-,/]\s*|\s+of\s+){MonthRegex}[\.]?((\s+in)?{DateExtractorYearTermRegex})?|{BaseDateTime.FourDigitYearRegex}\s*[-./]?\s*(the\s+)?(?(?:3[0-1]|[1-2]\d|0?[1-9])(?:th|nd|rd|st)?)[\.]?(\s+|\s*[-,/]\s*|\s+of\s+){MonthRegex}[\.]?)\b + def: \b(?{DayPrefix}(\s+|\s*,\s*))?({DayRegex}[\.]?(\s+|\s*[-,/]\s*|\s+of\s+){MonthRegex}[\.]?((\s+in)?{DateExtractorYearTermRegex})?|{BaseDateTime.FourDigitYearRegex}\s*[-./]?\s*(the\s+)?(?(?:3[0-1]|[1-2]\d|0?[1-9])(?:th|nd|rd|st)?)[\.]?(\s+|\s*[-,/]\s*|\s+of\s+){MonthRegex}[\.]?)\b references: [ DayPrefix, DayRegex, MonthRegex, DateExtractorYearTermRegex, BaseDateTime.FourDigitYearRegex ] DateExtractor4: !nestedRegex def: \b{MonthNumRegex}\s*[/\\\-]\s*{DayRegex}[\.]?\s*[/\\\-]\s*{DateYearRegex} references: [ MonthNumRegex, DayRegex, DateYearRegex ] # The final lookahead in DateExtractor5 avoids extracting as date "10/1-11" from an input like "10/1-11/2/2017" DateExtractor5: !nestedRegex - def: \b({DayPrefix}(\s*,)?\s+)?{DayRegex}\s*[/\\\-\.]\s*({MonthNumRegex}|{MonthRegex})\s*[/\\\-\.]\s*{DateYearRegex}(?!\s*[/\\\-\.]\s*\d+) + def: \b(?{DayPrefix}(\s*,)?\s+)?{DayRegex}\s*[/\\\-\.]\s*({MonthNumRegex}|{MonthRegex})\s*[/\\\-\.]\s*{DateYearRegex}(?!\s*[/\\\-\.]\s*\d+) references: [ DayPrefix, DayRegex, MonthNumRegex, MonthRegex, DateYearRegex ] DateExtractor6: !nestedRegex - def: (?<={DatePreposition}\s+)({StrictRelativeRegex}\s+)?({DayPrefix}\s+)?{MonthNumRegex}[\-\.]{DayRegex}(?![%]){BaseDateTime.CheckDecimalRegex}\b + def: (?<={DatePreposition}\s+)({StrictRelativeRegex}\s+)?(?{DayPrefix}\s+)?{MonthNumRegex}[\-\.]{DayRegex}(?![%]){BaseDateTime.CheckDecimalRegex}\b references: [ MonthNumRegex, DayRegex, DayPrefix, DatePreposition, StrictRelativeRegex, BaseDateTime.CheckDecimalRegex ] DateExtractor7L: !nestedRegex - def: \b({DayPrefix}(\s*,)?\s+)?{MonthNumRegex}\s*/\s*{DayRegex}{DateExtractorYearTermRegex}(?![%])\b + def: \b(?{DayPrefix}(\s*,)?\s+)?{MonthNumRegex}\s*/\s*{DayRegex}{DateExtractorYearTermRegex}(?![%])\b references: [ MonthNumRegex, DayRegex, DayPrefix, DateExtractorYearTermRegex ] DateExtractor7S: !nestedRegex - def: \b({DayPrefix}(\s*,)?\s+)?{MonthNumRegex}\s*/\s*{DayRegex}(?![%]){BaseDateTime.CheckDecimalRegex}\b + def: \b(?{DayPrefix}(\s*,)?\s+)?{MonthNumRegex}\s*/\s*{DayRegex}(?![%]){BaseDateTime.CheckDecimalRegex}\b references: [ MonthNumRegex, DayRegex, DayPrefix, BaseDateTime.CheckDecimalRegex ] # The only difference between 7L and 7S is whether "Year" part is required # We have both the long and short Regex because we would like to catch both "11/20, 12" and "11/20, 12/20" # Only use the long Regex would ignore "11/20" in "11/20, 12/20" and it is hard to exhaust all characters after the "year" as we also have cases like "11/20, 12 of April" # Same for DateExtractor9L and DateExtractor9S DateExtractor8: !nestedRegex - def: (?<={DatePreposition}\s+)({StrictRelativeRegex}\s+)?({DayPrefix}\s+)?{DayRegex}[\\\-]{MonthNumRegex}(?![%]){BaseDateTime.CheckDecimalRegex}\b + def: (?<={DatePreposition}\s+)({StrictRelativeRegex}\s+)?(?{DayPrefix}\s+)?{DayRegex}[\\\-]{MonthNumRegex}(?![%]){BaseDateTime.CheckDecimalRegex}\b references: [ DayRegex, MonthNumRegex, DayPrefix, DatePreposition, StrictRelativeRegex, BaseDateTime.CheckDecimalRegex ] DateExtractor9L: !nestedRegex - def: \b({DayPrefix}(\s*,)?\s+)?{DayRegex}\s*/\s*{MonthNumRegex}{DateExtractorYearTermRegex}(?![%])\b + def: \b(?{DayPrefix}(\s*,)?\s+)?{DayRegex}\s*/\s*{MonthNumRegex}{DateExtractorYearTermRegex}(?![%])\b references: [ DayRegex, MonthNumRegex, DayPrefix, DateExtractorYearTermRegex ] DateExtractor9S: !nestedRegex - def: \b({DayPrefix}(\s*,)?\s+)?{DayRegex}\s*/\s*{MonthNumRegex}{BaseDateTime.CheckDecimalRegex}(?![%])\b + def: \b(?{DayPrefix}(\s*,)?\s+)?{DayRegex}\s*/\s*{MonthNumRegex}{BaseDateTime.CheckDecimalRegex}(?![%])\b references: [ DayRegex, MonthNumRegex, DayPrefix, BaseDateTime.CheckDecimalRegex ] DateExtractorA: !nestedRegex - def: \b({DayPrefix}(\s*,)?\s+)?(({BaseDateTime.FourDigitYearRegex}\s*[/\\\-\.]\s*({MonthNumRegex}|{MonthRegex})\s*[/\\\-\.]\s*{DayRegex})|({MonthRegex}\s*[/\\\-\.]\s*{BaseDateTime.FourDigitYearRegex}\s*[/\\\-\.]\s*(the\s+)?(?(?:3[0-1]|[1-2]\d|0?[1-9])(?:th|nd|rd|st)?))|({DayRegex}\s*[/\\\-\.]\s*{BaseDateTime.FourDigitYearRegex}\s*[/\\\-\.]\s*{MonthRegex})) + def: \b(?{DayPrefix}(\s*,)?\s+)?(({BaseDateTime.FourDigitYearRegex}\s*[/\\\-\.]\s*({MonthNumRegex}|{MonthRegex})\s*[/\\\-\.]\s*{DayRegex})|({MonthRegex}\s*[/\\\-\.]\s*{BaseDateTime.FourDigitYearRegex}\s*[/\\\-\.]\s*(the\s+)?(?(?:3[0-1]|[1-2]\d|0?[1-9])(?:th|nd|rd|st)?))|({DayRegex}\s*[/\\\-\.]\s*{BaseDateTime.FourDigitYearRegex}\s*[/\\\-\.]\s*{MonthRegex})) references: [ BaseDateTime.FourDigitYearRegex, MonthNumRegex, MonthRegex, DayRegex, DayPrefix ] OfMonth: !nestedRegex def: ^\s*(day\s+)?of\s*{MonthRegex} diff --git a/Specs/DateTime/English/DateExtractor.json b/Specs/DateTime/English/DateExtractor.json index 037f121770..ee25b4cf10 100644 --- a/Specs/DateTime/English/DateExtractor.json +++ b/Specs/DateTime/English/DateExtractor.json @@ -77,13 +77,13 @@ ] }, { - "Input": "i'll go back monday january 12th, 2016", + "Input": "i'll go back tuesday january 12th, 2016", "Results": [ { - "Text": "monday january 12th, 2016", + "Text": "tuesday january 12th, 2016", "Type": "date", "Start": 13, - "Length": 25 + "Length": 26 } ] }, diff --git a/Specs/DateTime/English/DateParser.json b/Specs/DateTime/English/DateParser.json index 106f687c9d..92d0878d14 100644 --- a/Specs/DateTime/English/DateParser.json +++ b/Specs/DateTime/English/DateParser.json @@ -138,13 +138,13 @@ ] }, { - "Input": "i'll go back monday january 12th, 2016", + "Input": "i'll go back tuesday january 12th, 2016", "Context": { "ReferenceDateTime": "2016-11-07T00:00:00" }, "Results": [ { - "Text": "monday january 12th, 2016", + "Text": "tuesday january 12th, 2016", "Type": "date", "Value": { "Timex": "2016-01-12", @@ -156,7 +156,7 @@ } }, "Start": 13, - "Length": 25 + "Length": 26 } ] }, diff --git a/Specs/DateTime/English/DateTimeModel.json b/Specs/DateTime/English/DateTimeModel.json index bf6a7f9452..1c73f04c83 100644 --- a/Specs/DateTime/English/DateTimeModel.json +++ b/Specs/DateTime/English/DateTimeModel.json @@ -21356,5 +21356,147 @@ } } ] + }, + { + "Input": "This task must be completed on Monday, 9/2/2021 is the delivery date.", + "Context": { + "ReferenceDateTime": "2019-05-20T12:00:00" + }, + "NotSupported": "java, javascript, python", + "Results": [ + { + "Text": "monday", + "Start": 31, + "End": 36, + "TypeName": "datetimeV2.date", + "Resolution": { + "values": [ + { + "timex": "XXXX-WXX-1", + "type": "date", + "value": "2019-05-13" + }, + { + "timex": "XXXX-WXX-1", + "type": "date", + "value": "2019-05-20" + } + ] + } + }, + { + "Text": "9/2/2021", + "Start": 39, + "End": 46, + "TypeName": "datetimeV2.date", + "Resolution": { + "values": [ + { + "timex": "2021-09-02", + "type": "date", + "value": "2021-09-02" + } + ] + } + } + ] + }, + { + "Input": "This task must be completed on Thursday, 9/2/2021.", + "Context": { + "ReferenceDateTime": "2019-05-20T12:00:00" + }, + "NotSupported": "java, javascript, python", + "Results": [ + { + "Text": "thursday, 9/2/2021", + "Start": 31, + "End": 48, + "TypeName": "datetimeV2.date", + "Resolution": { + "values": [ + { + "timex": "2021-09-02", + "type": "date", + "value": "2021-09-02" + } + ] + } + } + ] + }, + { + "Input": "Summer ends Tuesday, September the 22nd 2021 is the first day of Autumn.", + "Context": { + "ReferenceDateTime": "2021-09-17T12:00:00" + }, + "NotSupported": "java, javascript, python", + "Results": [ + { + "Text": "summer", + "Start": 0, + "End": 5, + "TypeName": "datetimeV2.daterange", + "Resolution": { + "values": [ + { + "timex": "SU", + "type": "daterange", + "value": "not resolved" + } + ] + } + }, + { + "Text": "tuesday", + "Start": 12, + "End": 18, + "TypeName": "datetimeV2.date", + "Resolution": { + "values": [ + { + "timex": "XXXX-WXX-2", + "type": "date", + "value": "2021-09-14" + }, + { + "timex": "XXXX-WXX-2", + "type": "date", + "value": "2021-09-21" + } + ] + } + }, + { + "Text": "september the 22nd 2021", + "Start": 21, + "End": 43, + "TypeName": "datetimeV2.date", + "Resolution": { + "values": [ + { + "timex": "2021-09-22", + "type": "date", + "value": "2021-09-22" + } + ] + } + }, + { + "Text": "autumn", + "Start": 65, + "End": 70, + "TypeName": "datetimeV2.daterange", + "Resolution": { + "values": [ + { + "timex": "FA", + "type": "daterange", + "value": "not resolved" + } + ] + } + } + ] } ]