Skip to content

Commit

Permalink
Regex additions and testing
Browse files Browse the repository at this point in the history
  • Loading branch information
Conor-Keaney committed Jan 30, 2023
1 parent 3482407 commit bbb9e44
Show file tree
Hide file tree
Showing 27 changed files with 292 additions and 25 deletions.
2 changes: 1 addition & 1 deletion Patterns/Dutch/Dutch-DateTime.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ BetweenRegex: !nestedRegex
def: \b(tussen\s+)({DayRegex})\s*{RangeConnectorRegex}\s*({DayRegex})\s+{MonthSuffixRegex}((\s+|\s*,\s*){YearRegex})?\b
references: [ DayRegex, RangeConnectorRegex , MonthSuffixRegex, YearRegex ]
RelativeYearRegex: !nestedRegex
def: ({YearRegex}|{TwoDigitYearRegex}|(?<order>volgende?|komende?|aanstaande?|aankomende?|huidige?|vorige?|afgelopen|dit)\s+jaar)
def: ({YearRegex}|\'?{TwoDigitYearRegex}|(?<order>volgende?|komende?|aanstaande?|aankomende?|huidige?|vorige?|afgelopen|dit)\s+jaar)
references: [ YearRegex, TwoDigitYearRegex ]
MonthWithYear: !nestedRegex
def: \b(({WrittenMonthRegex}(\.)?(\s*)[/\\\-\.,]?(\s+(van|over|in))?(\s*){RelativeYearRegex})|({RelativeYearRegex}(\s*),?(\s*){WrittenMonthRegex}))\b
Expand Down
4 changes: 2 additions & 2 deletions Patterns/English/English-DateTime.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ BetweenRegex: !nestedRegex
def: \b(between\s+)({DayRegex}|{WrittenOrdinalDayRegex})\s*{RangeConnectorRegex}\s*({DayRegex}|{WrittenOrdinalDayRegex})\s+{MonthSuffixRegex}((\s+|\s*,\s*){YearRegex})?\b
references: [ DayRegex, RangeConnectorRegex , MonthSuffixRegex, YearRegex, WrittenOrdinalDayRegex ]
MonthWithYear: !nestedRegex
def: \b((({WrittenMonthRegex}[\.]?|((the\s+)?(?<cardinal>first|1st|second|2nd|third|3rd|fourth|4th|fifth|5th|sixth|6th|seventh|7th|eighth|8th|ninth|9th|tenth|10th|eleventh|11th|twelfth|12th|last)\s+month(?=\s+(of|in))))((\s*)[/\\\-\.,]?(\s+(of|in))?(\s*)({YearRegex}|{TwoDigitYearRegex}|(?<order>following|next|last|this)\s+year)|\s+(of|in)\s+{TwoDigitYearRegex}))|(({YearRegex}|(?<order>following|next|last|this)\s+year)(\s*),?(\s*){WrittenMonthRegex}))\b
def: \b((({WrittenMonthRegex}[\.]?|((the\s+)?(?<cardinal>first|1st|second|2nd|third|3rd|fourth|4th|fifth|5th|sixth|6th|seventh|7th|eighth|8th|ninth|9th|tenth|10th|eleventh|11th|twelfth|12th|last)\s+month(?=\s+(of|in))))((\s*)[/\\\-\.,]?(\s+(of|in))?(\s*)({YearRegex}|\'?{TwoDigitYearRegex}|(?<order>following|next|last|this)\s+year)|\s+(of|in)\s+{TwoDigitYearRegex}))|(({YearRegex}|(?<order>following|next|last|this)\s+year)(\s*),?(\s*){WrittenMonthRegex}))\b
references: [ WrittenMonthRegex, YearRegex, TwoDigitYearRegex ]
SpecialYearPrefixes: !simpleRegex
def: (calendar|(?<special>fiscal|school))
Expand Down Expand Up @@ -240,7 +240,7 @@ MonthOfRegex: !simpleRegex
# This is a look-behind assertion. Some cases should extract two digits as year like 11/25/16, where 16 means 2016.
# The assertion determines if not connected with am/pm or hour separator (:), which should be a time.
DateYearRegex: !nestedRegex
def: (?<year>{BaseDateTime.FourDigitYearRegex}|(?<!,\s?){TwoDigitYearRegex}|{TwoDigitYearRegex}(?=(\.(?!\d)|[?!;]|$)))
def: (?<year>{BaseDateTime.FourDigitYearRegex}|(?<!,\s?)\'?{TwoDigitYearRegex}|\'?{TwoDigitYearRegex}(?=(\.(?!\d)|[?!;]|$)))
references: [ BaseDateTime.FourDigitYearRegex, TwoDigitYearRegex ]
YearSuffix: !nestedRegex
def: ((,|\sof)?\s*({DateYearRegex}|{FullTextYearRegex}))
Expand Down
4 changes: 2 additions & 2 deletions Patterns/French/French-DateTime.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ BetweenRegex: !nestedRegex
YearWordRegex: !simpleRegex
def: \b(?<year>l'ann[ée]e)\b
MonthWithYear: !nestedRegex
def: \b({WrittenMonthRegex}(\s*),?(\s+de)?(\s*)({YearRegex}|{TwoDigitYearRegex}|(?<order>cette)\s*{YearWordRegex})|{YearWordRegex}\s*({PastSuffixRegex}|{NextSuffixRegex}))
def: \b({WrittenMonthRegex}(\s*),?(\s+de)?(\s*)({YearRegex}|\'?{TwoDigitYearRegex}|(?<order>cette)\s*{YearWordRegex})|{YearWordRegex}\s*({PastSuffixRegex}|{NextSuffixRegex}))
references: [ WrittenMonthRegex, YearRegex, TwoDigitYearRegex, YearWordRegex, PastSuffixRegex, NextSuffixRegex ]
OneWordPeriodRegex: !nestedRegex
def: \b(({RelativeRegex}\s+)?{WrittenMonthRegex}|(la\s+)?(weekend|(fin de )?semaine|week-end|mois|ans?|l'année)\s+{StrictRelativeRegex}|{RelativeRegex}\s+(weekend|(fin de )?semaine|week-end|mois|ans?|l'année)|weekend|week-end|mois|l'année|an)\b
Expand Down Expand Up @@ -183,7 +183,7 @@ SpecialDate: !nestedRegex
def: (?<=\b(au|le)\s+){DayRegex}(?!:)\b
references: [ DayRegex ]
DateYearRegex: !nestedRegex
def: (?<year>{YearRegex}|{TwoDigitYearRegex})
def: (?<year>{YearRegex}|\'?{TwoDigitYearRegex})
references: [ YearRegex, TwoDigitYearRegex ]
DateExtractor1: !nestedRegex
def: \b({WeekDayRegex}(\s+|\s*,\s*))?{MonthRegex}\s*[/\\\.\-]?\s*{DayRegex}(\s*([/\\\.\-]|\bde\b)?\s*{BaseDateTime.FourDigitYearRegex})?\b
Expand Down
4 changes: 2 additions & 2 deletions Patterns/German/German-DateTime.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ BetweenRegex: !nestedRegex
def: ((zwischen\s+)({DayRegex})(\s+{MonthSuffixRegex})?\s*{RangeConnectorRegex}\s*({DayRegex})(\s+{MonthSuffixRegex})((\s+|\s*,\s*){YearRegex})?|(zwischen\s+)({DayRegex})(\s+{MonthSuffixRegex})?\s*{RangeConnectorRegex}\s*({DayRegex})(\s+{MonthSuffixRegex})?((\s+|\s*,\s*){YearRegex})?)
references: [ DayRegex, RangeConnectorRegex , MonthSuffixRegex, YearRegex ]
MonthWithYear: !nestedRegex
def: \b((?<month>apr(il|\.)|aug(ust|\.)|dez(ember|\.)|feb(ruar|ber|\.)|januar|j[äa]n(ner|\.)|jul(e?i|l\.)|jun([io]|\.)|märz|mai|nov(ember|\.)|okt(ober|\.)|sept?(ember|\.))(\s*),?(\s+des)?(\s*)({YearRegex}|{TwoDigitYearRegex}|(?<order>nächste[mn]|letzte[mn]|diese(s|n))\s+jahres))
def: \b((?<month>apr(il|\.)|aug(ust|\.)|dez(ember|\.)|feb(ruar|ber|\.)|januar|j[äa]n(ner|\.)|jul(e?i|l\.)|jun([io]|\.)|märz|mai|nov(ember|\.)|okt(ober|\.)|sept?(ember|\.))(\s*),?(\s+des)?(\s*)({YearRegex}|\'?{TwoDigitYearRegex}|(?<order>nächste[mn]|letzte[mn]|diese(s|n))\s+jahres))
references: [ YearRegex , TwoDigitYearRegex ]
OneWordPeriodRegex: !nestedRegex
def: \b((((im\s+)?monat\s+)?({RelativeRegex}\s*(jahr\s*(im\s*)?)?)?(?<month>apr(il|\.)|aug(ust|\.)|dez(ember|\.)|feb(ruar|ber|\.)|j[äa]n(uar|ner|\.)|jul(e?i|l\.)|jun([io]|\.)|märz|mai|nov(ember|\.)|okt(ober|\.)|sept?(ember|\.)))|(?<business>unter\s+der\s+woche)|({RelativeRegex}\s+)?((?<business>werktags|arbeitswoche)|woche(nende)?|monat(s)?|jahr|jahres)(?!(\s+\d+(?!({BaseDateTime.BaseAmDescRegex}|{BaseDateTime.BasePmDescRegex}))|\s+bis\s+heute)))\b
Expand Down Expand Up @@ -162,7 +162,7 @@ MonthRegex: !simpleRegex
#This is a look-behind assertion. Some cases should extract two digits as year like 11/25/16, where 16 means 2016.
#The assertion determines if not connected with am/pm or hour separator (:), which should be a time.
DateYearRegex: !nestedRegex
def: (?<year>{BaseDateTime.FourDigitYearRegex}|{TwoDigitYearRegex})
def: (?<year>{BaseDateTime.FourDigitYearRegex}|\'?{TwoDigitYearRegex})
references: [ BaseDateTime.FourDigitYearRegex, TwoDigitYearRegex ]
OnRegex: !nestedRegex
def: (?<=\bam\s+)({DayRegex}s?)\b
Expand Down
4 changes: 2 additions & 2 deletions Patterns/Italian/Italian-DateTime.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ TwoDigitYearRegex: !nestedRegex
#This is a look-behind assertion. Some cases should extract two digits as year like 11/25/16, where 16 means 2016.
#The assertion determines if not connected with am/pm or hour separator (:), which should be a time.
DateYearRegex: !nestedRegex
def: (?<year>{BaseDateTime.FourDigitYearRegex}|{TwoDigitYearRegex})
def: (?<year>{BaseDateTime.FourDigitYearRegex}|\'?{TwoDigitYearRegex})
references: [ BaseDateTime.FourDigitYearRegex, TwoDigitYearRegex ]
YearSuffix: !nestedRegex
def: (,?\s*({DateYearRegex}|{FullTextYearRegex}))
Expand All @@ -133,7 +133,7 @@ BetweenRegex: !nestedRegex
YearWordRegex: !simpleRegex
def: \b(?<year>l'anno)\b
MonthWithYear: !nestedRegex
def: \b({MonthRegex}(\.)?(\s*)[/\\\-\.,]?(((\s+del)?\s+{YearRegex}|{TwoDigitYearRegex})|((\s+(del|di|il))?\s+(?<order>prossim['o]|passato|quest['o])\s*anno)|((\s+(del)?l')anno\s+(?<order>prossimo|passato))))
def: \b({MonthRegex}(\.)?(\s*)[/\\\-\.,]?(((\s+del)?\s+{YearRegex}|\'?{TwoDigitYearRegex})|((\s+(del|di|il))?\s+(?<order>prossim['o]|passato|quest['o])\s*anno)|((\s+(del)?l')anno\s+(?<order>prossimo|passato))))
references: [ YearRegex, MonthRegex, TwoDigitYearRegex ]
SpecialYearPrefixes: !simpleRegex
def: (?<special>fiscale|scolastico)
Expand Down
4 changes: 2 additions & 2 deletions Patterns/Portuguese/Portuguese-DateTime.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ OneWordPeriodRegex: !nestedRegex
def: \b(((pr[oó]xim[oa]?|[nd]?es[st]e|aquel[ea]|[uú]ltim[oa]?|em)\s+)?(?<month>abr(il)?|ago(sto)?|dez(embro)?|fev(ereiro)?|jan(eiro)?|ju[ln](ho)?|mar([çc]o)?|maio?|nov(embro)?|out(ubro)?|sep?t(embro)?)|({RelativeRegex}\s+)?(ano\s+{SpecialYearPrefixes}|{SpecialYearPrefixes}\s+ano)|(?<=\b(de|do|da|o|a)\s+)?(pr[oó]xim[oa](s)?|[uú]ltim[oa]s?|est(e|a))\s+(fim de semana|fins de semana|semana|m[êe]s|ano)|fim de semana|fins de semana|(m[êe]s|anos)? [àa] data)\b
references: [RelativeRegex, SpecialYearPrefixes]
MonthWithYearRegex: !nestedRegex
def: \b((((pr[oó]xim[oa](s)?|[nd]?es[st]e|aquele|[uú]ltim[oa]?|em)\s+)?{MonthRegex}|((n?o\s+)?(?<cardinal>primeiro|1o|segundo|2o|terceiro|3o|[cq]uarto|4o|quinto|5o|sexto|6o|s[eé]timo|7o|oitavo|8o|nono|9o|d[eé]cimo(\s+(primeiro|segundo))?|10o|11o|12o|[uú]ltimo)\s+m[eê]s(?=\s+(d[aeo]|[ao]))))\s+((d[aeo]|[ao])\s+)?({YearRegex}|{TwoDigitYearRegex}|(?<order>pr[oó]ximo(s)?|[uú]ltimo?|[nd]?es[st]e)\s+ano))\b
def: \b((((pr[oó]xim[oa](s)?|[nd]?es[st]e|aquele|[uú]ltim[oa]?|em)\s+)?{MonthRegex}|((n?o\s+)?(?<cardinal>primeiro|1o|segundo|2o|terceiro|3o|[cq]uarto|4o|quinto|5o|sexto|6o|s[eé]timo|7o|oitavo|8o|nono|9o|d[eé]cimo(\s+(primeiro|segundo))?|10o|11o|12o|[uú]ltimo)\s+m[eê]s(?=\s+(d[aeo]|[ao]))))\s+((d[aeo]|[ao])\s+)?({YearRegex}|\'?{TwoDigitYearRegex}|(?<order>pr[oó]ximo(s)?|[uú]ltimo?|[nd]?es[st]e)\s+ano))\b
references: [ MonthRegex, YearRegex, TwoDigitYearRegex ]
MonthNumWithYearRegex: !nestedRegex
def: ({YearRegex}(\s*?)[/\-\.](\s*?){MonthNumRegex})|({MonthNumRegex}(\s*?)[/\-](\s*?){YearRegex})
Expand Down Expand Up @@ -217,7 +217,7 @@ WeekDayEnd: !nestedRegex
WeekDayStart: !simpleRegex
def: ^\b$
DateYearRegex: !nestedRegex
def: (?<year>{YearRegex}|{TwoDigitYearRegex})
def: (?<year>{YearRegex}|\'?{TwoDigitYearRegex})
references: [ YearRegex, TwoDigitYearRegex ]
DateExtractor1: !nestedRegex
# (domingo,)? 5 de Abril
Expand Down
4 changes: 2 additions & 2 deletions Patterns/Spanish/Spanish-DateTime.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ OneWordPeriodRegex: !nestedRegex
def: \b(((((la|el)\s+)?mes\s+(({OfPrepositionRegex})\s+)?)|((pr[oó]xim[oa]?|est[ea]|[uú]ltim[oa]?)\s+))?({MonthRegex})|((el\s+)?{RelativeRegex}\s+)?(({SpecialYearPrefixes}\s+)año|año\s+{SpecialYearPrefixes})|(((la|el)\s+)?((({RelativeRegex}\s+)({DateUnitRegex}|(fin\s+de\s+)?semana|finde)(\s+{RelativeSuffixRegex})?)|{DateUnitRegex}(\s+{RelativeSuffixRegex}))|va\s+de\s+{DateUnitRegex}|((año|mes)(\s+(a|hasta)\s+la\s+fecha)?|((el\s+)?fin\s+de\s+)?semana|(el\s+)?finde))\b)
references: [MonthRegex, RelativeRegex, OfPrepositionRegex, RelativeSuffixRegex, DateUnitRegex, SpecialYearPrefixes]
MonthWithYearRegex: !nestedRegex
def: \b((((pr[oó]xim[oa](s)?|est?[ae]|[uú]ltim[oa]?)\s+)?{MonthRegex}|((el\s+)?(?<cardinal>primero?|1(er|ro)|segundo|2do|tercero?|3(er|ro)|uarto|4to|quinto|5to|sexto|6to|s[eé]ptimo|7mo|octavo|8vo|noveno|9no|d[eé]cimo|10mo|und[eé]cimo|11mo|duod[eé]cimo|12mo|[uú]ltimo)\s+mes(?=\s+(del?|en))))((\s+|(\s*[,-]\s*))((de(l|\s+la)?|en)\s+)?({YearRegex}|(?<order>pr[oó]ximo(s)?|[uú]ltimo?|este)\s+año)|\s+(del?|en)\s+{TwoDigitYearRegex}))\b
def: \b((((pr[oó]xim[oa](s)?|est?[ae]|[uú]ltim[oa]?)\s+)?{MonthRegex}|((el\s+)?(?<cardinal>primero?|1(er|ro)|segundo|2do|tercero?|3(er|ro)|uarto|4to|quinto|5to|sexto|6to|s[eé]ptimo|7mo|octavo|8vo|noveno|9no|d[eé]cimo|10mo|und[eé]cimo|11mo|duod[eé]cimo|12mo|[uú]ltimo)\s+mes(?=\s+(del?|en))))((\s+|(\s*[,-]\s*))((de(l|\s+la)?|en)\s+)?({YearRegex}|(?<order>pr[oó]ximo(s)?|[uú]ltimo?|este)\s+año)|\s+(del?|en)\s+\'?{TwoDigitYearRegex}))\b
references: [ MonthRegex, YearRegex, TwoDigitYearRegex ]
MonthNumWithYearRegex: !nestedRegex
def: \b(({YearRegex}(\s*?)[/\-\.~](\s*?){MonthNumRegex})|({MonthNumRegex}(\s*?)[/\-\.~](\s*?){YearRegex}))\b
Expand Down Expand Up @@ -237,7 +237,7 @@ WeekDayEnd: !nestedRegex
WeekDayStart: !simpleRegex
def: ^\b$
DateYearRegex: !nestedRegex
def: (?<year>{YearRegex}|(?<!,\s?){TwoDigitYearRegex}|{TwoDigitYearRegex}(?=(\.(?!\d)|[?!;]|$)))
def: (?<year>{YearRegex}|(?<!,\s?)\'?{TwoDigitYearRegex}|\'?{TwoDigitYearRegex}(?=(\.(?!\d)|[?!;]|$)))
references: [ YearRegex, TwoDigitYearRegex ]
DateExtractor1: !nestedRegex
# (domingo,)? 5 de Abril
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ class EnglishDateTime:
MonthFrontSimpleCasesRegex = f'\\b({RangePrefixRegex}\\s+)?{MonthSuffixRegex}\\s+((from)\\s+)?({DayRegex}|{WrittenOrdinalDayRegex})\\s*{TillRegex}\\s*({DayRegex}|{WrittenOrdinalDayRegex})((\\s+|\\s*,\\s*){YearRegex})?\\b'
MonthFrontBetweenRegex = f'\\b{MonthSuffixRegex}\\s+(between\\s+)({DayRegex}|{WrittenOrdinalDayRegex})\\s*{RangeConnectorRegex}\\s*({DayRegex}|{WrittenOrdinalDayRegex})((\\s+|\\s*,\\s*){YearRegex})?\\b'
BetweenRegex = f'\\b(between\\s+)({DayRegex}|{WrittenOrdinalDayRegex})\\s*{RangeConnectorRegex}\\s*({DayRegex}|{WrittenOrdinalDayRegex})\\s+{MonthSuffixRegex}((\\s+|\\s*,\\s*){YearRegex})?\\b'
MonthWithYear = f'\\b((({WrittenMonthRegex}[\\.]?|((the\\s+)?(?<cardinal>first|1st|second|2nd|third|3rd|fourth|4th|fifth|5th|sixth|6th|seventh|7th|eighth|8th|ninth|9th|tenth|10th|eleventh|11th|twelfth|12th|last)\\s+month(?=\\s+(of|in))))((\\s*)[/\\\\\\-\\.,]?(\\s+(of|in))?(\\s*)({YearRegex}|{TwoDigitYearRegex}|(?<order>following|next|last|this)\\s+year)|\\s+(of|in)\\s+{TwoDigitYearRegex}))|(({YearRegex}|(?<order>following|next|last|this)\\s+year)(\\s*),?(\\s*){WrittenMonthRegex}))\\b'
MonthWithYear = f'\\b((({WrittenMonthRegex}[\\.]?|((the\\s+)?(?<cardinal>first|1st|second|2nd|third|3rd|fourth|4th|fifth|5th|sixth|6th|seventh|7th|eighth|8th|ninth|9th|tenth|10th|eleventh|11th|twelfth|12th|last)\\s+month(?=\\s+(of|in))))((\\s*)[/\\\\\\-\\.,]?(\\s+(of|in))?(\\s*)({YearRegex}|\\\'?{TwoDigitYearRegex}|(?<order>following|next|last|this)\\s+year)|\\s+(of|in)\\s+{TwoDigitYearRegex}))|(({YearRegex}|(?<order>following|next|last|this)\\s+year)(\\s*),?(\\s*){WrittenMonthRegex}))\\b'
SpecialYearPrefixes = f'(calendar|(?<special>fiscal|school))'
OneWordPeriodRegex = f'\\b((((the\\s+)?month of\\s+)?({StrictRelativeRegex}\\s+)?{MonthRegex})|(month|year) to date|(?<toDate>((un)?till?|to)\\s+date)|({RelativeRegex}\\s+)?(my\\s+)?((?<business>working\\s+week|workweek)|week(end)?|month|fortnight|(({SpecialYearPrefixes}\\s+)?year))(?!((\\s+of)?\\s+\\d+(?!({BaseDateTime.BaseAmDescRegex}|{BaseDateTime.BasePmDescRegex}))|\\s+to\\s+date))(\\s+{AfterNextSuffixRegex})?)\\b'
MonthNumWithYear = f'\\b(({BaseDateTime.FourDigitYearRegex}(\\s*)[/\\-\\.](\\s*){MonthNumRegex})|({MonthNumRegex}(\\s*)[/\\-](\\s*){BaseDateTime.FourDigitYearRegex}))\\b'
Expand Down Expand Up @@ -109,7 +109,7 @@ class EnglishDateTime:
WhichWeekRegex = f'\\b(week)(\\s*)(?<number>5[0-3]|[1-4]\\d|0?[1-9])(\\s+of\\s+({YearRegex}|{RelativeRegex}\\s+year))?\\b'
WeekOfRegex = f'(the\\s+)?((week)(\\s+(of|(commencing|starting|beginning)(\\s+on)?))|w/c)(\\s+the)?'
MonthOfRegex = f'(month)(\\s*)(of)'
DateYearRegex = f'(?<year>{BaseDateTime.FourDigitYearRegex}|(?<!,\\s?){TwoDigitYearRegex}|{TwoDigitYearRegex}(?=(\\.(?!\\d)|[?!;]|$)))'
DateYearRegex = f'(?<year>{BaseDateTime.FourDigitYearRegex}|(?<!,\\s?)\\\'?{TwoDigitYearRegex}|\\\'?{TwoDigitYearRegex}(?=(\\.(?!\\d)|[?!;]|$)))'
YearSuffix = f'((,|\\sof)?\\s*({DateYearRegex}|{FullTextYearRegex}))'
OnRegex = f'(?<=\\bon\\s+)({DayRegex}s?)\\b'
RelaxedOnRegex = f'(?<=\\b(on|at|in)\\s+)((?<day>(3[0-1]|[0-2]?\\d)(?:th|nd|rd|st))s?)\\b'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class FrenchDateTime:
MonthFrontBetweenRegex = f'\\b{MonthSuffixRegex}\\s+(entre|d[ue]\\s+)({DayRegex})\\s*{RangeConnectorRegex}\\s*({DayRegex})((\\s+|\\s*,\\s*){YearRegex})?\\b'
BetweenRegex = f'\\b(entre\\s+)({DayRegex})\\s*{RangeConnectorRegex}\\s*({DayRegex})\\s+{MonthSuffixRegex}((\\s+|\\s*,\\s*){YearRegex})?\\b'
YearWordRegex = f'\\b(?<year>l\'ann[ée]e)\\b'
MonthWithYear = f'\\b({WrittenMonthRegex}(\\s*),?(\\s+de)?(\\s*)({YearRegex}|{TwoDigitYearRegex}|(?<order>cette)\\s*{YearWordRegex})|{YearWordRegex}\\s*({PastSuffixRegex}|{NextSuffixRegex}))'
MonthWithYear = f'\\b({WrittenMonthRegex}(\\s*),?(\\s+de)?(\\s*)({YearRegex}|\\\'?{TwoDigitYearRegex}|(?<order>cette)\\s*{YearWordRegex})|{YearWordRegex}\\s*({PastSuffixRegex}|{NextSuffixRegex}))'
OneWordPeriodRegex = f'\\b(({RelativeRegex}\\s+)?{WrittenMonthRegex}|(la\\s+)?(weekend|(fin de )?semaine|week-end|mois|ans?|l\'année)\\s+{StrictRelativeRegex}|{RelativeRegex}\\s+(weekend|(fin de )?semaine|week-end|mois|ans?|l\'année)|weekend|week-end|mois|l\'année|an)\\b'
MonthNumWithYear = f'({YearRegex}(\\s*)[/\\-\\.](\\s*){MonthNumRegex})|({MonthNumRegex}(\\s*)[/\\-](\\s*){YearRegex})'
WeekOfMonthRegex = f'(?<wom>(le\\s+)?(?<cardinal>premier|1er|duexi[èe]me|2|troisi[èe]me|3|quatri[èe]me|4|cinqi[èe]me|5)\\s+semaine(\\s+de)?\\s+{MonthSuffixRegex})'
Expand Down Expand Up @@ -85,7 +85,7 @@ class FrenchDateTime:
AmbiguousRangeModifierPrefix = f'^\\b$'
NumberEndingPattern = f'^\\b$'
SpecialDate = f'(?<=\\b(au|le)\\s+){DayRegex}(?!:)\\b'
DateYearRegex = f'(?<year>{YearRegex}|{TwoDigitYearRegex})'
DateYearRegex = f'(?<year>{YearRegex}|\\\'?{TwoDigitYearRegex})'
DateExtractor1 = f'\\b({WeekDayRegex}(\\s+|\\s*,\\s*))?{MonthRegex}\\s*[/\\\\\\.\\-]?\\s*{DayRegex}(\\s*([/\\\\\\.\\-]|\\bde\\b)?\\s*{BaseDateTime.FourDigitYearRegex})?\\b'
DateExtractor2 = f'\\b({WeekDayRegex}(\\s+|\\s*,\\s*))?{DayRegex}(\\s+|\\s*,\\s*|\\s+){MonthRegex}\\s*([\\.\\-]|\\bde\\b)?\\s*{DateYearRegex}\\b'
DateExtractor3 = f'\\b({WeekDayRegex}(\\s+|\\s*,\\s*))?((?<!\\d\\s)(?<!\\d){DayRegex}(\\s+|\\s*[.,/-])({MonthRegex}((\\s+|\\s*[.,/-]\\s*){DateYearRegex}(?!\\s*\\d))?|{MonthNumRegex}(\\s+|\\s*[.,/-]\\s*){DateYearRegex}(?!\\s*\\d))|{BaseDateTime.FourDigitYearRegex}\\s*[.,/-]?\\s*{DayRegex}\\s*[.,/-]?\\s*{MonthRegex})\\b'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ class GermanDateTime:
MonthFrontSimpleCasesRegex = f'((vom|zwischen)\\s*)?{MonthSuffixRegex}\\s*((vom|zwischen)\\s*)?({DayRegex})\\s*{TillRegex}\\s*({DayRegex})((\\s+|\\s*,\\s*){YearRegex})?'
MonthFrontBetweenRegex = f'({MonthSuffixRegex}\\s+(zwischen\\s+)({DayRegex})\\s*{RangeConnectorRegex}\\s*({DayRegex})((\\s+|\\s*,\\s*){YearRegex})?)'
BetweenRegex = f'((zwischen\\s+)({DayRegex})(\\s+{MonthSuffixRegex})?\\s*{RangeConnectorRegex}\\s*({DayRegex})(\\s+{MonthSuffixRegex})((\\s+|\\s*,\\s*){YearRegex})?|(zwischen\\s+)({DayRegex})(\\s+{MonthSuffixRegex})?\\s*{RangeConnectorRegex}\\s*({DayRegex})(\\s+{MonthSuffixRegex})?((\\s+|\\s*,\\s*){YearRegex})?)'
MonthWithYear = f'\\b((?<month>apr(il|\\.)|aug(ust|\\.)|dez(ember|\\.)|feb(ruar|ber|\\.)|januar|j[äa]n(ner|\\.)|jul(e?i|l\\.)|jun([io]|\\.)|märz|mai|nov(ember|\\.)|okt(ober|\\.)|sept?(ember|\\.))(\\s*),?(\\s+des)?(\\s*)({YearRegex}|{TwoDigitYearRegex}|(?<order>nächste[mn]|letzte[mn]|diese(s|n))\\s+jahres))'
MonthWithYear = f'\\b((?<month>apr(il|\\.)|aug(ust|\\.)|dez(ember|\\.)|feb(ruar|ber|\\.)|januar|j[äa]n(ner|\\.)|jul(e?i|l\\.)|jun([io]|\\.)|märz|mai|nov(ember|\\.)|okt(ober|\\.)|sept?(ember|\\.))(\\s*),?(\\s+des)?(\\s*)({YearRegex}|\\\'?{TwoDigitYearRegex}|(?<order>nächste[mn]|letzte[mn]|diese(s|n))\\s+jahres))'
OneWordPeriodRegex = f'\\b((((im\\s+)?monat\\s+)?({RelativeRegex}\\s*(jahr\\s*(im\\s*)?)?)?(?<month>apr(il|\\.)|aug(ust|\\.)|dez(ember|\\.)|feb(ruar|ber|\\.)|j[äa]n(uar|ner|\\.)|jul(e?i|l\\.)|jun([io]|\\.)|märz|mai|nov(ember|\\.)|okt(ober|\\.)|sept?(ember|\\.)))|(?<business>unter\\s+der\\s+woche)|({RelativeRegex}\\s+)?((?<business>werktags|arbeitswoche)|woche(nende)?|monat(s)?|jahr|jahres)(?!(\\s+\\d+(?!({BaseDateTime.BaseAmDescRegex}|{BaseDateTime.BasePmDescRegex}))|\\s+bis\\s+heute)))\\b'
MonthNumWithYear = f'({YearRegex}(\\s*)[/\\-\\.](\\s*){MonthNumRegex})|({MonthNumRegex}(\\s*)[/\\-\\.](\\s*){YearRegex})'
WeekOfMonthRegex = f'(?<wom>((die|der)\\s+)(?<cardinal>erste[rns]?|1\\.|zweite[rns]?|2\\.|dritte[rns]?|3\\.|vierte[rns]?|4\\.|fünfte[rns]?|5\\.|letzte[rmns]?)\\s+woche\\s+(des|diese(s|n)|im)\\s+({MonthSuffixRegex}|monat(s)?))'
Expand All @@ -78,7 +78,7 @@ class GermanDateTime:
WeekOfRegex = f'(die\\s+)?(woche)(\\s+des)'
MonthOfRegex = f'(monat)(\\s*)(des)'
MonthRegex = f'(?<month>apr((il)?\\b|\\.)|aug((ust)?\\b|\\.)|dez((ember)?\\b|\\.)|feb((ruar|ber)?\\b|\\.)|januar|j[äa]n((ner)?\\b|\\.)|jul((e?i|l)?\\b|l\\.)|jun([io]?\\b|\\.)|märz|mai|nov((ember)?\\b|\\.)|okt((ober)?\\b|\\.)|sept?((ember)?\\b|\\.))'
DateYearRegex = f'(?<year>{BaseDateTime.FourDigitYearRegex}|{TwoDigitYearRegex})'
DateYearRegex = f'(?<year>{BaseDateTime.FourDigitYearRegex}|\\\'?{TwoDigitYearRegex})'
OnRegex = f'(?<=\\bam\\s+)({DayRegex}s?)\\b'
RelaxedOnRegex = f'(?<=\\b(am|an dem)\\s+)((?<day>10|11|12|13|14|15|16|17|18|19|1|20|21|22|23|24|25|26|27|28|29|2|30|31|3|4|5|6|7|8|9)([\\.]))'
ThisRegex = f'(((diese((n|m)|(\\s*woche))(\\s*am)?\\s+){WeekDayRegex})|diese(n|r)?\\s*(sommer|winter|frühling|herbst))'
Expand Down

0 comments on commit bbb9e44

Please sign in to comment.