@@ -179,11 +179,7 @@ def array_strptime(
179179 npy_datetimestruct dts
180180 int64_t[::1 ] iresult
181181 object [::1 ] result_timezone
182- int year, month, day, minute, hour, second, weekday, julian
183- int week_of_year, week_of_year_start, parse_code, ordinal
184- int iso_week, iso_year
185- int64_t us, ns
186- object val, group_key, ampm, found, tz
182+ object val, tz
187183 bint is_raise = errors== " raise"
188184 bint is_ignore = errors== " ignore"
189185 bint is_coerce = errors== " coerce"
@@ -351,173 +347,9 @@ def array_strptime(
351347 if not string_to_dts_succeeded and fmt == " ISO8601" :
352348 raise ValueError (f" Time data {val} is not ISO8601 format" )
353349
354- # exact matching
355- if exact:
356- found = format_regex.match(val)
357- if not found:
358- raise ValueError (
359- f" time data \" {val}\" doesn't match format \" {fmt}\" "
360- )
361- if len (val) != found.end():
362- raise ValueError (
363- " unconverted data remains when parsing with "
364- f" format \" {fmt}\" : \" {val[found.end():]}\" "
365- )
366-
367- # search
368- else :
369- found = format_regex.search(val)
370- if not found:
371- raise ValueError (
372- f" time data \" {val}\" doesn't match format \" {fmt}\" "
373- )
374-
375- iso_year = - 1
376- year = 1900
377- month = day = 1
378- hour = minute = second = ns = us = 0
379- tz = None
380- # Default to -1 to signify that values not known; not critical to have,
381- # though
382- iso_week = week_of_year = - 1
383- week_of_year_start = - 1
384- # weekday and julian defaulted to -1 so as to signal need to calculate
385- # values
386- weekday = julian = - 1
387- found_dict = found.groupdict()
388- for group_key in found_dict.iterkeys():
389- # Directives not explicitly handled below:
390- # c, x, X
391- # handled by making out of other directives
392- # U, W
393- # worthless without day of the week
394- parse_code = _parse_code_table[group_key]
395-
396- if parse_code == 0 :
397- year = int (found_dict[" y" ])
398- # Open Group specification for strptime() states that a %y
399- # value in the range of [00, 68] is in the century 2000, while
400- # [69,99] is in the century 1900
401- if year <= 68 :
402- year += 2000
403- else :
404- year += 1900
405- elif parse_code == 1 :
406- year = int (found_dict[" Y" ])
407- elif parse_code == 2 :
408- month = int (found_dict[" m" ])
409- # elif group_key == 'B':
410- elif parse_code == 3 :
411- month = locale_time.f_month.index(found_dict[" B" ].lower())
412- # elif group_key == 'b':
413- elif parse_code == 4 :
414- month = locale_time.a_month.index(found_dict[" b" ].lower())
415- # elif group_key == 'd':
416- elif parse_code == 5 :
417- day = int (found_dict[" d" ])
418- # elif group_key == 'H':
419- elif parse_code == 6 :
420- hour = int (found_dict[" H" ])
421- elif parse_code == 7 :
422- hour = int (found_dict[" I" ])
423- ampm = found_dict.get(" p" , " " ).lower()
424- # If there was no AM/PM indicator, we'll treat this like AM
425- if ampm in (" " , locale_time.am_pm[0 ]):
426- # We're in AM so the hour is correct unless we're
427- # looking at 12 midnight.
428- # 12 midnight == 12 AM == hour 0
429- if hour == 12 :
430- hour = 0
431- elif ampm == locale_time.am_pm[1 ]:
432- # We're in PM so we need to add 12 to the hour unless
433- # we're looking at 12 noon.
434- # 12 noon == 12 PM == hour 12
435- if hour != 12 :
436- hour += 12
437- elif parse_code == 8 :
438- minute = int (found_dict[" M" ])
439- elif parse_code == 9 :
440- second = int (found_dict[" S" ])
441- elif parse_code == 10 :
442- s = found_dict[" f" ]
443- # Pad to always return nanoseconds
444- s += " 0" * (9 - len (s))
445- us = long (s)
446- ns = us % 1000
447- us = us // 1000
448- elif parse_code == 11 :
449- weekday = locale_time.f_weekday.index(found_dict[" A" ].lower())
450- elif parse_code == 12 :
451- weekday = locale_time.a_weekday.index(found_dict[" a" ].lower())
452- elif parse_code == 13 :
453- weekday = int (found_dict[" w" ])
454- if weekday == 0 :
455- weekday = 6
456- else :
457- weekday -= 1
458- elif parse_code == 14 :
459- julian = int (found_dict[" j" ])
460- elif parse_code == 15 or parse_code == 16 :
461- week_of_year = int (found_dict[group_key])
462- if group_key == " U" :
463- # U starts week on Sunday.
464- week_of_year_start = 6
465- else :
466- # W starts week on Monday.
467- week_of_year_start = 0
468- elif parse_code == 17 :
469- tz = pytz.timezone(found_dict[" Z" ])
470- elif parse_code == 19 :
471- tz = parse_timezone_directive(found_dict[" z" ])
472- elif parse_code == 20 :
473- iso_year = int (found_dict[" G" ])
474- elif parse_code == 21 :
475- iso_week = int (found_dict[" V" ])
476- elif parse_code == 22 :
477- weekday = int (found_dict[" u" ])
478- weekday -= 1
479-
480- # If we know the wk of the year and what day of that wk, we can figure
481- # out the Julian day of the year.
482- if julian == - 1 and weekday != - 1 :
483- if week_of_year != - 1 :
484- week_starts_Mon = week_of_year_start == 0
485- julian = _calc_julian_from_U_or_W(year, week_of_year, weekday,
486- week_starts_Mon)
487- elif iso_year != - 1 and iso_week != - 1 :
488- year, julian = _calc_julian_from_V(iso_year, iso_week,
489- weekday + 1 )
490- # Cannot pre-calculate date() since can change in Julian
491- # calculation and thus could have different value for the day of the wk
492- # calculation.
493- if julian == - 1 :
494- # Need to add 1 to result since first day of the year is 1, not
495- # 0.
496- ordinal = date(year, month, day).toordinal()
497- julian = ordinal - date(year, 1 , 1 ).toordinal() + 1
498- else :
499- # Assume that if they bothered to include Julian day it will
500- # be accurate.
501- datetime_result = date.fromordinal(
502- (julian - 1 ) + date(year, 1 , 1 ).toordinal())
503- year = datetime_result.year
504- month = datetime_result.month
505- day = datetime_result.day
506- if weekday == - 1 :
507- weekday = date(year, month, day).weekday()
508-
509- dts.year = year
510- dts.month = month
511- dts.day = day
512- dts.hour = hour
513- dts.min = minute
514- dts.sec = second
515- dts.us = us
516- dts.ps = ns * 1000
517-
518- iresult[i] = npy_datetimestruct_to_datetime(NPY_FR_ns, & dts)
519- check_dts_bounds(& dts)
520-
350+ tz = _parse_with_format(
351+ val, fmt, exact, format_regex, locale_time, & iresult[i]
352+ )
521353 result_timezone[i] = tz
522354
523355 except (ValueError , OutOfBoundsDatetime) as ex:
@@ -540,6 +372,190 @@ def array_strptime(
540372 return result, result_timezone.base
541373
542374
375+ cdef tzinfo _parse_with_format(
376+ str val, str fmt, bint exact, format_regex, locale_time, int64_t* iresult
377+ ):
378+ cdef:
379+ npy_datetimestruct dts
380+ int year, month, day, minute, hour, second, weekday, julian
381+ int week_of_year, week_of_year_start, parse_code, ordinal
382+ int iso_week, iso_year
383+ int64_t us, ns
384+ object found
385+ tzinfo tz
386+ dict found_dict
387+ str group_key, ampm
388+
389+ if exact:
390+ # exact matching
391+ found = format_regex.match(val)
392+ if not found:
393+ raise ValueError (
394+ f" time data \" {val}\" doesn't match format \" {fmt}\" "
395+ )
396+ if len (val) != found.end():
397+ raise ValueError (
398+ " unconverted data remains when parsing with "
399+ f" format \" {fmt}\" : \" {val[found.end():]}\" "
400+ )
401+
402+ else :
403+ # search
404+ found = format_regex.search(val)
405+ if not found:
406+ raise ValueError (
407+ f" time data \" {val}\" doesn't match format \" {fmt}\" "
408+ )
409+
410+ iso_year = - 1
411+ year = 1900
412+ month = day = 1
413+ hour = minute = second = ns = us = 0
414+ tz = None
415+ # Default to -1 to signify that values not known; not critical to have,
416+ # though
417+ iso_week = week_of_year = - 1
418+ week_of_year_start = - 1
419+ # weekday and julian defaulted to -1 so as to signal need to calculate
420+ # values
421+ weekday = julian = - 1
422+ found_dict = found.groupdict()
423+ for group_key in found_dict.iterkeys():
424+ # Directives not explicitly handled below:
425+ # c, x, X
426+ # handled by making out of other directives
427+ # U, W
428+ # worthless without day of the week
429+ parse_code = _parse_code_table[group_key]
430+
431+ if parse_code == 0 :
432+ year = int (found_dict[" y" ])
433+ # Open Group specification for strptime() states that a %y
434+ # value in the range of [00, 68] is in the century 2000, while
435+ # [69,99] is in the century 1900
436+ if year <= 68 :
437+ year += 2000
438+ else :
439+ year += 1900
440+ elif parse_code == 1 :
441+ year = int (found_dict[" Y" ])
442+ elif parse_code == 2 :
443+ month = int (found_dict[" m" ])
444+ # elif group_key == 'B':
445+ elif parse_code == 3 :
446+ month = locale_time.f_month.index(found_dict[" B" ].lower())
447+ # elif group_key == 'b':
448+ elif parse_code == 4 :
449+ month = locale_time.a_month.index(found_dict[" b" ].lower())
450+ # elif group_key == 'd':
451+ elif parse_code == 5 :
452+ day = int (found_dict[" d" ])
453+ # elif group_key == 'H':
454+ elif parse_code == 6 :
455+ hour = int (found_dict[" H" ])
456+ elif parse_code == 7 :
457+ hour = int (found_dict[" I" ])
458+ ampm = found_dict.get(" p" , " " ).lower()
459+ # If there was no AM/PM indicator, we'll treat this like AM
460+ if ampm in (" " , locale_time.am_pm[0 ]):
461+ # We're in AM so the hour is correct unless we're
462+ # looking at 12 midnight.
463+ # 12 midnight == 12 AM == hour 0
464+ if hour == 12 :
465+ hour = 0
466+ elif ampm == locale_time.am_pm[1 ]:
467+ # We're in PM so we need to add 12 to the hour unless
468+ # we're looking at 12 noon.
469+ # 12 noon == 12 PM == hour 12
470+ if hour != 12 :
471+ hour += 12
472+ elif parse_code == 8 :
473+ minute = int (found_dict[" M" ])
474+ elif parse_code == 9 :
475+ second = int (found_dict[" S" ])
476+ elif parse_code == 10 :
477+ s = found_dict[" f" ]
478+ # Pad to always return nanoseconds
479+ s += " 0" * (9 - len (s))
480+ us = long (s)
481+ ns = us % 1000
482+ us = us // 1000
483+ elif parse_code == 11 :
484+ weekday = locale_time.f_weekday.index(found_dict[" A" ].lower())
485+ elif parse_code == 12 :
486+ weekday = locale_time.a_weekday.index(found_dict[" a" ].lower())
487+ elif parse_code == 13 :
488+ weekday = int (found_dict[" w" ])
489+ if weekday == 0 :
490+ weekday = 6
491+ else :
492+ weekday -= 1
493+ elif parse_code == 14 :
494+ julian = int (found_dict[" j" ])
495+ elif parse_code == 15 or parse_code == 16 :
496+ week_of_year = int (found_dict[group_key])
497+ if group_key == " U" :
498+ # U starts week on Sunday.
499+ week_of_year_start = 6
500+ else :
501+ # W starts week on Monday.
502+ week_of_year_start = 0
503+ elif parse_code == 17 :
504+ tz = pytz.timezone(found_dict[" Z" ])
505+ elif parse_code == 19 :
506+ tz = parse_timezone_directive(found_dict[" z" ])
507+ elif parse_code == 20 :
508+ iso_year = int (found_dict[" G" ])
509+ elif parse_code == 21 :
510+ iso_week = int (found_dict[" V" ])
511+ elif parse_code == 22 :
512+ weekday = int (found_dict[" u" ])
513+ weekday -= 1
514+
515+ # If we know the wk of the year and what day of that wk, we can figure
516+ # out the Julian day of the year.
517+ if julian == - 1 and weekday != - 1 :
518+ if week_of_year != - 1 :
519+ week_starts_Mon = week_of_year_start == 0
520+ julian = _calc_julian_from_U_or_W(year, week_of_year, weekday,
521+ week_starts_Mon)
522+ elif iso_year != - 1 and iso_week != - 1 :
523+ year, julian = _calc_julian_from_V(iso_year, iso_week,
524+ weekday + 1 )
525+ # Cannot pre-calculate date() since can change in Julian
526+ # calculation and thus could have different value for the day of the wk
527+ # calculation.
528+ if julian == - 1 :
529+ # Need to add 1 to result since first day of the year is 1, not
530+ # 0.
531+ ordinal = date(year, month, day).toordinal()
532+ julian = ordinal - date(year, 1 , 1 ).toordinal() + 1
533+ else :
534+ # Assume that if they bothered to include Julian day it will
535+ # be accurate.
536+ datetime_result = date.fromordinal(
537+ (julian - 1 ) + date(year, 1 , 1 ).toordinal())
538+ year = datetime_result.year
539+ month = datetime_result.month
540+ day = datetime_result.day
541+ if weekday == - 1 :
542+ weekday = date(year, month, day).weekday()
543+
544+ dts.year = year
545+ dts.month = month
546+ dts.day = day
547+ dts.hour = hour
548+ dts.min = minute
549+ dts.sec = second
550+ dts.us = us
551+ dts.ps = ns * 1000
552+
553+ iresult[0 ] = npy_datetimestruct_to_datetime(NPY_FR_ns, & dts)
554+ check_dts_bounds(& dts)
555+
556+ return tz
557+
558+
543559class TimeRE (_TimeRE ):
544560 """
545561 Handle conversion from format directives to regexes.
0 commit comments