@@ -47,14 +47,18 @@ from numpy cimport (
4747)
4848
4949from pandas._libs.missing cimport checknull_with_nat_and_na
50- from pandas._libs.tslibs.conversion cimport get_datetime64_nanos
50+ from pandas._libs.tslibs.conversion cimport (
51+ get_datetime64_nanos,
52+ parse_pydatetime,
53+ )
5154from pandas._libs.tslibs.dtypes cimport (
5255 get_supported_reso,
5356 npy_unit_to_abbrev,
5457 npy_unit_to_attrname,
5558)
5659from pandas._libs.tslibs.nattype cimport (
5760 NPY_NAT,
61+ c_NaT as NaT,
5862 c_nat_strings as nat_strings,
5963)
6064from pandas._libs.tslibs.np_datetime cimport (
@@ -65,7 +69,6 @@ from pandas._libs.tslibs.np_datetime cimport (
6569 npy_datetimestruct,
6670 npy_datetimestruct_to_datetime,
6771 pydate_to_dt64,
68- pydatetime_to_dt64,
6972 string_to_dts,
7073)
7174
@@ -82,6 +85,8 @@ from pandas._libs.util cimport (
8285
8386from pandas._libs.tslibs.timestamps import Timestamp
8487
88+ from pandas._libs.tslibs.tzconversion cimport tz_localize_to_utc_single
89+
8590cnp.import_array()
8691
8792
@@ -314,11 +319,13 @@ def array_strptime(
314319 Py_ssize_t i, n = len (values)
315320 npy_datetimestruct dts
316321 int64_t[::1 ] iresult
317- object [::1 ] result_timezone
318322 object val, tz
323+ bint seen_datetime_offset = False
319324 bint is_raise = errors== " raise"
320325 bint is_ignore = errors== " ignore"
321326 bint is_coerce = errors== " coerce"
327+ bint is_same_offsets
328+ set out_tzoffset_vals = set ()
322329 tzinfo tz_out = None
323330 bint iso_format = format_is_iso(fmt)
324331 NPY_DATETIMEUNIT out_bestunit, item_reso
@@ -338,7 +345,6 @@ def array_strptime(
338345 abbrev = npy_unit_to_abbrev(creso)
339346 result = np.empty(n, dtype = f" M8[{abbrev}]" )
340347 iresult = result.view(" i8" )
341- result_timezone = np.empty(n, dtype = " object" )
342348
343349 dts.us = dts.ps = dts.as = 0
344350
@@ -361,23 +367,18 @@ def array_strptime(
361367 if infer_reso:
362368 creso = state.creso
363369 tz_out = state.process_datetime(val, tz_out, utc)
364- if isinstance (val, _Timestamp):
365- val = (< _Timestamp> val)._as_creso(creso)
366- iresult[i] = val.tz_localize(None )._value
367- else :
368- iresult[i] = pydatetime_to_dt64(
369- val.replace(tzinfo = None ), & dts, reso = creso
370- )
371- result_timezone[i] = val.tzinfo
370+ iresult[i] = parse_pydatetime(val, & dts, state.creso)
372371 continue
373372 elif PyDate_Check(val):
373+ state.found_other = True
374374 item_reso = NPY_DATETIMEUNIT.NPY_FR_s
375375 state.update_creso(item_reso)
376376 if infer_reso:
377377 creso = state.creso
378378 iresult[i] = pydate_to_dt64(val, & dts, reso = creso)
379379 continue
380380 elif cnp.is_datetime64_object(val):
381+ state.found_other = True
381382 item_reso = get_supported_reso(get_datetime64_unit(val))
382383 state.update_creso(item_reso)
383384 if infer_reso:
@@ -418,13 +419,17 @@ def array_strptime(
418419 f" Out of bounds {attrname} timestamp: {val}"
419420 ) from err
420421 if out_local == 1 :
421- # Store the out_tzoffset in seconds
422- # since we store the total_seconds of
423- # dateutil.tz.tzoffset objects
422+ nsecs = out_tzoffset * 60
423+ out_tzoffset_vals.add(nsecs)
424+ seen_datetime_offset = True
424425 tz = timezone(timedelta(minutes = out_tzoffset))
425- result_timezone[i] = tz
426- out_local = 0
427- out_tzoffset = 0
426+ value = tz_localize_to_utc_single(
427+ value, tz, ambiguous = " raise" , nonexistent = None , creso = creso
428+ )
429+ else :
430+ tz = None
431+ out_tzoffset_vals.add(" naive" )
432+ state.found_naive_str = True
428433 iresult[i] = value
429434 continue
430435
@@ -450,14 +455,34 @@ def array_strptime(
450455 state.update_creso(item_reso)
451456 if infer_reso:
452457 creso = state.creso
458+
453459 try :
454460 iresult[i] = npy_datetimestruct_to_datetime(creso, & dts)
455461 except OverflowError as err:
456462 attrname = npy_unit_to_attrname[creso]
457463 raise OutOfBoundsDatetime(
458464 f" Out of bounds {attrname} timestamp: {val}"
459465 ) from err
460- result_timezone[i] = tz
466+
467+ if tz is not None :
468+ ival = iresult[i]
469+ iresult[i] = tz_localize_to_utc_single(
470+ ival, tz, ambiguous = " raise" , nonexistent = None , creso = creso
471+ )
472+ nsecs = (ival - iresult[i])
473+ if creso == NPY_FR_ns:
474+ nsecs = nsecs // 10 ** 9
475+ elif creso == NPY_DATETIMEUNIT.NPY_FR_us:
476+ nsecs = nsecs // 10 ** 6
477+ elif creso == NPY_DATETIMEUNIT.NPY_FR_ms:
478+ nsecs = nsecs // 10 ** 3
479+
480+ out_tzoffset_vals.add(nsecs)
481+ seen_datetime_offset = True
482+ else :
483+ state.found_naive_str = True
484+ tz = None
485+ out_tzoffset_vals.add(" naive" )
461486
462487 except (ValueError , OutOfBoundsDatetime) as ex:
463488 ex.args = (
@@ -474,7 +499,37 @@ def array_strptime(
474499 continue
475500 elif is_raise:
476501 raise
477- return values, []
502+ return values, None
503+
504+ if seen_datetime_offset and not utc:
505+ is_same_offsets = len (out_tzoffset_vals) == 1
506+ if not is_same_offsets or (state.found_naive or state.found_other):
507+ result2 = _array_strptime_object_fallback(
508+ values, fmt = fmt, exact = exact, errors = errors, utc = utc
509+ )
510+ return result2, None
511+ elif tz_out is not None :
512+ # GH#55693
513+ tz_offset = out_tzoffset_vals.pop()
514+ tz_out2 = timezone(timedelta(seconds = tz_offset))
515+ if not tz_compare(tz_out, tz_out2):
516+ # e.g. test_to_datetime_mixed_offsets_with_utc_false_deprecated
517+ result2 = _array_strptime_object_fallback(
518+ values, fmt = fmt, exact = exact, errors = errors, utc = utc
519+ )
520+ return result2, None
521+ # e.g. test_guess_datetime_format_with_parseable_formats
522+ else :
523+ # e.g. test_to_datetime_iso8601_with_timezone_valid
524+ tz_offset = out_tzoffset_vals.pop()
525+ tz_out = timezone(timedelta(seconds = tz_offset))
526+ elif not utc:
527+ if tz_out and (state.found_other or state.found_naive_str):
528+ # found_other indicates a tz-naive int, float, dt64, or date
529+ result2 = _array_strptime_object_fallback(
530+ values, fmt = fmt, exact = exact, errors = errors, utc = utc
531+ )
532+ return result2, None
478533
479534 if infer_reso:
480535 if state.creso_ever_changed:
@@ -488,7 +543,6 @@ def array_strptime(
488543 utc = utc,
489544 creso = state.creso,
490545 )
491-
492546 elif state.creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
493547 # i.e. we never encountered anything non-NaT, default to "s". This
494548 # ensures that insert and concat-like operations with NaT
@@ -499,7 +553,7 @@ def array_strptime(
499553 # a second pass.
500554 abbrev = npy_unit_to_abbrev(state.creso)
501555 result = iresult.base.view(f" M8[{abbrev}]" )
502- return result, result_timezone.base
556+ return result, tz_out
503557
504558
505559cdef tzinfo _parse_with_format(
@@ -737,6 +791,157 @@ cdef tzinfo _parse_with_format(
737791 return tz
738792
739793
794+ def _array_strptime_object_fallback (
795+ ndarray[object] values ,
796+ str fmt ,
797+ bint exact = True ,
798+ errors = " raise" ,
799+ bint utc = False ,
800+ ):
801+
802+ cdef:
803+ Py_ssize_t i, n = len (values)
804+ npy_datetimestruct dts
805+ int64_t iresult
806+ object val
807+ tzinfo tz
808+ bint is_raise = errors== " raise"
809+ bint is_ignore = errors== " ignore"
810+ bint is_coerce = errors== " coerce"
811+ bint iso_format = format_is_iso(fmt)
812+ NPY_DATETIMEUNIT creso, out_bestunit, item_reso
813+ int out_local = 0 , out_tzoffset = 0
814+ bint string_to_dts_succeeded = 0
815+
816+ assert is_raise or is_ignore or is_coerce
817+
818+ item_reso = NPY_DATETIMEUNIT.NPY_FR_GENERIC
819+ format_regex, locale_time = _get_format_regex(fmt)
820+
821+ result = np.empty(n, dtype = object )
822+
823+ dts.us = dts.ps = dts.as = 0
824+
825+ for i in range (n):
826+ val = values[i]
827+ try :
828+ if isinstance (val, str ):
829+ if len (val) == 0 or val in nat_strings:
830+ result[i] = NaT
831+ continue
832+ elif checknull_with_nat_and_na(val):
833+ result[i] = NaT
834+ continue
835+ elif PyDateTime_Check(val):
836+ result[i] = Timestamp(val)
837+ continue
838+ elif PyDate_Check(val):
839+ result[i] = Timestamp(val)
840+ continue
841+ elif cnp.is_datetime64_object(val):
842+ result[i] = Timestamp(val)
843+ continue
844+ elif (
845+ (is_integer_object(val) or is_float_object(val))
846+ and (val != val or val == NPY_NAT)
847+ ):
848+ result[i] = NaT
849+ continue
850+ else :
851+ val = str (val)
852+
853+ if fmt == " ISO8601" :
854+ string_to_dts_succeeded = not string_to_dts(
855+ val, & dts, & out_bestunit, & out_local,
856+ & out_tzoffset, False , None , False
857+ )
858+ elif iso_format:
859+ string_to_dts_succeeded = not string_to_dts(
860+ val, & dts, & out_bestunit, & out_local,
861+ & out_tzoffset, False , fmt, exact
862+ )
863+ if string_to_dts_succeeded:
864+ # No error reported by string_to_dts, pick back up
865+ # where we left off
866+ creso = get_supported_reso(out_bestunit)
867+ try :
868+ value = npy_datetimestruct_to_datetime(creso, & dts)
869+ except OverflowError as err:
870+ raise OutOfBoundsDatetime(
871+ f" Out of bounds nanosecond timestamp: {val}"
872+ ) from err
873+ if out_local == 1 :
874+ tz = timezone(timedelta(minutes = out_tzoffset))
875+ value = tz_localize_to_utc_single(
876+ value, tz, ambiguous = " raise" , nonexistent = None , creso = creso
877+ )
878+ else :
879+ tz = None
880+ ts = Timestamp._from_value_and_reso(value, creso, tz)
881+ result[i] = ts
882+ continue
883+
884+ if parse_today_now(val, & iresult, utc, NPY_FR_ns):
885+ result[i] = Timestamp(val)
886+ continue
887+
888+ # Some ISO formats can't be parsed by string_to_dts
889+ # For example, 6-digit YYYYMD. So, if there's an error, and a format
890+ # was specified, then try the string-matching code below. If the format
891+ # specified was 'ISO8601', then we need to error, because
892+ # only string_to_dts handles mixed ISO8601 formats.
893+ if not string_to_dts_succeeded and fmt == " ISO8601" :
894+ raise ValueError (f" Time data {val} is not ISO8601 format" )
895+
896+ tz = _parse_with_format(
897+ val, fmt, exact, format_regex, locale_time, & dts, & item_reso
898+ )
899+ try :
900+ iresult = npy_datetimestruct_to_datetime(item_reso, & dts)
901+ except OverflowError as err:
902+ raise OutOfBoundsDatetime(
903+ f" Out of bounds nanosecond timestamp: {val}"
904+ ) from err
905+ if tz is not None :
906+ iresult = tz_localize_to_utc_single(
907+ iresult, tz, ambiguous = " raise" , nonexistent = None , creso = item_reso
908+ )
909+ ts = Timestamp._from_value_and_reso(iresult, item_reso, tz)
910+ result[i] = ts
911+
912+ except (ValueError , OutOfBoundsDatetime) as ex:
913+ ex.args = (
914+ f" {str(ex)}, at position {i}. You might want to try:\n "
915+ " - passing `format` if your strings have a consistent format;\n "
916+ " - passing `format='ISO8601'` if your strings are "
917+ " all ISO8601 but not necessarily in exactly the same format;\n "
918+ " - passing `format='mixed'`, and the format will be "
919+ " inferred for each element individually. "
920+ " You might want to use `dayfirst` alongside this." ,
921+ )
922+ if is_coerce:
923+ result[i] = NaT
924+ continue
925+ elif is_raise:
926+ raise
927+ return values
928+
929+ import warnings
930+
931+ from pandas.util._exceptions import find_stack_level
932+ warnings.warn(
933+ " In a future version of pandas, parsing datetimes with mixed time "
934+ " zones will raise an error unless `utc=True`. Please specify `utc=True` "
935+ " to opt in to the new behaviour and silence this warning. "
936+ " To create a `Series` with mixed offsets and `object` dtype, "
937+ " please use `apply` and `datetime.datetime.strptime`" ,
938+ FutureWarning ,
939+ stacklevel = find_stack_level(),
940+ )
941+
942+ return result
943+
944+
740945class TimeRE (_TimeRE ):
741946 """
742947 Handle conversion from format directives to regexes.
0 commit comments