Skip to content

Commit

Permalink
feat(rust, python): auto-infer fmt for tz-aware date strings (#7405)
Browse files Browse the repository at this point in the history
Co-authored-by: MarcoGorelli <>
  • Loading branch information
MarcoGorelli committed Apr 9, 2023
1 parent 68c6ea8 commit f3b6c14
Show file tree
Hide file tree
Showing 12 changed files with 289 additions and 84 deletions.
6 changes: 3 additions & 3 deletions polars/polars-arrow/src/kernels/time.rs
Expand Up @@ -90,18 +90,18 @@ pub fn replace_timezone(
Ok(to_tz) => convert_to_timestamp(from_tz, to_tz, arr, tu)?,
Err(_) => match parse_offset(to) {
Ok(to_tz) => convert_to_timestamp(from_tz, to_tz, arr, tu)?,
Err(_) => polars_bail!(ComputeError: "unable to parse time zone: {}", to),
Err(_) => polars_bail!(ComputeError: "unable to parse time zone: '{}'", to),
},
},
Err(_) => match parse_offset(from) {
Ok(from_tz) => match to.parse::<chrono_tz::Tz>() {
Ok(to_tz) => convert_to_timestamp(from_tz, to_tz, arr, tu)?,
Err(_) => match parse_offset(to) {
Ok(to_tz) => convert_to_timestamp(from_tz, to_tz, arr, tu)?,
Err(_) => polars_bail!(ComputeError: "unable to parse time zone: {}", to),
Err(_) => polars_bail!(ComputeError: "unable to parse time zone: '{}'", to),
},
},
Err(_) => polars_bail!(ComputeError: "unable to parse time zone: {}", from),
Err(_) => polars_bail!(ComputeError: "unable to parse time zone: '{}'", from),
},
})
}
2 changes: 1 addition & 1 deletion polars/polars-core/src/chunked_array/temporal/datetime.rs
Expand Up @@ -26,7 +26,7 @@ fn validate_time_zone(tz: TimeZone) -> PolarsResult<()> {
Ok(_) => Ok(()),
Err(_) => match tz.parse::<Tz>() {
Ok(_) => Ok(()),
Err(_) => polars_bail!(ComputeError: "unable to parse timezone: '{}'", tz),
Err(_) => polars_bail!(ComputeError: "unable to parse time zone: '{}'", tz),
},
}
}
Expand Down
24 changes: 13 additions & 11 deletions polars/polars-io/src/csv/buffer.rs
Expand Up @@ -428,18 +428,20 @@ where
buf.builder.append_null();
Ok(())
}
Some(pattern) => match DatetimeInfer::<T::Native>::try_from(pattern) {
Ok(mut infer) => {
let parsed = infer.parse(val);
buf.compiled = Some(infer);
buf.builder.append_option(parsed);
Ok(())
}
Err(_) => {
buf.builder.append_null();
Ok(())
Some(pattern_with_offset) => {
match DatetimeInfer::<T::Native>::try_from(pattern_with_offset.pattern) {
Ok(mut infer) => {
let parsed = infer.parse(val, pattern_with_offset.offset);
buf.compiled = Some(infer);
buf.builder.append_option(parsed);
Ok(())
}
Err(_) => {
buf.builder.append_null();
Ok(())
}
}
},
}
}
}

Expand Down
22 changes: 14 additions & 8 deletions polars/polars-io/src/csv/utils.rs
Expand Up @@ -110,10 +110,13 @@ fn infer_field_schema(string: &str, try_parse_dates: bool) -> DataType {
#[cfg(feature = "polars-time")]
{
match date_infer::infer_pattern_single(&string[1..string.len() - 1]) {
Some(Pattern::DatetimeYMD | Pattern::DatetimeDMY) => {
DataType::Datetime(TimeUnit::Microseconds, None)
}
Some(Pattern::DateYMD | Pattern::DateDMY) => DataType::Date,
Some(pattern_with_offset) => match pattern_with_offset.pattern {
Pattern::DatetimeYMD | Pattern::DatetimeDMY => {
DataType::Datetime(TimeUnit::Microseconds, None)
}
Pattern::DateYMD | Pattern::DateDMY => DataType::Date,
_ => DataType::Utf8, // TODO: support tz-aware patterns
},
None => DataType::Utf8,
}
}
Expand All @@ -136,10 +139,13 @@ fn infer_field_schema(string: &str, try_parse_dates: bool) -> DataType {
#[cfg(feature = "polars-time")]
{
match date_infer::infer_pattern_single(string) {
Some(Pattern::DatetimeYMD | Pattern::DatetimeDMY) => {
DataType::Datetime(TimeUnit::Microseconds, None)
}
Some(Pattern::DateYMD | Pattern::DateDMY) => DataType::Date,
Some(pattern_with_offset) => match pattern_with_offset.pattern {
Pattern::DatetimeYMD | Pattern::DatetimeDMY => {
DataType::Datetime(TimeUnit::Microseconds, None)
}
Pattern::DateYMD | Pattern::DateDMY => DataType::Date,
_ => DataType::Utf8, // TODO: support tz-aware patterns
},
None => DataType::Utf8,
}
}
Expand Down
6 changes: 3 additions & 3 deletions polars/polars-io/src/ndjson_core/buffer.rs
Expand Up @@ -154,9 +154,9 @@ where
Value::String(s) => s,
_ => return None,
};
infer_pattern_single(val).and_then(|pattern| {
match DatetimeInfer::<T::Native>::try_from(pattern) {
Ok(mut infer) => infer.parse(val),
infer_pattern_single(val).and_then(|pattern_with_offset| {
match DatetimeInfer::<T::Native>::try_from(pattern_with_offset.pattern) {
Ok(mut infer) => infer.parse(val, pattern_with_offset.offset),
Err(_) => None,
}
})
Expand Down
Expand Up @@ -338,20 +338,18 @@ pub(super) fn strptime(s: &Series, options: &StrpTimeOptions) -> PolarsResult<Se
}
}
DataType::Datetime(tu, tz) => {
let tz = match (tz, tz_aware, options.utc) {
(Some(tz), false, false) => Some(tz.clone()),
match (tz, tz_aware, options.utc) {
(Some(_), true, _) => polars_bail!(
ComputeError:
"cannot use strptime with both a tz-aware format and a tz-aware dtype, \
please drop time zone from the dtype"
),
(Some(_), _, true) => polars_bail!(
ComputeError:
"cannot use strptime with both 'utc=True' and tz-aware datetime, \
"cannot use strptime with both 'utc=True' and tz-aware dtype, \
please drop time zone from the dtype"
),
(None, _, true) => Some("UTC".to_string()),
(None, _, false) => None,
_ => (),
};
if options.exact {
ca.as_datetime(
Expand Down

0 comments on commit f3b6c14

Please sign in to comment.