diff --git a/Cargo.lock b/Cargo.lock index bc2f4d3636..0806a1f072 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4071,6 +4071,7 @@ dependencies = [ "bytes", "chrono", "cidr", + "criterion", "database-utils", "eui48", "futures", diff --git a/psql-srv/Cargo.toml b/psql-srv/Cargo.toml index e3bdc122dc..87f30e4069 100644 --- a/psql-srv/Cargo.toml +++ b/psql-srv/Cargo.toml @@ -47,3 +47,8 @@ tokio-test = "0.4.1" database-utils = { path = "../database-utils" } readyset-tracing = { path = "../readyset-tracing" } postgres-native-tls = { workspace = true } +criterion = { workspace = true, features = ["html_reports"] } + +[[bench]] +name = "parse" +harness = false diff --git a/psql-srv/benches/parse.rs b/psql-srv/benches/parse.rs new file mode 100644 index 0000000000..d27259ec79 --- /dev/null +++ b/psql-srv/benches/parse.rs @@ -0,0 +1,59 @@ +use std::str::FromStr; + +use chrono::{DateTime, FixedOffset, NaiveDateTime, TimeZone}; +use criterion::{black_box, criterion_group, criterion_main, Bencher, Criterion}; + +const TIMESTAMP_FORMAT: &str = "%Y-%m-%d %H:%M:%S%.f"; +const TIMESTAMP_FORMAT_PGJDBC: &str = "%Y-%m-%d %H:%M:%S%.f %#z"; + +/// Benches `NaiveDateTime::parse_from_str()`, letting it fail, and reparsing +/// via `DateTime::parse_from_str()` with a "PGJDBC-formatted" timestamp string. +fn timestamp_text_parse_reparse(c: &mut Criterion) { + let run_benchmark = |b: &mut Bencher| { + b.iter(|| { + { + let input = "2024-02-13 18:35:23.176-08"; + let res = NaiveDateTime::parse_from_str(input, TIMESTAMP_FORMAT); + if res.is_err() { + let _ = DateTime::::parse_from_str(input, TIMESTAMP_FORMAT_PGJDBC) + .unwrap(); + } + }; + black_box(()) + }) + }; + + c.benchmark_group("timestamp_text_reparse") + .bench_function("reparse", run_benchmark); +} + +/// Benches `NaiveDateTime::parse_and_remainder()`, and passing the `remainder` to +/// `FixedOffset::from_str()` - modifying the `remainder` to be "timezone formatted" +/// as PGJDBC drops the minutes from the UTC offset (for most timezones). +fn timestamp_text_parse_offsets(c: &mut Criterion) { + let run_benchmark = |b: &mut Bencher| { + b.iter(|| { + { + let input = "2024-02-13 18:35:23.176-08"; + let (datetime, timezone_tag) = + NaiveDateTime::parse_and_remainder(input, TIMESTAMP_FORMAT).unwrap(); + + let mut pgjdbc_tz_tag = timezone_tag.to_owned(); + pgjdbc_tz_tag.push_str(String::from("00").as_str()); + let offset = FixedOffset::from_str(&pgjdbc_tz_tag).unwrap(); + let _ = offset.from_utc_datetime(&datetime); + }; + black_box(()) + }) + }; + + c.benchmark_group("timestamp_text_parse") + .bench_function("offsets", run_benchmark); +} + +criterion_group!( + benches, + timestamp_text_parse_offsets, + timestamp_text_parse_reparse +); +criterion_main!(benches); diff --git a/psql-srv/src/codec/decoder.rs b/psql-srv/src/codec/decoder.rs index 0dc656a6eb..f8a58792a0 100644 --- a/psql-srv/src/codec/decoder.rs +++ b/psql-srv/src/codec/decoder.rs @@ -4,7 +4,7 @@ use std::str; use bit_vec::BitVec; use bytes::{Buf, Bytes, BytesMut}; -use chrono::{DateTime, FixedOffset, NaiveDate, NaiveDateTime, NaiveTime}; +use chrono::{DateTime, FixedOffset, NaiveDate, NaiveDateTime, NaiveTime, TimeZone}; use cidr::IpInet; use eui48::MacAddress; use postgres_types::{FromSql, Kind, Type}; @@ -52,7 +52,6 @@ const HEADER_LENGTH: usize = 5; const LENGTH_NULL_SENTINEL: i32 = -1; const NUL_BYTE: u8 = b'\0'; const TIMESTAMP_FORMAT: &str = "%Y-%m-%d %H:%M:%S%.f"; -const TIMESTAMP_TZ_FORMAT: &str = "%Y-%m-%d %H:%M:%S%.f %:z"; impl Decoder for Codec { type Item = FrontendMessage; @@ -439,14 +438,58 @@ fn get_text_value(src: &mut Bytes, t: &Type) -> Result { Type::TIMESTAMP => { // TODO: Does not correctly handle all valid timestamp representations. For example, // 8601/SQL timestamp format is assumed; infinity/-infinity are not supported. - Ok(PsqlValue::Timestamp(NaiveDateTime::parse_from_str( - text_str, - TIMESTAMP_FORMAT, - )?)) + let (datetime, timezone_tag) = + NaiveDateTime::parse_and_remainder(text_str, TIMESTAMP_FORMAT)?; + if timezone_tag.is_empty() { + return Ok(PsqlValue::Timestamp(datetime)); + } + + // hack for PGJDBC: pgjdbc always sends the UTC offset at the end of the Timestamp + // field string, but it may not contain the minutes information: for + // example, "-08" for UTC-08 (Pacific time zone); see + // pgjdbc/src/main/java/org/postgresql/jdbc/PgPreparedStatement.java# + // setTimestamp() for detailed commentary. First, though, parse for an offset, + // in the standard "%z" format (eg '-0800'), which must include + // minutes. we can use FixedOffset's native parse for this. + let timezone_tag = timezone_tag.trim_start(); + let offset_parse = FixedOffset::from_str(timezone_tag); + if let Ok(offset) = offset_parse { + return Ok(PsqlValue::Timestamp( + offset.from_utc_datetime(&datetime).naive_utc(), + )); + } + + // To support an offset without minutes, as pgjdbc truncates the minutes value + // if it's "00" (no minute offset from the hour offset), blindly append two zeros + // to the end of the timezone tag to see if we get lucky parsing it :homer-cry: + let mut pgjdbc_tz_tag = timezone_tag.to_owned(); + pgjdbc_tz_tag.push_str(String::from("00").as_str()); + let offset = FixedOffset::from_str(&pgjdbc_tz_tag)?; + Ok(PsqlValue::Timestamp( + offset.from_utc_datetime(&datetime).naive_utc(), + )) + } + Type::TIMESTAMPTZ => { + // Note: read commentary in Type::TIMESTAMP above for context around pgjdbc support. + let (datetime, timezone_tag) = + NaiveDateTime::parse_and_remainder(text_str, TIMESTAMP_FORMAT)?; + + let timezone_tag = timezone_tag.trim_start(); + let offset_parse = FixedOffset::from_str(timezone_tag); + if let Ok(offset) = offset_parse { + return Ok(PsqlValue::TimestampTz( + offset.from_local_datetime(&datetime).single().unwrap(), + )); + } + + // To support an offset without minutes, as pgjdbc truncates the minutes value + // if it's "00" (no minute offset from hout offset), blindly append two zeros + // to the end of the timezone tag to see if we get lucky parsing it :homer-cry: + let mut pgjdbc_tz_tag = timezone_tag.to_owned(); + pgjdbc_tz_tag.push_str(String::from("00").as_str()); + let offset = FixedOffset::from_str(&pgjdbc_tz_tag)?; + Ok(PsqlValue::TimestampTz(offset.from_utc_datetime(&datetime))) } - Type::TIMESTAMPTZ => Ok(PsqlValue::TimestampTz( - DateTime::::parse_from_str(text_str, TIMESTAMP_TZ_FORMAT)?, - )), Type::BYTEA => { let bytes = hex::decode(text_str).map_err(InvalidTextByteArrayValue)?; Ok(PsqlValue::ByteArray(bytes)) @@ -1412,6 +1455,42 @@ mod tests { ); } + #[test] + fn test_decode_text_timestamp_with_offset() { + let expected = + FixedOffset::east_opt(18000) + .unwrap() + .from_utc_datetime(&NaiveDateTime::new( + NaiveDate::from_ymd_opt(2020, 1, 2).unwrap(), + NaiveTime::from_hms_milli_opt(3, 4, 5, 660).unwrap(), + )); + let mut buf = BytesMut::new(); + buf.put_i32(27); // size + buf.extend_from_slice(b"2020-01-02 03:04:05.66-0800"); // value + assert_eq!( + get_text_value(&mut buf.freeze(), &Type::TIMESTAMP).unwrap(), + PsqlValue::Timestamp(expected.naive_utc()) + ); + } + + #[test] + fn test_decode_text_timestamp_pgjdbc_offset() { + let expected = + FixedOffset::east_opt(18000) + .unwrap() + .from_utc_datetime(&NaiveDateTime::new( + NaiveDate::from_ymd_opt(2020, 1, 2).unwrap(), + NaiveTime::from_hms_milli_opt(3, 4, 5, 660).unwrap(), + )); + let mut buf = BytesMut::new(); + buf.put_i32(25); // size + buf.extend_from_slice(b"2020-01-02 03:04:05.66-08"); // value + assert_eq!( + get_text_value(&mut buf.freeze(), &Type::TIMESTAMP).unwrap(), + PsqlValue::Timestamp(expected.naive_utc()) + ); + } + #[test] fn test_decode_text_bytes() { let mut buf = BytesMut::new(); @@ -1504,6 +1583,24 @@ mod tests { ); } + #[test] + fn test_decode_text_timestamptz_pgjdbc_offset() { + let expected = + FixedOffset::east_opt(18000) + .unwrap() + .from_utc_datetime(&NaiveDateTime::new( + NaiveDate::from_ymd_opt(2020, 1, 2).unwrap(), + NaiveTime::from_hms_milli_opt(3, 4, 5, 660).unwrap(), + )); + let mut buf = BytesMut::new(); + buf.put_i32(25); // size + buf.extend_from_slice(b"2020-01-02 03:04:05.66-08"); // value + assert_eq!( + get_text_value(&mut buf.freeze(), &Type::TIMESTAMPTZ).unwrap(), + PsqlValue::TimestampTz(expected) + ); + } + #[test] fn test_decode_text_array_int() { let array_string = "{1,2,32767}";