Skip to content

Commit

Permalink
psql-srv: Support pgjdbc Timestamp
Browse files Browse the repository at this point in the history
During `BIND` for prepared statements, pgjdbc sends
`javax.sql.Timestamp`s with timezone data (UTC offset), even if the
target data type is `timestamp` (not `timestamptz`). According to
the PG docs [0], the timezone is optional, but Readyset errors if
there is timezone information in the string that is sent.
Interestingly, pgjdbc will send along only the hour offset,
truncating the minutes data, if there's no minutes offset for the
timezone (as is the case for most timezones). Note that this
"appending timezone data" applies to the text format transfer as
pgjdbc only transfers timestamps in the text format.

This patch extends the existing timestamp and timestamptz parsing by
checking for any trailing text in the input timestamp string, and
attempts to parse it into a timezone offset.

Also included is a benchmark that compares parsing the timezone
offset versus reparsing the entire input string with a "pgjdbc-
friendly" format string. Parsing just for the offset is 40% faster
than reparsing the entire string:

timestamp_text_parse/offsets
  time:   [281.25 ns 282.11 ns 283.06 ns]
timestamp_text_reparse/reparse
  time:   [485.31 ns 486.00 ns 486.81 ns]

[0] https://www.postgresql.org/docs/current/datatype-datetime.html

Fixes: REA-2662
Release-Note-Core: Support timestamps in prepared statements from
  the Java PG driver, pgjdbc.
Change-Id: Ie861d63d1c0ec8d13778857c18204c86585d834d
Reviewed-on: https://gerrit.readyset.name/c/readyset/+/6932
Tested-by: Buildkite CI
Reviewed-by: Ethan Donowitz <ethan@readyset.io>
  • Loading branch information
jasobrown-rs committed Mar 1, 2024
1 parent a11beff commit 7a14b00
Show file tree
Hide file tree
Showing 4 changed files with 171 additions and 9 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions psql-srv/Cargo.toml
Expand Up @@ -47,3 +47,8 @@ tokio-test = "0.4.1"
database-utils = { path = "../database-utils" }
readyset-tracing = { path = "../readyset-tracing" }
postgres-native-tls = { workspace = true }
criterion = { workspace = true, features = ["html_reports"] }

[[bench]]
name = "parse"
harness = false
59 changes: 59 additions & 0 deletions psql-srv/benches/parse.rs
@@ -0,0 +1,59 @@
use std::str::FromStr;

use chrono::{DateTime, FixedOffset, NaiveDateTime, TimeZone};
use criterion::{black_box, criterion_group, criterion_main, Bencher, Criterion};

const TIMESTAMP_FORMAT: &str = "%Y-%m-%d %H:%M:%S%.f";
const TIMESTAMP_FORMAT_PGJDBC: &str = "%Y-%m-%d %H:%M:%S%.f %#z";

/// Benches `NaiveDateTime::parse_from_str()`, letting it fail, and reparsing
/// via `DateTime::parse_from_str()` with a "PGJDBC-formatted" timestamp string.
fn timestamp_text_parse_reparse(c: &mut Criterion) {
let run_benchmark = |b: &mut Bencher| {
b.iter(|| {
{
let input = "2024-02-13 18:35:23.176-08";
let res = NaiveDateTime::parse_from_str(input, TIMESTAMP_FORMAT);
if res.is_err() {
let _ = DateTime::<FixedOffset>::parse_from_str(input, TIMESTAMP_FORMAT_PGJDBC)
.unwrap();
}
};
black_box(())
})
};

c.benchmark_group("timestamp_text_reparse")
.bench_function("reparse", run_benchmark);
}

/// Benches `NaiveDateTime::parse_and_remainder()`, and passing the `remainder` to
/// `FixedOffset::from_str()` - modifying the `remainder` to be "timezone formatted"
/// as PGJDBC drops the minutes from the UTC offset (for most timezones).
fn timestamp_text_parse_offsets(c: &mut Criterion) {
let run_benchmark = |b: &mut Bencher| {
b.iter(|| {
{
let input = "2024-02-13 18:35:23.176-08";
let (datetime, timezone_tag) =
NaiveDateTime::parse_and_remainder(input, TIMESTAMP_FORMAT).unwrap();

let mut pgjdbc_tz_tag = timezone_tag.to_owned();
pgjdbc_tz_tag.push_str(String::from("00").as_str());
let offset = FixedOffset::from_str(&pgjdbc_tz_tag).unwrap();
let _ = offset.from_utc_datetime(&datetime);
};
black_box(())
})
};

c.benchmark_group("timestamp_text_parse")
.bench_function("offsets", run_benchmark);
}

criterion_group!(
benches,
timestamp_text_parse_offsets,
timestamp_text_parse_reparse
);
criterion_main!(benches);
115 changes: 106 additions & 9 deletions psql-srv/src/codec/decoder.rs
Expand Up @@ -4,7 +4,7 @@ use std::str;

use bit_vec::BitVec;
use bytes::{Buf, Bytes, BytesMut};
use chrono::{DateTime, FixedOffset, NaiveDate, NaiveDateTime, NaiveTime};
use chrono::{DateTime, FixedOffset, NaiveDate, NaiveDateTime, NaiveTime, TimeZone};
use cidr::IpInet;
use eui48::MacAddress;
use postgres_types::{FromSql, Kind, Type};
Expand Down Expand Up @@ -52,7 +52,6 @@ const HEADER_LENGTH: usize = 5;
const LENGTH_NULL_SENTINEL: i32 = -1;
const NUL_BYTE: u8 = b'\0';
const TIMESTAMP_FORMAT: &str = "%Y-%m-%d %H:%M:%S%.f";
const TIMESTAMP_TZ_FORMAT: &str = "%Y-%m-%d %H:%M:%S%.f %:z";

impl Decoder for Codec {
type Item = FrontendMessage;
Expand Down Expand Up @@ -439,14 +438,58 @@ fn get_text_value(src: &mut Bytes, t: &Type) -> Result<PsqlValue, Error> {
Type::TIMESTAMP => {
// TODO: Does not correctly handle all valid timestamp representations. For example,
// 8601/SQL timestamp format is assumed; infinity/-infinity are not supported.
Ok(PsqlValue::Timestamp(NaiveDateTime::parse_from_str(
text_str,
TIMESTAMP_FORMAT,
)?))
let (datetime, timezone_tag) =
NaiveDateTime::parse_and_remainder(text_str, TIMESTAMP_FORMAT)?;
if timezone_tag.is_empty() {
return Ok(PsqlValue::Timestamp(datetime));
}

// hack for PGJDBC: pgjdbc always sends the UTC offset at the end of the Timestamp
// field string, but it may not contain the minutes information: for
// example, "-08" for UTC-08 (Pacific time zone); see
// pgjdbc/src/main/java/org/postgresql/jdbc/PgPreparedStatement.java#
// setTimestamp() for detailed commentary. First, though, parse for an offset,
// in the standard "%z" format (eg '-0800'), which must include
// minutes. we can use FixedOffset's native parse for this.
let timezone_tag = timezone_tag.trim_start();
let offset_parse = FixedOffset::from_str(timezone_tag);
if let Ok(offset) = offset_parse {
return Ok(PsqlValue::Timestamp(
offset.from_utc_datetime(&datetime).naive_utc(),
));
}

// To support an offset without minutes, as pgjdbc truncates the minutes value
// if it's "00" (no minute offset from the hour offset), blindly append two zeros
// to the end of the timezone tag to see if we get lucky parsing it :homer-cry:
let mut pgjdbc_tz_tag = timezone_tag.to_owned();
pgjdbc_tz_tag.push_str(String::from("00").as_str());
let offset = FixedOffset::from_str(&pgjdbc_tz_tag)?;
Ok(PsqlValue::Timestamp(
offset.from_utc_datetime(&datetime).naive_utc(),
))
}
Type::TIMESTAMPTZ => {
// Note: read commentary in Type::TIMESTAMP above for context around pgjdbc support.
let (datetime, timezone_tag) =
NaiveDateTime::parse_and_remainder(text_str, TIMESTAMP_FORMAT)?;

let timezone_tag = timezone_tag.trim_start();
let offset_parse = FixedOffset::from_str(timezone_tag);
if let Ok(offset) = offset_parse {
return Ok(PsqlValue::TimestampTz(
offset.from_local_datetime(&datetime).single().unwrap(),
));
}

// To support an offset without minutes, as pgjdbc truncates the minutes value
// if it's "00" (no minute offset from hout offset), blindly append two zeros
// to the end of the timezone tag to see if we get lucky parsing it :homer-cry:
let mut pgjdbc_tz_tag = timezone_tag.to_owned();
pgjdbc_tz_tag.push_str(String::from("00").as_str());
let offset = FixedOffset::from_str(&pgjdbc_tz_tag)?;
Ok(PsqlValue::TimestampTz(offset.from_utc_datetime(&datetime)))
}
Type::TIMESTAMPTZ => Ok(PsqlValue::TimestampTz(
DateTime::<FixedOffset>::parse_from_str(text_str, TIMESTAMP_TZ_FORMAT)?,
)),
Type::BYTEA => {
let bytes = hex::decode(text_str).map_err(InvalidTextByteArrayValue)?;
Ok(PsqlValue::ByteArray(bytes))
Expand Down Expand Up @@ -1412,6 +1455,42 @@ mod tests {
);
}

#[test]
fn test_decode_text_timestamp_with_offset() {
let expected =
FixedOffset::east_opt(18000)
.unwrap()
.from_utc_datetime(&NaiveDateTime::new(
NaiveDate::from_ymd_opt(2020, 1, 2).unwrap(),
NaiveTime::from_hms_milli_opt(3, 4, 5, 660).unwrap(),
));
let mut buf = BytesMut::new();
buf.put_i32(27); // size
buf.extend_from_slice(b"2020-01-02 03:04:05.66-0800"); // value
assert_eq!(
get_text_value(&mut buf.freeze(), &Type::TIMESTAMP).unwrap(),
PsqlValue::Timestamp(expected.naive_utc())
);
}

#[test]
fn test_decode_text_timestamp_pgjdbc_offset() {
let expected =
FixedOffset::east_opt(18000)
.unwrap()
.from_utc_datetime(&NaiveDateTime::new(
NaiveDate::from_ymd_opt(2020, 1, 2).unwrap(),
NaiveTime::from_hms_milli_opt(3, 4, 5, 660).unwrap(),
));
let mut buf = BytesMut::new();
buf.put_i32(25); // size
buf.extend_from_slice(b"2020-01-02 03:04:05.66-08"); // value
assert_eq!(
get_text_value(&mut buf.freeze(), &Type::TIMESTAMP).unwrap(),
PsqlValue::Timestamp(expected.naive_utc())
);
}

#[test]
fn test_decode_text_bytes() {
let mut buf = BytesMut::new();
Expand Down Expand Up @@ -1504,6 +1583,24 @@ mod tests {
);
}

#[test]
fn test_decode_text_timestamptz_pgjdbc_offset() {
let expected =
FixedOffset::east_opt(18000)
.unwrap()
.from_utc_datetime(&NaiveDateTime::new(
NaiveDate::from_ymd_opt(2020, 1, 2).unwrap(),
NaiveTime::from_hms_milli_opt(3, 4, 5, 660).unwrap(),
));
let mut buf = BytesMut::new();
buf.put_i32(25); // size
buf.extend_from_slice(b"2020-01-02 03:04:05.66-08"); // value
assert_eq!(
get_text_value(&mut buf.freeze(), &Type::TIMESTAMPTZ).unwrap(),
PsqlValue::TimestampTz(expected)
);
}

#[test]
fn test_decode_text_array_int() {
let array_string = "{1,2,32767}";
Expand Down

0 comments on commit 7a14b00

Please sign in to comment.