Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
# Change Log

## 0.27.1 - 2025-12-18

- Performance improvement: Skipped UTF-8 validation for map keys during
deserialization. This significantly speeds up full record decoding by
treating keys as raw bytes when matching against struct fields.
- Performance improvement: Optimized tree traversal by reducing bounds checks
during node reading.

## 0.27.0 - 2025-11-28

This release includes significant API changes. See [UPGRADING.md](UPGRADING.md)
Expand Down
30 changes: 25 additions & 5 deletions src/decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -446,19 +446,26 @@ impl<'de> Decoder<'de> {
}
}

/// Reads a string directly, following pointers if needed.
pub(crate) fn read_string(&mut self) -> DecodeResult<&'de str> {
/// Reads a string's bytes directly, following pointers if needed.
/// Does NOT validate UTF-8.
pub(crate) fn read_str_as_bytes(&mut self) -> DecodeResult<&'de [u8]> {
let (size, type_num) = self.size_and_type();
if type_num == TYPE_POINTER {
// Pointer
let new_ptr = self.decode_pointer(size);
let saved_ptr = self.current_ptr;
self.current_ptr = new_ptr;
let result = self.read_string();
let result = self.read_str_as_bytes();
self.current_ptr = saved_ptr;
result
} else if type_num == TYPE_STRING {
self.decode_string(size)
let new_offset = self.current_ptr + size;
if new_offset > self.buf.len() {
return Err(self.invalid_db_error("string length exceeds buffer"));
}
let bytes = &self.buf[self.current_ptr..new_offset];
self.current_ptr = new_offset;
Ok(bytes)
} else {
Err(self.invalid_db_error(&format!("expected string, got type {type_num}")))
}
Expand Down Expand Up @@ -595,10 +602,23 @@ impl<'de: 'a, 'a> de::Deserializer<'de> for &'a mut Decoder<'de> {
visitor.visit_enum(EnumAccessor { de: self })
}

fn deserialize_identifier<V>(self, visitor: V) -> DecodeResult<V::Value>
where
V: Visitor<'de>,
{
let (_, type_num) = self.peek_type()?;
if type_num == TYPE_STRING {
let bytes = self.read_str_as_bytes()?;
visitor.visit_borrowed_bytes(bytes)
} else {
self.decode_any(visitor)
}
}

forward_to_deserialize_any! {
bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string
bytes byte_buf unit unit_struct newtype_struct seq tuple
tuple_struct map struct identifier
tuple_struct map struct
}
Comment on lines +605 to 622
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Check if there are any custom Visitor implementations in the codebase that might be affected
rg -n "impl.*Visitor" --type rust -A 10 | rg -B5 "visit_str|visit_borrowed_str" | head -100

# Check for any explicit deserialize_identifier calls or custom identifier handling
rg -n "deserialize_identifier|visit_identifier" --type rust

# Also check the actual decoder.rs implementation around the mentioned lines
cat -n src/decoder.rs | sed -n '600,625p'

Repository: oschwald/maxminddb-rust

Length of output: 1059


deserialize_identifier should call visit_str instead of visit_borrowed_bytes.

Serde's recommended pattern for deserialize_identifier is to forward to deserialize_str, not to invoke byte visitors. While derived Deserialize implementations can handle both string and bytes methods for identifiers, this approach creates a compatibility risk. Custom Visitor implementations that only implement string-based identifier deserialization will fail with a type error when encountering visit_borrowed_bytes.

🤖 Prompt for AI Agents
In src/decoder.rs around lines 605–622, deserialize_identifier currently calls
visitor.visit_borrowed_bytes for string types which can break Visitors that only
implement string-based identifier methods; change this to forward to the
string-based visitor: either delegate to the deserializer's string path (e.g.,
call deserialize_str(visitor) / the equivalent method) or convert the read bytes
to &str (validate UTF-8 and return a decode error on invalid UTF-8) and call
visitor.visit_str (or visit_borrowed_str if appropriate) instead of
visit_borrowed_bytes so identifier deserialization uses the string visitor API.

}

Expand Down
21 changes: 11 additions & 10 deletions src/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -450,9 +450,8 @@ impl<'de, S: AsRef<[u8]>> Reader<S> {
let val = match self.metadata.record_size {
24 => {
let offset = base_offset + index * 3;
(buf[offset] as usize) << 16
| (buf[offset + 1] as usize) << 8
| buf[offset + 2] as usize
let bytes = &buf[offset..offset + 3];
(bytes[0] as usize) << 16 | (bytes[1] as usize) << 8 | bytes[2] as usize
}
28 => {
let middle = if index != 0 {
Expand All @@ -461,17 +460,19 @@ impl<'de, S: AsRef<[u8]>> Reader<S> {
(buf[base_offset + 3] & 0xF0) >> 4
};
let offset = base_offset + index * 4;
let bytes = &buf[offset..offset + 3];
(middle as usize) << 24
| (buf[offset] as usize) << 16
| (buf[offset + 1] as usize) << 8
| buf[offset + 2] as usize
| (bytes[0] as usize) << 16
| (bytes[1] as usize) << 8
| bytes[2] as usize
}
32 => {
let offset = base_offset + index * 4;
(buf[offset] as usize) << 24
| (buf[offset + 1] as usize) << 16
| (buf[offset + 2] as usize) << 8
| buf[offset + 3] as usize
let bytes = &buf[offset..offset + 4];
(bytes[0] as usize) << 24
| (bytes[1] as usize) << 16
| (bytes[2] as usize) << 8
| bytes[3] as usize
}
s => {
return Err(MaxMindDbError::invalid_database(format!(
Expand Down
1 change: 1 addition & 0 deletions src/reader_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1212,6 +1212,7 @@ fn test_ignored_any() {
let _ = env_logger::try_init();

// Struct that only reads some fields, ignoring others via IgnoredAny
#[allow(dead_code)]
#[derive(Deserialize, Debug)]
struct PartialRead {
utf8_string: String,
Expand Down
5 changes: 3 additions & 2 deletions src/result.rs
Original file line number Diff line number Diff line change
Expand Up @@ -252,9 +252,10 @@ impl<'a, S: AsRef<[u8]>> LookupResult<'a, S> {
let size = decoder.consume_map_header().map_err(with_path)?;

let mut found = false;
let key_bytes = key.as_bytes();
for _ in 0..size {
let k = decoder.read_string().map_err(with_path)?;
if k == key {
let k = decoder.read_str_as_bytes().map_err(with_path)?;
if k == key_bytes {
found = true;
break;
} else {
Expand Down