From 120bd2cfaed074c91c8ba482b7e9f175103ea438 Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Wed, 26 Nov 2025 11:25:27 -0800 Subject: [PATCH 01/37] Inline byte manipulation in read_node MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Directly perform byte-to-usize conversion in read_node instead of calling the separate to_usize function. This makes the byte layout for each record size explicit and removes the now-unused to_usize function from the library. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- src/maxminddb/lib.rs | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/src/maxminddb/lib.rs b/src/maxminddb/lib.rs index 959cb7e4..7d81e86f 100644 --- a/src/maxminddb/lib.rs +++ b/src/maxminddb/lib.rs @@ -609,21 +609,28 @@ impl<'de, S: AsRef<[u8]>> Reader { let val = match self.metadata.record_size { 24 => { let offset = base_offset + index * 3; - to_usize(0, &buf[offset..offset + 3]) + (buf[offset] as usize) << 16 + | (buf[offset + 1] as usize) << 8 + | buf[offset + 2] as usize } 28 => { - let mut middle = buf[base_offset + 3]; - if index != 0 { - middle &= 0x0F + let middle = if index != 0 { + buf[base_offset + 3] & 0x0F } else { - middle = (0xF0 & middle) >> 4 - } + (buf[base_offset + 3] & 0xF0) >> 4 + }; let offset = base_offset + index * 4; - to_usize(middle, &buf[offset..offset + 3]) + (middle as usize) << 24 + | (buf[offset] as usize) << 16 + | (buf[offset + 1] as usize) << 8 + | buf[offset + 2] as usize } 32 => { let offset = base_offset + index * 4; - to_usize(0, &buf[offset..offset + 4]) + (buf[offset] as usize) << 24 + | (buf[offset + 1] as usize) << 16 + | (buf[offset + 2] as usize) << 8 + | buf[offset + 3] as usize } s => { return Err(MaxMindDbError::InvalidDatabase(format!( @@ -662,15 +669,6 @@ impl<'de, S: AsRef<[u8]>> Reader { } } -// I haven't moved all patterns of this form to a generic function as -// the FromPrimitive trait is unstable -#[inline(always)] -fn to_usize(base: u8, bytes: &[u8]) -> usize { - bytes - .iter() - .fold(base as usize, |acc, &b| (acc << 8) | b as usize) -} - #[inline] fn bytes_and_prefix_to_net(bytes: &IpInt, prefix: u8) -> Result { let (ip, prefix) = match bytes { From de1decea5f31f6f978b1da103d491a757d7a1e08 Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Wed, 26 Nov 2025 11:25:47 -0800 Subject: [PATCH 02/37] Add inline hints to lookup path functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add #[inline] hints to start_node and resolve_data_pointer functions which are called during the lookup hot path. While the compiler likely inlines these already, explicit hints make the optimization intent clear. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- src/maxminddb/lib.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/maxminddb/lib.rs b/src/maxminddb/lib.rs index 7d81e86f..57eec663 100644 --- a/src/maxminddb/lib.rs +++ b/src/maxminddb/lib.rs @@ -576,6 +576,7 @@ impl<'de, S: AsRef<[u8]>> Reader { } } + #[inline] fn start_node(&self, length: usize) -> usize { if length == 128 { 0 @@ -643,6 +644,7 @@ impl<'de, S: AsRef<[u8]>> Reader { } /// Resolves a pointer from the search tree to an offset in the data section. + #[inline] fn resolve_data_pointer(&self, pointer: usize) -> Result { let resolved = pointer - (self.metadata.node_count as usize) - 16; From 5f9833f045904d600e21c72f2338b0ce50375c96 Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Wed, 26 Nov 2025 13:59:58 -0800 Subject: [PATCH 03/37] Add LookupResult API with lazy decoding support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BREAKING CHANGE: The lookup() method now returns LookupResult instead of Option. Data is only deserialized when decode() is called. Migration: - Old: reader.lookup::(ip)? returns Option - New: reader.lookup(ip)?.decode::()? returns City - Check if found: reader.lookup(ip)?.found() Other breaking changes: - lookup_prefix() removed - use result.network() instead - Within iterator now yields LookupResult instead of WithinItem New features: - LookupResult with found(), network(), offset(), decode(), decode_path(), and decoder() methods - PathElement enum for navigating nested structures with Python-style negative indexing - Low-level Decoder API (Kind, MapReader, ArrayReader) for FFI bindings and custom deserialization 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- CHANGELOG.md | 26 +++ benches/lookup.rs | 10 +- examples/lookup.rs | 22 +- examples/within.rs | 19 +- src/maxminddb/decoder.rs | 171 ++++++++++++++++ src/maxminddb/geoip2.rs | 8 +- src/maxminddb/lib.rs | 376 ++++++++++++++++++----------------- src/maxminddb/reader_test.rs | 145 ++++++++------ src/maxminddb/result.rs | 364 +++++++++++++++++++++++++++++++++ 9 files changed, 880 insertions(+), 261 deletions(-) create mode 100644 src/maxminddb/result.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index 5a2a75a0..3e9a0595 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,31 @@ # Change Log +## 0.27.0 - UNRELEASED + +- **BREAKING CHANGE:** The `lookup` method now returns a `LookupResult` instead + of `Option`. The new API enables lazy decoding - data is only deserialized + when explicitly requested via `decode()`. Migration: + - Old: `reader.lookup::(ip)?` returns `Option` + - New: `reader.lookup(ip)?.decode::()` returns `City` + - Check if found: `reader.lookup(ip)?.found()` returns `bool` +- **BREAKING CHANGE:** The `lookup_prefix` method has been removed. Use + `reader.lookup(ip)?.network()` to get the network containing the IP. +- **BREAKING CHANGE:** The `Within` iterator now yields `LookupResult` instead + of `WithinItem`. Access the network via `result.network()?` and decode + data via `result.decode::()?`. +- Added `LookupResult` type with methods: + - `found()` - Check if IP was found in database + - `network()` - Get the network containing the IP + - `offset()` - Get data offset for caching/deduplication + - `decode()` - Deserialize full record using serde + - `decode_path()` - Selectively decode specific fields by path +- Added `PathElement` enum for navigating nested structures: + - `PathElement::Key("name")` - Navigate into map by key + - `PathElement::Index(0)` - Navigate into array by index + - `PathElement::Index(-1)` - Python-style negative indexing +- Added low-level `Decoder` API (`Kind`, `MapReader`, `ArrayReader`) for + FFI bindings and custom deserialization without serde overhead. + ## 0.26.0 - 2025-03-28 - **BREAKING CHANGE:** The `lookup` and `lookup_prefix` methods now return diff --git a/benches/lookup.rs b/benches/lookup.rs index 9a87d619..440fbc33 100644 --- a/benches/lookup.rs +++ b/benches/lookup.rs @@ -32,7 +32,10 @@ where T: AsRef<[u8]>, { for ip in ips.iter() { - let _ = reader.lookup::(*ip); + let result = reader.lookup(*ip).unwrap(); + if result.found() { + let _: geoip2::City = result.decode().unwrap(); + } } } @@ -42,7 +45,10 @@ where T: AsRef<[u8]> + std::marker::Sync, { ips.par_iter().for_each(|ip| { - let _ = reader.lookup::(*ip); + let result = reader.lookup(*ip).unwrap(); + if result.found() { + let _: geoip2::City = result.decode().unwrap(); + } }); } diff --git a/examples/lookup.rs b/examples/lookup.rs index 6b49dead..2241995b 100644 --- a/examples/lookup.rs +++ b/examples/lookup.rs @@ -16,13 +16,21 @@ fn main() -> Result<(), Box> { .parse() .map_err(|e| format!("Invalid IP address '{}': {}", ip_str, e))?; - match reader.lookup::(ip)? { - Some(city) => { - println!("City data for IP {}: {city:#?}", ip); - } - None => { - println!("No city data found for IP {}", ip); - } + let result = reader.lookup(ip)?; + + if result.found() { + let city: geoip2::City = result.decode()?; + println!("City data for IP {}: {city:#?}", ip); + + // Also show the network + let network = result.network()?; + println!("Network: {}", network); + } else { + println!("No city data found for IP {}", ip); + + // Even if not found, we can still show the network + let network = result.network()?; + println!("Network (no data): {}", network); } Ok(()) } diff --git a/examples/within.rs b/examples/within.rs index d0d31d4e..29f61327 100644 --- a/examples/within.rs +++ b/examples/within.rs @@ -17,21 +17,24 @@ fn main() -> Result<(), Box> { .map_err(|e| format!("Invalid CIDR notation '{}': {}", cidr_str, e))?; let mut n = 0; - let iter: Within = reader.within(ip_net)?; + let iter: Within<_> = reader.within(ip_net)?; for next in iter { - let item = next?; - let continent = item.info.continent.and_then(|c| c.code).unwrap_or(""); - let country = item.info.country.and_then(|c| c.iso_code).unwrap_or(""); - let city = match item.info.city.and_then(|c| c.names) { + let lookup = next?; + let network = lookup.network()?; + let info: geoip2::City = lookup.decode()?; + + let continent = info.continent.and_then(|c| c.code).unwrap_or(""); + let country = info.country.and_then(|c| c.iso_code).unwrap_or(""); + let city = match info.city.and_then(|c| c.names) { Some(names) => names.get("en").unwrap_or(&""), None => "", }; if !city.is_empty() { - println!("{} {}-{}-{}", item.ip_net, continent, country, city); + println!("{} {}-{}-{}", network, continent, country, city); } else if !country.is_empty() { - println!("{} {}-{}", item.ip_net, continent, country); + println!("{} {}-{}", network, continent, country); } else if !continent.is_empty() { - println!("{} {}", item.ip_net, continent); + println!("{} {}", network, continent); } n += 1; } diff --git a/src/maxminddb/decoder.rs b/src/maxminddb/decoder.rs index 745079c8..25f0d461 100644 --- a/src/maxminddb/decoder.rs +++ b/src/maxminddb/decoder.rs @@ -5,6 +5,11 @@ use std::convert::TryInto; use super::MaxMindDbError; +// MaxMind DB type constants (only those actually used) +pub(crate) const TYPE_POINTER: u8 = 1; +pub(crate) const TYPE_MAP: u8 = 7; +pub(crate) const TYPE_ARRAY: u8 = 11; + fn to_usize(base: u8, bytes: &[u8]) -> usize { bytes .iter() @@ -333,6 +338,172 @@ impl<'de> Decoder<'de> { )), } } + + // ========== Navigation methods for path decoding and verification ========== + + /// Peeks at the type and size without consuming it. + /// Returns (size, type_num) and restores the position. + pub(crate) fn peek_type(&mut self) -> DecodeResult<(usize, u8)> { + let saved_ptr = self.current_ptr; + let result = self.size_and_type_following_pointers()?; + self.current_ptr = saved_ptr; + Ok(result) + } + + /// Consumes a map header, returning its size. Follows pointers. + pub(crate) fn consume_map_header(&mut self) -> DecodeResult { + let (size, type_num) = self.size_and_type(); + if type_num == TYPE_POINTER { + let new_ptr = self.decode_pointer(size); + self.current_ptr = new_ptr; + self.consume_map_header() + } else if type_num == TYPE_MAP { + Ok(size) + } else { + Err(MaxMindDbError::Decoding(format!( + "expected map, got type {type_num}" + ))) + } + } + + /// Consumes an array header, returning its size. Follows pointers. + pub(crate) fn consume_array_header(&mut self) -> DecodeResult { + let (size, type_num) = self.size_and_type(); + if type_num == TYPE_POINTER { + let new_ptr = self.decode_pointer(size); + self.current_ptr = new_ptr; + self.consume_array_header() + } else if type_num == TYPE_ARRAY { + Ok(size) + } else { + Err(MaxMindDbError::Decoding(format!( + "expected array, got type {type_num}" + ))) + } + } + + /// Gets size and type, following any pointers. + fn size_and_type_following_pointers(&mut self) -> DecodeResult<(usize, u8)> { + let (size, type_num) = self.size_and_type(); + if type_num == 1 { + // Pointer - follow it + let new_ptr = self.decode_pointer(size); + self.current_ptr = new_ptr; + self.size_and_type_following_pointers() + } else { + Ok((size, type_num)) + } + } + + /// Reads a string directly, following pointers if needed. + pub(crate) fn read_string(&mut self) -> DecodeResult<&'de str> { + let (size, type_num) = self.size_and_type(); + if type_num == TYPE_POINTER { + // Pointer + let new_ptr = self.decode_pointer(size); + let saved_ptr = self.current_ptr; + self.current_ptr = new_ptr; + let result = self.read_string(); + self.current_ptr = saved_ptr; + result + } else if type_num == 2 { + self.decode_string(size) + } else { + Err(MaxMindDbError::InvalidDatabase(format!( + "expected string, got type {type_num}" + ))) + } + } + + /// Skips the current value, following pointers. + pub(crate) fn skip_value(&mut self) -> DecodeResult<()> { + let (size, type_num) = self.size_and_type(); + self.skip_value_inner(size, type_num, true) + } + + /// Skips the current value without following pointers (for verification). + pub(crate) fn skip_value_for_verification(&mut self) -> DecodeResult<()> { + let (size, type_num) = self.size_and_type(); + self.skip_value_inner(size, type_num, false) + } + + fn skip_value_inner( + &mut self, + size: usize, + type_num: u8, + follow_pointers: bool, + ) -> DecodeResult<()> { + match type_num { + 1 => { + // Pointer + let new_ptr = self.decode_pointer(size); + if follow_pointers { + let saved_ptr = self.current_ptr; + self.current_ptr = new_ptr; + self.skip_value()?; + self.current_ptr = saved_ptr; + } + Ok(()) + } + 2 | 4 => { + // String or Bytes - skip size bytes + self.current_ptr += size; + Ok(()) + } + 3 => { + // Double - must be exactly 8 bytes + if size != 8 { + return Err(MaxMindDbError::InvalidDatabase(format!( + "double of size {size}" + ))); + } + self.current_ptr += size; + Ok(()) + } + 15 => { + // Float - must be exactly 4 bytes + if size != 4 { + return Err(MaxMindDbError::InvalidDatabase(format!( + "float of size {size}" + ))); + } + self.current_ptr += size; + Ok(()) + } + 5 | 6 | 8 | 9 | 10 => { + // Numeric types - skip size bytes + self.current_ptr += size; + Ok(()) + } + 14 => { + // Boolean - size field IS the value, no data bytes to skip + Ok(()) + } + 7 => { + // Map - skip size key-value pairs + for _ in 0..size { + self.skip_value_inner_with_follow(follow_pointers)?; // key + self.skip_value_inner_with_follow(follow_pointers)?; // value + } + Ok(()) + } + 11 => { + // Array - skip size elements + for _ in 0..size { + self.skip_value_inner_with_follow(follow_pointers)?; + } + Ok(()) + } + u => Err(MaxMindDbError::InvalidDatabase(format!( + "Unknown data type: {u:?}" + ))), + } + } + + fn skip_value_inner_with_follow(&mut self, follow_pointers: bool) -> DecodeResult<()> { + let (size, type_num) = self.size_and_type(); + self.skip_value_inner(size, type_num, follow_pointers) + } } pub type DecodeResult = Result; diff --git a/src/maxminddb/geoip2.rs b/src/maxminddb/geoip2.rs index edb44fe4..65cfc285 100644 --- a/src/maxminddb/geoip2.rs +++ b/src/maxminddb/geoip2.rs @@ -27,7 +27,9 @@ //! let ip: IpAddr = "89.160.20.128".parse().unwrap(); //! //! // City lookup (most common) -//! if let Some(city) = reader.lookup::(ip)? { +//! let result = reader.lookup(ip)?; +//! if result.found() { +//! let city: geoip2::City = result.decode()?; //! if let Some(city_names) = city.city.and_then(|c| c.names) { //! if let Some(city_name) = city_names.get("en") { //! println!("City: {}", city_name); @@ -39,7 +41,9 @@ //! } //! //! // Country-only lookup (smaller/faster) -//! if let Some(country) = reader.lookup::(ip)? { +//! let result = reader.lookup(ip)?; +//! if result.found() { +//! let country: geoip2::Country = result.decode()?; //! if let Some(country_names) = country.country.and_then(|c| c.names) { //! if let Some(country_name) = country_names.get("en") { //! println!("Country: {}", country_name); diff --git a/src/maxminddb/lib.rs b/src/maxminddb/lib.rs index 57eec663..5186d589 100644 --- a/src/maxminddb/lib.rs +++ b/src/maxminddb/lib.rs @@ -43,7 +43,10 @@ //! //! // Look up an IP address //! let ip: IpAddr = "89.160.20.128".parse()?; -//! if let Some(city) = reader.lookup::(ip)? { +//! let result = reader.lookup(ip)?; +//! +//! if result.found() { +//! let city: geoip2::City = result.decode()?; //! if let Some(country) = city.country { //! println!("Country: {}", country.iso_code.unwrap_or("Unknown")); //! } @@ -52,14 +55,33 @@ //! Ok(()) //! } //! ``` +//! +//! ## Selective Field Access +//! +//! Use `decode_path` to extract specific fields without deserializing the entire record: +//! +//! ```rust +//! use maxminddb::{Reader, PathElement}; +//! use std::net::IpAddr; +//! +//! let reader = Reader::open_readfile("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); +//! let ip: IpAddr = "89.160.20.128".parse().unwrap(); +//! +//! let result = reader.lookup(ip).unwrap(); +//! let country_code: Option = result.decode_path(&[ +//! PathElement::Key("country"), +//! PathElement::Key("iso_code"), +//! ]).unwrap(); +//! +//! println!("Country: {:?}", country_code); +//! ``` use std::cmp::Ordering; use std::collections::BTreeMap; use std::fmt::Display; use std::fs; use std::io; -use std::marker::PhantomData; -use std::net::{IpAddr, Ipv4Addr, Ipv6Addr}; +use std::net::IpAddr; use std::path::Path; use ipnetwork::{IpNetwork, IpNetworkError}; @@ -129,18 +151,16 @@ struct WithinNode { prefix_len: usize, } +/// Iterator over IP networks within a CIDR range. +/// +/// This iterator yields [`LookupResult`] for each network in the database +/// that falls within the specified CIDR range. Use [`LookupResult::decode()`] +/// to deserialize the data for each result. #[derive(Debug)] -pub struct Within<'de, T: Deserialize<'de>, S: AsRef<[u8]>> { +pub struct Within<'de, S: AsRef<[u8]>> { reader: &'de Reader, node_count: usize, stack: Vec, - phantom: PhantomData<&'de T>, -} - -#[derive(Debug)] -pub struct WithinItem { - pub ip_net: IpNetwork, - pub info: T, } #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -180,8 +200,8 @@ impl IpInt { } } -impl<'de, T: Deserialize<'de>, S: AsRef<[u8]>> Iterator for Within<'de, T, S> { - type Item = Result, MaxMindDbError>; +impl<'de, S: AsRef<[u8]>> Iterator for Within<'de, S> { + type Item = Result, MaxMindDbError>; fn next(&mut self) -> Option { while let Some(current) = self.stack.pop() { @@ -199,17 +219,20 @@ impl<'de, T: Deserialize<'de>, S: AsRef<[u8]>> Iterator for Within<'de, T, S> { match current.node.cmp(&self.node_count) { Ordering::Greater => { // This is a data node, emit it and we're done (until the following next call) - let ip_net = - match bytes_and_prefix_to_net(¤t.ip_int, current.prefix_len as u8) { - Ok(ip_net) => ip_net, - Err(e) => return Some(Err(e)), - }; + let ip_addr = ip_int_to_addr(¤t.ip_int); - // Call the new helper method to decode data - return match self.reader.decode_data_at_pointer(current.node) { - Ok(info) => Some(Ok(WithinItem { ip_net, info })), - Err(e) => Some(Err(e)), + // Resolve the pointer to a data offset + let data_offset = match self.reader.resolve_data_pointer(current.node) { + Ok(offset) => offset, + Err(e) => return Some(Err(e)), }; + + return Some(Ok(LookupResult::new_found( + self.reader, + data_offset, + current.prefix_len as u8, + ip_addr, + ))); } Ordering::Equal => { // Dead end, nothing to do @@ -253,6 +276,21 @@ impl<'de, T: Deserialize<'de>, S: AsRef<[u8]>> Iterator for Within<'de, T, S> { } } +/// Convert IpInt to IpAddr +fn ip_int_to_addr(ip_int: &IpInt) -> IpAddr { + match ip_int { + IpInt::V4(ip) => IpAddr::V4((*ip).into()), + IpInt::V6(ip) => { + // Check if this is an IPv4-mapped IPv6 address + if *ip <= 0xFFFFFFFF { + IpAddr::V4((*ip as u32).into()) + } else { + IpAddr::V6((*ip).into()) + } + } + } +} + /// A reader for the MaxMind DB format. The lifetime `'data` is tied to the /// lifetime of the underlying buffer holding the contents of the database file. /// @@ -338,8 +376,14 @@ impl<'de, S: AsRef<[u8]>> Reader { Ok(reader) } - /// Lookup the socket address in the opened MaxMind DB. - /// Returns `Ok(None)` if the address is not found in the database. + /// Lookup an IP address in the database. + /// + /// Returns a [`LookupResult`] that can be used to: + /// - Check if the IP was found with [`found()`](LookupResult::found) + /// - Get the network containing the IP with [`network()`](LookupResult::network) + /// - Decode the full record with [`decode()`](LookupResult::decode) + /// - Decode a specific path with [`decode_path()`](LookupResult::decode_path) + /// - Get a low-level decoder with [`decoder()`](LookupResult::decoder) /// /// # Examples /// @@ -347,134 +391,92 @@ impl<'de, S: AsRef<[u8]>> Reader { /// ``` /// # use maxminddb::geoip2; /// # use std::net::IpAddr; - /// # use std::str::FromStr; /// # fn main() -> Result<(), maxminddb::MaxMindDbError> { /// let reader = maxminddb::Reader::open_readfile( /// "test-data/test-data/GeoIP2-City-Test.mmdb")?; /// - /// let ip: IpAddr = FromStr::from_str("89.160.20.128").unwrap(); - /// match reader.lookup::(ip)? { - /// Some(city) => { - /// if let Some(city_names) = city.city.and_then(|c| c.names) { - /// if let Some(name) = city_names.get("en") { + /// let ip: IpAddr = "89.160.20.128".parse().unwrap(); + /// let result = reader.lookup(ip)?; + /// + /// if result.found() { + /// let city: geoip2::City = result.decode()?; + /// if let Some(city_info) = city.city { + /// if let Some(names) = city_info.names { + /// if let Some(name) = names.get("en") { /// println!("City: {}", name); /// } /// } - /// if let Some(country) = city.country.and_then(|c| c.iso_code) { - /// println!("Country: {}", country); - /// } /// } - /// None => println!("No data found for IP {}", ip), + /// } else { + /// println!("No data found for IP {}", ip); /// } /// # Ok(()) /// # } /// ``` /// - /// Lookup with different record types: + /// Selective field access: /// ``` - /// # use maxminddb::geoip2; + /// # use maxminddb::{Reader, PathElement}; /// # use std::net::IpAddr; /// # fn main() -> Result<(), maxminddb::MaxMindDbError> { - /// let reader = maxminddb::Reader::open_readfile( + /// let reader = Reader::open_readfile( /// "test-data/test-data/GeoIP2-City-Test.mmdb")?; /// let ip: IpAddr = "89.160.20.128".parse().unwrap(); /// - /// // Different record types for the same IP - /// let city: Option = reader.lookup(ip)?; - /// let country: Option = reader.lookup(ip)?; - /// - /// println!("City data available: {}", city.is_some()); - /// println!("Country data available: {}", country.is_some()); - /// # Ok(()) - /// # } - /// ``` - pub fn lookup(&'de self, address: IpAddr) -> Result, MaxMindDbError> - where - T: Deserialize<'de>, - { - self.lookup_prefix(address) - .map(|(option_value, _prefix_len)| option_value) - } - - /// Lookup the socket address in the opened MaxMind DB, returning the found value (if any) - /// and the prefix length of the network associated with the lookup. - /// - /// Returns `Ok((None, prefix_len))` if the address is found in the tree but has no data record. - /// Returns `Err(...)` for database errors (IO, corruption, decoding). - /// - /// Example: - /// - /// ``` - /// # use maxminddb::geoip2; - /// # use std::net::IpAddr; - /// # use std::str::FromStr; - /// # fn main() -> Result<(), maxminddb::MaxMindDbError> { - /// let reader = maxminddb::Reader::open_readfile( - /// "test-data/test-data/GeoIP2-City-Test.mmdb")?; - /// - /// let ip: IpAddr = "89.160.20.128".parse().unwrap(); // Known IP - /// let ip_unknown: IpAddr = "10.0.0.1".parse().unwrap(); // Unknown IP - /// - /// let (city_option, prefix_len) = reader.lookup_prefix::(ip)?; - /// if let Some(city) = city_option { - /// println!("Found {:?} at prefix length {}", city.city.unwrap().names.unwrap().get("en").unwrap(), prefix_len); - /// } else { - /// // This case is less likely with lookup_prefix if the IP resolves in the tree - /// println!("IP found in tree but no data (prefix_len: {})", prefix_len); - /// } + /// let result = reader.lookup(ip)?; + /// let country_code: Option = result.decode_path(&[ + /// PathElement::Key("country"), + /// PathElement::Key("iso_code"), + /// ])?; /// - /// let (city_option_unknown, prefix_len_unknown) = reader.lookup_prefix::(ip_unknown)?; - /// assert!(city_option_unknown.is_none()); - /// println!("Unknown IP resolved to prefix_len: {}", prefix_len_unknown); + /// println!("Country: {:?}", country_code); /// # Ok(()) /// # } /// ``` - pub fn lookup_prefix( - &'de self, - address: IpAddr, - ) -> Result<(Option, usize), MaxMindDbError> - where - T: Deserialize<'de>, - { + pub fn lookup(&'de self, address: IpAddr) -> Result, MaxMindDbError> { let ip_int = IpInt::new(address); - // find_address_in_tree returns Result<(usize, usize), MaxMindDbError> -> (pointer, prefix_len) let (pointer, prefix_len) = self.find_address_in_tree(&ip_int)?; if pointer == 0 { - // If pointer is 0, it signifies no data record was associated during tree traversal. - // Return None for the data, but include the calculated prefix_len. - return Ok((None, prefix_len)); - } - - // If pointer > 0, attempt to resolve and decode data using the helper method - match self.decode_data_at_pointer(pointer) { - Ok(value) => Ok((Some(value), prefix_len)), - Err(e) => Err(e), + // IP not found in database + Ok(LookupResult::new_not_found(self, prefix_len as u8, address)) + } else { + // Resolve the pointer to a data offset + let data_offset = self.resolve_data_pointer(pointer)?; + Ok(LookupResult::new_found( + self, + data_offset, + prefix_len as u8, + address, + )) } } - /// Iterate over blocks of IP networks in the opened MaxMind DB + /// Iterate over IP networks within a CIDR range. /// - /// This method returns an iterator that yields all IP network blocks that - /// fall within the specified CIDR range and have associated data in the - /// database. + /// Returns an iterator that yields [`LookupResult`] for each network in the + /// database that falls within the specified CIDR range. /// /// # Examples /// /// Iterate over all IPv4 networks: /// ``` /// use ipnetwork::IpNetwork; - /// use maxminddb::{geoip2, Within}; + /// use maxminddb::{geoip2, Reader}; /// - /// let reader = maxminddb::Reader::open_readfile( + /// let reader = Reader::open_readfile( /// "test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); /// /// let ipv4_all = IpNetwork::V4("0.0.0.0/0".parse().unwrap()); /// let mut count = 0; - /// for result in reader.within::(ipv4_all).unwrap() { - /// let item = result.unwrap(); - /// let city_name = item.info.city.as_ref().and_then(|c| c.names.as_ref()).and_then(|n| n.get("en")); - /// println!("Network: {}, City: {:?}", item.ip_net, city_name); + /// for result in reader.within(ipv4_all).unwrap() { + /// let lookup = result.unwrap(); + /// let network = lookup.network().unwrap(); + /// let city: geoip2::City = lookup.decode().unwrap(); + /// let city_name = city.city.as_ref() + /// .and_then(|c| c.names.as_ref()) + /// .and_then(|n| n.get("en")); + /// println!("Network: {}, City: {:?}", network, city_name); /// count += 1; /// if count >= 10 { break; } // Limit output for example /// } @@ -483,28 +485,23 @@ impl<'de, S: AsRef<[u8]>> Reader { /// Search within a specific subnet: /// ``` /// use ipnetwork::IpNetwork; - /// use maxminddb::geoip2; + /// use maxminddb::{geoip2, Reader}; /// - /// let reader = maxminddb::Reader::open_readfile( + /// let reader = Reader::open_readfile( /// "test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); /// /// let subnet = IpNetwork::V4("192.168.0.0/16".parse().unwrap()); - /// match reader.within::(subnet) { - /// Ok(iter) => { - /// for result in iter { - /// match result { - /// Ok(item) => println!("Found: {}", item.ip_net), - /// Err(e) => eprintln!("Error processing item: {}", e), - /// } + /// for result in reader.within(subnet).unwrap() { + /// match result { + /// Ok(lookup) => { + /// let network = lookup.network().unwrap(); + /// println!("Found: {}", network); /// } + /// Err(e) => eprintln!("Error: {}", e), /// } - /// Err(e) => eprintln!("Failed to create iterator: {}", e), /// } /// ``` - pub fn within(&'de self, cidr: IpNetwork) -> Result, MaxMindDbError> - where - T: Deserialize<'de>, - { + pub fn within(&'de self, cidr: IpNetwork) -> Result, MaxMindDbError> { let ip_address = cidr.network(); let prefix_len = cidr.prefix() as usize; let ip_int = IpInt::new(ip_address); @@ -540,11 +537,10 @@ impl<'de, S: AsRef<[u8]>> Reader { // else the stack will be empty and we'll be returning an iterator that visits nothing, // which makes sense. - let within: Within = Within { + let within = Within { reader: self, node_count, stack, - phantom: PhantomData, }; Ok(within) @@ -657,30 +653,6 @@ impl<'de, S: AsRef<[u8]>> Reader { Ok(resolved) } - - /// Decodes data at the given pointer offset. - /// Assumes the pointer is valid and points to the data section. - fn decode_data_at_pointer(&'de self, pointer: usize) -> Result - where - T: Deserialize<'de>, - { - let resolved_offset = self.resolve_data_pointer(pointer)?; - let mut decoder = - decoder::Decoder::new(&self.buf.as_ref()[self.pointer_base..], resolved_offset); - T::deserialize(&mut decoder) - } -} - -#[inline] -fn bytes_and_prefix_to_net(bytes: &IpInt, prefix: u8) -> Result { - let (ip, prefix) = match bytes { - IpInt::V4(ip) => (IpAddr::V4(Ipv4Addr::from(*ip)), prefix), - IpInt::V6(ip) if bytes.is_ipv4_in_ipv6() => { - (IpAddr::V4(Ipv4Addr::from(*ip as u32)), prefix - 96) - } - IpInt::V6(ip) => (IpAddr::V6(Ipv6Addr::from(*ip)), prefix), - }; - IpNetwork::new(ip, prefix).map_err(MaxMindDbError::InvalidNetwork) } fn find_metadata_start(buf: &[u8]) -> Result { @@ -697,6 +669,9 @@ fn find_metadata_start(buf: &[u8]) -> Result { mod decoder; pub mod geoip2; +mod result; + +pub use result::{LookupResult, PathElement}; #[cfg(test)] mod reader_test; @@ -744,59 +719,96 @@ mod tests { } #[test] - fn test_lookup_returns_none_for_unknown_address() { + fn test_lookup_not_found_for_unknown_address() { use super::Reader; - use crate::geoip2; use std::net::IpAddr; - use std::str::FromStr; let reader = Reader::open_readfile("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); - let ip: IpAddr = FromStr::from_str("10.0.0.1").unwrap(); + let ip: IpAddr = "10.0.0.1".parse().unwrap(); - let result_lookup = reader.lookup::(ip); + let result = reader.lookup(ip).unwrap(); assert!( - matches!(result_lookup, Ok(None)), - "lookup should return Ok(None) for unknown IP" + !result.found(), + "lookup should return found=false for unknown IP" ); - let result_lookup_prefix = reader.lookup_prefix::(ip); - assert!( - matches!(result_lookup_prefix, Ok((None, 8))), - "lookup_prefix should return Ok((None, 8)) for unknown IP, got {:?}", - result_lookup_prefix - ); + // Network should still be available + let network = result.network().unwrap(); + assert_eq!(network.prefix(), 8, "Expected prefix length 8"); } #[test] - fn test_lookup_returns_some_for_known_address() { + fn test_lookup_found_for_known_address() { use super::Reader; use crate::geoip2; use std::net::IpAddr; - use std::str::FromStr; let reader = Reader::open_readfile("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); - let ip: IpAddr = FromStr::from_str("89.160.20.128").unwrap(); + let ip: IpAddr = "89.160.20.128".parse().unwrap(); - let result_lookup = reader.lookup::(ip); + let result = reader.lookup(ip).unwrap(); assert!( - matches!(result_lookup, Ok(Some(_))), - "lookup should return Ok(Some(_)) for known IP" - ); - assert!( - result_lookup.unwrap().unwrap().city.is_some(), - "Expected city data" + result.found(), + "lookup should return found=true for known IP" ); - let result_lookup_prefix = reader.lookup_prefix::(ip); - assert!( - matches!(result_lookup_prefix, Ok((Some(_), _))), - "lookup_prefix should return Ok(Some(_)) for known IP" - ); - let (city_data, prefix_len) = result_lookup_prefix.unwrap(); + // Decode the data + let city: geoip2::City = result.decode().unwrap(); + assert!(city.city.is_some(), "Expected city data"); + + // Check network + let network = result.network().unwrap(); + assert_eq!(network.prefix(), 25, "Expected prefix length 25"); + + // Check offset is available assert!( - city_data.unwrap().city.is_some(), - "Expected city data from prefix lookup" + result.offset().is_some(), + "Expected offset to be Some for found IP" ); - assert_eq!(prefix_len, 25, "Expected valid prefix length"); + } + + #[test] + fn test_decode_path() { + use super::{PathElement, Reader}; + use std::net::IpAddr; + + let reader = Reader::open_readfile("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); + let ip: IpAddr = "89.160.20.128".parse().unwrap(); + + let result = reader.lookup(ip).unwrap(); + + // Navigate to country.iso_code + let iso_code: Option = result + .decode_path(&[PathElement::Key("country"), PathElement::Key("iso_code")]) + .unwrap(); + assert_eq!(iso_code, Some("SE".to_owned())); + + // Navigate to non-existent path + let missing: Option = result + .decode_path(&[PathElement::Key("nonexistent")]) + .unwrap(); + assert!(missing.is_none()); + } + + #[test] + fn test_decoder_api() { + use super::{Kind, Reader}; + use std::net::IpAddr; + + let reader = Reader::open_readfile("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); + let ip: IpAddr = "89.160.20.128".parse().unwrap(); + + let result = reader.lookup(ip).unwrap(); + let mut decoder = result.decoder().unwrap(); + + // The root should be a map + assert_eq!(decoder.peek_kind().unwrap(), Kind::Map); + + let mut map = decoder.read_map().unwrap(); + assert!(map.len() > 0, "Expected non-empty map"); + + // Read first key + let key = map.next_key().unwrap(); + assert!(key.is_some(), "Expected at least one key"); } } diff --git a/src/maxminddb/reader_test.rs b/src/maxminddb/reader_test.rs index 9eb37af9..1dd60d8a 100644 --- a/src/maxminddb/reader_test.rs +++ b/src/maxminddb/reader_test.rs @@ -48,7 +48,9 @@ fn test_decoder() { } let r = r.unwrap(); let ip: IpAddr = FromStr::from_str("1.1.1.0").unwrap(); - let result: TestType = r.lookup(ip).unwrap().unwrap(); + let lookup = r.lookup(ip).unwrap(); + assert!(lookup.found(), "Expected IP to be found"); + let result: TestType = lookup.decode().unwrap(); assert_eq!(result.array, vec![1_u32, 2_u32, 3_u32]); assert!(result.boolean); @@ -103,13 +105,18 @@ fn test_broken_database() { #[derive(Deserialize, Debug)] struct TestType {} - match r.lookup::(ip) { - Err(e) => assert!(matches!( - e, - MaxMindDbError::InvalidDatabase(_) // Check variant, message might vary slightly - )), - Ok(Some(_)) => panic!("Unexpected success with broken data"), - Ok(None) => panic!("Got None, expected InvalidDatabase"), + + let lookup = r.lookup(ip).unwrap(); + if lookup.found() { + match lookup.decode::() { + Err(e) => assert!(matches!( + e, + MaxMindDbError::InvalidDatabase(_) // Check variant, message might vary slightly + )), + Ok(_) => panic!("Unexpected success with broken data"), + } + } else { + panic!("Expected IP to be found (with broken data)"); } } @@ -206,7 +213,9 @@ fn test_lookup_city() { let reader = Reader::open_readfile(filename).unwrap(); let ip: IpAddr = FromStr::from_str("89.160.20.112").unwrap(); - let city: geoip2::City = reader.lookup(ip).unwrap().unwrap(); + let lookup = reader.lookup(ip).unwrap(); + assert!(lookup.found()); + let city: geoip2::City = lookup.decode().unwrap(); let iso_code = city.country.and_then(|cy| cy.iso_code); @@ -222,7 +231,9 @@ fn test_lookup_country() { let reader = Reader::open_readfile(filename).unwrap(); let ip: IpAddr = FromStr::from_str("89.160.20.112").unwrap(); - let country: geoip2::Country = reader.lookup(ip).unwrap().unwrap(); + let lookup = reader.lookup(ip).unwrap(); + assert!(lookup.found()); + let country: geoip2::Country = lookup.decode().unwrap(); let country = country.country.unwrap(); assert_eq!(country.iso_code, Some("SE")); @@ -238,7 +249,9 @@ fn test_lookup_connection_type() { let reader = Reader::open_readfile(filename).unwrap(); let ip: IpAddr = FromStr::from_str("96.1.20.112").unwrap(); - let connection_type: geoip2::ConnectionType = reader.lookup(ip).unwrap().unwrap(); + let lookup = reader.lookup(ip).unwrap(); + assert!(lookup.found()); + let connection_type: geoip2::ConnectionType = lookup.decode().unwrap(); assert_eq!(connection_type.connection_type, Some("Cable/DSL")); } @@ -252,7 +265,9 @@ fn test_lookup_annonymous_ip() { let reader = Reader::open_readfile(filename).unwrap(); let ip: IpAddr = FromStr::from_str("81.2.69.123").unwrap(); - let anonymous_ip: geoip2::AnonymousIp = reader.lookup(ip).unwrap().unwrap(); + let lookup = reader.lookup(ip).unwrap(); + assert!(lookup.found()); + let anonymous_ip: geoip2::AnonymousIp = lookup.decode().unwrap(); assert_eq!(anonymous_ip.is_anonymous, Some(true)); assert_eq!(anonymous_ip.is_public_proxy, Some(true)); @@ -270,7 +285,9 @@ fn test_lookup_density_income() { let reader = Reader::open_readfile(filename).unwrap(); let ip: IpAddr = FromStr::from_str("5.83.124.123").unwrap(); - let density_income: geoip2::DensityIncome = reader.lookup(ip).unwrap().unwrap(); + let lookup = reader.lookup(ip).unwrap(); + assert!(lookup.found()); + let density_income: geoip2::DensityIncome = lookup.decode().unwrap(); assert_eq!(density_income.average_income, Some(32323)); assert_eq!(density_income.population_density, Some(1232)) @@ -285,7 +302,9 @@ fn test_lookup_domain() { let reader = Reader::open_readfile(filename).unwrap(); let ip: IpAddr = FromStr::from_str("66.92.80.123").unwrap(); - let domain: geoip2::Domain = reader.lookup(ip).unwrap().unwrap(); + let lookup = reader.lookup(ip).unwrap(); + assert!(lookup.found()); + let domain: geoip2::Domain = lookup.decode().unwrap(); assert_eq!(domain.domain, Some("speakeasy.net")); } @@ -299,7 +318,9 @@ fn test_lookup_isp() { let reader = Reader::open_readfile(filename).unwrap(); let ip: IpAddr = FromStr::from_str("12.87.118.123").unwrap(); - let isp: geoip2::Isp = reader.lookup(ip).unwrap().unwrap(); + let lookup = reader.lookup(ip).unwrap(); + assert!(lookup.found()); + let isp: geoip2::Isp = lookup.decode().unwrap(); assert_eq!(isp.autonomous_system_number, Some(7018)); assert_eq!(isp.isp, Some("AT&T Services")); @@ -315,59 +336,60 @@ fn test_lookup_asn() { let reader = Reader::open_readfile(filename).unwrap(); let ip: IpAddr = FromStr::from_str("1.128.0.123").unwrap(); - let asn: geoip2::Asn = reader.lookup(ip).unwrap().unwrap(); + let lookup = reader.lookup(ip).unwrap(); + assert!(lookup.found()); + let asn: geoip2::Asn = lookup.decode().unwrap(); assert_eq!(asn.autonomous_system_number, Some(1221)); assert_eq!(asn.autonomous_system_organization, Some("Telstra Pty Ltd")); } #[test] -fn test_lookup_prefix() { +fn test_lookup_network() { let _ = env_logger::try_init(); let filename = "test-data/test-data/GeoIP2-City-Test.mmdb"; let reader = Reader::open_readfile(filename).unwrap(); // --- IPv4 Check (Known) --- let ip: IpAddr = "89.160.20.128".parse().unwrap(); - let result_v4 = reader.lookup_prefix::(ip); - assert!(result_v4.is_ok()); - let (city_opt_v4, prefix_len_v4) = result_v4.unwrap(); - assert!(city_opt_v4.is_some(), "Expected Some(City) for known IPv4"); - assert_eq!(prefix_len_v4, 25); - assert!(city_opt_v4.unwrap().country.is_some()); + let lookup = reader.lookup(ip).unwrap(); + assert!(lookup.found(), "Expected Some(City) for known IPv4"); + let network = lookup.network().unwrap(); + assert_eq!(network.prefix(), 25); + let city: geoip2::City = lookup.decode().unwrap(); + assert!(city.country.is_some()); // --- IPv4 Check (Last Host, Known) --- let ip_last: IpAddr = "89.160.20.254".parse().unwrap(); - let (city_opt_last, last_prefix_len) = reader.lookup_prefix::(ip_last).unwrap(); - assert!(city_opt_last.is_some(), "Expected Some(City) for last host"); - assert_eq!(last_prefix_len, 25); // Should be same network + let lookup_last = reader.lookup(ip_last).unwrap(); + assert!(lookup_last.found(), "Expected Some(City) for last host"); + assert_eq!(lookup_last.network().unwrap().prefix(), 25); // Should be same network // --- IPv6 Check (Not Found in Data) --- // This IP might resolve to a node in the tree, but that node might not point to data. let ip_v6_not_found: IpAddr = "2c0f:ff00::1".parse().unwrap(); - let result_not_found = reader.lookup_prefix::(ip_v6_not_found); - assert!(result_not_found.is_ok()); - let (city_opt_nf, prefix_len_nf) = result_not_found.unwrap(); + let lookup_nf = reader.lookup(ip_v6_not_found).unwrap(); assert!( - city_opt_nf.is_none(), - "Expected None data for non-existent IP 2c0f:ff00::1" + !lookup_nf.found(), + "Expected not found for non-existent IP 2c0f:ff00::1" ); assert_eq!( - prefix_len_nf, 6, + lookup_nf.network().unwrap().prefix(), + 6, "Expected valid prefix length for not-found IPv6" ); // --- IPv6 Check (Known Data) --- let ip_v6_known: IpAddr = "2001:218:85a3:0:0:8a2e:370:7334".parse().unwrap(); - let result_known_v6 = reader.lookup_prefix::(ip_v6_known); - assert!(result_known_v6.is_ok()); - let (city_opt_v6, prefix_len_v6_known) = result_known_v6.unwrap(); - assert!(city_opt_v6.is_some(), "Expected Some(City) for known IPv6"); + let lookup_v6 = reader.lookup(ip_v6_known).unwrap(); + assert!(lookup_v6.found(), "Expected Some(City) for known IPv6"); assert_eq!( - prefix_len_v6_known, 32, + lookup_v6.network().unwrap().prefix(), + 32, "Prefix length mismatch for known IPv6" ); - assert!(city_opt_v6.unwrap().country.is_some()); + let city_v6: geoip2::City = lookup_v6.decode().unwrap(); + assert!(city_v6.country.is_some()); } #[test] @@ -380,7 +402,7 @@ fn test_within_city() { // --- Test iteration over entire DB ("::/0") --- let ip_net_all = IpNetwork::V6("::/0".parse().unwrap()); - let mut iter_all: Within = reader.within(ip_net_all).unwrap(); + let mut iter_all: Within<_> = reader.within(ip_net_all).unwrap(); // Get the first item let first_item_result = iter_all.next(); @@ -388,7 +410,7 @@ fn test_within_city() { first_item_result.is_some(), "Iterator over ::/0 yielded no items" ); - let _first_item = first_item_result.unwrap().unwrap(); + let _first_lookup = first_item_result.unwrap().unwrap(); // Count the remaining items to check total count let mut n = 1; // Start at 1 since we already took the first item @@ -400,7 +422,7 @@ fn test_within_city() { // --- Test iteration over a specific smaller network --- let specific = IpNetwork::V4("81.2.69.0/24".parse().unwrap()); - let mut iter_specific: Within = reader.within(specific).unwrap(); + let mut iter_specific: Within<_> = reader.within(specific).unwrap(); let expected = vec![ // In order of iteration: @@ -418,16 +440,18 @@ fn test_within_city() { item_res.is_some(), "Expected more items in specific iterator" ); - let item = item_res.unwrap().unwrap(); + let lookup = item_res.unwrap().unwrap(); + let network = lookup.network().unwrap(); assert_eq!( - item.ip_net, expected_net, + network, expected_net, "Mismatch in specific network iteration" ); // Check associated data for one of them - if item.ip_net.prefix() == 31 { + if network.prefix() == 31 { // 81.2.69.142/31 - assert!(item.info.city.is_some()); - assert_eq!(item.info.city.unwrap().geoname_id, Some(2643743)); // London + let city: geoip2::City = lookup.decode().unwrap(); + assert!(city.city.is_some()); + assert_eq!(city.city.unwrap().geoname_id, Some(2643743)); // London } found_count += 1; } @@ -501,21 +525,21 @@ fn check_ip>(reader: &Reader, ip_version: usize) { // Test lookups that are expected to succeed for subnet in &subnets { let ip: IpAddr = FromStr::from_str(subnet).unwrap(); - let result = reader.lookup::(ip); + let lookup = reader.lookup(ip); assert!( - result.is_ok(), + lookup.is_ok(), "Lookup failed unexpectedly for {}: {:?}", subnet, - result.err() + lookup.err() ); - let value_option = result.unwrap(); + let lookup = lookup.unwrap(); assert!( - value_option.is_some(), - "Lookup for {} returned None unexpectedly", + lookup.found(), + "Lookup for {} returned not found unexpectedly", subnet ); - let value = value_option.unwrap(); + let value: IpType = lookup.decode().unwrap(); // The value stored is often the network address, not the specific IP looked up // We need to parse the found IP and the subnet IP to check containment or equality. @@ -523,7 +547,7 @@ fn check_ip>(reader: &Reader, ip_version: usize) { assert_eq!(value.ip, *subnet); } - // Test lookups that are expected to return "not found" (Ok(None)) + // Test lookups that are expected to return "not found" let no_record = ["1.1.1.33", "255.254.253.123", "89fa::"]; for &address in &no_record { @@ -535,13 +559,12 @@ fn check_ip>(reader: &Reader, ip_version: usize) { } let ip: IpAddr = FromStr::from_str(address).unwrap(); - let result = reader.lookup::(ip); + let lookup = reader.lookup(ip).unwrap(); assert!( - matches!(result, Ok(None)), - "Expected Ok(None) for address {}, but got {:?}", - address, - result + !lookup.found(), + "Expected not found for address {}, but it was found", + address ); } } @@ -555,7 +578,9 @@ fn test_json_serialize() { let reader = Reader::open_readfile(filename).unwrap(); let ip: IpAddr = FromStr::from_str("89.160.20.112").unwrap(); - let city: geoip2::City = reader.lookup(ip).unwrap().unwrap(); + let lookup = reader.lookup(ip).unwrap(); + assert!(lookup.found()); + let city: geoip2::City = lookup.decode().unwrap(); let json_value = json!(city); let json_string = json_value.to_string(); diff --git a/src/maxminddb/result.rs b/src/maxminddb/result.rs new file mode 100644 index 00000000..2fdaf4b5 --- /dev/null +++ b/src/maxminddb/result.rs @@ -0,0 +1,364 @@ +//! Lookup result types for deferred decoding. +//! +//! This module provides `LookupResult`, which enables lazy decoding of +//! MaxMind DB records. Instead of immediately deserializing data, you +//! get a lightweight handle that can be decoded later or navigated +//! selectively via paths. + +use std::net::IpAddr; + +use ipnetwork::IpNetwork; +use serde::Deserialize; + +use super::decoder::{TYPE_ARRAY, TYPE_MAP}; +use super::{MaxMindDbError, Reader}; + +/// The result of looking up an IP address in a MaxMind DB. +/// +/// This is a lightweight handle (~40 bytes) that stores the lookup result +/// without immediately decoding the data. You can: +/// +/// - Check if the IP was found with [`found()`](Self::found) +/// - Get the network containing the IP with [`network()`](Self::network) +/// - Decode the full record with [`decode()`](Self::decode) +/// - Decode a specific path with [`decode_path()`](Self::decode_path) +/// +/// # Example +/// +/// ``` +/// use maxminddb::{Reader, geoip2, PathElement}; +/// use std::net::IpAddr; +/// +/// let reader = Reader::open_readfile("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); +/// let ip: IpAddr = "89.160.20.128".parse().unwrap(); +/// +/// let result = reader.lookup(ip).unwrap(); +/// +/// if result.found() { +/// // Full decode +/// let city: geoip2::City = result.decode().unwrap(); +/// +/// // Or selective decode via path +/// let country_code: Option = result.decode_path(&[ +/// PathElement::Key("country"), +/// PathElement::Key("iso_code"), +/// ]).unwrap(); +/// println!("Country: {:?}", country_code); +/// } +/// ``` +#[derive(Debug, Clone, Copy)] +pub struct LookupResult<'a, S: AsRef<[u8]>> { + reader: &'a Reader, + /// Offset into the data section, or usize::MAX if not found + data_offset: usize, + prefix_len: u8, + ip: IpAddr, +} + +/// Not found sentinel value +const NOT_FOUND: usize = usize::MAX; + +impl<'a, S: AsRef<[u8]>> LookupResult<'a, S> { + /// Creates a new LookupResult for a found IP. + pub(crate) fn new_found( + reader: &'a Reader, + data_offset: usize, + prefix_len: u8, + ip: IpAddr, + ) -> Self { + LookupResult { + reader, + data_offset, + prefix_len, + ip, + } + } + + /// Creates a new LookupResult for an IP not in the database. + pub(crate) fn new_not_found(reader: &'a Reader, prefix_len: u8, ip: IpAddr) -> Self { + LookupResult { + reader, + data_offset: NOT_FOUND, + prefix_len, + ip, + } + } + + /// Returns true if the IP address was found in the database. + /// + /// Note that "not found" means the database has no data for this IP, + /// which is different from an error during lookup. + #[inline] + pub fn found(&self) -> bool { + self.data_offset != NOT_FOUND + } + + /// Returns the network containing the looked-up IP address. + /// + /// This is the most specific network in the database that contains + /// the IP, regardless of whether data was found. + pub fn network(&self) -> Result { + let (ip, prefix) = match self.ip { + IpAddr::V4(v4) => (IpAddr::V4(v4), self.prefix_len), + IpAddr::V6(v6) => { + let v6_int: u128 = v6.into(); + if v6_int <= 0xFFFFFFFF { + // IPv4-mapped in IPv6, convert back + use std::net::Ipv4Addr; + let v4 = Ipv4Addr::from(v6_int as u32); + (IpAddr::V4(v4), self.prefix_len.saturating_sub(96)) + } else { + (IpAddr::V6(v6), self.prefix_len) + } + } + }; + + // Mask the IP to the network address + let network_ip = mask_ip(ip, prefix); + IpNetwork::new(network_ip, prefix).map_err(MaxMindDbError::InvalidNetwork) + } + + /// Returns the data section offset if found, for use as a cache key. + /// + /// Multiple IP addresses often point to the same data record. This + /// offset can be used to deduplicate decoding or cache results. + /// + /// Returns `None` if the IP was not found. + #[inline] + pub fn offset(&self) -> Option { + if self.found() { + Some(self.data_offset) + } else { + None + } + } + + /// Decodes the full record into the specified type. + /// + /// Returns an error if the IP was not found or if decoding fails. + /// + /// # Example + /// + /// ``` + /// use maxminddb::{Reader, geoip2}; + /// use std::net::IpAddr; + /// + /// let reader = Reader::open_readfile("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); + /// let ip: IpAddr = "89.160.20.128".parse().unwrap(); + /// + /// let result = reader.lookup(ip).unwrap(); + /// let city: geoip2::City = result.decode().unwrap(); + /// ``` + pub fn decode(&self) -> Result + where + T: Deserialize<'a>, + { + if !self.found() { + return Err(MaxMindDbError::Decoding( + "cannot decode: IP address not found in database".to_owned(), + )); + } + + let buf = &self.reader.buf.as_ref()[self.reader.pointer_base..]; + let mut decoder = super::decoder::Decoder::new(buf, self.data_offset); + T::deserialize(&mut decoder) + } + + /// Decodes a value at a specific path within the record. + /// + /// Returns: + /// - `Ok(Some(T))` if the path exists and was successfully decoded + /// - `Ok(None)` if the path doesn't exist (key missing, index out of bounds) + /// - `Err(...)` if there's a type mismatch during navigation (e.g., `Key` on an array) + /// + /// If `found() == false`, returns `Ok(None)`. + /// + /// # Path Elements + /// + /// - `PathElement::Key("name")` - Navigate into a map by key + /// - `PathElement::Index(0)` - Navigate into an array by index + /// - `PathElement::Index(-1)` - Last element (Python-style negative indexing) + /// + /// # Example + /// + /// ``` + /// use maxminddb::{Reader, PathElement}; + /// use std::net::IpAddr; + /// + /// let reader = Reader::open_readfile("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); + /// let ip: IpAddr = "89.160.20.128".parse().unwrap(); + /// + /// let result = reader.lookup(ip).unwrap(); + /// + /// // Navigate to country.iso_code + /// let iso_code: Option = result.decode_path(&[ + /// PathElement::Key("country"), + /// PathElement::Key("iso_code"), + /// ]).unwrap(); + /// + /// // Navigate to subdivisions[0].names.en + /// let subdiv_name: Option = result.decode_path(&[ + /// PathElement::Key("subdivisions"), + /// PathElement::Index(0), + /// PathElement::Key("names"), + /// PathElement::Key("en"), + /// ]).unwrap(); + /// ``` + pub fn decode_path(&self, path: &[PathElement<'_>]) -> Result, MaxMindDbError> + where + T: Deserialize<'a>, + { + if !self.found() { + return Ok(None); + } + + let buf = &self.reader.buf.as_ref()[self.reader.pointer_base..]; + let mut decoder = super::decoder::Decoder::new(buf, self.data_offset); + + // Navigate through the path + for element in path { + match element { + PathElement::Key(key) => { + let (_, type_num) = decoder.peek_type()?; + if type_num != TYPE_MAP { + return Err(MaxMindDbError::Decoding(format!( + "expected map for Key navigation, got type {type_num}" + ))); + } + + // Consume the map header and get size + let size = decoder.consume_map_header()?; + + let mut found = false; + for _ in 0..size { + let k = decoder.read_string()?; + if k == *key { + found = true; + break; + } else { + decoder.skip_value()?; + } + } + + if !found { + return Ok(None); + } + } + PathElement::Index(idx) => { + let (_, type_num) = decoder.peek_type()?; + if type_num != TYPE_ARRAY { + return Err(MaxMindDbError::Decoding(format!( + "expected array for Index navigation, got type {type_num}" + ))); + } + + // Consume the array header and get size + let size = decoder.consume_array_header()?; + + // Handle negative indexing (Python-style) + let actual_idx = if *idx < 0 { + let positive = (-*idx) as usize; + if positive > size { + return Ok(None); // Out of bounds + } + size - positive + } else { + let positive = *idx as usize; + if positive >= size { + return Ok(None); // Out of bounds + } + positive + }; + + // Skip to the target index + for _ in 0..actual_idx { + decoder.skip_value()?; + } + } + } + } + + // Decode the value at the current position + T::deserialize(&mut decoder).map(Some) + } +} + +/// A path element for navigating into nested data structures. +/// +/// Used with [`LookupResult::decode_path()`] to selectively decode +/// specific fields without parsing the entire record. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum PathElement<'a> { + /// Navigate into a map by key. + Key(&'a str), + /// Navigate into an array by index. + /// + /// Supports Python-style negative indexing: + /// - `Index(0)` - first element + /// - `Index(-1)` - last element + /// - `Index(-2)` - second-to-last element + Index(isize), +} + +/// Masks an IP address to its network address given a prefix length. +fn mask_ip(ip: IpAddr, prefix: u8) -> IpAddr { + match ip { + IpAddr::V4(v4) => { + if prefix >= 32 { + IpAddr::V4(v4) + } else { + let int: u32 = v4.into(); + let mask = if prefix == 0 { + 0 + } else { + !0u32 << (32 - prefix) + }; + IpAddr::V4((int & mask).into()) + } + } + IpAddr::V6(v6) => { + if prefix >= 128 { + IpAddr::V6(v6) + } else { + let int: u128 = v6.into(); + let mask = if prefix == 0 { + 0 + } else { + !0u128 << (128 - prefix) + }; + IpAddr::V6((int & mask).into()) + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_mask_ipv4() { + let ip: IpAddr = "192.168.1.100".parse().unwrap(); + assert_eq!(mask_ip(ip, 24), "192.168.1.0".parse::().unwrap()); + assert_eq!(mask_ip(ip, 16), "192.168.0.0".parse::().unwrap()); + assert_eq!(mask_ip(ip, 32), "192.168.1.100".parse::().unwrap()); + assert_eq!(mask_ip(ip, 0), "0.0.0.0".parse::().unwrap()); + } + + #[test] + fn test_mask_ipv6() { + let ip: IpAddr = "2001:db8:85a3::8a2e:370:7334".parse().unwrap(); + assert_eq!( + mask_ip(ip, 64), + "2001:db8:85a3::".parse::().unwrap() + ); + assert_eq!(mask_ip(ip, 32), "2001:db8::".parse::().unwrap()); + } + + #[test] + fn test_path_element_debug() { + assert_eq!(format!("{:?}", PathElement::Key("test")), "Key(\"test\")"); + assert_eq!(format!("{:?}", PathElement::Index(5)), "Index(5)"); + assert_eq!(format!("{:?}", PathElement::Index(-1)), "Index(-1)"); + } +} From e5a565525001e136b5521c4013a410caf9821455 Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Wed, 26 Nov 2025 14:12:37 -0800 Subject: [PATCH 04/37] Match Go reader network() behavior for IPv4/IPv6 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - IPv4 lookups return IPv4 networks with correct prefix length - IPv6 lookups preserve IPv6 form (including IPv4-mapped addresses) - Handle no-ipv4-search-tree databases by tracking ipv4_start_bit_depth - Add comprehensive test_lookup_network covering 11 edge cases 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- src/maxminddb/lib.rs | 177 ++++++++++++++++++++++++++++++++++------ src/maxminddb/result.rs | 36 ++++++-- 2 files changed, 180 insertions(+), 33 deletions(-) diff --git a/src/maxminddb/lib.rs b/src/maxminddb/lib.rs index 5186d589..b35e36aa 100644 --- a/src/maxminddb/lib.rs +++ b/src/maxminddb/lib.rs @@ -309,6 +309,9 @@ pub struct Reader> { buf: S, pub metadata: Metadata, ipv4_start: usize, + /// Bit depth at which ipv4_start was found (0-96). Used to calculate + /// correct prefix lengths for IPv4 lookups in IPv6 databases. + ipv4_start_bit_depth: usize, pointer_base: usize, } @@ -370,8 +373,11 @@ impl<'de, S: AsRef<[u8]>> Reader { pointer_base: search_tree_size + data_section_separator_size, metadata, ipv4_start: 0, + ipv4_start_bit_depth: 0, }; - reader.ipv4_start = reader.find_ipv4_start()?; + let (ipv4_start, ipv4_start_bit_depth) = reader.find_ipv4_start()?; + reader.ipv4_start = ipv4_start; + reader.ipv4_start_bit_depth = ipv4_start_bit_depth; Ok(reader) } @@ -437,6 +443,15 @@ impl<'de, S: AsRef<[u8]>> Reader { let ip_int = IpInt::new(address); let (pointer, prefix_len) = self.find_address_in_tree(&ip_int)?; + // For IPv4 addresses in IPv6 databases, adjust prefix_len to reflect + // the actual bit depth in the tree. The ipv4_start_bit_depth tells us + // how deep in the IPv6 tree we were when we found the IPv4 subtree. + let prefix_len = if matches!(address, IpAddr::V4(_)) && self.metadata.ip_version == 6 { + self.ipv4_start_bit_depth + prefix_len + } else { + prefix_len + }; + if pointer == 0 { // IP not found in database Ok(LookupResult::new_not_found(self, prefix_len as u8, address)) @@ -581,21 +596,26 @@ impl<'de, S: AsRef<[u8]>> Reader { } } - fn find_ipv4_start(&self) -> Result { + /// Find the IPv4 start node and the bit depth at which it was found. + /// Returns (node, depth) where depth is how far into the tree we traversed. + fn find_ipv4_start(&self) -> Result<(usize, usize), MaxMindDbError> { if self.metadata.ip_version != 6 { - return Ok(0); + return Ok((0, 0)); } // We are looking up an IPv4 address in an IPv6 tree. Skip over the // first 96 nodes. let mut node: usize = 0_usize; - for _ in 0_u8..96 { + let mut depth: usize = 0; + for i in 0_u8..96 { if node >= self.metadata.node_count as usize { + depth = i as usize; break; } node = self.read_node(node, 0)?; + depth = (i + 1) as usize; } - Ok(node) + Ok((node, depth)) } #[inline(always)] @@ -719,26 +739,134 @@ mod tests { } #[test] - fn test_lookup_not_found_for_unknown_address() { + fn test_lookup_network() { use super::Reader; + use std::collections::HashMap; use std::net::IpAddr; - let reader = Reader::open_readfile("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); - let ip: IpAddr = "10.0.0.1".parse().unwrap(); + struct TestCase { + ip: &'static str, + db_file: &'static str, + expected_network: &'static str, + expected_found: bool, + } - let result = reader.lookup(ip).unwrap(); - assert!( - !result.found(), - "lookup should return found=false for unknown IP" - ); + let test_cases = [ + // IPv4 address in IPv6 database - not found, returns containing network + TestCase { + ip: "1.1.1.1", + db_file: "test-data/test-data/MaxMind-DB-test-ipv6-32.mmdb", + expected_network: "1.0.0.0/8", + expected_found: false, + }, + // IPv6 exact match + TestCase { + ip: "::1:ffff:ffff", + db_file: "test-data/test-data/MaxMind-DB-test-ipv6-24.mmdb", + expected_network: "::1:ffff:ffff/128", + expected_found: true, + }, + // IPv6 network match (not exact) + TestCase { + ip: "::2:0:1", + db_file: "test-data/test-data/MaxMind-DB-test-ipv6-24.mmdb", + expected_network: "::2:0:0/122", + expected_found: true, + }, + // IPv4 exact match + TestCase { + ip: "1.1.1.1", + db_file: "test-data/test-data/MaxMind-DB-test-ipv4-24.mmdb", + expected_network: "1.1.1.1/32", + expected_found: true, + }, + // IPv4 network match (not exact) + TestCase { + ip: "1.1.1.3", + db_file: "test-data/test-data/MaxMind-DB-test-ipv4-24.mmdb", + expected_network: "1.1.1.2/31", + expected_found: true, + }, + // IPv4 in decoder test database + TestCase { + ip: "1.1.1.3", + db_file: "test-data/test-data/MaxMind-DB-test-decoder.mmdb", + expected_network: "1.1.1.0/24", + expected_found: true, + }, + // IPv4-mapped IPv6 address - preserves IPv6 form + TestCase { + ip: "::ffff:1.1.1.128", + db_file: "test-data/test-data/MaxMind-DB-test-decoder.mmdb", + expected_network: "::ffff:1.1.1.0/120", + expected_found: true, + }, + // IPv4-compatible IPv6 address - uses compressed IPv6 notation + TestCase { + ip: "::1.1.1.128", + db_file: "test-data/test-data/MaxMind-DB-test-decoder.mmdb", + expected_network: "::101:100/120", + expected_found: true, + }, + // No IPv4 search tree - IPv4 address returns ::/64 + TestCase { + ip: "200.0.2.1", + db_file: "test-data/test-data/MaxMind-DB-no-ipv4-search-tree.mmdb", + expected_network: "::/64", + expected_found: true, + }, + // No IPv4 search tree - IPv6 address in IPv4 range + TestCase { + ip: "::200.0.2.1", + db_file: "test-data/test-data/MaxMind-DB-no-ipv4-search-tree.mmdb", + expected_network: "::/64", + expected_found: true, + }, + // No IPv4 search tree - high IPv6 address not found + TestCase { + ip: "ef00::", + db_file: "test-data/test-data/MaxMind-DB-no-ipv4-search-tree.mmdb", + expected_network: "8000::/1", + expected_found: false, + }, + ]; + + // Cache readers to avoid reopening the same file multiple times + let mut readers: HashMap<&str, Reader>> = HashMap::new(); + + for test in &test_cases { + let reader = readers + .entry(test.db_file) + .or_insert_with(|| Reader::open_readfile(test.db_file).unwrap()); + + let ip: IpAddr = test.ip.parse().unwrap(); + let result = reader.lookup(ip).unwrap(); - // Network should still be available - let network = result.network().unwrap(); - assert_eq!(network.prefix(), 8, "Expected prefix length 8"); + assert_eq!( + result.found(), + test.expected_found, + "IP {} in {}: expected found={}, got found={}", + test.ip, + test.db_file, + test.expected_found, + result.found() + ); + + let network = result.network().unwrap(); + assert_eq!( + network.to_string(), + test.expected_network, + "IP {} in {}: expected network {}, got {}", + test.ip, + test.db_file, + test.expected_network, + network + ); + } } #[test] - fn test_lookup_found_for_known_address() { + fn test_lookup_with_geoip_data() { use super::Reader; use crate::geoip2; use std::net::IpAddr; @@ -747,20 +875,21 @@ mod tests { let ip: IpAddr = "89.160.20.128".parse().unwrap(); let result = reader.lookup(ip).unwrap(); - assert!( - result.found(), - "lookup should return found=true for known IP" - ); + assert!(result.found(), "lookup should find known IP"); // Decode the data let city: geoip2::City = result.decode().unwrap(); assert!(city.city.is_some(), "Expected city data"); - // Check network + // Check full network (not just prefix) let network = result.network().unwrap(); - assert_eq!(network.prefix(), 25, "Expected prefix length 25"); + assert_eq!( + network.to_string(), + "89.160.20.128/25", + "Expected network 89.160.20.128/25" + ); - // Check offset is available + // Check offset is available for caching assert!( result.offset().is_some(), "Expected offset to be Some for found IP" diff --git a/src/maxminddb/result.rs b/src/maxminddb/result.rs index 2fdaf4b5..4cfbddc8 100644 --- a/src/maxminddb/result.rs +++ b/src/maxminddb/result.rs @@ -97,20 +97,38 @@ impl<'a, S: AsRef<[u8]>> LookupResult<'a, S> { /// /// This is the most specific network in the database that contains /// the IP, regardless of whether data was found. + /// + /// The returned network preserves the IP version of the original lookup: + /// - IPv4 lookups return IPv4 networks (unless prefix < 96, see below) + /// - IPv6 lookups return IPv6 networks (including IPv4-mapped addresses) + /// + /// Special case: If an IPv4 address is looked up in an IPv6 database but + /// the matching record is at a prefix length < 96 (e.g., a database with + /// no IPv4 subtree), an IPv6 network is returned since there's no valid + /// IPv4 representation. pub fn network(&self) -> Result { let (ip, prefix) = match self.ip { - IpAddr::V4(v4) => (IpAddr::V4(v4), self.prefix_len), - IpAddr::V6(v6) => { - let v6_int: u128 = v6.into(); - if v6_int <= 0xFFFFFFFF { - // IPv4-mapped in IPv6, convert back - use std::net::Ipv4Addr; - let v4 = Ipv4Addr::from(v6_int as u32); - (IpAddr::V4(v4), self.prefix_len.saturating_sub(96)) + IpAddr::V4(v4) => { + // For IPv4 lookups in IPv6 databases, prefix_len includes the + // 96-bit offset. Subtract it to get the IPv4 prefix. + // For IPv4 databases, prefix_len is already 0-32. + if self.prefix_len >= 96 { + // IPv6 database: subtract 96 to get IPv4 prefix + (IpAddr::V4(v4), self.prefix_len - 96) + } else if self.prefix_len > 32 { + // IPv6 database with record at prefix < 96 (e.g., ::/64). + // Return IPv6 network since there's no valid IPv4 representation. + use std::net::Ipv6Addr; + (IpAddr::V6(Ipv6Addr::UNSPECIFIED), self.prefix_len) } else { - (IpAddr::V6(v6), self.prefix_len) + // IPv4 database: use prefix directly + (IpAddr::V4(v4), self.prefix_len) } } + IpAddr::V6(v6) => { + // For IPv6 lookups, preserve the IPv6 form (including IPv4-mapped) + (IpAddr::V6(v6), self.prefix_len) + } }; // Mask the IP to the network address From 8adb5492e0ca87d2b96e9ef31b169eb58730f353 Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Wed, 26 Nov 2025 14:15:47 -0800 Subject: [PATCH 05/37] Add comprehensive test coverage matching Go reader MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add IPv6 in IPv4-only database validation (returns error like Go) - Add missing network test case for 0:0:0:0:ffff:ffff:ffff:ffff - Add comprehensive decode_path tests (array indexing, negative index) - Test nested path navigation (map.mapX.arrayX[1]) - Test non-existent key handling 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- src/maxminddb/lib.rs | 112 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 100 insertions(+), 12 deletions(-) diff --git a/src/maxminddb/lib.rs b/src/maxminddb/lib.rs index b35e36aa..2722794c 100644 --- a/src/maxminddb/lib.rs +++ b/src/maxminddb/lib.rs @@ -440,6 +440,13 @@ impl<'de, S: AsRef<[u8]>> Reader { /// # } /// ``` pub fn lookup(&'de self, address: IpAddr) -> Result, MaxMindDbError> { + // Check for IPv6 address in IPv4-only database + if matches!(address, IpAddr::V6(_)) && self.metadata.ip_version == 4 { + return Err(MaxMindDbError::InvalidDatabase( + "you attempted to look up an IPv6 address in an IPv4-only database".to_string(), + )); + } + let ip_int = IpInt::new(address); let (pointer, prefix_len) = self.find_address_in_tree(&ip_int)?; @@ -822,6 +829,13 @@ mod tests { expected_network: "::/64", expected_found: true, }, + // No IPv4 search tree - IPv6 address at boundary of IPv4 space + TestCase { + ip: "0:0:0:0:ffff:ffff:ffff:ffff", + db_file: "test-data/test-data/MaxMind-DB-no-ipv4-search-tree.mmdb", + expected_network: "::/64", + expected_found: true, + }, // No IPv4 search tree - high IPv6 address not found TestCase { ip: "ef00::", @@ -920,24 +934,98 @@ mod tests { } #[test] - fn test_decoder_api() { - use super::{Kind, Reader}; + fn test_ipv6_in_ipv4_database() { + use super::{MaxMindDbError, Reader}; use std::net::IpAddr; - let reader = Reader::open_readfile("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); - let ip: IpAddr = "89.160.20.128".parse().unwrap(); + let reader = + Reader::open_readfile("test-data/test-data/MaxMind-DB-test-ipv4-24.mmdb").unwrap(); + let ip: IpAddr = "2001::".parse().unwrap(); + + let result = reader.lookup(ip); + match result { + Err(MaxMindDbError::InvalidDatabase(msg)) => { + assert!( + msg.contains("IPv6") && msg.contains("IPv4"), + "Expected error message about IPv6 in IPv4 database, got: {}", + msg + ); + } + Err(e) => panic!( + "Expected InvalidDatabase error for IPv6 in IPv4 database, got: {:?}", + e + ), + Ok(_) => panic!("Expected error for IPv6 lookup in IPv4-only database"), + } + } + + #[test] + fn test_decode_path_comprehensive() { + use super::{PathElement, Reader}; + use std::net::IpAddr; + + let reader = + Reader::open_readfile("test-data/test-data/MaxMind-DB-test-decoder.mmdb").unwrap(); + let ip: IpAddr = "::1.1.1.0".parse().unwrap(); let result = reader.lookup(ip).unwrap(); - let mut decoder = result.decoder().unwrap(); + assert!(result.found()); - // The root should be a map - assert_eq!(decoder.peek_kind().unwrap(), Kind::Map); + // Test simple path: uint16 + let u16_val: Option = result.decode_path(&[PathElement::Key("uint16")]).unwrap(); + assert_eq!(u16_val, Some(100)); - let mut map = decoder.read_map().unwrap(); - assert!(map.len() > 0, "Expected non-empty map"); + // Test array access: first element + let arr_first: Option = result + .decode_path(&[PathElement::Key("array"), PathElement::Index(0)]) + .unwrap(); + assert_eq!(arr_first, Some(1)); - // Read first key - let key = map.next_key().unwrap(); - assert!(key.is_some(), "Expected at least one key"); + // Test array access: last element (index 2) + let arr_last: Option = result + .decode_path(&[PathElement::Key("array"), PathElement::Index(2)]) + .unwrap(); + assert_eq!(arr_last, Some(3)); + + // Test array access: out of bounds (index 3) returns None + let arr_oob: Option = result + .decode_path(&[PathElement::Key("array"), PathElement::Index(3)]) + .unwrap(); + assert!(arr_oob.is_none()); + + // Test negative index: -1 means last element + let arr_neg1: Option = result + .decode_path(&[PathElement::Key("array"), PathElement::Index(-1)]) + .unwrap(); + assert_eq!(arr_neg1, Some(3)); + + // Test negative index: -3 means first element + let arr_neg3: Option = result + .decode_path(&[PathElement::Key("array"), PathElement::Index(-3)]) + .unwrap(); + assert_eq!(arr_neg3, Some(1)); + + // Test nested path: map.mapX.arrayX[1] + let nested: Option = result + .decode_path(&[ + PathElement::Key("map"), + PathElement::Key("mapX"), + PathElement::Key("arrayX"), + PathElement::Index(1), + ]) + .unwrap(); + assert_eq!(nested, Some(8)); + + // Test non-existent key returns None + let missing: Option = result + .decode_path(&[PathElement::Key("does-not-exist"), PathElement::Index(1)]) + .unwrap(); + assert!(missing.is_none()); + + // Test utf8_string path + let utf8: Option = result + .decode_path(&[PathElement::Key("utf8_string")]) + .unwrap(); + assert_eq!(utf8, Some("unicode! ☯ - ♫".to_owned())); } } From 2bbe79da0dd210514683bc54ef54e05e14ba5369 Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Wed, 26 Nov 2025 16:36:49 -0800 Subject: [PATCH 06/37] Add iteration options matching Go's Networks/NetworksWithin MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add WithinOptions struct with builder methods to control network iteration behavior: - include_aliased_networks(): Include IPv4 via IPv6 aliases - include_networks_without_data(): Include networks with no data - skip_empty_values(): Skip empty maps/arrays Add networks() convenience method for iterating all networks. Update within() to take options as second parameter. Breaking change: within(cidr) -> within(cidr, options) 🤖 Generated with [Claude Code](https://claude.ai/claude-code) Co-Authored-By: Claude --- CHANGELOG.md | 15 ++ examples/within.rs | 2 +- src/maxminddb/lib.rs | 208 ++++++++++++++-- src/maxminddb/reader_test.rs | 451 ++++++++++++++++++++++++++++++++++- 4 files changed, 650 insertions(+), 26 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3e9a0595..e680b43f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,21 @@ - **BREAKING CHANGE:** The `Within` iterator now yields `LookupResult` instead of `WithinItem`. Access the network via `result.network()?` and decode data via `result.decode::()?`. +- **BREAKING CHANGE:** The `within()` method now takes a second `options` + parameter of type `WithinOptions`. Use `Default::default()` for the previous + behavior: + - Old: `reader.within(cidr)?` + - New: `reader.within(cidr, Default::default())?` +- Added `WithinOptions` struct to control network iteration behavior: + - `include_aliased_networks()` - Include IPv4 networks multiple times when + accessed via IPv6 aliases (e.g., `::ffff:0:0/96`, `2001::/32`, `2002::/16`) + - `include_networks_without_data()` - Include networks that have no associated + data record. `LookupResult::found()` returns `false` for these. + - `skip_empty_values()` - Skip networks whose data is an empty map `{}` or + empty array `[]` +- Added `networks()` method as a convenience for iterating over all networks in + the database. Equivalent to `within("::/0", options)` for IPv6 databases or + `within("0.0.0.0/0", options)` for IPv4-only databases. - Added `LookupResult` type with methods: - `found()` - Check if IP was found in database - `network()` - Get the network containing the IP diff --git a/examples/within.rs b/examples/within.rs index 29f61327..28d0dfe7 100644 --- a/examples/within.rs +++ b/examples/within.rs @@ -17,7 +17,7 @@ fn main() -> Result<(), Box> { .map_err(|e| format!("Invalid CIDR notation '{}': {}", cidr_str, e))?; let mut n = 0; - let iter: Within<_> = reader.within(ip_net)?; + let iter: Within<_> = reader.within(ip_net, Default::default())?; for next in iter { let lookup = next?; let network = lookup.network()?; diff --git a/src/maxminddb/lib.rs b/src/maxminddb/lib.rs index 2722794c..cf520d1c 100644 --- a/src/maxminddb/lib.rs +++ b/src/maxminddb/lib.rs @@ -144,6 +144,74 @@ pub struct Metadata { pub record_size: u16, } +/// Options for network iteration. +/// +/// Controls which networks are yielded when iterating over the database +/// with [`Reader::within()`] or [`Reader::networks()`]. +/// +/// # Example +/// +/// ``` +/// use maxminddb::WithinOptions; +/// +/// // Default options (skip aliases, skip networks without data, include empty values) +/// let opts = WithinOptions::default(); +/// +/// // Include aliased networks (IPv4 networks via IPv6 aliases) +/// let opts = WithinOptions::default().include_aliased_networks(); +/// +/// // Skip empty values and include networks without data +/// let opts = WithinOptions::default() +/// .skip_empty_values() +/// .include_networks_without_data(); +/// ``` +#[derive(Debug, Clone, Copy, Default)] +pub struct WithinOptions { + /// Include IPv4 networks multiple times when accessed via IPv6 aliases. + pub include_aliased_networks: bool, + /// Include networks that have no associated data record. + pub include_networks_without_data: bool, + /// Skip networks whose data is an empty map or empty array. + pub skip_empty_values: bool, +} + +impl WithinOptions { + /// Include IPv4 networks multiple times when accessed via IPv6 aliases. + /// + /// In IPv6 databases, IPv4 networks are stored at `::0/96`. However, the + /// same data is accessible through several IPv6 prefixes (e.g., + /// `::ffff:0:0/96` for IPv4-mapped IPv6). By default, these aliases are + /// skipped to avoid yielding the same network multiple times. + /// + /// When enabled, the iterator will yield these aliased networks. + #[must_use] + pub fn include_aliased_networks(mut self) -> Self { + self.include_aliased_networks = true; + self + } + + /// Include networks that have no associated data record. + /// + /// Some tree nodes point to "no data" (the node_count sentinel). By default + /// these are skipped. When enabled, these networks are yielded and + /// [`LookupResult::found()`] returns `false` for them. + #[must_use] + pub fn include_networks_without_data(mut self) -> Self { + self.include_networks_without_data = true; + self + } + + /// Skip networks whose data is an empty map or empty array. + /// + /// Some databases store empty maps `{}` or empty arrays `[]` for records + /// without meaningful data. This option filters them out. + #[must_use] + pub fn skip_empty_values(mut self) -> Self { + self.skip_empty_values = true; + self + } +} + #[derive(Debug)] struct WithinNode { node: usize, @@ -161,6 +229,7 @@ pub struct Within<'de, S: AsRef<[u8]>> { reader: &'de Reader, node_count: usize, stack: Vec, + options: WithinOptions, } #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -207,8 +276,9 @@ impl<'de, S: AsRef<[u8]>> Iterator for Within<'de, S> { while let Some(current) = self.stack.pop() { let bit_count = current.ip_int.bit_count(); - // Skip networks that are aliases for the IPv4 network - if self.reader.ipv4_start != 0 + // Skip networks that are aliases for the IPv4 network (unless option is set) + if !self.options.include_aliased_networks + && self.reader.ipv4_start != 0 && current.node == self.reader.ipv4_start && bit_count == 128 && !current.ip_int.is_ipv4_in_ipv6() @@ -227,6 +297,15 @@ impl<'de, S: AsRef<[u8]>> Iterator for Within<'de, S> { Err(e) => return Some(Err(e)), }; + // Check if we should skip empty values + if self.options.skip_empty_values { + match self.is_empty_value_at(data_offset) { + Ok(true) => continue, // Skip empty value + Ok(false) => {} // Not empty, proceed + Err(e) => return Some(Err(e)), + } + } + return Some(Ok(LookupResult::new_found( self.reader, data_offset, @@ -235,7 +314,16 @@ impl<'de, S: AsRef<[u8]>> Iterator for Within<'de, S> { ))); } Ordering::Equal => { - // Dead end, nothing to do + // Dead end (no data) - include if option is set + if self.options.include_networks_without_data { + let ip_addr = ip_int_to_addr(¤t.ip_int); + return Some(Ok(LookupResult::new_not_found( + self.reader, + current.prefix_len as u8, + ip_addr, + ))); + } + // Otherwise skip (current behavior) } Ordering::Less => { // In order traversal of our children @@ -276,6 +364,19 @@ impl<'de, S: AsRef<[u8]>> Iterator for Within<'de, S> { } } +impl<'de, S: AsRef<[u8]>> Within<'de, S> { + /// Check if the value at the given data offset is an empty map or array. + fn is_empty_value_at(&self, data_offset: usize) -> Result { + let buf = &self.reader.buf.as_ref()[self.reader.pointer_base..]; + let mut dec = decoder::Decoder::new(buf, data_offset); + let (size, type_num) = dec.peek_type()?; + match type_num { + decoder::TYPE_MAP | decoder::TYPE_ARRAY => Ok(size == 0), + _ => Ok(false), // Non-container types are never "empty" + } + } +} + /// Convert IpInt to IpAddr fn ip_int_to_addr(ip_int: &IpInt) -> IpAddr { match ip_int { @@ -474,11 +575,53 @@ impl<'de, S: AsRef<[u8]>> Reader { } } + /// Iterate over all networks in the database. + /// + /// This is a convenience method equivalent to calling [`within()`](Self::within) + /// with `0.0.0.0/0` for IPv4-only databases or `::/0` for IPv6 databases. + /// + /// # Arguments + /// + /// * `options` - Controls which networks are yielded. Use [`Default::default()`] + /// for standard behavior. + /// + /// # Examples + /// + /// Iterate over all networks with default options: + /// ``` + /// use maxminddb::{geoip2, Reader}; + /// + /// let reader = Reader::open_readfile( + /// "test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); + /// + /// let mut count = 0; + /// for result in reader.networks(Default::default()).unwrap() { + /// let lookup = result.unwrap(); + /// count += 1; + /// if count >= 10 { break; } + /// } + /// ``` + pub fn networks(&'de self, options: WithinOptions) -> Result, MaxMindDbError> { + let cidr = if self.metadata.ip_version == 6 { + IpNetwork::V6("::/0".parse().unwrap()) + } else { + IpNetwork::V4("0.0.0.0/0".parse().unwrap()) + }; + self.within(cidr, options) + } + /// Iterate over IP networks within a CIDR range. /// /// Returns an iterator that yields [`LookupResult`] for each network in the /// database that falls within the specified CIDR range. /// + /// # Arguments + /// + /// * `cidr` - The CIDR range to iterate over. + /// * `options` - Controls which networks are yielded. Use [`Default::default()`] + /// for standard behavior (skip aliases, skip networks without data, include + /// empty values). + /// /// # Examples /// /// Iterate over all IPv4 networks: @@ -491,7 +634,7 @@ impl<'de, S: AsRef<[u8]>> Reader { /// /// let ipv4_all = IpNetwork::V4("0.0.0.0/0".parse().unwrap()); /// let mut count = 0; - /// for result in reader.within(ipv4_all).unwrap() { + /// for result in reader.within(ipv4_all, Default::default()).unwrap() { /// let lookup = result.unwrap(); /// let network = lookup.network().unwrap(); /// let city: geoip2::City = lookup.decode().unwrap(); @@ -513,7 +656,7 @@ impl<'de, S: AsRef<[u8]>> Reader { /// "test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); /// /// let subnet = IpNetwork::V4("192.168.0.0/16".parse().unwrap()); - /// for result in reader.within(subnet).unwrap() { + /// for result in reader.within(subnet, Default::default()).unwrap() { /// match result { /// Ok(lookup) => { /// let network = lookup.network().unwrap(); @@ -523,7 +666,28 @@ impl<'de, S: AsRef<[u8]>> Reader { /// } /// } /// ``` - pub fn within(&'de self, cidr: IpNetwork) -> Result, MaxMindDbError> { + /// + /// Include networks without data: + /// ``` + /// use ipnetwork::IpNetwork; + /// use maxminddb::{Reader, WithinOptions}; + /// + /// let reader = Reader::open_readfile( + /// "test-data/test-data/MaxMind-DB-test-mixed-24.mmdb").unwrap(); + /// + /// let opts = WithinOptions::default().include_networks_without_data(); + /// for result in reader.within("1.0.0.0/8".parse().unwrap(), opts).unwrap() { + /// let lookup = result.unwrap(); + /// if !lookup.found() { + /// println!("Network {} has no data", lookup.network().unwrap()); + /// } + /// } + /// ``` + pub fn within( + &'de self, + cidr: IpNetwork, + options: WithinOptions, + ) -> Result, MaxMindDbError> { let ip_address = cidr.network(); let prefix_len = cidr.prefix() as usize; let ip_int = IpInt::new(ip_address); @@ -535,34 +699,34 @@ impl<'de, S: AsRef<[u8]>> Reader { let mut stack: Vec = Vec::with_capacity(bit_count - prefix_len); // Traverse down the tree to the level that matches the cidr mark - let mut i = 0_usize; - while i < prefix_len { + let mut depth = 0_usize; + for i in 0..prefix_len { let bit = ip_int.get_bit(i); node = self.read_node(node, bit as usize)?; + depth = i + 1; // We've now traversed i+1 bits (bits 0 through i) + if node >= node_count { - // We've hit a dead end before we exhausted our prefix + // We've hit a data node or dead end before we exhausted our prefix. + // This means the requested CIDR is contained in a single record. break; } - - i += 1; } - if node < node_count { - // Ok, now anything that's below node in the tree is "within", start with the node we - // traversed to as our to be processed stack. - stack.push(WithinNode { - node, - ip_int, - prefix_len, - }); - } - // else the stack will be empty and we'll be returning an iterator that visits nothing, - // which makes sense. + // Always push the node - it could be: + // - A data node (> node_count): will be yielded as a single record + // - The empty node (== node_count): will be skipped unless include_networks_without_data + // - An internal node (< node_count): will be traversed to find all contained records + stack.push(WithinNode { + node, + ip_int, + prefix_len: depth, + }); let within = Within { reader: self, node_count, stack, + options, }; Ok(within) diff --git a/src/maxminddb/reader_test.rs b/src/maxminddb/reader_test.rs index 1dd60d8a..a049a576 100644 --- a/src/maxminddb/reader_test.rs +++ b/src/maxminddb/reader_test.rs @@ -6,7 +6,7 @@ use serde::Deserialize; use serde_json::json; use crate::geoip2; -use crate::{MaxMindDbError, Reader, Within}; +use crate::{MaxMindDbError, Reader, Within, WithinOptions}; #[allow(clippy::float_cmp)] #[test] @@ -402,7 +402,7 @@ fn test_within_city() { // --- Test iteration over entire DB ("::/0") --- let ip_net_all = IpNetwork::V6("::/0".parse().unwrap()); - let mut iter_all: Within<_> = reader.within(ip_net_all).unwrap(); + let mut iter_all: Within<_> = reader.within(ip_net_all, Default::default()).unwrap(); // Get the first item let first_item_result = iter_all.next(); @@ -422,7 +422,7 @@ fn test_within_city() { // --- Test iteration over a specific smaller network --- let specific = IpNetwork::V4("81.2.69.0/24".parse().unwrap()); - let mut iter_specific: Within<_> = reader.within(specific).unwrap(); + let mut iter_specific: Within<_> = reader.within(specific, Default::default()).unwrap(); let expected = vec![ // In order of iteration: @@ -591,3 +591,448 @@ fn test_json_serialize() { assert_eq!(json_value, expected_value); assert_eq!(json_string, expected_json_str); } + +// ============================================================================ +// Iteration Options Tests +// ============================================================================ + +/// Test networks() method iterates over entire database +#[test] +fn test_networks() { + let _ = env_logger::try_init(); + + // Test with different record sizes and IP versions + for record_size in &[24_u32, 28, 32] { + for ip_version in &[4_u32, 6] { + let filename = + format!("test-data/test-data/MaxMind-DB-test-ipv{ip_version}-{record_size}.mmdb"); + let reader = Reader::open_readfile(&filename).unwrap(); + + for result in reader.networks(Default::default()).unwrap() { + let lookup = result.unwrap(); + assert!( + lookup.found(), + "networks() should only yield found records by default" + ); + + #[derive(Deserialize)] + struct IpRecord { + ip: String, + } + let record: IpRecord = lookup.decode().unwrap(); + let network = lookup.network().unwrap(); + assert_eq!( + record.ip, + network.ip().to_string(), + "record IP should match network IP" + ); + } + } + } +} + +/// Test that default options skip aliased networks +#[test] +fn test_default_skips_aliases() { + let _ = env_logger::try_init(); + + let reader = + Reader::open_readfile("test-data/test-data/MaxMind-DB-test-mixed-24.mmdb").unwrap(); + + // Without IncludeAliasedNetworks, iterating over ::/0 should yield IPv4 networks only once + let ip_net_all = IpNetwork::V6("::/0".parse().unwrap()); + + let expected_without_aliases = vec![ + "1.1.1.1/32", + "1.1.1.2/31", + "1.1.1.4/30", + "1.1.1.8/29", + "1.1.1.16/28", + "1.1.1.32/32", + "::1:ffff:ffff/128", + "::2:0:0/122", + "::2:0:40/124", + "::2:0:50/125", + "::2:0:58/127", + ]; + + let mut networks: Vec = Vec::new(); + for result in reader.within(ip_net_all, Default::default()).unwrap() { + let lookup = result.unwrap(); + networks.push(lookup.network().unwrap().to_string()); + } + + assert_eq!(networks, expected_without_aliases); +} + +/// Test IncludeAliasedNetworks option +#[test] +fn test_include_aliased_networks() { + let _ = env_logger::try_init(); + + let reader = + Reader::open_readfile("test-data/test-data/MaxMind-DB-test-mixed-24.mmdb").unwrap(); + + let ip_net_all = IpNetwork::V6("::/0".parse().unwrap()); + let opts = WithinOptions::default().include_aliased_networks(); + + // With IncludeAliasedNetworks, we should see IPv4 networks via various IPv6 prefixes + let expected_with_aliases = vec![ + "1.1.1.1/32", + "1.1.1.2/31", + "1.1.1.4/30", + "1.1.1.8/29", + "1.1.1.16/28", + "1.1.1.32/32", + "::1:ffff:ffff/128", + "::2:0:0/122", + "::2:0:40/124", + "::2:0:50/125", + "::2:0:58/127", + "::ffff:1.1.1.1/128", + "::ffff:1.1.1.2/127", + "::ffff:1.1.1.4/126", + "::ffff:1.1.1.8/125", + "::ffff:1.1.1.16/124", + "::ffff:1.1.1.32/128", + "2001:0:101:101::/64", + "2001:0:101:102::/63", + "2001:0:101:104::/62", + "2001:0:101:108::/61", + "2001:0:101:110::/60", + "2001:0:101:120::/64", + "2002:101:101::/48", + "2002:101:102::/47", + "2002:101:104::/46", + "2002:101:108::/45", + "2002:101:110::/44", + "2002:101:120::/48", + ]; + + let mut networks: Vec = Vec::new(); + for result in reader.within(ip_net_all, opts).unwrap() { + let lookup = result.unwrap(); + networks.push(lookup.network().unwrap().to_string()); + } + + assert_eq!(networks, expected_with_aliases); +} + +/// Test IncludeNetworksWithoutData option +#[test] +fn test_include_networks_without_data() { + let _ = env_logger::try_init(); + + let reader = + Reader::open_readfile("test-data/test-data/MaxMind-DB-test-mixed-24.mmdb").unwrap(); + + // Using 1.0.0.0/8 like the Go tests + let cidr: IpNetwork = "1.0.0.0/8".parse().unwrap(); + let opts = WithinOptions::default().include_networks_without_data(); + + let expected = vec![ + "1.0.0.0/16", + "1.1.0.0/24", + "1.1.1.0/32", + "1.1.1.1/32", + "1.1.1.2/31", + "1.1.1.4/30", + "1.1.1.8/29", + "1.1.1.16/28", + "1.1.1.32/32", + "1.1.1.33/32", + "1.1.1.34/31", + "1.1.1.36/30", + "1.1.1.40/29", + "1.1.1.48/28", + "1.1.1.64/26", + "1.1.1.128/25", + "1.1.2.0/23", + "1.1.4.0/22", + "1.1.8.0/21", + "1.1.16.0/20", + "1.1.32.0/19", + "1.1.64.0/18", + "1.1.128.0/17", + "1.2.0.0/15", + "1.4.0.0/14", + "1.8.0.0/13", + "1.16.0.0/12", + "1.32.0.0/11", + "1.64.0.0/10", + "1.128.0.0/9", + ]; + + let mut networks: Vec = Vec::new(); + let mut found_count = 0; + let mut not_found_count = 0; + + for result in reader.within(cidr, opts).unwrap() { + let lookup = result.unwrap(); + networks.push(lookup.network().unwrap().to_string()); + if lookup.found() { + found_count += 1; + } else { + not_found_count += 1; + } + } + + assert_eq!(networks, expected); + assert!( + not_found_count > 0, + "Should have some networks without data" + ); + assert!(found_count > 0, "Should have some networks with data"); +} + +/// Test SkipEmptyValues option +#[test] +fn test_skip_empty_values() { + let _ = env_logger::try_init(); + + let reader = + Reader::open_readfile("test-data/test-data/GeoIP2-Anonymous-IP-Test.mmdb").unwrap(); + + // Count networks without SkipEmptyValues + let mut count_without_skip = 0; + let mut empty_count = 0; + + for result in reader.networks(Default::default()).unwrap() { + let lookup = result.unwrap(); + count_without_skip += 1; + + if lookup.found() { + let data: std::collections::BTreeMap = + lookup.decode().unwrap(); + if data.is_empty() { + empty_count += 1; + } + } + } + + // Count networks with SkipEmptyValues + let mut count_with_skip = 0; + let opts = WithinOptions::default().skip_empty_values(); + + for result in reader.networks(opts).unwrap() { + let lookup = result.unwrap(); + count_with_skip += 1; + + if lookup.found() { + let data: std::collections::BTreeMap = + lookup.decode().unwrap(); + assert!( + !data.is_empty(), + "Should not see empty maps with skip_empty_values" + ); + } + } + + // Verify the option works + assert!( + empty_count > 0, + "Test database should have empty values, found {} empty out of {}", + empty_count, + count_without_skip + ); + assert_eq!( + count_without_skip - empty_count, + count_with_skip, + "SkipEmptyValues should skip exactly the empty values" + ); +} + +/// Test SkipEmptyValues with other options combined +#[test] +fn test_skip_empty_values_with_other_options() { + let _ = env_logger::try_init(); + + let reader = + Reader::open_readfile("test-data/test-data/GeoIP2-Anonymous-IP-Test.mmdb").unwrap(); + + // Test with IncludeNetworksWithoutData - should still skip empty maps + let opts = WithinOptions::default() + .include_networks_without_data() + .skip_empty_values(); + + let mut count = 0; + for result in reader.networks(opts).unwrap() { + let lookup = result.unwrap(); + count += 1; + + if lookup.found() { + let data: std::collections::BTreeMap = + lookup.decode().unwrap(); + assert!( + !data.is_empty(), + "Should not see empty maps even with other options" + ); + } + } + + assert!(count > 0, "Should have some networks"); +} + +/// Test various NetworksWithin scenarios matching Go tests +#[test] +fn test_networks_within_scenarios() { + let _ = env_logger::try_init(); + + struct TestCase { + network: &'static str, + database: &'static str, + expected: Vec<&'static str>, + } + + let test_cases = vec![ + TestCase { + network: "0.0.0.0/0", + database: "ipv4", + expected: vec![ + "1.1.1.1/32", + "1.1.1.2/31", + "1.1.1.4/30", + "1.1.1.8/29", + "1.1.1.16/28", + "1.1.1.32/32", + ], + }, + TestCase { + network: "1.1.1.1/30", + database: "ipv4", + expected: vec!["1.1.1.1/32", "1.1.1.2/31"], + }, + TestCase { + network: "1.1.1.2/31", + database: "ipv4", + expected: vec!["1.1.1.2/31"], + }, + TestCase { + network: "1.1.1.1/32", + database: "ipv4", + expected: vec!["1.1.1.1/32"], + }, + TestCase { + network: "1.1.1.2/32", + database: "ipv4", + expected: vec!["1.1.1.2/31"], + }, + TestCase { + network: "1.1.1.3/32", + database: "ipv4", + expected: vec!["1.1.1.2/31"], + }, + TestCase { + network: "1.1.1.19/32", + database: "ipv4", + expected: vec!["1.1.1.16/28"], + }, + TestCase { + network: "255.255.255.0/24", + database: "ipv4", + expected: vec![], + }, + TestCase { + network: "1.1.1.1/32", + database: "mixed", + expected: vec!["1.1.1.1/32"], + }, + TestCase { + network: "255.255.255.0/24", + database: "mixed", + expected: vec![], + }, + TestCase { + network: "::1:ffff:ffff/128", + database: "ipv6", + expected: vec!["::1:ffff:ffff/128"], + }, + TestCase { + network: "::/0", + database: "ipv6", + expected: vec![ + "::1:ffff:ffff/128", + "::2:0:0/122", + "::2:0:40/124", + "::2:0:50/125", + "::2:0:58/127", + ], + }, + TestCase { + network: "::2:0:40/123", + database: "ipv6", + expected: vec!["::2:0:40/124", "::2:0:50/125", "::2:0:58/127"], + }, + TestCase { + network: "0:0:0:0:0:ffff:ffff:ff00/120", + database: "ipv6", + expected: vec![], + }, + TestCase { + network: "0.0.0.0/0", + database: "mixed", + expected: vec![ + "1.1.1.1/32", + "1.1.1.2/31", + "1.1.1.4/30", + "1.1.1.8/29", + "1.1.1.16/28", + "1.1.1.32/32", + ], + }, + TestCase { + network: "1.1.1.16/28", + database: "mixed", + expected: vec!["1.1.1.16/28"], + }, + TestCase { + network: "1.1.1.4/30", + database: "ipv4", + expected: vec!["1.1.1.4/30"], + }, + ]; + + for record_size in &[24_u32, 28, 32] { + for test in &test_cases { + let filename = format!( + "test-data/test-data/MaxMind-DB-test-{}-{}.mmdb", + test.database, record_size + ); + let reader = Reader::open_readfile(&filename).unwrap(); + + let cidr: IpNetwork = test.network.parse().unwrap(); + let mut networks: Vec = Vec::new(); + + for result in reader.within(cidr, Default::default()).unwrap() { + let lookup = result.unwrap(); + networks.push(lookup.network().unwrap().to_string()); + } + + let expected: Vec = test.expected.iter().map(|s| s.to_string()).collect(); + assert_eq!( + networks, expected, + "Mismatch for {} in {}-{}: expected {:?}, got {:?}", + test.network, test.database, record_size, expected, networks + ); + } + } +} + +/// Test GeoIP database-specific NetworksWithin +#[test] +fn test_geoip_networks_within() { + let _ = env_logger::try_init(); + + let reader = Reader::open_readfile("test-data/test-data/GeoIP2-Country-Test.mmdb").unwrap(); + + let cidr: IpNetwork = "81.2.69.128/26".parse().unwrap(); + let expected = vec!["81.2.69.142/31", "81.2.69.144/28", "81.2.69.160/27"]; + + let mut networks: Vec = Vec::new(); + for result in reader.within(cidr, Default::default()).unwrap() { + let lookup = result.unwrap(); + networks.push(lookup.network().unwrap().to_string()); + } + + assert_eq!(networks, expected); +} From 90488874cd09b259cae9fafc24f2f24c8eb04556 Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Wed, 26 Nov 2025 17:03:14 -0800 Subject: [PATCH 07/37] Add verify() method for database integrity validation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add comprehensive database verification matching Go's Verify() method: - Metadata validation (format version, required fields, value constraints) - Search tree traversal with cycle detection - Data section separator verification (16 zero bytes) - Data record validation at each pointed-to offset - Type-specific size validation for floats, integers, and booleans 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- CHANGELOG.md | 5 +- src/maxminddb/lib.rs | 165 ++++++++++++++++++++++++++++++++++- src/maxminddb/reader_test.rs | 85 ++++++++++++++++++ 3 files changed, 252 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e680b43f..cfcd18ae 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -38,8 +38,9 @@ - `PathElement::Key("name")` - Navigate into map by key - `PathElement::Index(0)` - Navigate into array by index - `PathElement::Index(-1)` - Python-style negative indexing -- Added low-level `Decoder` API (`Kind`, `MapReader`, `ArrayReader`) for - FFI bindings and custom deserialization without serde overhead. +- Added `verify()` method for comprehensive database validation. Validates + metadata, search tree structure, data section separator, and data records. + Useful for validating database files after download or generation. ## 0.26.0 - 2025-03-28 diff --git a/src/maxminddb/lib.rs b/src/maxminddb/lib.rs index cf520d1c..76f5c813 100644 --- a/src/maxminddb/lib.rs +++ b/src/maxminddb/lib.rs @@ -77,7 +77,7 @@ //! ``` use std::cmp::Ordering; -use std::collections::BTreeMap; +use std::collections::{BTreeMap, HashSet}; use std::fmt::Display; use std::fs; use std::io; @@ -98,6 +98,9 @@ use std::fs::File; #[cfg(all(feature = "simdutf8", feature = "unsafe-str-decode"))] compile_error!("features `simdutf8` and `unsafe-str-decode` are mutually exclusive"); +/// Size of the data section separator (16 zero bytes). +const DATA_SECTION_SEPARATOR_SIZE: usize = 16; + #[derive(Error, Debug)] pub enum MaxMindDbError { #[error("Invalid database: {0}")] @@ -844,6 +847,166 @@ impl<'de, S: AsRef<[u8]>> Reader { Ok(resolved) } + + /// Performs comprehensive validation of the MaxMind DB file. + /// + /// This method validates: + /// - Metadata section: format versions, required fields, and value constraints + /// - Search tree: traverses all networks to verify tree structure integrity + /// - Data section separator: validates the 16-byte separator between tree and data + /// - Data section: verifies all data records referenced by the search tree + /// + /// The verifier is stricter than the MaxMind DB specification and may return + /// errors on some databases that are still readable by normal operations. + /// This method is useful for: + /// - Validating database files after download or generation + /// - Debugging database corruption issues + /// - Ensuring database integrity in critical applications + /// + /// Note: Verification traverses the entire database and may be slow on large files. + /// The method is thread-safe and can be called on an active Reader. + /// + /// # Example + /// + /// ``` + /// use maxminddb::Reader; + /// + /// let reader = Reader::open_readfile("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); + /// reader.verify().expect("Database should be valid"); + /// ``` + pub fn verify(&self) -> Result<(), MaxMindDbError> { + self.verify_metadata()?; + self.verify_database() + } + + fn verify_metadata(&self) -> Result<(), MaxMindDbError> { + let m = &self.metadata; + + if m.binary_format_major_version != 2 { + return Err(MaxMindDbError::InvalidDatabase(format!( + "binary_format_major_version - Expected: 2 Actual: {}", + m.binary_format_major_version + ))); + } + if m.binary_format_minor_version != 0 { + return Err(MaxMindDbError::InvalidDatabase(format!( + "binary_format_minor_version - Expected: 0 Actual: {}", + m.binary_format_minor_version + ))); + } + if m.database_type.is_empty() { + return Err(MaxMindDbError::InvalidDatabase( + "database_type - Expected: non-empty string Actual: \"\"".to_owned(), + )); + } + if m.description.is_empty() { + return Err(MaxMindDbError::InvalidDatabase( + "description - Expected: non-empty map Actual: {}".to_owned(), + )); + } + if m.ip_version != 4 && m.ip_version != 6 { + return Err(MaxMindDbError::InvalidDatabase(format!( + "ip_version - Expected: 4 or 6 Actual: {}", + m.ip_version + ))); + } + if m.record_size != 24 && m.record_size != 28 && m.record_size != 32 { + return Err(MaxMindDbError::InvalidDatabase(format!( + "record_size - Expected: 24, 28, or 32 Actual: {}", + m.record_size + ))); + } + if m.node_count == 0 { + return Err(MaxMindDbError::InvalidDatabase( + "node_count - Expected: positive integer Actual: 0".to_owned(), + )); + } + Ok(()) + } + + fn verify_database(&self) -> Result<(), MaxMindDbError> { + let offsets = self.verify_search_tree()?; + self.verify_data_section_separator()?; + self.verify_data_section(offsets) + } + + fn verify_search_tree(&self) -> Result, MaxMindDbError> { + let mut offsets = HashSet::new(); + let opts = WithinOptions::default().include_networks_without_data(); + + // Maximum number of networks we can expect in a valid database. + // A database with N nodes can have at most 2N data entries (each leaf node + // can have data). We add some margin for safety. + let max_iterations = (self.metadata.node_count as usize).saturating_mul(3); + let mut iteration_count = 0usize; + + for result in self.networks(opts)? { + let lookup = result?; + if let Some(offset) = lookup.offset() { + offsets.insert(offset); + } + + iteration_count += 1; + if iteration_count > max_iterations { + return Err(MaxMindDbError::InvalidDatabase(format!( + "search tree appears to have a cycle or invalid structure (exceeded {} iterations)", + max_iterations + ))); + } + } + Ok(offsets) + } + + fn verify_data_section_separator(&self) -> Result<(), MaxMindDbError> { + let separator_start = + self.metadata.node_count as usize * self.metadata.record_size as usize / 4; + let separator_end = separator_start + DATA_SECTION_SEPARATOR_SIZE; + + if separator_end > self.buf.as_ref().len() { + return Err(MaxMindDbError::InvalidDatabase( + "data section separator extends past end of file".to_owned(), + )); + } + + let separator = &self.buf.as_ref()[separator_start..separator_end]; + + for &b in separator { + if b != 0 { + return Err(MaxMindDbError::InvalidDatabase(format!( + "unexpected byte in data separator: {:?}", + separator + ))); + } + } + Ok(()) + } + + fn verify_data_section(&self, offsets: HashSet) -> Result<(), MaxMindDbError> { + let data_section = &self.buf.as_ref()[self.pointer_base..]; + + // Verify each offset from the search tree points to valid, decodable data + for &offset in &offsets { + if offset >= data_section.len() { + return Err(MaxMindDbError::InvalidDatabase(format!( + "search tree pointer {} is beyond data section (len: {})", + offset, + data_section.len() + ))); + } + + let mut dec = decoder::Decoder::new(data_section, offset); + + // Try to skip/decode the value to verify it's valid + if let Err(e) = dec.skip_value_for_verification() { + return Err(MaxMindDbError::InvalidDatabase(format!( + "received decoding error ({}) at offset {}", + e, offset + ))); + } + } + + Ok(()) + } } fn find_metadata_start(buf: &[u8]) -> Result { diff --git a/src/maxminddb/reader_test.rs b/src/maxminddb/reader_test.rs index a049a576..964480d4 100644 --- a/src/maxminddb/reader_test.rs +++ b/src/maxminddb/reader_test.rs @@ -1036,3 +1036,88 @@ fn test_geoip_networks_within() { assert_eq!(networks, expected); } + +/// Test that verify() succeeds on valid databases (matching Go's TestVerifyOnGoodDatabases) +#[test] +fn test_verify_good_databases() { + let _ = env_logger::try_init(); + + let databases = [ + "GeoIP2-Anonymous-IP-Test.mmdb", + "GeoIP2-City-Test.mmdb", + "GeoIP2-Connection-Type-Test.mmdb", + "GeoIP2-Country-Test.mmdb", + "GeoIP2-Domain-Test.mmdb", + "GeoIP2-ISP-Test.mmdb", + "GeoIP2-Precision-Enterprise-Test.mmdb", + "MaxMind-DB-no-ipv4-search-tree.mmdb", + "MaxMind-DB-string-value-entries.mmdb", + "MaxMind-DB-test-decoder.mmdb", + "MaxMind-DB-test-ipv4-24.mmdb", + "MaxMind-DB-test-ipv4-28.mmdb", + "MaxMind-DB-test-ipv4-32.mmdb", + "MaxMind-DB-test-ipv6-24.mmdb", + "MaxMind-DB-test-ipv6-28.mmdb", + "MaxMind-DB-test-ipv6-32.mmdb", + "MaxMind-DB-test-mixed-24.mmdb", + "MaxMind-DB-test-mixed-28.mmdb", + "MaxMind-DB-test-mixed-32.mmdb", + "MaxMind-DB-test-nested.mmdb", + ]; + + for database in &databases { + let path = format!("test-data/test-data/{}", database); + let reader = Reader::open_readfile(&path) + .unwrap_or_else(|e| panic!("Failed to open {}: {}", database, e)); + + reader + .verify() + .unwrap_or_else(|e| panic!("verify() failed for {}: {}", database, e)); + } +} + +/// Test that verify() returns errors on broken databases (matching Go's TestVerifyOnBrokenDatabases) +#[test] +fn test_verify_broken_double_format() { + let _ = env_logger::try_init(); + + let reader = + Reader::open_readfile("test-data/test-data/GeoIP2-City-Test-Broken-Double-Format.mmdb") + .unwrap(); + + let result = reader.verify(); + assert!( + result.is_err(), + "Expected verify() to return error for Broken-Double-Format, but it succeeded" + ); +} + +#[test] +fn test_verify_broken_pointers() { + let _ = env_logger::try_init(); + + let reader = + Reader::open_readfile("test-data/test-data/MaxMind-DB-test-broken-pointers-24.mmdb") + .unwrap(); + + let result = reader.verify(); + assert!( + result.is_err(), + "Expected verify() to return error for broken-pointers, but it succeeded" + ); +} + +#[test] +fn test_verify_broken_search_tree() { + let _ = env_logger::try_init(); + + let reader = + Reader::open_readfile("test-data/test-data/MaxMind-DB-test-broken-search-tree-24.mmdb") + .unwrap(); + + let result = reader.verify(); + assert!( + result.is_err(), + "Expected verify() to return error for broken-search-tree, but it succeeded" + ); +} From 121a1cf1b551bb45454311f3736d9c8543b1f752 Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Thu, 27 Nov 2025 10:23:26 -0800 Subject: [PATCH 08/37] Add serde size hints for efficient collection pre-allocation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement size_hint() for SeqAccess and MapAccess traits in the decoder. This allows Vec, HashMap, and other collections to pre-allocate the correct capacity when deserializing, avoiding reallocations. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- src/maxminddb/decoder.rs | 8 ++++ src/maxminddb/reader_test.rs | 89 ++++++++++++++++++++++++++++++++++++ 2 files changed, 97 insertions(+) diff --git a/src/maxminddb/decoder.rs b/src/maxminddb/decoder.rs index 25f0d461..b1869c34 100644 --- a/src/maxminddb/decoder.rs +++ b/src/maxminddb/decoder.rs @@ -546,6 +546,10 @@ struct ArrayAccess<'a, 'de: 'a> { impl<'de> SeqAccess<'de> for ArrayAccess<'_, 'de> { type Error = MaxMindDbError; + fn size_hint(&self) -> Option { + Some(self.count) + } + fn next_element_seed(&mut self, seed: T) -> DecodeResult> where T: DeserializeSeed<'de>, @@ -571,6 +575,10 @@ struct MapAccessor<'a, 'de: 'a> { impl<'de> MapAccess<'de> for MapAccessor<'_, 'de> { type Error = MaxMindDbError; + fn size_hint(&self) -> Option { + Some(self.count / 2) + } + fn next_key_seed(&mut self, seed: K) -> DecodeResult> where K: DeserializeSeed<'de>, diff --git a/src/maxminddb/reader_test.rs b/src/maxminddb/reader_test.rs index 964480d4..b1bf43a3 100644 --- a/src/maxminddb/reader_test.rs +++ b/src/maxminddb/reader_test.rs @@ -1121,3 +1121,92 @@ fn test_verify_broken_search_tree() { "Expected verify() to return error for broken-search-tree, but it succeeded" ); } + +/// Test that size hints are properly returned for sequences and maps +#[test] +fn test_size_hints() { + use serde::de::{Deserializer, MapAccess, SeqAccess, Visitor}; + use std::fmt; + + let _ = env_logger::try_init(); + + // Wrapper that captures size_hint for sequences + struct SeqSizeHint { + hint: Option, + values: Vec, + } + + impl<'de> Deserialize<'de> for SeqSizeHint { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + struct V; + impl<'de> Visitor<'de> for V { + type Value = SeqSizeHint; + fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str("sequence") + } + fn visit_seq>(self, mut seq: A) -> Result { + let hint = seq.size_hint(); + let mut values = Vec::new(); + while let Some(v) = seq.next_element()? { + values.push(v); + } + Ok(SeqSizeHint { hint, values }) + } + } + deserializer.deserialize_seq(V) + } + } + + // Wrapper that captures size_hint for maps + struct MapSizeHint { + hint: Option, + len: usize, + } + + impl<'de> Deserialize<'de> for MapSizeHint { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + struct V; + impl<'de> Visitor<'de> for V { + type Value = MapSizeHint; + fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str("map") + } + fn visit_map>(self, mut map: A) -> Result { + let hint = map.size_hint(); + let mut len = 0; + while map.next_entry::()?.is_some() { + len += 1; + } + Ok(MapSizeHint { hint, len }) + } + } + deserializer.deserialize_map(V) + } + } + + #[derive(Deserialize)] + struct TestType { + array: SeqSizeHint, + map: MapSizeHint, + } + + let r = Reader::open_readfile("test-data/test-data/MaxMind-DB-test-decoder.mmdb").unwrap(); + let ip: IpAddr = FromStr::from_str("1.1.1.0").unwrap(); + let lookup = r.lookup(ip).unwrap(); + assert!(lookup.found()); + let result: TestType = lookup.decode().unwrap(); + + // Verify array size hint matches actual length + assert_eq!(result.array.hint, Some(3)); + assert_eq!(result.array.values, vec![1, 2, 3]); + + // Verify map size hint matches actual entry count + assert_eq!(result.map.hint, Some(result.map.len)); + assert!(result.map.len > 0, "Map should have entries"); +} From 7c5274800b50e1e045fc32858b4560838839e758 Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Thu, 27 Nov 2025 15:57:17 -0800 Subject: [PATCH 09/37] Refactor: Use type constants throughout decoder MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace magic numbers with named constants for all MaxMind DB type codes. This improves readability and maintainability. Constants added: - TYPE_EXTENDED (0), TYPE_POINTER (1), TYPE_STRING (2) - TYPE_DOUBLE (3), TYPE_BYTES (4), TYPE_UINT16 (5) - TYPE_UINT32 (6), TYPE_MAP (7), TYPE_INT32 (8) - TYPE_UINT64 (9), TYPE_UINT128 (10), TYPE_ARRAY (11) - TYPE_BOOL (14), TYPE_FLOAT (15) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- src/maxminddb/decoder.rs | 70 +++++++++++++++++++++++----------------- 1 file changed, 40 insertions(+), 30 deletions(-) diff --git a/src/maxminddb/decoder.rs b/src/maxminddb/decoder.rs index b1869c34..c4dada12 100644 --- a/src/maxminddb/decoder.rs +++ b/src/maxminddb/decoder.rs @@ -5,10 +5,21 @@ use std::convert::TryInto; use super::MaxMindDbError; -// MaxMind DB type constants (only those actually used) +// MaxMind DB type constants +const TYPE_EXTENDED: u8 = 0; pub(crate) const TYPE_POINTER: u8 = 1; +const TYPE_STRING: u8 = 2; +const TYPE_DOUBLE: u8 = 3; +const TYPE_BYTES: u8 = 4; +const TYPE_UINT16: u8 = 5; +const TYPE_UINT32: u8 = 6; pub(crate) const TYPE_MAP: u8 = 7; +const TYPE_INT32: u8 = 8; +const TYPE_UINT64: u8 = 9; +const TYPE_UINT128: u8 = 10; pub(crate) const TYPE_ARRAY: u8 = 11; +const TYPE_BOOL: u8 = 14; +const TYPE_FLOAT: u8 = 15; fn to_usize(base: u8, bytes: &[u8]) -> usize { bytes @@ -56,8 +67,8 @@ impl<'de> Decoder<'de> { #[inline(always)] fn size_from_ctrl_byte(&mut self, ctrl_byte: u8, type_num: u8) -> usize { let size = (ctrl_byte & 0x1f) as usize; - // extended - if type_num == 0 { + // Extended type - size field is used differently + if type_num == TYPE_EXTENDED { return size; } @@ -79,9 +90,9 @@ impl<'de> Decoder<'de> { fn size_and_type(&mut self) -> (usize, u8) { let ctrl_byte = self.eat_byte(); let mut type_num = ctrl_byte >> 5; - // Extended type - if type_num == 0 { - type_num = self.eat_byte() + 7; + // Extended type: type 0 means read next byte for actual type + if type_num == TYPE_EXTENDED { + type_num = self.eat_byte() + TYPE_MAP; // Extended types start at 7 } let size = self.size_from_ctrl_byte(ctrl_byte, type_num); (size, type_num) @@ -114,25 +125,25 @@ impl<'de> Decoder<'de> { let (size, type_num) = self.size_and_type(); Ok(match type_num { - 1 => { + TYPE_POINTER => { let new_ptr = self.decode_pointer(size); let prev_ptr = self.current_ptr; self.current_ptr = new_ptr; Value::Any { prev_ptr } } - 2 => Value::String(self.decode_string(size)?), - 3 => Value::F64(self.decode_double(size)?), - 4 => Value::Bytes(self.decode_bytes(size)?), - 5 => Value::U16(self.decode_uint16(size)?), - 6 => Value::U32(self.decode_uint32(size)?), - 7 => self.decode_map(size), - 8 => Value::I32(self.decode_int(size)?), - 9 => Value::U64(self.decode_uint64(size)?), - 10 => Value::U128(self.decode_uint128(size)?), - 11 => self.decode_array(size), - 14 => Value::Bool(self.decode_bool(size)?), - 15 => Value::F32(self.decode_float(size)?), + TYPE_STRING => Value::String(self.decode_string(size)?), + TYPE_DOUBLE => Value::F64(self.decode_double(size)?), + TYPE_BYTES => Value::Bytes(self.decode_bytes(size)?), + TYPE_UINT16 => Value::U16(self.decode_uint16(size)?), + TYPE_UINT32 => Value::U32(self.decode_uint32(size)?), + TYPE_MAP => self.decode_map(size), + TYPE_INT32 => Value::I32(self.decode_int(size)?), + TYPE_UINT64 => Value::U64(self.decode_uint64(size)?), + TYPE_UINT128 => Value::U128(self.decode_uint128(size)?), + TYPE_ARRAY => self.decode_array(size), + TYPE_BOOL => Value::Bool(self.decode_bool(size)?), + TYPE_FLOAT => Value::F32(self.decode_float(size)?), u => { return Err(MaxMindDbError::InvalidDatabase(format!( "Unknown data type: {u:?}" @@ -385,7 +396,7 @@ impl<'de> Decoder<'de> { /// Gets size and type, following any pointers. fn size_and_type_following_pointers(&mut self) -> DecodeResult<(usize, u8)> { let (size, type_num) = self.size_and_type(); - if type_num == 1 { + if type_num == TYPE_POINTER { // Pointer - follow it let new_ptr = self.decode_pointer(size); self.current_ptr = new_ptr; @@ -406,7 +417,7 @@ impl<'de> Decoder<'de> { let result = self.read_string(); self.current_ptr = saved_ptr; result - } else if type_num == 2 { + } else if type_num == TYPE_STRING { self.decode_string(size) } else { Err(MaxMindDbError::InvalidDatabase(format!( @@ -434,8 +445,7 @@ impl<'de> Decoder<'de> { follow_pointers: bool, ) -> DecodeResult<()> { match type_num { - 1 => { - // Pointer + TYPE_POINTER => { let new_ptr = self.decode_pointer(size); if follow_pointers { let saved_ptr = self.current_ptr; @@ -445,12 +455,12 @@ impl<'de> Decoder<'de> { } Ok(()) } - 2 | 4 => { + TYPE_STRING | TYPE_BYTES => { // String or Bytes - skip size bytes self.current_ptr += size; Ok(()) } - 3 => { + TYPE_DOUBLE => { // Double - must be exactly 8 bytes if size != 8 { return Err(MaxMindDbError::InvalidDatabase(format!( @@ -460,7 +470,7 @@ impl<'de> Decoder<'de> { self.current_ptr += size; Ok(()) } - 15 => { + TYPE_FLOAT => { // Float - must be exactly 4 bytes if size != 4 { return Err(MaxMindDbError::InvalidDatabase(format!( @@ -470,16 +480,16 @@ impl<'de> Decoder<'de> { self.current_ptr += size; Ok(()) } - 5 | 6 | 8 | 9 | 10 => { + TYPE_UINT16 | TYPE_UINT32 | TYPE_INT32 | TYPE_UINT64 | TYPE_UINT128 => { // Numeric types - skip size bytes self.current_ptr += size; Ok(()) } - 14 => { + TYPE_BOOL => { // Boolean - size field IS the value, no data bytes to skip Ok(()) } - 7 => { + TYPE_MAP => { // Map - skip size key-value pairs for _ in 0..size { self.skip_value_inner_with_follow(follow_pointers)?; // key @@ -487,7 +497,7 @@ impl<'de> Decoder<'de> { } Ok(()) } - 11 => { + TYPE_ARRAY => { // Array - skip size elements for _ in 0..size { self.skip_value_inner_with_follow(follow_pointers)?; From 0a1f5639508168abb4559212dec2dce02b50833e Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Thu, 27 Nov 2025 19:39:34 -0800 Subject: [PATCH 10/37] Return false for is_human_readable() since MMDB is binary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The serde Deserializer trait's is_human_readable() method defaults to true, but MMDB is a binary format. This affects how some types like Duration and IpAddr choose their serialization format. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- src/maxminddb/decoder.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/maxminddb/decoder.rs b/src/maxminddb/decoder.rs index c4dada12..0f1d55e9 100644 --- a/src/maxminddb/decoder.rs +++ b/src/maxminddb/decoder.rs @@ -539,6 +539,10 @@ impl<'de: 'a, 'a> de::Deserializer<'de> for &'a mut Decoder<'de> { visitor.visit_some(self) } + fn is_human_readable(&self) -> bool { + false + } + forward_to_deserialize_any! { bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string bytes byte_buf unit unit_struct newtype_struct seq tuple From 3ac1328f620a279d7acc9d593a26fc20e14cbfdc Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Thu, 27 Nov 2025 19:41:50 -0800 Subject: [PATCH 11/37] Implement deserialize_ignored_any for efficient value skipping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When deserializing into serde::de::IgnoredAny, we can now skip values efficiently without fully decoding them. This reuses the existing skip_value() method which advances the decoder offset based on the control byte without parsing strings, validating UTF-8, etc. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- src/maxminddb/decoder.rs | 10 +++++++++- src/maxminddb/reader_test.rs | 26 ++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/src/maxminddb/decoder.rs b/src/maxminddb/decoder.rs index 0f1d55e9..05625ca5 100644 --- a/src/maxminddb/decoder.rs +++ b/src/maxminddb/decoder.rs @@ -543,10 +543,18 @@ impl<'de: 'a, 'a> de::Deserializer<'de> for &'a mut Decoder<'de> { false } + fn deserialize_ignored_any(self, visitor: V) -> DecodeResult + where + V: Visitor<'de>, + { + self.skip_value()?; + visitor.visit_unit() + } + forward_to_deserialize_any! { bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string bytes byte_buf unit unit_struct newtype_struct seq tuple - tuple_struct map struct enum identifier ignored_any + tuple_struct map struct enum identifier } } diff --git a/src/maxminddb/reader_test.rs b/src/maxminddb/reader_test.rs index b1bf43a3..3675f77b 100644 --- a/src/maxminddb/reader_test.rs +++ b/src/maxminddb/reader_test.rs @@ -1210,3 +1210,29 @@ fn test_size_hints() { assert_eq!(result.map.hint, Some(result.map.len)); assert!(result.map.len > 0, "Map should have entries"); } + +/// Test that deserialize_ignored_any efficiently skips values +#[test] +fn test_ignored_any() { + use serde::de::IgnoredAny; + + let _ = env_logger::try_init(); + + // Struct that only reads some fields, ignoring others via IgnoredAny + #[derive(Deserialize, Debug)] + struct PartialRead { + utf8_string: String, + // These fields use IgnoredAny to skip decoding + array: IgnoredAny, + map: IgnoredAny, + uint128: IgnoredAny, + } + + let r = Reader::open_readfile("test-data/test-data/MaxMind-DB-test-decoder.mmdb").unwrap(); + let ip: IpAddr = FromStr::from_str("1.1.1.0").unwrap(); + let lookup = r.lookup(ip).unwrap(); + assert!(lookup.found()); + let result: PartialRead = lookup.decode().unwrap(); + + assert_eq!(result.utf8_string, "unicode! ☯ - ♫"); +} From c21f5b999c8a640cae3350ed44681ab9ff3dba2f Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Thu, 27 Nov 2025 19:43:01 -0800 Subject: [PATCH 12/37] Implement deserialize_enum for string-to-enum deserialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Users can now deserialize string values from MMDB into Rust enums using serde's rename attribute: #[derive(Deserialize)] enum ConnType { #[serde(rename = "Cable/DSL")] CableDsl, } This adds EnumAccessor implementing EnumAccess and VariantAccess traits, supporting unit, newtype, tuple, and struct variants. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- src/maxminddb/decoder.rs | 65 +++++++++++++++++++++++++++++++++++- src/maxminddb/reader_test.rs | 25 ++++++++++++++ 2 files changed, 89 insertions(+), 1 deletion(-) diff --git a/src/maxminddb/decoder.rs b/src/maxminddb/decoder.rs index 05625ca5..16a92e08 100644 --- a/src/maxminddb/decoder.rs +++ b/src/maxminddb/decoder.rs @@ -551,10 +551,22 @@ impl<'de: 'a, 'a> de::Deserializer<'de> for &'a mut Decoder<'de> { visitor.visit_unit() } + fn deserialize_enum( + self, + _name: &'static str, + _variants: &'static [&'static str], + visitor: V, + ) -> DecodeResult + where + V: Visitor<'de>, + { + visitor.visit_enum(EnumAccessor { de: self }) + } + forward_to_deserialize_any! { bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string bytes byte_buf unit unit_struct newtype_struct seq tuple - tuple_struct map struct enum identifier + tuple_struct map struct identifier } } @@ -629,3 +641,54 @@ impl<'de> MapAccess<'de> for MapAccessor<'_, 'de> { seed.deserialize(&mut *self.de) } } + +struct EnumAccessor<'a, 'de: 'a> { + de: &'a mut Decoder<'de>, +} + +impl<'de> de::EnumAccess<'de> for EnumAccessor<'_, 'de> { + type Error = MaxMindDbError; + type Variant = Self; + + fn variant_seed(self, seed: V) -> DecodeResult<(V::Value, Self::Variant)> + where + V: DeserializeSeed<'de>, + { + // Deserialize the variant identifier (string) + let variant = seed.deserialize(&mut *self.de)?; + Ok((variant, self)) + } +} + +impl<'de> de::VariantAccess<'de> for EnumAccessor<'_, 'de> { + type Error = MaxMindDbError; + + fn unit_variant(self) -> DecodeResult<()> { + Ok(()) + } + + fn newtype_variant_seed(self, seed: T) -> DecodeResult + where + T: DeserializeSeed<'de>, + { + seed.deserialize(&mut *self.de) + } + + fn tuple_variant(self, _len: usize, visitor: V) -> DecodeResult + where + V: Visitor<'de>, + { + de::Deserializer::deserialize_seq(&mut *self.de, visitor) + } + + fn struct_variant( + self, + _fields: &'static [&'static str], + visitor: V, + ) -> DecodeResult + where + V: Visitor<'de>, + { + de::Deserializer::deserialize_map(&mut *self.de, visitor) + } +} diff --git a/src/maxminddb/reader_test.rs b/src/maxminddb/reader_test.rs index 3675f77b..1bedc440 100644 --- a/src/maxminddb/reader_test.rs +++ b/src/maxminddb/reader_test.rs @@ -1236,3 +1236,28 @@ fn test_ignored_any() { assert_eq!(result.utf8_string, "unicode! ☯ - ♫"); } + +/// Test that string values can be deserialized into enums +#[test] +fn test_enum_deserialization() { + let _ = env_logger::try_init(); + + #[derive(Deserialize, Debug, PartialEq)] + enum ConnType { + #[serde(rename = "Cable/DSL")] + CableDsl, + } + + #[derive(Deserialize)] + struct Record { + connection_type: ConnType, + } + + let r = Reader::open_readfile("test-data/test-data/GeoIP2-Connection-Type-Test.mmdb").unwrap(); + let ip: IpAddr = FromStr::from_str("96.1.20.112").unwrap(); + let lookup = r.lookup(ip).unwrap(); + assert!(lookup.found()); + let result: Record = lookup.decode().unwrap(); + + assert_eq!(result.connection_type, ConnType::CableDsl); +} From f5a703262cabafbfa121dcb9c886e8206f56a3dd Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Thu, 27 Nov 2025 19:50:42 -0800 Subject: [PATCH 13/37] Add test for serde(flatten) attribute support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Real-world GeoIP2/GeoLite2 databases don't contain u128 values, so #[serde(flatten)] with HashMap works without issues. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- src/maxminddb/reader_test.rs | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/src/maxminddb/reader_test.rs b/src/maxminddb/reader_test.rs index 1bedc440..705f78f4 100644 --- a/src/maxminddb/reader_test.rs +++ b/src/maxminddb/reader_test.rs @@ -1261,3 +1261,34 @@ fn test_enum_deserialization() { assert_eq!(result.connection_type, ConnType::CableDsl); } + +/// Test serde flatten attribute with HashMap +/// +/// Real-world GeoIP2/GeoLite2 databases don't contain u128 values, so +/// `#[serde(flatten)]` works without issues. +#[test] +fn test_serde_flatten() { + use serde::de::IgnoredAny; + + let _ = env_logger::try_init(); + + #[derive(Deserialize, Debug)] + struct PartialCountry { + continent: Continent, + #[serde(flatten)] + _rest: std::collections::HashMap, + } + + #[derive(Deserialize, Debug)] + struct Continent { + code: String, + } + + let r = Reader::open_readfile("test-data/test-data/GeoIP2-Country-Test.mmdb").unwrap(); + let ip: IpAddr = FromStr::from_str("81.2.69.160").unwrap(); + let lookup = r.lookup(ip).unwrap(); + assert!(lookup.found()); + + let result: PartialCountry = lookup.decode().unwrap(); + assert_eq!(result.continent.code, "EU"); +} From c392a8cb4da36c1352a95784260da38917b2ab30 Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Thu, 27 Nov 2025 19:51:25 -0800 Subject: [PATCH 14/37] Add changelog entry for serde deserializer improvements MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index cfcd18ae..0de3e79a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -41,6 +41,12 @@ - Added `verify()` method for comprehensive database validation. Validates metadata, search tree structure, data section separator, and data records. Useful for validating database files after download or generation. +- Serde deserializer improvements: + - Added size hints to `SeqAccess` and `MapAccess` for efficient collection + pre-allocation + - `is_human_readable()` now returns `false` since MMDB is a binary format + - Implemented `deserialize_ignored_any` for efficient value skipping + - Implemented `deserialize_enum` for string-to-enum deserialization ## 0.26.0 - 2025-03-28 From 1385066e6a8bb0f22990f368150005d4cb78ed52 Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Thu, 27 Nov 2025 20:00:47 -0800 Subject: [PATCH 15/37] Add recursion depth limit matching libmaxminddb MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a maximum data structure depth of 512 when decoding, matching the limit used in libmaxminddb and the Go reader. This prevents stack overflow when decoding malformed databases with excessively nested maps, arrays, or pointer chains. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- CHANGELOG.md | 3 +++ src/maxminddb/decoder.rs | 50 ++++++++++++++++++++++++++++++++++++---- 2 files changed, 49 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0de3e79a..6f0013a1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -47,6 +47,9 @@ - `is_human_readable()` now returns `false` since MMDB is a binary format - Implemented `deserialize_ignored_any` for efficient value skipping - Implemented `deserialize_enum` for string-to-enum deserialization +- Added recursion depth limit (512) matching libmaxminddb and the Go reader. + This prevents stack overflow when decoding malformed databases with deeply + nested structures. ## 0.26.0 - 2025-03-28 diff --git a/src/maxminddb/decoder.rs b/src/maxminddb/decoder.rs index 16a92e08..eacdcf64 100644 --- a/src/maxminddb/decoder.rs +++ b/src/maxminddb/decoder.rs @@ -21,6 +21,10 @@ pub(crate) const TYPE_ARRAY: u8 = 11; const TYPE_BOOL: u8 = 14; const TYPE_FLOAT: u8 = 15; +/// Maximum recursion depth for nested data structures. +/// This matches the value used in libmaxminddb and the Go reader. +const MAXIMUM_DATA_STRUCTURE_DEPTH: u16 = 512; + fn to_usize(base: u8, bytes: &[u8]) -> usize { bytes .iter() @@ -47,6 +51,7 @@ enum Value<'a, 'de> { pub struct Decoder<'de> { buf: &'de [u8], current_ptr: usize, + depth: u16, } impl<'de> Decoder<'de> { @@ -54,7 +59,26 @@ impl<'de> Decoder<'de> { Decoder { buf, current_ptr: start_ptr, + depth: 0, + } + } + + /// Check and increment depth, returning error if limit exceeded. + #[inline] + fn enter_nested(&mut self) -> DecodeResult<()> { + if self.depth >= MAXIMUM_DATA_STRUCTURE_DEPTH { + return Err(MaxMindDbError::InvalidDatabase( + "exceeded maximum data structure depth; database is likely corrupt".to_owned(), + )); } + self.depth += 1; + Ok(()) + } + + /// Decrement depth when exiting a nested structure. + #[inline] + fn exit_nested(&mut self) { + self.depth = self.depth.saturating_sub(1); } #[inline(always)] @@ -101,7 +125,10 @@ impl<'de> Decoder<'de> { fn decode_any>(&mut self, visitor: V) -> DecodeResult { match self.decode_any_value()? { Value::Any { prev_ptr } => { + // Pointer dereference - track depth + self.enter_nested()?; let res = self.decode_any(visitor); + self.exit_nested(); self.current_ptr = prev_ptr; res } @@ -115,8 +142,17 @@ impl<'de> Decoder<'de> { Value::U128(x) => visitor.visit_u128(x), Value::F64(x) => visitor.visit_f64(x), Value::F32(x) => visitor.visit_f32(x), - Value::Map(x) => visitor.visit_map(x), - Value::Array(x) => visitor.visit_seq(x), + // Maps and arrays enter_nested in decode_any_value; exit when done + Value::Map(x) => { + let res = visitor.visit_map(x); + self.exit_nested(); + res + } + Value::Array(x) => { + let res = visitor.visit_seq(x); + self.exit_nested(); + res + } } } @@ -137,11 +173,17 @@ impl<'de> Decoder<'de> { TYPE_BYTES => Value::Bytes(self.decode_bytes(size)?), TYPE_UINT16 => Value::U16(self.decode_uint16(size)?), TYPE_UINT32 => Value::U32(self.decode_uint32(size)?), - TYPE_MAP => self.decode_map(size), + TYPE_MAP => { + self.enter_nested()?; + self.decode_map(size) + } TYPE_INT32 => Value::I32(self.decode_int(size)?), TYPE_UINT64 => Value::U64(self.decode_uint64(size)?), TYPE_UINT128 => Value::U128(self.decode_uint128(size)?), - TYPE_ARRAY => self.decode_array(size), + TYPE_ARRAY => { + self.enter_nested()?; + self.decode_array(size) + } TYPE_BOOL => Value::Bool(self.decode_bool(size)?), TYPE_FLOAT => Value::F32(self.decode_float(size)?), u => { From 20723e11f54b426781537483860e838f687c5c28 Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Thu, 27 Nov 2025 20:03:23 -0800 Subject: [PATCH 16/37] Include offset in decoder error messages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Decoding errors now include the current offset in the data section, making it easier to debug malformed or corrupt databases. For example: "Invalid database: unknown data type: 13 at offset 1234" 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- CHANGELOG.md | 2 + src/maxminddb/decoder.rs | 84 ++++++++++++++-------------------------- 2 files changed, 30 insertions(+), 56 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6f0013a1..0a7f4dfd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -50,6 +50,8 @@ - Added recursion depth limit (512) matching libmaxminddb and the Go reader. This prevents stack overflow when decoding malformed databases with deeply nested structures. +- Decoding errors now include offset information for easier debugging of + malformed databases. ## 0.26.0 - 2025-03-28 diff --git a/src/maxminddb/decoder.rs b/src/maxminddb/decoder.rs index eacdcf64..610228bf 100644 --- a/src/maxminddb/decoder.rs +++ b/src/maxminddb/decoder.rs @@ -81,6 +81,18 @@ impl<'de> Decoder<'de> { self.depth = self.depth.saturating_sub(1); } + /// Create an InvalidDatabase error with current offset context. + #[inline] + fn invalid_db_error(&self, msg: &str) -> MaxMindDbError { + MaxMindDbError::InvalidDatabase(format!("{msg} at offset {}", self.current_ptr)) + } + + /// Create a Decoding error with current offset context. + #[inline] + fn decode_error(&self, msg: &str) -> MaxMindDbError { + MaxMindDbError::Decoding(format!("{msg} at offset {}", self.current_ptr)) + } + #[inline(always)] fn eat_byte(&mut self) -> u8 { let b = self.buf[self.current_ptr]; @@ -186,11 +198,7 @@ impl<'de> Decoder<'de> { } TYPE_BOOL => Value::Bool(self.decode_bool(size)?), TYPE_FLOAT => Value::F32(self.decode_float(size)?), - u => { - return Err(MaxMindDbError::InvalidDatabase(format!( - "Unknown data type: {u:?}" - ))) - } + u => return Err(self.invalid_db_error(&format!("unknown data type: {u}"))), }) } @@ -204,9 +212,7 @@ impl<'de> Decoder<'de> { fn decode_bool(&mut self, size: usize) -> DecodeResult { match size { 0 | 1 => Ok(size != 0), - s => Err(MaxMindDbError::InvalidDatabase(format!( - "bool of size {s:?}" - ))), + s => Err(self.invalid_db_error(&format!("bool of size {s}"))), } } @@ -222,12 +228,7 @@ impl<'de> Decoder<'de> { let new_offset = self.current_ptr + size; let value: [u8; 4] = self.buf[self.current_ptr..new_offset] .try_into() - .map_err(|_| { - MaxMindDbError::InvalidDatabase(format!( - "float of size {:?}", - new_offset - self.current_ptr - )) - })?; + .map_err(|_| self.invalid_db_error(&format!("float of size {size}")))?; self.current_ptr = new_offset; let float_value = f32::from_be_bytes(value); Ok(float_value) @@ -237,12 +238,7 @@ impl<'de> Decoder<'de> { let new_offset = self.current_ptr + size; let value: [u8; 8] = self.buf[self.current_ptr..new_offset] .try_into() - .map_err(|_| { - MaxMindDbError::InvalidDatabase(format!( - "double of size {:?}", - new_offset - self.current_ptr - )) - })?; + .map_err(|_| self.invalid_db_error(&format!("double of size {size}")))?; self.current_ptr = new_offset; let float_value = f64::from_be_bytes(value); Ok(float_value) @@ -259,9 +255,7 @@ impl<'de> Decoder<'de> { self.current_ptr = new_offset; Ok(value) } - s => Err(MaxMindDbError::InvalidDatabase(format!( - "u64 of size {s:?}" - ))), + s => Err(self.invalid_db_error(&format!("u64 of size {s}"))), } } @@ -276,9 +270,7 @@ impl<'de> Decoder<'de> { self.current_ptr = new_offset; Ok(value) } - s => Err(MaxMindDbError::InvalidDatabase(format!( - "u128 of size {s:?}" - ))), + s => Err(self.invalid_db_error(&format!("u128 of size {s}"))), } } @@ -293,9 +285,7 @@ impl<'de> Decoder<'de> { self.current_ptr = new_offset; Ok(value) } - s => Err(MaxMindDbError::InvalidDatabase(format!( - "u32 of size {s:?}" - ))), + s => Err(self.invalid_db_error(&format!("u32 of size {s}"))), } } @@ -310,9 +300,7 @@ impl<'de> Decoder<'de> { self.current_ptr = new_offset; Ok(value) } - s => Err(MaxMindDbError::InvalidDatabase(format!( - "u16 of size {s:?}" - ))), + s => Err(self.invalid_db_error(&format!("u16 of size {s}"))), } } @@ -327,9 +315,7 @@ impl<'de> Decoder<'de> { self.current_ptr = new_offset; Ok(value) } - s => Err(MaxMindDbError::InvalidDatabase(format!( - "int32 of size {s:?}" - ))), + s => Err(self.invalid_db_error(&format!("i32 of size {s}"))), } } @@ -386,9 +372,7 @@ impl<'de> Decoder<'de> { self.current_ptr = new_offset; match from_utf8(bytes) { Ok(v) => Ok(v), - Err(_) => Err(MaxMindDbError::InvalidDatabase( - "error decoding string".to_owned(), - )), + Err(_) => Err(self.invalid_db_error("invalid UTF-8 in string")), } } @@ -413,9 +397,7 @@ impl<'de> Decoder<'de> { } else if type_num == TYPE_MAP { Ok(size) } else { - Err(MaxMindDbError::Decoding(format!( - "expected map, got type {type_num}" - ))) + Err(self.decode_error(&format!("expected map, got type {type_num}"))) } } @@ -429,9 +411,7 @@ impl<'de> Decoder<'de> { } else if type_num == TYPE_ARRAY { Ok(size) } else { - Err(MaxMindDbError::Decoding(format!( - "expected array, got type {type_num}" - ))) + Err(self.decode_error(&format!("expected array, got type {type_num}"))) } } @@ -462,9 +442,7 @@ impl<'de> Decoder<'de> { } else if type_num == TYPE_STRING { self.decode_string(size) } else { - Err(MaxMindDbError::InvalidDatabase(format!( - "expected string, got type {type_num}" - ))) + Err(self.invalid_db_error(&format!("expected string, got type {type_num}"))) } } @@ -505,9 +483,7 @@ impl<'de> Decoder<'de> { TYPE_DOUBLE => { // Double - must be exactly 8 bytes if size != 8 { - return Err(MaxMindDbError::InvalidDatabase(format!( - "double of size {size}" - ))); + return Err(self.invalid_db_error(&format!("double of size {size}"))); } self.current_ptr += size; Ok(()) @@ -515,9 +491,7 @@ impl<'de> Decoder<'de> { TYPE_FLOAT => { // Float - must be exactly 4 bytes if size != 4 { - return Err(MaxMindDbError::InvalidDatabase(format!( - "float of size {size}" - ))); + return Err(self.invalid_db_error(&format!("float of size {size}"))); } self.current_ptr += size; Ok(()) @@ -546,9 +520,7 @@ impl<'de> Decoder<'de> { } Ok(()) } - u => Err(MaxMindDbError::InvalidDatabase(format!( - "Unknown data type: {u:?}" - ))), + u => Err(self.invalid_db_error(&format!("unknown data type: {u}"))), } } From 0b1c9efa5768443625b6b9204b1ae8654b5c2a51 Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Thu, 27 Nov 2025 20:17:00 -0800 Subject: [PATCH 17/37] Restructure error types with structured fields MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Change InvalidDatabase and Decoding variants to use structured fields (message, offset, path) instead of a single String - Add helper constructors: invalid_database(), invalid_database_at(), decoding(), decoding_at(), decoding_at_path() - Add offset() method to Decoder for exposing current position - Add #[non_exhaustive] attribute to MaxMindDbError for future compat - Update all error creation sites to use new constructors with offsets - Follow Rust idiom: lowercase error messages without trailing punctuation This is a breaking change - pattern matching code must be updated from InvalidDatabase(msg) to InvalidDatabase { message, .. }. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- CHANGELOG.md | 12 +- src/maxminddb/decoder.rs | 16 ++- src/maxminddb/lib.rs | 224 ++++++++++++++++++++++++++--------- src/maxminddb/reader_test.rs | 4 +- src/maxminddb/result.rs | 18 +-- 5 files changed, 198 insertions(+), 76 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0a7f4dfd..720f5bfb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -50,8 +50,16 @@ - Added recursion depth limit (512) matching libmaxminddb and the Go reader. This prevents stack overflow when decoding malformed databases with deeply nested structures. -- Decoding errors now include offset information for easier debugging of - malformed databases. +- **BREAKING CHANGE:** The `InvalidDatabase` and `Decoding` error variants now + use structured fields instead of a single string: + - `InvalidDatabase { message, offset }` - includes optional byte offset + - `Decoding { message, offset, path }` - includes optional byte offset and + JSON-pointer-style path for locating the error + - Pattern matching code must be updated (e.g., `InvalidDatabase(msg)` becomes + `InvalidDatabase { message, .. }`) +- Error messages now include byte offsets when available, making it easier to + debug malformed databases. The `#[non_exhaustive]` attribute is added to + `MaxMindDbError` to allow future additions without breaking changes. ## 0.26.0 - 2025-03-28 diff --git a/src/maxminddb/decoder.rs b/src/maxminddb/decoder.rs index 610228bf..2346cfd0 100644 --- a/src/maxminddb/decoder.rs +++ b/src/maxminddb/decoder.rs @@ -67,8 +67,8 @@ impl<'de> Decoder<'de> { #[inline] fn enter_nested(&mut self) -> DecodeResult<()> { if self.depth >= MAXIMUM_DATA_STRUCTURE_DEPTH { - return Err(MaxMindDbError::InvalidDatabase( - "exceeded maximum data structure depth; database is likely corrupt".to_owned(), + return Err(self.invalid_db_error( + "exceeded maximum data structure depth; database is likely corrupt", )); } self.depth += 1; @@ -84,13 +84,19 @@ impl<'de> Decoder<'de> { /// Create an InvalidDatabase error with current offset context. #[inline] fn invalid_db_error(&self, msg: &str) -> MaxMindDbError { - MaxMindDbError::InvalidDatabase(format!("{msg} at offset {}", self.current_ptr)) + MaxMindDbError::invalid_database_at(msg, self.current_ptr) } /// Create a Decoding error with current offset context. #[inline] fn decode_error(&self, msg: &str) -> MaxMindDbError { - MaxMindDbError::Decoding(format!("{msg} at offset {}", self.current_ptr)) + MaxMindDbError::decoding_at(msg, self.current_ptr) + } + + /// Returns the current offset in the data section. + #[inline] + pub fn offset(&self) -> usize { + self.current_ptr } #[inline(always)] @@ -647,7 +653,7 @@ impl<'de> MapAccess<'de> for MapAccessor<'_, 'de> { { // Check if there are no more entries. if self.count == 0 { - return Err(MaxMindDbError::Decoding("no more entries".to_owned())); + return Err(self.de.decode_error("no more entries")); } self.count -= 1; diff --git a/src/maxminddb/lib.rs b/src/maxminddb/lib.rs index 76f5c813..b712f829 100644 --- a/src/maxminddb/lib.rs +++ b/src/maxminddb/lib.rs @@ -101,26 +101,45 @@ compile_error!("features `simdutf8` and `unsafe-str-decode` are mutually exclusi /// Size of the data section separator (16 zero bytes). const DATA_SECTION_SEPARATOR_SIZE: usize = 16; +/// Error returned by MaxMind DB operations. #[derive(Error, Debug)] +#[non_exhaustive] pub enum MaxMindDbError { - #[error("Invalid database: {0}")] - InvalidDatabase(String), - - #[error("I/O error: {0}")] + /// The database file is invalid or corrupted. + #[error("{}", format_invalid_database(.message, .offset))] + InvalidDatabase { + /// Description of what is invalid. + message: String, + /// Byte offset in the database where the error was detected. + offset: Option, + }, + + /// An I/O error occurred while reading the database. + #[error("i/o error: {0}")] Io( #[from] #[source] io::Error, ), + /// Memory mapping failed. #[cfg(feature = "mmap")] - #[error("Memory map error: {0}")] + #[error("memory map error: {0}")] Mmap(#[source] io::Error), - #[error("Decoding error: {0}")] - Decoding(String), - - #[error("Invalid network: {0}")] + /// Error decoding data from the database. + #[error("{}", format_decoding_error(.message, .offset, .path.as_deref()))] + Decoding { + /// Description of the decoding error. + message: String, + /// Byte offset in the data section where the error occurred. + offset: Option, + /// JSON-pointer-like path to the field (e.g., "/city/names/en"). + path: Option, + }, + + /// The provided network/CIDR is invalid. + #[error("invalid network: {0}")] InvalidNetwork( #[from] #[source] @@ -128,9 +147,74 @@ pub enum MaxMindDbError { ), } +fn format_invalid_database(message: &str, offset: &Option) -> String { + match offset { + Some(off) => format!("invalid database at offset {off}: {message}"), + None => format!("invalid database: {message}"), + } +} + +fn format_decoding_error(message: &str, offset: &Option, path: Option<&str>) -> String { + match (offset, path) { + (Some(off), Some(p)) => format!("decoding error at offset {off} (path: {p}): {message}"), + (Some(off), None) => format!("decoding error at offset {off}: {message}"), + (None, Some(p)) => format!("decoding error (path: {p}): {message}"), + (None, None) => format!("decoding error: {message}"), + } +} + +impl MaxMindDbError { + /// Creates an InvalidDatabase error with just a message. + pub fn invalid_database(message: impl Into) -> Self { + MaxMindDbError::InvalidDatabase { + message: message.into(), + offset: None, + } + } + + /// Creates an InvalidDatabase error with message and offset. + pub fn invalid_database_at(message: impl Into, offset: usize) -> Self { + MaxMindDbError::InvalidDatabase { + message: message.into(), + offset: Some(offset), + } + } + + /// Creates a Decoding error with just a message. + pub fn decoding(message: impl Into) -> Self { + MaxMindDbError::Decoding { + message: message.into(), + offset: None, + path: None, + } + } + + /// Creates a Decoding error with message and offset. + pub fn decoding_at(message: impl Into, offset: usize) -> Self { + MaxMindDbError::Decoding { + message: message.into(), + offset: Some(offset), + path: None, + } + } + + /// Creates a Decoding error with message, offset, and path. + pub fn decoding_at_path( + message: impl Into, + offset: usize, + path: impl Into, + ) -> Self { + MaxMindDbError::Decoding { + message: message.into(), + offset: Some(offset), + path: Some(path.into()), + } + } +} + impl de::Error for MaxMindDbError { fn custom(msg: T) -> Self { - MaxMindDbError::Decoding(format!("{msg}")) + MaxMindDbError::decoding(msg.to_string()) } } @@ -546,8 +630,8 @@ impl<'de, S: AsRef<[u8]>> Reader { pub fn lookup(&'de self, address: IpAddr) -> Result, MaxMindDbError> { // Check for IPv6 address in IPv4-only database if matches!(address, IpAddr::V6(_)) && self.metadata.ip_version == 4 { - return Err(MaxMindDbError::InvalidDatabase( - "you attempted to look up an IPv6 address in an IPv4-only database".to_string(), + return Err(MaxMindDbError::invalid_database( + "you attempted to look up an IPv6 address in an IPv4-only database", )); } @@ -755,8 +839,8 @@ impl<'de, S: AsRef<[u8]>> Reader { // return 0 as the pointer value to signify "not found". n if n == node => Ok((0, prefix_len)), n if node > n => Ok((node, prefix_len)), - _ => Err(MaxMindDbError::InvalidDatabase( - "invalid node in search tree".to_owned(), + _ => Err(MaxMindDbError::invalid_database( + "invalid node in search tree", )), } } @@ -824,9 +908,8 @@ impl<'de, S: AsRef<[u8]>> Reader { | buf[offset + 3] as usize } s => { - return Err(MaxMindDbError::InvalidDatabase(format!( - "unknown record size: \ - {s:?}" + return Err(MaxMindDbError::invalid_database(format!( + "unknown record size: {s}" ))) } }; @@ -840,8 +923,8 @@ impl<'de, S: AsRef<[u8]>> Reader { // Check bounds using pointer_base which marks the start of the data section if resolved >= (self.buf.as_ref().len() - self.pointer_base) { - return Err(MaxMindDbError::InvalidDatabase( - "the MaxMind DB file's data pointer resolves to an invalid location".to_owned(), + return Err(MaxMindDbError::invalid_database( + "the MaxMind DB file's data pointer resolves to an invalid location", )); } @@ -883,42 +966,42 @@ impl<'de, S: AsRef<[u8]>> Reader { let m = &self.metadata; if m.binary_format_major_version != 2 { - return Err(MaxMindDbError::InvalidDatabase(format!( + return Err(MaxMindDbError::invalid_database(format!( "binary_format_major_version - Expected: 2 Actual: {}", m.binary_format_major_version ))); } if m.binary_format_minor_version != 0 { - return Err(MaxMindDbError::InvalidDatabase(format!( + return Err(MaxMindDbError::invalid_database(format!( "binary_format_minor_version - Expected: 0 Actual: {}", m.binary_format_minor_version ))); } if m.database_type.is_empty() { - return Err(MaxMindDbError::InvalidDatabase( - "database_type - Expected: non-empty string Actual: \"\"".to_owned(), + return Err(MaxMindDbError::invalid_database( + "database_type - Expected: non-empty string Actual: \"\"", )); } if m.description.is_empty() { - return Err(MaxMindDbError::InvalidDatabase( - "description - Expected: non-empty map Actual: {}".to_owned(), + return Err(MaxMindDbError::invalid_database( + "description - Expected: non-empty map Actual: {}", )); } if m.ip_version != 4 && m.ip_version != 6 { - return Err(MaxMindDbError::InvalidDatabase(format!( + return Err(MaxMindDbError::invalid_database(format!( "ip_version - Expected: 4 or 6 Actual: {}", m.ip_version ))); } if m.record_size != 24 && m.record_size != 28 && m.record_size != 32 { - return Err(MaxMindDbError::InvalidDatabase(format!( + return Err(MaxMindDbError::invalid_database(format!( "record_size - Expected: 24, 28, or 32 Actual: {}", m.record_size ))); } if m.node_count == 0 { - return Err(MaxMindDbError::InvalidDatabase( - "node_count - Expected: positive integer Actual: 0".to_owned(), + return Err(MaxMindDbError::invalid_database( + "node_count - Expected: positive integer Actual: 0", )); } Ok(()) @@ -948,9 +1031,8 @@ impl<'de, S: AsRef<[u8]>> Reader { iteration_count += 1; if iteration_count > max_iterations { - return Err(MaxMindDbError::InvalidDatabase(format!( - "search tree appears to have a cycle or invalid structure (exceeded {} iterations)", - max_iterations + return Err(MaxMindDbError::invalid_database(format!( + "search tree appears to have a cycle or invalid structure (exceeded {max_iterations} iterations)" ))); } } @@ -963,8 +1045,9 @@ impl<'de, S: AsRef<[u8]>> Reader { let separator_end = separator_start + DATA_SECTION_SEPARATOR_SIZE; if separator_end > self.buf.as_ref().len() { - return Err(MaxMindDbError::InvalidDatabase( - "data section separator extends past end of file".to_owned(), + return Err(MaxMindDbError::invalid_database_at( + "data section separator extends past end of file", + separator_start, )); } @@ -972,10 +1055,10 @@ impl<'de, S: AsRef<[u8]>> Reader { for &b in separator { if b != 0 { - return Err(MaxMindDbError::InvalidDatabase(format!( - "unexpected byte in data separator: {:?}", - separator - ))); + return Err(MaxMindDbError::invalid_database_at( + format!("unexpected byte in data separator: {separator:?}"), + separator_start, + )); } } Ok(()) @@ -987,21 +1070,23 @@ impl<'de, S: AsRef<[u8]>> Reader { // Verify each offset from the search tree points to valid, decodable data for &offset in &offsets { if offset >= data_section.len() { - return Err(MaxMindDbError::InvalidDatabase(format!( - "search tree pointer {} is beyond data section (len: {})", + return Err(MaxMindDbError::invalid_database_at( + format!( + "search tree pointer is beyond data section (len: {})", + data_section.len() + ), offset, - data_section.len() - ))); + )); } let mut dec = decoder::Decoder::new(data_section, offset); // Try to skip/decode the value to verify it's valid if let Err(e) = dec.skip_value_for_verification() { - return Err(MaxMindDbError::InvalidDatabase(format!( - "received decoding error ({}) at offset {}", - e, offset - ))); + return Err(MaxMindDbError::invalid_database_at( + format!("decoding error: {e}"), + offset, + )); } } @@ -1015,9 +1100,7 @@ fn find_metadata_start(buf: &[u8]) -> Result { memchr::memmem::rfind(buf, METADATA_START_MARKER) .map(|x| x + METADATA_START_MARKER.len()) .ok_or_else(|| { - MaxMindDbError::InvalidDatabase( - "Could not find MaxMind DB metadata in file.".to_owned(), - ) + MaxMindDbError::invalid_database("could not find MaxMind DB metadata in file") }) } @@ -1038,17 +1121,26 @@ mod tests { #[test] fn test_error_display() { + // Error without offset + assert_eq!( + format!( + "{}", + MaxMindDbError::invalid_database("something went wrong") + ), + "invalid database: something went wrong".to_owned(), + ); + // Error with offset assert_eq!( format!( "{}", - MaxMindDbError::InvalidDatabase("something went wrong".to_owned()) + MaxMindDbError::invalid_database_at("something went wrong", 42) ), - "Invalid database: something went wrong".to_owned(), + "invalid database at offset 42: something went wrong".to_owned(), ); let io_err = Error::new(ErrorKind::NotFound, "file not found"); assert_eq!( format!("{}", MaxMindDbError::from(io_err)), - "I/O error: file not found".to_owned(), + "i/o error: file not found".to_owned(), ); #[cfg(feature = "mmap")] @@ -1056,19 +1148,33 @@ mod tests { let mmap_io_err = Error::new(ErrorKind::PermissionDenied, "mmap failed"); assert_eq!( format!("{}", MaxMindDbError::Mmap(mmap_io_err)), - "Memory map error: mmap failed".to_owned(), + "memory map error: mmap failed".to_owned(), ); } + // Decoding error without offset + assert_eq!( + format!("{}", MaxMindDbError::decoding("unexpected type")), + "decoding error: unexpected type".to_owned(), + ); + // Decoding error with offset assert_eq!( - format!("{}", MaxMindDbError::Decoding("unexpected type".to_owned())), - "Decoding error: unexpected type".to_owned(), + format!("{}", MaxMindDbError::decoding_at("unexpected type", 100)), + "decoding error at offset 100: unexpected type".to_owned(), + ); + // Decoding error with offset and path + assert_eq!( + format!( + "{}", + MaxMindDbError::decoding_at_path("unexpected type", 100, "/city/names/en") + ), + "decoding error at offset 100 (path: /city/names/en): unexpected type".to_owned(), ); let net_err = IpNetworkError::InvalidPrefix; assert_eq!( format!("{}", MaxMindDbError::from(net_err)), - "Invalid network: invalid prefix".to_owned(), + "invalid network: invalid prefix".to_owned(), ); } @@ -1271,11 +1377,11 @@ mod tests { let result = reader.lookup(ip); match result { - Err(MaxMindDbError::InvalidDatabase(msg)) => { + Err(MaxMindDbError::InvalidDatabase { message, .. }) => { assert!( - msg.contains("IPv6") && msg.contains("IPv4"), + message.contains("IPv6") && message.contains("IPv4"), "Expected error message about IPv6 in IPv4 database, got: {}", - msg + message ); } Err(e) => panic!( diff --git a/src/maxminddb/reader_test.rs b/src/maxminddb/reader_test.rs index 705f78f4..f39a2132 100644 --- a/src/maxminddb/reader_test.rs +++ b/src/maxminddb/reader_test.rs @@ -111,7 +111,7 @@ fn test_broken_database() { match lookup.decode::() { Err(e) => assert!(matches!( e, - MaxMindDbError::InvalidDatabase(_) // Check variant, message might vary slightly + MaxMindDbError::InvalidDatabase { .. } // Check variant, message might vary slightly )), Ok(_) => panic!("Unexpected success with broken data"), } @@ -139,7 +139,7 @@ fn test_non_database() { match r { Ok(_) => panic!("Received Reader when opening a non-MMDB file"), Err(e) => assert!( - matches!(&e, MaxMindDbError::InvalidDatabase(s) if s == "Could not find MaxMind DB metadata in file."), + matches!(&e, MaxMindDbError::InvalidDatabase { message, .. } if message == "could not find MaxMind DB metadata in file"), "Expected InvalidDatabase error with specific message, but got: {:?}", e ), diff --git a/src/maxminddb/result.rs b/src/maxminddb/result.rs index 4cfbddc8..a3fbbbf0 100644 --- a/src/maxminddb/result.rs +++ b/src/maxminddb/result.rs @@ -172,8 +172,8 @@ impl<'a, S: AsRef<[u8]>> LookupResult<'a, S> { T: Deserialize<'a>, { if !self.found() { - return Err(MaxMindDbError::Decoding( - "cannot decode: IP address not found in database".to_owned(), + return Err(MaxMindDbError::decoding( + "cannot decode: IP address not found in database", )); } @@ -239,9 +239,10 @@ impl<'a, S: AsRef<[u8]>> LookupResult<'a, S> { PathElement::Key(key) => { let (_, type_num) = decoder.peek_type()?; if type_num != TYPE_MAP { - return Err(MaxMindDbError::Decoding(format!( - "expected map for Key navigation, got type {type_num}" - ))); + return Err(MaxMindDbError::decoding_at( + format!("expected map for Key navigation, got type {type_num}"), + decoder.offset(), + )); } // Consume the map header and get size @@ -265,9 +266,10 @@ impl<'a, S: AsRef<[u8]>> LookupResult<'a, S> { PathElement::Index(idx) => { let (_, type_num) = decoder.peek_type()?; if type_num != TYPE_ARRAY { - return Err(MaxMindDbError::Decoding(format!( - "expected array for Index navigation, got type {type_num}" - ))); + return Err(MaxMindDbError::decoding_at( + format!("expected array for Index navigation, got type {type_num}"), + decoder.offset(), + )); } // Consume the array header and get size From 4a0e7599f5589e2ca0a73e9b61f7bcf027f2f426 Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Fri, 28 Nov 2025 06:42:41 -0800 Subject: [PATCH 18/37] Modernize Rust idioms in codebase MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Use then_some() for Option transformation in LookupResult::offset() - Replace FromStr::from_str() with .parse() throughout tests - Simplify test error handling: if let Err + unwrap -> expect() - Remove unused FromStr import - Use _ instead of unused variable in match guards 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- src/maxminddb/lib.rs | 4 +-- src/maxminddb/reader_test.rs | 49 ++++++++++++++++-------------------- src/maxminddb/result.rs | 6 +---- 3 files changed, 24 insertions(+), 35 deletions(-) diff --git a/src/maxminddb/lib.rs b/src/maxminddb/lib.rs index b712f829..98f31575 100644 --- a/src/maxminddb/lib.rs +++ b/src/maxminddb/lib.rs @@ -837,8 +837,8 @@ impl<'de, S: AsRef<[u8]>> Reader { match node_count { // If node == node_count, it means we hit the placeholder "empty" node // return 0 as the pointer value to signify "not found". - n if n == node => Ok((0, prefix_len)), - n if node > n => Ok((node, prefix_len)), + _ if node == node_count => Ok((0, prefix_len)), + _ if node > node_count => Ok((node, prefix_len)), _ => Err(MaxMindDbError::invalid_database( "invalid node in search tree", )), diff --git a/src/maxminddb/reader_test.rs b/src/maxminddb/reader_test.rs index f39a2132..8c9c4ed1 100644 --- a/src/maxminddb/reader_test.rs +++ b/src/maxminddb/reader_test.rs @@ -1,5 +1,4 @@ use std::net::IpAddr; -use std::str::FromStr; use ipnetwork::IpNetwork; use serde::Deserialize; @@ -42,12 +41,9 @@ fn test_decoder() { utf8_string: String, } - let r = Reader::open_readfile("test-data/test-data/MaxMind-DB-test-decoder.mmdb"); - if let Err(err) = r { - panic!("error opening mmdb: {err:?}"); - } - let r = r.unwrap(); - let ip: IpAddr = FromStr::from_str("1.1.1.0").unwrap(); + let r = Reader::open_readfile("test-data/test-data/MaxMind-DB-test-decoder.mmdb") + .expect("error opening mmdb"); + let ip: IpAddr = "1.1.1.0".parse().unwrap(); let lookup = r.lookup(ip).unwrap(); assert!(lookup.found(), "Expected IP to be found"); let result: TestType = lookup.decode().unwrap(); @@ -87,11 +83,8 @@ fn test_decoder() { fn test_pointers_in_metadata() { let _ = env_logger::try_init(); - let r = Reader::open_readfile("test-data/test-data/MaxMind-DB-test-metadata-pointers.mmdb"); - if let Err(err) = r { - panic!("error opening mmdb: {err:?}"); - } - r.unwrap(); + Reader::open_readfile("test-data/test-data/MaxMind-DB-test-metadata-pointers.mmdb") + .expect("error opening mmdb"); } #[test] @@ -101,7 +94,7 @@ fn test_broken_database() { let r = Reader::open_readfile("test-data/test-data/GeoIP2-City-Test-Broken-Double-Format.mmdb") .ok() .unwrap(); - let ip: IpAddr = FromStr::from_str("2001:220::").unwrap(); + let ip: IpAddr = "2001:220::".parse().unwrap(); #[derive(Deserialize, Debug)] struct TestType {} @@ -212,7 +205,7 @@ fn test_lookup_city() { let reader = Reader::open_readfile(filename).unwrap(); - let ip: IpAddr = FromStr::from_str("89.160.20.112").unwrap(); + let ip: IpAddr = "89.160.20.112".parse().unwrap(); let lookup = reader.lookup(ip).unwrap(); assert!(lookup.found()); let city: geoip2::City = lookup.decode().unwrap(); @@ -230,7 +223,7 @@ fn test_lookup_country() { let reader = Reader::open_readfile(filename).unwrap(); - let ip: IpAddr = FromStr::from_str("89.160.20.112").unwrap(); + let ip: IpAddr = "89.160.20.112".parse().unwrap(); let lookup = reader.lookup(ip).unwrap(); assert!(lookup.found()); let country: geoip2::Country = lookup.decode().unwrap(); @@ -248,7 +241,7 @@ fn test_lookup_connection_type() { let reader = Reader::open_readfile(filename).unwrap(); - let ip: IpAddr = FromStr::from_str("96.1.20.112").unwrap(); + let ip: IpAddr = "96.1.20.112".parse().unwrap(); let lookup = reader.lookup(ip).unwrap(); assert!(lookup.found()); let connection_type: geoip2::ConnectionType = lookup.decode().unwrap(); @@ -264,7 +257,7 @@ fn test_lookup_annonymous_ip() { let reader = Reader::open_readfile(filename).unwrap(); - let ip: IpAddr = FromStr::from_str("81.2.69.123").unwrap(); + let ip: IpAddr = "81.2.69.123".parse().unwrap(); let lookup = reader.lookup(ip).unwrap(); assert!(lookup.found()); let anonymous_ip: geoip2::AnonymousIp = lookup.decode().unwrap(); @@ -284,7 +277,7 @@ fn test_lookup_density_income() { let reader = Reader::open_readfile(filename).unwrap(); - let ip: IpAddr = FromStr::from_str("5.83.124.123").unwrap(); + let ip: IpAddr = "5.83.124.123".parse().unwrap(); let lookup = reader.lookup(ip).unwrap(); assert!(lookup.found()); let density_income: geoip2::DensityIncome = lookup.decode().unwrap(); @@ -301,7 +294,7 @@ fn test_lookup_domain() { let reader = Reader::open_readfile(filename).unwrap(); - let ip: IpAddr = FromStr::from_str("66.92.80.123").unwrap(); + let ip: IpAddr = "66.92.80.123".parse().unwrap(); let lookup = reader.lookup(ip).unwrap(); assert!(lookup.found()); let domain: geoip2::Domain = lookup.decode().unwrap(); @@ -317,7 +310,7 @@ fn test_lookup_isp() { let reader = Reader::open_readfile(filename).unwrap(); - let ip: IpAddr = FromStr::from_str("12.87.118.123").unwrap(); + let ip: IpAddr = "12.87.118.123".parse().unwrap(); let lookup = reader.lookup(ip).unwrap(); assert!(lookup.found()); let isp: geoip2::Isp = lookup.decode().unwrap(); @@ -335,7 +328,7 @@ fn test_lookup_asn() { let reader = Reader::open_readfile(filename).unwrap(); - let ip: IpAddr = FromStr::from_str("1.128.0.123").unwrap(); + let ip: IpAddr = "1.128.0.123".parse().unwrap(); let lookup = reader.lookup(ip).unwrap(); assert!(lookup.found()); let asn: geoip2::Asn = lookup.decode().unwrap(); @@ -524,7 +517,7 @@ fn check_ip>(reader: &Reader, ip_version: usize) { // Test lookups that are expected to succeed for subnet in &subnets { - let ip: IpAddr = FromStr::from_str(subnet).unwrap(); + let ip: IpAddr = subnet.parse().unwrap(); let lookup = reader.lookup(ip); assert!( @@ -558,7 +551,7 @@ fn check_ip>(reader: &Reader, ip_version: usize) { continue; // Skip IPv4 addresses if testing IPv6 db } - let ip: IpAddr = FromStr::from_str(address).unwrap(); + let ip: IpAddr = address.parse().unwrap(); let lookup = reader.lookup(ip).unwrap(); assert!( @@ -577,7 +570,7 @@ fn test_json_serialize() { let reader = Reader::open_readfile(filename).unwrap(); - let ip: IpAddr = FromStr::from_str("89.160.20.112").unwrap(); + let ip: IpAddr = "89.160.20.112".parse().unwrap(); let lookup = reader.lookup(ip).unwrap(); assert!(lookup.found()); let city: geoip2::City = lookup.decode().unwrap(); @@ -1197,7 +1190,7 @@ fn test_size_hints() { } let r = Reader::open_readfile("test-data/test-data/MaxMind-DB-test-decoder.mmdb").unwrap(); - let ip: IpAddr = FromStr::from_str("1.1.1.0").unwrap(); + let ip: IpAddr = "1.1.1.0".parse().unwrap(); let lookup = r.lookup(ip).unwrap(); assert!(lookup.found()); let result: TestType = lookup.decode().unwrap(); @@ -1229,7 +1222,7 @@ fn test_ignored_any() { } let r = Reader::open_readfile("test-data/test-data/MaxMind-DB-test-decoder.mmdb").unwrap(); - let ip: IpAddr = FromStr::from_str("1.1.1.0").unwrap(); + let ip: IpAddr = "1.1.1.0".parse().unwrap(); let lookup = r.lookup(ip).unwrap(); assert!(lookup.found()); let result: PartialRead = lookup.decode().unwrap(); @@ -1254,7 +1247,7 @@ fn test_enum_deserialization() { } let r = Reader::open_readfile("test-data/test-data/GeoIP2-Connection-Type-Test.mmdb").unwrap(); - let ip: IpAddr = FromStr::from_str("96.1.20.112").unwrap(); + let ip: IpAddr = "96.1.20.112".parse().unwrap(); let lookup = r.lookup(ip).unwrap(); assert!(lookup.found()); let result: Record = lookup.decode().unwrap(); @@ -1285,7 +1278,7 @@ fn test_serde_flatten() { } let r = Reader::open_readfile("test-data/test-data/GeoIP2-Country-Test.mmdb").unwrap(); - let ip: IpAddr = FromStr::from_str("81.2.69.160").unwrap(); + let ip: IpAddr = "81.2.69.160".parse().unwrap(); let lookup = r.lookup(ip).unwrap(); assert!(lookup.found()); diff --git a/src/maxminddb/result.rs b/src/maxminddb/result.rs index a3fbbbf0..20f8c2b6 100644 --- a/src/maxminddb/result.rs +++ b/src/maxminddb/result.rs @@ -144,11 +144,7 @@ impl<'a, S: AsRef<[u8]>> LookupResult<'a, S> { /// Returns `None` if the IP was not found. #[inline] pub fn offset(&self) -> Option { - if self.found() { - Some(self.data_offset) - } else { - None - } + self.found().then_some(self.data_offset) } /// Decodes the full record into the specified type. From 11fc430078dbdd04595b923c7f7216b0dc93e61f Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Fri, 28 Nov 2025 06:52:25 -0800 Subject: [PATCH 19/37] Reorganize code into modern module structure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move from non-standard src/maxminddb/lib.rs to standard src/lib.rs layout. Split the monolithic lib.rs into focused modules: - error.rs: MaxMindDbError and helper functions - metadata.rs: Metadata struct - reader.rs: Reader struct and all implementations - within.rs: Within iterator, WithinOptions, and IpInt helper - result.rs: LookupResult and PathElement (already separate) - decoder.rs: Binary format decoder (already separate) - geoip2.rs: GeoIP2 data structures (already separate) The lib.rs now serves as the crate root with re-exports, following modern Rust conventions. No public API changes. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- Cargo.toml | 3 +- src/{maxminddb => }/decoder.rs | 2 +- src/error.rs | 190 ++++ src/{maxminddb => }/geoip2.rs | 0 src/lib.rs | 376 +++++++ src/maxminddb/lib.rs | 1464 ---------------------------- src/metadata.rs | 28 + src/reader.rs | 655 +++++++++++++ src/{maxminddb => }/reader_test.rs | 0 src/{maxminddb => }/result.rs | 5 +- src/within.rs | 257 +++++ 11 files changed, 1511 insertions(+), 1469 deletions(-) rename src/{maxminddb => }/decoder.rs (99%) create mode 100644 src/error.rs rename src/{maxminddb => }/geoip2.rs (100%) create mode 100644 src/lib.rs delete mode 100644 src/maxminddb/lib.rs create mode 100644 src/metadata.rs create mode 100644 src/reader.rs rename src/{maxminddb => }/reader_test.rs (100%) rename src/{maxminddb => }/result.rs (99%) create mode 100644 src/within.rs diff --git a/Cargo.toml b/Cargo.toml index 76123e21..460d18c8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,8 +20,7 @@ mmap = ["memmap2"] unsafe-str-decode = [] [lib] -name ="maxminddb" -path = "src/maxminddb/lib.rs" +name = "maxminddb" [dependencies] ipnetwork = "0.21.1" diff --git a/src/maxminddb/decoder.rs b/src/decoder.rs similarity index 99% rename from src/maxminddb/decoder.rs rename to src/decoder.rs index 2346cfd0..3c29f879 100644 --- a/src/maxminddb/decoder.rs +++ b/src/decoder.rs @@ -3,7 +3,7 @@ use serde::de::{self, DeserializeSeed, MapAccess, SeqAccess, Visitor}; use serde::forward_to_deserialize_any; use std::convert::TryInto; -use super::MaxMindDbError; +use crate::error::MaxMindDbError; // MaxMind DB type constants const TYPE_EXTENDED: u8 = 0; diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 00000000..4685b92b --- /dev/null +++ b/src/error.rs @@ -0,0 +1,190 @@ +//! Error types for MaxMind DB operations. + +use std::fmt::Display; +use std::io; + +use ipnetwork::IpNetworkError; +use serde::de; +use thiserror::Error; + +/// Error returned by MaxMind DB operations. +#[derive(Error, Debug)] +#[non_exhaustive] +pub enum MaxMindDbError { + /// The database file is invalid or corrupted. + #[error("{}", format_invalid_database(.message, .offset))] + InvalidDatabase { + /// Description of what is invalid. + message: String, + /// Byte offset in the database where the error was detected. + offset: Option, + }, + + /// An I/O error occurred while reading the database. + #[error("i/o error: {0}")] + Io( + #[from] + #[source] + io::Error, + ), + + /// Memory mapping failed. + #[cfg(feature = "mmap")] + #[error("memory map error: {0}")] + Mmap(#[source] io::Error), + + /// Error decoding data from the database. + #[error("{}", format_decoding_error(.message, .offset, .path.as_deref()))] + Decoding { + /// Description of the decoding error. + message: String, + /// Byte offset in the data section where the error occurred. + offset: Option, + /// JSON-pointer-like path to the field (e.g., "/city/names/en"). + path: Option, + }, + + /// The provided network/CIDR is invalid. + #[error("invalid network: {0}")] + InvalidNetwork( + #[from] + #[source] + IpNetworkError, + ), +} + +fn format_invalid_database(message: &str, offset: &Option) -> String { + match offset { + Some(off) => format!("invalid database at offset {off}: {message}"), + None => format!("invalid database: {message}"), + } +} + +fn format_decoding_error(message: &str, offset: &Option, path: Option<&str>) -> String { + match (offset, path) { + (Some(off), Some(p)) => format!("decoding error at offset {off} (path: {p}): {message}"), + (Some(off), None) => format!("decoding error at offset {off}: {message}"), + (None, Some(p)) => format!("decoding error (path: {p}): {message}"), + (None, None) => format!("decoding error: {message}"), + } +} + +impl MaxMindDbError { + /// Creates an InvalidDatabase error with just a message. + pub fn invalid_database(message: impl Into) -> Self { + MaxMindDbError::InvalidDatabase { + message: message.into(), + offset: None, + } + } + + /// Creates an InvalidDatabase error with message and offset. + pub fn invalid_database_at(message: impl Into, offset: usize) -> Self { + MaxMindDbError::InvalidDatabase { + message: message.into(), + offset: Some(offset), + } + } + + /// Creates a Decoding error with just a message. + pub fn decoding(message: impl Into) -> Self { + MaxMindDbError::Decoding { + message: message.into(), + offset: None, + path: None, + } + } + + /// Creates a Decoding error with message and offset. + pub fn decoding_at(message: impl Into, offset: usize) -> Self { + MaxMindDbError::Decoding { + message: message.into(), + offset: Some(offset), + path: None, + } + } + + /// Creates a Decoding error with message, offset, and path. + pub fn decoding_at_path( + message: impl Into, + offset: usize, + path: impl Into, + ) -> Self { + MaxMindDbError::Decoding { + message: message.into(), + offset: Some(offset), + path: Some(path.into()), + } + } +} + +impl de::Error for MaxMindDbError { + fn custom(msg: T) -> Self { + MaxMindDbError::decoding(msg.to_string()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::{Error, ErrorKind}; + + #[test] + fn test_error_display() { + // Error without offset + assert_eq!( + format!( + "{}", + MaxMindDbError::invalid_database("something went wrong") + ), + "invalid database: something went wrong".to_owned(), + ); + // Error with offset + assert_eq!( + format!( + "{}", + MaxMindDbError::invalid_database_at("something went wrong", 42) + ), + "invalid database at offset 42: something went wrong".to_owned(), + ); + let io_err = Error::new(ErrorKind::NotFound, "file not found"); + assert_eq!( + format!("{}", MaxMindDbError::from(io_err)), + "i/o error: file not found".to_owned(), + ); + + #[cfg(feature = "mmap")] + { + let mmap_io_err = Error::new(ErrorKind::PermissionDenied, "mmap failed"); + assert_eq!( + format!("{}", MaxMindDbError::Mmap(mmap_io_err)), + "memory map error: mmap failed".to_owned(), + ); + } + + // Decoding error without offset + assert_eq!( + format!("{}", MaxMindDbError::decoding("unexpected type")), + "decoding error: unexpected type".to_owned(), + ); + // Decoding error with offset + assert_eq!( + format!("{}", MaxMindDbError::decoding_at("unexpected type", 100)), + "decoding error at offset 100: unexpected type".to_owned(), + ); + // Decoding error with offset and path + assert_eq!( + format!( + "{}", + MaxMindDbError::decoding_at_path("unexpected type", 100, "/city/names/en") + ), + "decoding error at offset 100 (path: /city/names/en): unexpected type".to_owned(), + ); + + let net_err = IpNetworkError::InvalidPrefix; + assert_eq!( + format!("{}", MaxMindDbError::from(net_err)), + "invalid network: invalid prefix".to_owned(), + ); + } +} diff --git a/src/maxminddb/geoip2.rs b/src/geoip2.rs similarity index 100% rename from src/maxminddb/geoip2.rs rename to src/geoip2.rs diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 00000000..3facb2c8 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,376 @@ +#![deny(trivial_casts, trivial_numeric_casts, unused_import_braces)] +//! # MaxMind DB Reader +//! +//! This library reads the MaxMind DB format, including the GeoIP2 and GeoLite2 databases. +//! +//! ## Features +//! +//! This crate provides several optional features for performance and functionality: +//! +//! - **`mmap`** (default: disabled): Enable memory-mapped file access for +//! better performance in long-running applications +//! - **`simdutf8`** (default: disabled): Use SIMD instructions for faster +//! UTF-8 validation during string decoding +//! - **`unsafe-str-decode`** (default: disabled): Skip UTF-8 validation +//! entirely for maximum performance (~20% faster lookups) +//! +//! **Note**: `simdutf8` and `unsafe-str-decode` are mutually exclusive. +//! +//! ## Database Compatibility +//! +//! This library supports all MaxMind DB format databases: +//! - **GeoIP2** databases (City, Country, Enterprise, ISP, etc.) +//! - **GeoLite2** databases (free versions) +//! - Custom MaxMind DB format databases +//! +//! ## Thread Safety +//! +//! The `Reader` is `Send` and `Sync`, making it safe to share across threads. +//! This makes it ideal for web servers and other concurrent applications. +//! +//! ## Quick Start +//! +//! ```rust +//! use maxminddb::{Reader, geoip2}; +//! use std::net::IpAddr; +//! +//! fn main() -> Result<(), Box> { +//! // Open database file +//! # let reader = Reader::open_readfile("test-data/test-data/GeoIP2-City-Test.mmdb")?; +//! # /* +//! let reader = Reader::open_readfile("/path/to/GeoIP2-City.mmdb")?; +//! # */ +//! +//! // Look up an IP address +//! let ip: IpAddr = "89.160.20.128".parse()?; +//! let result = reader.lookup(ip)?; +//! +//! if result.found() { +//! let city: geoip2::City = result.decode()?; +//! if let Some(country) = city.country { +//! println!("Country: {}", country.iso_code.unwrap_or("Unknown")); +//! } +//! } +//! +//! Ok(()) +//! } +//! ``` +//! +//! ## Selective Field Access +//! +//! Use `decode_path` to extract specific fields without deserializing the entire record: +//! +//! ```rust +//! use maxminddb::{Reader, PathElement}; +//! use std::net::IpAddr; +//! +//! let reader = Reader::open_readfile("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); +//! let ip: IpAddr = "89.160.20.128".parse().unwrap(); +//! +//! let result = reader.lookup(ip).unwrap(); +//! let country_code: Option = result.decode_path(&[ +//! PathElement::Key("country"), +//! PathElement::Key("iso_code"), +//! ]).unwrap(); +//! +//! println!("Country: {:?}", country_code); +//! ``` + +#[cfg(all(feature = "simdutf8", feature = "unsafe-str-decode"))] +compile_error!("features `simdutf8` and `unsafe-str-decode` are mutually exclusive"); + +mod decoder; +mod error; +pub mod geoip2; +mod metadata; +mod reader; +mod result; +mod within; + +// Re-export public types +pub use error::MaxMindDbError; +pub use metadata::Metadata; +pub use reader::Reader; +pub use result::{LookupResult, PathElement}; +pub use within::{Within, WithinOptions}; + +#[cfg(feature = "mmap")] +pub use memmap2::Mmap; + +#[cfg(test)] +mod reader_test; + +#[cfg(test)] +mod tests { + use super::*; + use std::net::IpAddr; + + #[test] + fn test_lookup_network() { + use std::collections::HashMap; + + struct TestCase { + ip: &'static str, + db_file: &'static str, + expected_network: &'static str, + expected_found: bool, + } + + let test_cases = [ + // IPv4 address in IPv6 database - not found, returns containing network + TestCase { + ip: "1.1.1.1", + db_file: "test-data/test-data/MaxMind-DB-test-ipv6-32.mmdb", + expected_network: "1.0.0.0/8", + expected_found: false, + }, + // IPv6 exact match + TestCase { + ip: "::1:ffff:ffff", + db_file: "test-data/test-data/MaxMind-DB-test-ipv6-24.mmdb", + expected_network: "::1:ffff:ffff/128", + expected_found: true, + }, + // IPv6 network match (not exact) + TestCase { + ip: "::2:0:1", + db_file: "test-data/test-data/MaxMind-DB-test-ipv6-24.mmdb", + expected_network: "::2:0:0/122", + expected_found: true, + }, + // IPv4 exact match + TestCase { + ip: "1.1.1.1", + db_file: "test-data/test-data/MaxMind-DB-test-ipv4-24.mmdb", + expected_network: "1.1.1.1/32", + expected_found: true, + }, + // IPv4 network match (not exact) + TestCase { + ip: "1.1.1.3", + db_file: "test-data/test-data/MaxMind-DB-test-ipv4-24.mmdb", + expected_network: "1.1.1.2/31", + expected_found: true, + }, + // IPv4 in decoder test database + TestCase { + ip: "1.1.1.3", + db_file: "test-data/test-data/MaxMind-DB-test-decoder.mmdb", + expected_network: "1.1.1.0/24", + expected_found: true, + }, + // IPv4-mapped IPv6 address - preserves IPv6 form + TestCase { + ip: "::ffff:1.1.1.128", + db_file: "test-data/test-data/MaxMind-DB-test-decoder.mmdb", + expected_network: "::ffff:1.1.1.0/120", + expected_found: true, + }, + // IPv4-compatible IPv6 address - uses compressed IPv6 notation + TestCase { + ip: "::1.1.1.128", + db_file: "test-data/test-data/MaxMind-DB-test-decoder.mmdb", + expected_network: "::101:100/120", + expected_found: true, + }, + // No IPv4 search tree - IPv4 address returns ::/64 + TestCase { + ip: "200.0.2.1", + db_file: "test-data/test-data/MaxMind-DB-no-ipv4-search-tree.mmdb", + expected_network: "::/64", + expected_found: true, + }, + // No IPv4 search tree - IPv6 address in IPv4 range + TestCase { + ip: "::200.0.2.1", + db_file: "test-data/test-data/MaxMind-DB-no-ipv4-search-tree.mmdb", + expected_network: "::/64", + expected_found: true, + }, + // No IPv4 search tree - IPv6 address at boundary of IPv4 space + TestCase { + ip: "0:0:0:0:ffff:ffff:ffff:ffff", + db_file: "test-data/test-data/MaxMind-DB-no-ipv4-search-tree.mmdb", + expected_network: "::/64", + expected_found: true, + }, + // No IPv4 search tree - high IPv6 address not found + TestCase { + ip: "ef00::", + db_file: "test-data/test-data/MaxMind-DB-no-ipv4-search-tree.mmdb", + expected_network: "8000::/1", + expected_found: false, + }, + ]; + + // Cache readers to avoid reopening the same file multiple times + let mut readers: HashMap<&str, Reader>> = HashMap::new(); + + for test in &test_cases { + let reader = readers + .entry(test.db_file) + .or_insert_with(|| Reader::open_readfile(test.db_file).unwrap()); + + let ip: IpAddr = test.ip.parse().unwrap(); + let result = reader.lookup(ip).unwrap(); + + assert_eq!( + result.found(), + test.expected_found, + "IP {} in {}: expected found={}, got found={}", + test.ip, + test.db_file, + test.expected_found, + result.found() + ); + + let network = result.network().unwrap(); + assert_eq!( + network.to_string(), + test.expected_network, + "IP {} in {}: expected network {}, got {}", + test.ip, + test.db_file, + test.expected_network, + network + ); + } + } + + #[test] + fn test_lookup_with_geoip_data() { + let reader = Reader::open_readfile("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); + let ip: IpAddr = "89.160.20.128".parse().unwrap(); + + let result = reader.lookup(ip).unwrap(); + assert!(result.found(), "lookup should find known IP"); + + // Decode the data + let city: geoip2::City = result.decode().unwrap(); + assert!(city.city.is_some(), "Expected city data"); + + // Check full network (not just prefix) + let network = result.network().unwrap(); + assert_eq!( + network.to_string(), + "89.160.20.128/25", + "Expected network 89.160.20.128/25" + ); + + // Check offset is available for caching + assert!( + result.offset().is_some(), + "Expected offset to be Some for found IP" + ); + } + + #[test] + fn test_decode_path() { + let reader = Reader::open_readfile("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); + let ip: IpAddr = "89.160.20.128".parse().unwrap(); + + let result = reader.lookup(ip).unwrap(); + + // Navigate to country.iso_code + let iso_code: Option = result + .decode_path(&[PathElement::Key("country"), PathElement::Key("iso_code")]) + .unwrap(); + assert_eq!(iso_code, Some("SE".to_owned())); + + // Navigate to non-existent path + let missing: Option = result + .decode_path(&[PathElement::Key("nonexistent")]) + .unwrap(); + assert!(missing.is_none()); + } + + #[test] + fn test_ipv6_in_ipv4_database() { + let reader = + Reader::open_readfile("test-data/test-data/MaxMind-DB-test-ipv4-24.mmdb").unwrap(); + let ip: IpAddr = "2001::".parse().unwrap(); + + let result = reader.lookup(ip); + match result { + Err(MaxMindDbError::InvalidDatabase { message, .. }) => { + assert!( + message.contains("IPv6") && message.contains("IPv4"), + "Expected error message about IPv6 in IPv4 database, got: {}", + message + ); + } + Err(e) => panic!( + "Expected InvalidDatabase error for IPv6 in IPv4 database, got: {:?}", + e + ), + Ok(_) => panic!("Expected error for IPv6 lookup in IPv4-only database"), + } + } + + #[test] + fn test_decode_path_comprehensive() { + let reader = + Reader::open_readfile("test-data/test-data/MaxMind-DB-test-decoder.mmdb").unwrap(); + let ip: IpAddr = "::1.1.1.0".parse().unwrap(); + + let result = reader.lookup(ip).unwrap(); + assert!(result.found()); + + // Test simple path: uint16 + let u16_val: Option = result.decode_path(&[PathElement::Key("uint16")]).unwrap(); + assert_eq!(u16_val, Some(100)); + + // Test array access: first element + let arr_first: Option = result + .decode_path(&[PathElement::Key("array"), PathElement::Index(0)]) + .unwrap(); + assert_eq!(arr_first, Some(1)); + + // Test array access: last element (index 2) + let arr_last: Option = result + .decode_path(&[PathElement::Key("array"), PathElement::Index(2)]) + .unwrap(); + assert_eq!(arr_last, Some(3)); + + // Test array access: out of bounds (index 3) returns None + let arr_oob: Option = result + .decode_path(&[PathElement::Key("array"), PathElement::Index(3)]) + .unwrap(); + assert!(arr_oob.is_none()); + + // Test negative index: -1 means last element + let arr_neg1: Option = result + .decode_path(&[PathElement::Key("array"), PathElement::Index(-1)]) + .unwrap(); + assert_eq!(arr_neg1, Some(3)); + + // Test negative index: -3 means first element + let arr_neg3: Option = result + .decode_path(&[PathElement::Key("array"), PathElement::Index(-3)]) + .unwrap(); + assert_eq!(arr_neg3, Some(1)); + + // Test nested path: map.mapX.arrayX[1] + let nested: Option = result + .decode_path(&[ + PathElement::Key("map"), + PathElement::Key("mapX"), + PathElement::Key("arrayX"), + PathElement::Index(1), + ]) + .unwrap(); + assert_eq!(nested, Some(8)); + + // Test non-existent key returns None + let missing: Option = result + .decode_path(&[PathElement::Key("does-not-exist"), PathElement::Index(1)]) + .unwrap(); + assert!(missing.is_none()); + + // Test utf8_string path + let utf8: Option = result + .decode_path(&[PathElement::Key("utf8_string")]) + .unwrap(); + assert_eq!(utf8, Some("unicode! ☯ - ♫".to_owned())); + } +} diff --git a/src/maxminddb/lib.rs b/src/maxminddb/lib.rs deleted file mode 100644 index 98f31575..00000000 --- a/src/maxminddb/lib.rs +++ /dev/null @@ -1,1464 +0,0 @@ -#![deny(trivial_casts, trivial_numeric_casts, unused_import_braces)] -//! # MaxMind DB Reader -//! -//! This library reads the MaxMind DB format, including the GeoIP2 and GeoLite2 databases. -//! -//! ## Features -//! -//! This crate provides several optional features for performance and functionality: -//! -//! - **`mmap`** (default: disabled): Enable memory-mapped file access for -//! better performance in long-running applications -//! - **`simdutf8`** (default: disabled): Use SIMD instructions for faster -//! UTF-8 validation during string decoding -//! - **`unsafe-str-decode`** (default: disabled): Skip UTF-8 validation -//! entirely for maximum performance (~20% faster lookups) -//! -//! **Note**: `simdutf8` and `unsafe-str-decode` are mutually exclusive. -//! -//! ## Database Compatibility -//! -//! This library supports all MaxMind DB format databases: -//! - **GeoIP2** databases (City, Country, Enterprise, ISP, etc.) -//! - **GeoLite2** databases (free versions) -//! - Custom MaxMind DB format databases -//! -//! ## Thread Safety -//! -//! The `Reader` is `Send` and `Sync`, making it safe to share across threads. -//! This makes it ideal for web servers and other concurrent applications. -//! -//! ## Quick Start -//! -//! ```rust -//! use maxminddb::{Reader, geoip2}; -//! use std::net::IpAddr; -//! -//! fn main() -> Result<(), Box> { -//! // Open database file -//! # let reader = Reader::open_readfile("test-data/test-data/GeoIP2-City-Test.mmdb")?; -//! # /* -//! let reader = Reader::open_readfile("/path/to/GeoIP2-City.mmdb")?; -//! # */ -//! -//! // Look up an IP address -//! let ip: IpAddr = "89.160.20.128".parse()?; -//! let result = reader.lookup(ip)?; -//! -//! if result.found() { -//! let city: geoip2::City = result.decode()?; -//! if let Some(country) = city.country { -//! println!("Country: {}", country.iso_code.unwrap_or("Unknown")); -//! } -//! } -//! -//! Ok(()) -//! } -//! ``` -//! -//! ## Selective Field Access -//! -//! Use `decode_path` to extract specific fields without deserializing the entire record: -//! -//! ```rust -//! use maxminddb::{Reader, PathElement}; -//! use std::net::IpAddr; -//! -//! let reader = Reader::open_readfile("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); -//! let ip: IpAddr = "89.160.20.128".parse().unwrap(); -//! -//! let result = reader.lookup(ip).unwrap(); -//! let country_code: Option = result.decode_path(&[ -//! PathElement::Key("country"), -//! PathElement::Key("iso_code"), -//! ]).unwrap(); -//! -//! println!("Country: {:?}", country_code); -//! ``` - -use std::cmp::Ordering; -use std::collections::{BTreeMap, HashSet}; -use std::fmt::Display; -use std::fs; -use std::io; -use std::net::IpAddr; -use std::path::Path; - -use ipnetwork::{IpNetwork, IpNetworkError}; -use serde::{de, Deserialize, Serialize}; -use thiserror::Error; - -#[cfg(feature = "mmap")] -pub use memmap2::Mmap; -#[cfg(feature = "mmap")] -use memmap2::MmapOptions; -#[cfg(feature = "mmap")] -use std::fs::File; - -#[cfg(all(feature = "simdutf8", feature = "unsafe-str-decode"))] -compile_error!("features `simdutf8` and `unsafe-str-decode` are mutually exclusive"); - -/// Size of the data section separator (16 zero bytes). -const DATA_SECTION_SEPARATOR_SIZE: usize = 16; - -/// Error returned by MaxMind DB operations. -#[derive(Error, Debug)] -#[non_exhaustive] -pub enum MaxMindDbError { - /// The database file is invalid or corrupted. - #[error("{}", format_invalid_database(.message, .offset))] - InvalidDatabase { - /// Description of what is invalid. - message: String, - /// Byte offset in the database where the error was detected. - offset: Option, - }, - - /// An I/O error occurred while reading the database. - #[error("i/o error: {0}")] - Io( - #[from] - #[source] - io::Error, - ), - - /// Memory mapping failed. - #[cfg(feature = "mmap")] - #[error("memory map error: {0}")] - Mmap(#[source] io::Error), - - /// Error decoding data from the database. - #[error("{}", format_decoding_error(.message, .offset, .path.as_deref()))] - Decoding { - /// Description of the decoding error. - message: String, - /// Byte offset in the data section where the error occurred. - offset: Option, - /// JSON-pointer-like path to the field (e.g., "/city/names/en"). - path: Option, - }, - - /// The provided network/CIDR is invalid. - #[error("invalid network: {0}")] - InvalidNetwork( - #[from] - #[source] - IpNetworkError, - ), -} - -fn format_invalid_database(message: &str, offset: &Option) -> String { - match offset { - Some(off) => format!("invalid database at offset {off}: {message}"), - None => format!("invalid database: {message}"), - } -} - -fn format_decoding_error(message: &str, offset: &Option, path: Option<&str>) -> String { - match (offset, path) { - (Some(off), Some(p)) => format!("decoding error at offset {off} (path: {p}): {message}"), - (Some(off), None) => format!("decoding error at offset {off}: {message}"), - (None, Some(p)) => format!("decoding error (path: {p}): {message}"), - (None, None) => format!("decoding error: {message}"), - } -} - -impl MaxMindDbError { - /// Creates an InvalidDatabase error with just a message. - pub fn invalid_database(message: impl Into) -> Self { - MaxMindDbError::InvalidDatabase { - message: message.into(), - offset: None, - } - } - - /// Creates an InvalidDatabase error with message and offset. - pub fn invalid_database_at(message: impl Into, offset: usize) -> Self { - MaxMindDbError::InvalidDatabase { - message: message.into(), - offset: Some(offset), - } - } - - /// Creates a Decoding error with just a message. - pub fn decoding(message: impl Into) -> Self { - MaxMindDbError::Decoding { - message: message.into(), - offset: None, - path: None, - } - } - - /// Creates a Decoding error with message and offset. - pub fn decoding_at(message: impl Into, offset: usize) -> Self { - MaxMindDbError::Decoding { - message: message.into(), - offset: Some(offset), - path: None, - } - } - - /// Creates a Decoding error with message, offset, and path. - pub fn decoding_at_path( - message: impl Into, - offset: usize, - path: impl Into, - ) -> Self { - MaxMindDbError::Decoding { - message: message.into(), - offset: Some(offset), - path: Some(path.into()), - } - } -} - -impl de::Error for MaxMindDbError { - fn custom(msg: T) -> Self { - MaxMindDbError::decoding(msg.to_string()) - } -} - -#[derive(Deserialize, Serialize, Clone, Debug)] -pub struct Metadata { - pub binary_format_major_version: u16, - pub binary_format_minor_version: u16, - pub build_epoch: u64, - pub database_type: String, - pub description: BTreeMap, - pub ip_version: u16, - pub languages: Vec, - pub node_count: u32, - pub record_size: u16, -} - -/// Options for network iteration. -/// -/// Controls which networks are yielded when iterating over the database -/// with [`Reader::within()`] or [`Reader::networks()`]. -/// -/// # Example -/// -/// ``` -/// use maxminddb::WithinOptions; -/// -/// // Default options (skip aliases, skip networks without data, include empty values) -/// let opts = WithinOptions::default(); -/// -/// // Include aliased networks (IPv4 networks via IPv6 aliases) -/// let opts = WithinOptions::default().include_aliased_networks(); -/// -/// // Skip empty values and include networks without data -/// let opts = WithinOptions::default() -/// .skip_empty_values() -/// .include_networks_without_data(); -/// ``` -#[derive(Debug, Clone, Copy, Default)] -pub struct WithinOptions { - /// Include IPv4 networks multiple times when accessed via IPv6 aliases. - pub include_aliased_networks: bool, - /// Include networks that have no associated data record. - pub include_networks_without_data: bool, - /// Skip networks whose data is an empty map or empty array. - pub skip_empty_values: bool, -} - -impl WithinOptions { - /// Include IPv4 networks multiple times when accessed via IPv6 aliases. - /// - /// In IPv6 databases, IPv4 networks are stored at `::0/96`. However, the - /// same data is accessible through several IPv6 prefixes (e.g., - /// `::ffff:0:0/96` for IPv4-mapped IPv6). By default, these aliases are - /// skipped to avoid yielding the same network multiple times. - /// - /// When enabled, the iterator will yield these aliased networks. - #[must_use] - pub fn include_aliased_networks(mut self) -> Self { - self.include_aliased_networks = true; - self - } - - /// Include networks that have no associated data record. - /// - /// Some tree nodes point to "no data" (the node_count sentinel). By default - /// these are skipped. When enabled, these networks are yielded and - /// [`LookupResult::found()`] returns `false` for them. - #[must_use] - pub fn include_networks_without_data(mut self) -> Self { - self.include_networks_without_data = true; - self - } - - /// Skip networks whose data is an empty map or empty array. - /// - /// Some databases store empty maps `{}` or empty arrays `[]` for records - /// without meaningful data. This option filters them out. - #[must_use] - pub fn skip_empty_values(mut self) -> Self { - self.skip_empty_values = true; - self - } -} - -#[derive(Debug)] -struct WithinNode { - node: usize, - ip_int: IpInt, - prefix_len: usize, -} - -/// Iterator over IP networks within a CIDR range. -/// -/// This iterator yields [`LookupResult`] for each network in the database -/// that falls within the specified CIDR range. Use [`LookupResult::decode()`] -/// to deserialize the data for each result. -#[derive(Debug)] -pub struct Within<'de, S: AsRef<[u8]>> { - reader: &'de Reader, - node_count: usize, - stack: Vec, - options: WithinOptions, -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -enum IpInt { - V4(u32), - V6(u128), -} - -impl IpInt { - fn new(ip_addr: IpAddr) -> Self { - match ip_addr { - IpAddr::V4(v4) => IpInt::V4(v4.into()), - IpAddr::V6(v6) => IpInt::V6(v6.into()), - } - } - - #[inline(always)] - fn get_bit(&self, index: usize) -> bool { - match self { - IpInt::V4(ip) => (ip >> (31 - index)) & 1 == 1, - IpInt::V6(ip) => (ip >> (127 - index)) & 1 == 1, - } - } - - fn bit_count(&self) -> usize { - match self { - IpInt::V4(_) => 32, - IpInt::V6(_) => 128, - } - } - - fn is_ipv4_in_ipv6(&self) -> bool { - match self { - IpInt::V4(_) => false, - IpInt::V6(ip) => *ip <= 0xFFFFFFFF, - } - } -} - -impl<'de, S: AsRef<[u8]>> Iterator for Within<'de, S> { - type Item = Result, MaxMindDbError>; - - fn next(&mut self) -> Option { - while let Some(current) = self.stack.pop() { - let bit_count = current.ip_int.bit_count(); - - // Skip networks that are aliases for the IPv4 network (unless option is set) - if !self.options.include_aliased_networks - && self.reader.ipv4_start != 0 - && current.node == self.reader.ipv4_start - && bit_count == 128 - && !current.ip_int.is_ipv4_in_ipv6() - { - continue; - } - - match current.node.cmp(&self.node_count) { - Ordering::Greater => { - // This is a data node, emit it and we're done (until the following next call) - let ip_addr = ip_int_to_addr(¤t.ip_int); - - // Resolve the pointer to a data offset - let data_offset = match self.reader.resolve_data_pointer(current.node) { - Ok(offset) => offset, - Err(e) => return Some(Err(e)), - }; - - // Check if we should skip empty values - if self.options.skip_empty_values { - match self.is_empty_value_at(data_offset) { - Ok(true) => continue, // Skip empty value - Ok(false) => {} // Not empty, proceed - Err(e) => return Some(Err(e)), - } - } - - return Some(Ok(LookupResult::new_found( - self.reader, - data_offset, - current.prefix_len as u8, - ip_addr, - ))); - } - Ordering::Equal => { - // Dead end (no data) - include if option is set - if self.options.include_networks_without_data { - let ip_addr = ip_int_to_addr(¤t.ip_int); - return Some(Ok(LookupResult::new_not_found( - self.reader, - current.prefix_len as u8, - ip_addr, - ))); - } - // Otherwise skip (current behavior) - } - Ordering::Less => { - // In order traversal of our children - // right/1-bit - let mut right_ip_int = current.ip_int; - - if current.prefix_len < bit_count { - let bit = current.prefix_len; - match &mut right_ip_int { - IpInt::V4(ip) => *ip |= 1 << (31 - bit), - IpInt::V6(ip) => *ip |= 1 << (127 - bit), - }; - } - - let node = match self.reader.read_node(current.node, 1) { - Ok(node) => node, - Err(e) => return Some(Err(e)), - }; - self.stack.push(WithinNode { - node, - ip_int: right_ip_int, - prefix_len: current.prefix_len + 1, - }); - // left/0-bit - let node = match self.reader.read_node(current.node, 0) { - Ok(node) => node, - Err(e) => return Some(Err(e)), - }; - self.stack.push(WithinNode { - node, - ip_int: current.ip_int, - prefix_len: current.prefix_len + 1, - }); - } - } - } - None - } -} - -impl<'de, S: AsRef<[u8]>> Within<'de, S> { - /// Check if the value at the given data offset is an empty map or array. - fn is_empty_value_at(&self, data_offset: usize) -> Result { - let buf = &self.reader.buf.as_ref()[self.reader.pointer_base..]; - let mut dec = decoder::Decoder::new(buf, data_offset); - let (size, type_num) = dec.peek_type()?; - match type_num { - decoder::TYPE_MAP | decoder::TYPE_ARRAY => Ok(size == 0), - _ => Ok(false), // Non-container types are never "empty" - } - } -} - -/// Convert IpInt to IpAddr -fn ip_int_to_addr(ip_int: &IpInt) -> IpAddr { - match ip_int { - IpInt::V4(ip) => IpAddr::V4((*ip).into()), - IpInt::V6(ip) => { - // Check if this is an IPv4-mapped IPv6 address - if *ip <= 0xFFFFFFFF { - IpAddr::V4((*ip as u32).into()) - } else { - IpAddr::V6((*ip).into()) - } - } - } -} - -/// A reader for the MaxMind DB format. The lifetime `'data` is tied to the -/// lifetime of the underlying buffer holding the contents of the database file. -/// -/// The `Reader` supports both file-based and memory-mapped access to MaxMind -/// DB files, including GeoIP2 and GeoLite2 databases. -/// -/// # Features -/// -/// - **`mmap`**: Enable memory-mapped file access for better performance -/// - **`simdutf8`**: Use SIMD-accelerated UTF-8 validation (faster string -/// decoding) -/// - **`unsafe-str-decode`**: Skip UTF-8 validation entirely (unsafe, but -/// ~20% faster) -#[derive(Debug)] -pub struct Reader> { - buf: S, - pub metadata: Metadata, - ipv4_start: usize, - /// Bit depth at which ipv4_start was found (0-96). Used to calculate - /// correct prefix lengths for IPv4 lookups in IPv6 databases. - ipv4_start_bit_depth: usize, - pointer_base: usize, -} - -#[cfg(feature = "mmap")] -impl Reader { - /// Open a MaxMind DB database file by memory mapping it. - /// - /// # Example - /// - /// ``` - /// # #[cfg(feature = "mmap")] - /// # { - /// let reader = maxminddb::Reader::open_mmap("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); - /// # } - /// ``` - pub fn open_mmap>(database: P) -> Result, MaxMindDbError> { - let file_read = File::open(database)?; - let mmap = unsafe { MmapOptions::new().map(&file_read) }.map_err(MaxMindDbError::Mmap)?; - Reader::from_source(mmap) - } -} - -impl Reader> { - /// Open a MaxMind DB database file by loading it into memory. - /// - /// # Example - /// - /// ``` - /// let reader = maxminddb::Reader::open_readfile( - /// "test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); - /// ``` - pub fn open_readfile>(database: P) -> Result>, MaxMindDbError> { - let buf: Vec = fs::read(&database)?; // IO error converted via #[from] - Reader::from_source(buf) - } -} - -impl<'de, S: AsRef<[u8]>> Reader { - /// Open a MaxMind DB database from anything that implements AsRef<[u8]> - /// - /// # Example - /// - /// ``` - /// use std::fs; - /// let buf = fs::read("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); - /// let reader = maxminddb::Reader::from_source(buf).unwrap(); - /// ``` - pub fn from_source(buf: S) -> Result, MaxMindDbError> { - let data_section_separator_size = 16; - - let metadata_start = find_metadata_start(buf.as_ref())?; - let mut type_decoder = decoder::Decoder::new(&buf.as_ref()[metadata_start..], 0); - let metadata = Metadata::deserialize(&mut type_decoder)?; - - let search_tree_size = (metadata.node_count as usize) * (metadata.record_size as usize) / 4; - - let mut reader = Reader { - buf, - pointer_base: search_tree_size + data_section_separator_size, - metadata, - ipv4_start: 0, - ipv4_start_bit_depth: 0, - }; - let (ipv4_start, ipv4_start_bit_depth) = reader.find_ipv4_start()?; - reader.ipv4_start = ipv4_start; - reader.ipv4_start_bit_depth = ipv4_start_bit_depth; - - Ok(reader) - } - - /// Lookup an IP address in the database. - /// - /// Returns a [`LookupResult`] that can be used to: - /// - Check if the IP was found with [`found()`](LookupResult::found) - /// - Get the network containing the IP with [`network()`](LookupResult::network) - /// - Decode the full record with [`decode()`](LookupResult::decode) - /// - Decode a specific path with [`decode_path()`](LookupResult::decode_path) - /// - Get a low-level decoder with [`decoder()`](LookupResult::decoder) - /// - /// # Examples - /// - /// Basic city lookup: - /// ``` - /// # use maxminddb::geoip2; - /// # use std::net::IpAddr; - /// # fn main() -> Result<(), maxminddb::MaxMindDbError> { - /// let reader = maxminddb::Reader::open_readfile( - /// "test-data/test-data/GeoIP2-City-Test.mmdb")?; - /// - /// let ip: IpAddr = "89.160.20.128".parse().unwrap(); - /// let result = reader.lookup(ip)?; - /// - /// if result.found() { - /// let city: geoip2::City = result.decode()?; - /// if let Some(city_info) = city.city { - /// if let Some(names) = city_info.names { - /// if let Some(name) = names.get("en") { - /// println!("City: {}", name); - /// } - /// } - /// } - /// } else { - /// println!("No data found for IP {}", ip); - /// } - /// # Ok(()) - /// # } - /// ``` - /// - /// Selective field access: - /// ``` - /// # use maxminddb::{Reader, PathElement}; - /// # use std::net::IpAddr; - /// # fn main() -> Result<(), maxminddb::MaxMindDbError> { - /// let reader = Reader::open_readfile( - /// "test-data/test-data/GeoIP2-City-Test.mmdb")?; - /// let ip: IpAddr = "89.160.20.128".parse().unwrap(); - /// - /// let result = reader.lookup(ip)?; - /// let country_code: Option = result.decode_path(&[ - /// PathElement::Key("country"), - /// PathElement::Key("iso_code"), - /// ])?; - /// - /// println!("Country: {:?}", country_code); - /// # Ok(()) - /// # } - /// ``` - pub fn lookup(&'de self, address: IpAddr) -> Result, MaxMindDbError> { - // Check for IPv6 address in IPv4-only database - if matches!(address, IpAddr::V6(_)) && self.metadata.ip_version == 4 { - return Err(MaxMindDbError::invalid_database( - "you attempted to look up an IPv6 address in an IPv4-only database", - )); - } - - let ip_int = IpInt::new(address); - let (pointer, prefix_len) = self.find_address_in_tree(&ip_int)?; - - // For IPv4 addresses in IPv6 databases, adjust prefix_len to reflect - // the actual bit depth in the tree. The ipv4_start_bit_depth tells us - // how deep in the IPv6 tree we were when we found the IPv4 subtree. - let prefix_len = if matches!(address, IpAddr::V4(_)) && self.metadata.ip_version == 6 { - self.ipv4_start_bit_depth + prefix_len - } else { - prefix_len - }; - - if pointer == 0 { - // IP not found in database - Ok(LookupResult::new_not_found(self, prefix_len as u8, address)) - } else { - // Resolve the pointer to a data offset - let data_offset = self.resolve_data_pointer(pointer)?; - Ok(LookupResult::new_found( - self, - data_offset, - prefix_len as u8, - address, - )) - } - } - - /// Iterate over all networks in the database. - /// - /// This is a convenience method equivalent to calling [`within()`](Self::within) - /// with `0.0.0.0/0` for IPv4-only databases or `::/0` for IPv6 databases. - /// - /// # Arguments - /// - /// * `options` - Controls which networks are yielded. Use [`Default::default()`] - /// for standard behavior. - /// - /// # Examples - /// - /// Iterate over all networks with default options: - /// ``` - /// use maxminddb::{geoip2, Reader}; - /// - /// let reader = Reader::open_readfile( - /// "test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); - /// - /// let mut count = 0; - /// for result in reader.networks(Default::default()).unwrap() { - /// let lookup = result.unwrap(); - /// count += 1; - /// if count >= 10 { break; } - /// } - /// ``` - pub fn networks(&'de self, options: WithinOptions) -> Result, MaxMindDbError> { - let cidr = if self.metadata.ip_version == 6 { - IpNetwork::V6("::/0".parse().unwrap()) - } else { - IpNetwork::V4("0.0.0.0/0".parse().unwrap()) - }; - self.within(cidr, options) - } - - /// Iterate over IP networks within a CIDR range. - /// - /// Returns an iterator that yields [`LookupResult`] for each network in the - /// database that falls within the specified CIDR range. - /// - /// # Arguments - /// - /// * `cidr` - The CIDR range to iterate over. - /// * `options` - Controls which networks are yielded. Use [`Default::default()`] - /// for standard behavior (skip aliases, skip networks without data, include - /// empty values). - /// - /// # Examples - /// - /// Iterate over all IPv4 networks: - /// ``` - /// use ipnetwork::IpNetwork; - /// use maxminddb::{geoip2, Reader}; - /// - /// let reader = Reader::open_readfile( - /// "test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); - /// - /// let ipv4_all = IpNetwork::V4("0.0.0.0/0".parse().unwrap()); - /// let mut count = 0; - /// for result in reader.within(ipv4_all, Default::default()).unwrap() { - /// let lookup = result.unwrap(); - /// let network = lookup.network().unwrap(); - /// let city: geoip2::City = lookup.decode().unwrap(); - /// let city_name = city.city.as_ref() - /// .and_then(|c| c.names.as_ref()) - /// .and_then(|n| n.get("en")); - /// println!("Network: {}, City: {:?}", network, city_name); - /// count += 1; - /// if count >= 10 { break; } // Limit output for example - /// } - /// ``` - /// - /// Search within a specific subnet: - /// ``` - /// use ipnetwork::IpNetwork; - /// use maxminddb::{geoip2, Reader}; - /// - /// let reader = Reader::open_readfile( - /// "test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); - /// - /// let subnet = IpNetwork::V4("192.168.0.0/16".parse().unwrap()); - /// for result in reader.within(subnet, Default::default()).unwrap() { - /// match result { - /// Ok(lookup) => { - /// let network = lookup.network().unwrap(); - /// println!("Found: {}", network); - /// } - /// Err(e) => eprintln!("Error: {}", e), - /// } - /// } - /// ``` - /// - /// Include networks without data: - /// ``` - /// use ipnetwork::IpNetwork; - /// use maxminddb::{Reader, WithinOptions}; - /// - /// let reader = Reader::open_readfile( - /// "test-data/test-data/MaxMind-DB-test-mixed-24.mmdb").unwrap(); - /// - /// let opts = WithinOptions::default().include_networks_without_data(); - /// for result in reader.within("1.0.0.0/8".parse().unwrap(), opts).unwrap() { - /// let lookup = result.unwrap(); - /// if !lookup.found() { - /// println!("Network {} has no data", lookup.network().unwrap()); - /// } - /// } - /// ``` - pub fn within( - &'de self, - cidr: IpNetwork, - options: WithinOptions, - ) -> Result, MaxMindDbError> { - let ip_address = cidr.network(); - let prefix_len = cidr.prefix() as usize; - let ip_int = IpInt::new(ip_address); - let bit_count = ip_int.bit_count(); - - let mut node = self.start_node(bit_count); - let node_count = self.metadata.node_count as usize; - - let mut stack: Vec = Vec::with_capacity(bit_count - prefix_len); - - // Traverse down the tree to the level that matches the cidr mark - let mut depth = 0_usize; - for i in 0..prefix_len { - let bit = ip_int.get_bit(i); - node = self.read_node(node, bit as usize)?; - depth = i + 1; // We've now traversed i+1 bits (bits 0 through i) - - if node >= node_count { - // We've hit a data node or dead end before we exhausted our prefix. - // This means the requested CIDR is contained in a single record. - break; - } - } - - // Always push the node - it could be: - // - A data node (> node_count): will be yielded as a single record - // - The empty node (== node_count): will be skipped unless include_networks_without_data - // - An internal node (< node_count): will be traversed to find all contained records - stack.push(WithinNode { - node, - ip_int, - prefix_len: depth, - }); - - let within = Within { - reader: self, - node_count, - stack, - options, - }; - - Ok(within) - } - - fn find_address_in_tree(&self, ip_int: &IpInt) -> Result<(usize, usize), MaxMindDbError> { - let bit_count = ip_int.bit_count(); - let mut node = self.start_node(bit_count); - - let node_count = self.metadata.node_count as usize; - let mut prefix_len = bit_count; - - for i in 0..bit_count { - if node >= node_count { - prefix_len = i; - break; - } - let bit = ip_int.get_bit(i); - node = self.read_node(node, bit as usize)?; - } - match node_count { - // If node == node_count, it means we hit the placeholder "empty" node - // return 0 as the pointer value to signify "not found". - _ if node == node_count => Ok((0, prefix_len)), - _ if node > node_count => Ok((node, prefix_len)), - _ => Err(MaxMindDbError::invalid_database( - "invalid node in search tree", - )), - } - } - - #[inline] - fn start_node(&self, length: usize) -> usize { - if length == 128 { - 0 - } else { - self.ipv4_start - } - } - - /// Find the IPv4 start node and the bit depth at which it was found. - /// Returns (node, depth) where depth is how far into the tree we traversed. - fn find_ipv4_start(&self) -> Result<(usize, usize), MaxMindDbError> { - if self.metadata.ip_version != 6 { - return Ok((0, 0)); - } - - // We are looking up an IPv4 address in an IPv6 tree. Skip over the - // first 96 nodes. - let mut node: usize = 0_usize; - let mut depth: usize = 0; - for i in 0_u8..96 { - if node >= self.metadata.node_count as usize { - depth = i as usize; - break; - } - node = self.read_node(node, 0)?; - depth = (i + 1) as usize; - } - Ok((node, depth)) - } - - #[inline(always)] - fn read_node(&self, node_number: usize, index: usize) -> Result { - let buf = self.buf.as_ref(); - let base_offset = node_number * (self.metadata.record_size as usize) / 4; - - let val = match self.metadata.record_size { - 24 => { - let offset = base_offset + index * 3; - (buf[offset] as usize) << 16 - | (buf[offset + 1] as usize) << 8 - | buf[offset + 2] as usize - } - 28 => { - let middle = if index != 0 { - buf[base_offset + 3] & 0x0F - } else { - (buf[base_offset + 3] & 0xF0) >> 4 - }; - let offset = base_offset + index * 4; - (middle as usize) << 24 - | (buf[offset] as usize) << 16 - | (buf[offset + 1] as usize) << 8 - | buf[offset + 2] as usize - } - 32 => { - let offset = base_offset + index * 4; - (buf[offset] as usize) << 24 - | (buf[offset + 1] as usize) << 16 - | (buf[offset + 2] as usize) << 8 - | buf[offset + 3] as usize - } - s => { - return Err(MaxMindDbError::invalid_database(format!( - "unknown record size: {s}" - ))) - } - }; - Ok(val) - } - - /// Resolves a pointer from the search tree to an offset in the data section. - #[inline] - fn resolve_data_pointer(&self, pointer: usize) -> Result { - let resolved = pointer - (self.metadata.node_count as usize) - 16; - - // Check bounds using pointer_base which marks the start of the data section - if resolved >= (self.buf.as_ref().len() - self.pointer_base) { - return Err(MaxMindDbError::invalid_database( - "the MaxMind DB file's data pointer resolves to an invalid location", - )); - } - - Ok(resolved) - } - - /// Performs comprehensive validation of the MaxMind DB file. - /// - /// This method validates: - /// - Metadata section: format versions, required fields, and value constraints - /// - Search tree: traverses all networks to verify tree structure integrity - /// - Data section separator: validates the 16-byte separator between tree and data - /// - Data section: verifies all data records referenced by the search tree - /// - /// The verifier is stricter than the MaxMind DB specification and may return - /// errors on some databases that are still readable by normal operations. - /// This method is useful for: - /// - Validating database files after download or generation - /// - Debugging database corruption issues - /// - Ensuring database integrity in critical applications - /// - /// Note: Verification traverses the entire database and may be slow on large files. - /// The method is thread-safe and can be called on an active Reader. - /// - /// # Example - /// - /// ``` - /// use maxminddb::Reader; - /// - /// let reader = Reader::open_readfile("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); - /// reader.verify().expect("Database should be valid"); - /// ``` - pub fn verify(&self) -> Result<(), MaxMindDbError> { - self.verify_metadata()?; - self.verify_database() - } - - fn verify_metadata(&self) -> Result<(), MaxMindDbError> { - let m = &self.metadata; - - if m.binary_format_major_version != 2 { - return Err(MaxMindDbError::invalid_database(format!( - "binary_format_major_version - Expected: 2 Actual: {}", - m.binary_format_major_version - ))); - } - if m.binary_format_minor_version != 0 { - return Err(MaxMindDbError::invalid_database(format!( - "binary_format_minor_version - Expected: 0 Actual: {}", - m.binary_format_minor_version - ))); - } - if m.database_type.is_empty() { - return Err(MaxMindDbError::invalid_database( - "database_type - Expected: non-empty string Actual: \"\"", - )); - } - if m.description.is_empty() { - return Err(MaxMindDbError::invalid_database( - "description - Expected: non-empty map Actual: {}", - )); - } - if m.ip_version != 4 && m.ip_version != 6 { - return Err(MaxMindDbError::invalid_database(format!( - "ip_version - Expected: 4 or 6 Actual: {}", - m.ip_version - ))); - } - if m.record_size != 24 && m.record_size != 28 && m.record_size != 32 { - return Err(MaxMindDbError::invalid_database(format!( - "record_size - Expected: 24, 28, or 32 Actual: {}", - m.record_size - ))); - } - if m.node_count == 0 { - return Err(MaxMindDbError::invalid_database( - "node_count - Expected: positive integer Actual: 0", - )); - } - Ok(()) - } - - fn verify_database(&self) -> Result<(), MaxMindDbError> { - let offsets = self.verify_search_tree()?; - self.verify_data_section_separator()?; - self.verify_data_section(offsets) - } - - fn verify_search_tree(&self) -> Result, MaxMindDbError> { - let mut offsets = HashSet::new(); - let opts = WithinOptions::default().include_networks_without_data(); - - // Maximum number of networks we can expect in a valid database. - // A database with N nodes can have at most 2N data entries (each leaf node - // can have data). We add some margin for safety. - let max_iterations = (self.metadata.node_count as usize).saturating_mul(3); - let mut iteration_count = 0usize; - - for result in self.networks(opts)? { - let lookup = result?; - if let Some(offset) = lookup.offset() { - offsets.insert(offset); - } - - iteration_count += 1; - if iteration_count > max_iterations { - return Err(MaxMindDbError::invalid_database(format!( - "search tree appears to have a cycle or invalid structure (exceeded {max_iterations} iterations)" - ))); - } - } - Ok(offsets) - } - - fn verify_data_section_separator(&self) -> Result<(), MaxMindDbError> { - let separator_start = - self.metadata.node_count as usize * self.metadata.record_size as usize / 4; - let separator_end = separator_start + DATA_SECTION_SEPARATOR_SIZE; - - if separator_end > self.buf.as_ref().len() { - return Err(MaxMindDbError::invalid_database_at( - "data section separator extends past end of file", - separator_start, - )); - } - - let separator = &self.buf.as_ref()[separator_start..separator_end]; - - for &b in separator { - if b != 0 { - return Err(MaxMindDbError::invalid_database_at( - format!("unexpected byte in data separator: {separator:?}"), - separator_start, - )); - } - } - Ok(()) - } - - fn verify_data_section(&self, offsets: HashSet) -> Result<(), MaxMindDbError> { - let data_section = &self.buf.as_ref()[self.pointer_base..]; - - // Verify each offset from the search tree points to valid, decodable data - for &offset in &offsets { - if offset >= data_section.len() { - return Err(MaxMindDbError::invalid_database_at( - format!( - "search tree pointer is beyond data section (len: {})", - data_section.len() - ), - offset, - )); - } - - let mut dec = decoder::Decoder::new(data_section, offset); - - // Try to skip/decode the value to verify it's valid - if let Err(e) = dec.skip_value_for_verification() { - return Err(MaxMindDbError::invalid_database_at( - format!("decoding error: {e}"), - offset, - )); - } - } - - Ok(()) - } -} - -fn find_metadata_start(buf: &[u8]) -> Result { - const METADATA_START_MARKER: &[u8] = b"\xab\xcd\xefMaxMind.com"; - - memchr::memmem::rfind(buf, METADATA_START_MARKER) - .map(|x| x + METADATA_START_MARKER.len()) - .ok_or_else(|| { - MaxMindDbError::invalid_database("could not find MaxMind DB metadata in file") - }) -} - -mod decoder; -pub mod geoip2; -mod result; - -pub use result::{LookupResult, PathElement}; - -#[cfg(test)] -mod reader_test; - -#[cfg(test)] -mod tests { - use super::MaxMindDbError; - use ipnetwork::IpNetworkError; - use std::io::{Error, ErrorKind}; - - #[test] - fn test_error_display() { - // Error without offset - assert_eq!( - format!( - "{}", - MaxMindDbError::invalid_database("something went wrong") - ), - "invalid database: something went wrong".to_owned(), - ); - // Error with offset - assert_eq!( - format!( - "{}", - MaxMindDbError::invalid_database_at("something went wrong", 42) - ), - "invalid database at offset 42: something went wrong".to_owned(), - ); - let io_err = Error::new(ErrorKind::NotFound, "file not found"); - assert_eq!( - format!("{}", MaxMindDbError::from(io_err)), - "i/o error: file not found".to_owned(), - ); - - #[cfg(feature = "mmap")] - { - let mmap_io_err = Error::new(ErrorKind::PermissionDenied, "mmap failed"); - assert_eq!( - format!("{}", MaxMindDbError::Mmap(mmap_io_err)), - "memory map error: mmap failed".to_owned(), - ); - } - - // Decoding error without offset - assert_eq!( - format!("{}", MaxMindDbError::decoding("unexpected type")), - "decoding error: unexpected type".to_owned(), - ); - // Decoding error with offset - assert_eq!( - format!("{}", MaxMindDbError::decoding_at("unexpected type", 100)), - "decoding error at offset 100: unexpected type".to_owned(), - ); - // Decoding error with offset and path - assert_eq!( - format!( - "{}", - MaxMindDbError::decoding_at_path("unexpected type", 100, "/city/names/en") - ), - "decoding error at offset 100 (path: /city/names/en): unexpected type".to_owned(), - ); - - let net_err = IpNetworkError::InvalidPrefix; - assert_eq!( - format!("{}", MaxMindDbError::from(net_err)), - "invalid network: invalid prefix".to_owned(), - ); - } - - #[test] - fn test_lookup_network() { - use super::Reader; - use std::collections::HashMap; - use std::net::IpAddr; - - struct TestCase { - ip: &'static str, - db_file: &'static str, - expected_network: &'static str, - expected_found: bool, - } - - let test_cases = [ - // IPv4 address in IPv6 database - not found, returns containing network - TestCase { - ip: "1.1.1.1", - db_file: "test-data/test-data/MaxMind-DB-test-ipv6-32.mmdb", - expected_network: "1.0.0.0/8", - expected_found: false, - }, - // IPv6 exact match - TestCase { - ip: "::1:ffff:ffff", - db_file: "test-data/test-data/MaxMind-DB-test-ipv6-24.mmdb", - expected_network: "::1:ffff:ffff/128", - expected_found: true, - }, - // IPv6 network match (not exact) - TestCase { - ip: "::2:0:1", - db_file: "test-data/test-data/MaxMind-DB-test-ipv6-24.mmdb", - expected_network: "::2:0:0/122", - expected_found: true, - }, - // IPv4 exact match - TestCase { - ip: "1.1.1.1", - db_file: "test-data/test-data/MaxMind-DB-test-ipv4-24.mmdb", - expected_network: "1.1.1.1/32", - expected_found: true, - }, - // IPv4 network match (not exact) - TestCase { - ip: "1.1.1.3", - db_file: "test-data/test-data/MaxMind-DB-test-ipv4-24.mmdb", - expected_network: "1.1.1.2/31", - expected_found: true, - }, - // IPv4 in decoder test database - TestCase { - ip: "1.1.1.3", - db_file: "test-data/test-data/MaxMind-DB-test-decoder.mmdb", - expected_network: "1.1.1.0/24", - expected_found: true, - }, - // IPv4-mapped IPv6 address - preserves IPv6 form - TestCase { - ip: "::ffff:1.1.1.128", - db_file: "test-data/test-data/MaxMind-DB-test-decoder.mmdb", - expected_network: "::ffff:1.1.1.0/120", - expected_found: true, - }, - // IPv4-compatible IPv6 address - uses compressed IPv6 notation - TestCase { - ip: "::1.1.1.128", - db_file: "test-data/test-data/MaxMind-DB-test-decoder.mmdb", - expected_network: "::101:100/120", - expected_found: true, - }, - // No IPv4 search tree - IPv4 address returns ::/64 - TestCase { - ip: "200.0.2.1", - db_file: "test-data/test-data/MaxMind-DB-no-ipv4-search-tree.mmdb", - expected_network: "::/64", - expected_found: true, - }, - // No IPv4 search tree - IPv6 address in IPv4 range - TestCase { - ip: "::200.0.2.1", - db_file: "test-data/test-data/MaxMind-DB-no-ipv4-search-tree.mmdb", - expected_network: "::/64", - expected_found: true, - }, - // No IPv4 search tree - IPv6 address at boundary of IPv4 space - TestCase { - ip: "0:0:0:0:ffff:ffff:ffff:ffff", - db_file: "test-data/test-data/MaxMind-DB-no-ipv4-search-tree.mmdb", - expected_network: "::/64", - expected_found: true, - }, - // No IPv4 search tree - high IPv6 address not found - TestCase { - ip: "ef00::", - db_file: "test-data/test-data/MaxMind-DB-no-ipv4-search-tree.mmdb", - expected_network: "8000::/1", - expected_found: false, - }, - ]; - - // Cache readers to avoid reopening the same file multiple times - let mut readers: HashMap<&str, Reader>> = HashMap::new(); - - for test in &test_cases { - let reader = readers - .entry(test.db_file) - .or_insert_with(|| Reader::open_readfile(test.db_file).unwrap()); - - let ip: IpAddr = test.ip.parse().unwrap(); - let result = reader.lookup(ip).unwrap(); - - assert_eq!( - result.found(), - test.expected_found, - "IP {} in {}: expected found={}, got found={}", - test.ip, - test.db_file, - test.expected_found, - result.found() - ); - - let network = result.network().unwrap(); - assert_eq!( - network.to_string(), - test.expected_network, - "IP {} in {}: expected network {}, got {}", - test.ip, - test.db_file, - test.expected_network, - network - ); - } - } - - #[test] - fn test_lookup_with_geoip_data() { - use super::Reader; - use crate::geoip2; - use std::net::IpAddr; - - let reader = Reader::open_readfile("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); - let ip: IpAddr = "89.160.20.128".parse().unwrap(); - - let result = reader.lookup(ip).unwrap(); - assert!(result.found(), "lookup should find known IP"); - - // Decode the data - let city: geoip2::City = result.decode().unwrap(); - assert!(city.city.is_some(), "Expected city data"); - - // Check full network (not just prefix) - let network = result.network().unwrap(); - assert_eq!( - network.to_string(), - "89.160.20.128/25", - "Expected network 89.160.20.128/25" - ); - - // Check offset is available for caching - assert!( - result.offset().is_some(), - "Expected offset to be Some for found IP" - ); - } - - #[test] - fn test_decode_path() { - use super::{PathElement, Reader}; - use std::net::IpAddr; - - let reader = Reader::open_readfile("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); - let ip: IpAddr = "89.160.20.128".parse().unwrap(); - - let result = reader.lookup(ip).unwrap(); - - // Navigate to country.iso_code - let iso_code: Option = result - .decode_path(&[PathElement::Key("country"), PathElement::Key("iso_code")]) - .unwrap(); - assert_eq!(iso_code, Some("SE".to_owned())); - - // Navigate to non-existent path - let missing: Option = result - .decode_path(&[PathElement::Key("nonexistent")]) - .unwrap(); - assert!(missing.is_none()); - } - - #[test] - fn test_ipv6_in_ipv4_database() { - use super::{MaxMindDbError, Reader}; - use std::net::IpAddr; - - let reader = - Reader::open_readfile("test-data/test-data/MaxMind-DB-test-ipv4-24.mmdb").unwrap(); - let ip: IpAddr = "2001::".parse().unwrap(); - - let result = reader.lookup(ip); - match result { - Err(MaxMindDbError::InvalidDatabase { message, .. }) => { - assert!( - message.contains("IPv6") && message.contains("IPv4"), - "Expected error message about IPv6 in IPv4 database, got: {}", - message - ); - } - Err(e) => panic!( - "Expected InvalidDatabase error for IPv6 in IPv4 database, got: {:?}", - e - ), - Ok(_) => panic!("Expected error for IPv6 lookup in IPv4-only database"), - } - } - - #[test] - fn test_decode_path_comprehensive() { - use super::{PathElement, Reader}; - use std::net::IpAddr; - - let reader = - Reader::open_readfile("test-data/test-data/MaxMind-DB-test-decoder.mmdb").unwrap(); - let ip: IpAddr = "::1.1.1.0".parse().unwrap(); - - let result = reader.lookup(ip).unwrap(); - assert!(result.found()); - - // Test simple path: uint16 - let u16_val: Option = result.decode_path(&[PathElement::Key("uint16")]).unwrap(); - assert_eq!(u16_val, Some(100)); - - // Test array access: first element - let arr_first: Option = result - .decode_path(&[PathElement::Key("array"), PathElement::Index(0)]) - .unwrap(); - assert_eq!(arr_first, Some(1)); - - // Test array access: last element (index 2) - let arr_last: Option = result - .decode_path(&[PathElement::Key("array"), PathElement::Index(2)]) - .unwrap(); - assert_eq!(arr_last, Some(3)); - - // Test array access: out of bounds (index 3) returns None - let arr_oob: Option = result - .decode_path(&[PathElement::Key("array"), PathElement::Index(3)]) - .unwrap(); - assert!(arr_oob.is_none()); - - // Test negative index: -1 means last element - let arr_neg1: Option = result - .decode_path(&[PathElement::Key("array"), PathElement::Index(-1)]) - .unwrap(); - assert_eq!(arr_neg1, Some(3)); - - // Test negative index: -3 means first element - let arr_neg3: Option = result - .decode_path(&[PathElement::Key("array"), PathElement::Index(-3)]) - .unwrap(); - assert_eq!(arr_neg3, Some(1)); - - // Test nested path: map.mapX.arrayX[1] - let nested: Option = result - .decode_path(&[ - PathElement::Key("map"), - PathElement::Key("mapX"), - PathElement::Key("arrayX"), - PathElement::Index(1), - ]) - .unwrap(); - assert_eq!(nested, Some(8)); - - // Test non-existent key returns None - let missing: Option = result - .decode_path(&[PathElement::Key("does-not-exist"), PathElement::Index(1)]) - .unwrap(); - assert!(missing.is_none()); - - // Test utf8_string path - let utf8: Option = result - .decode_path(&[PathElement::Key("utf8_string")]) - .unwrap(); - assert_eq!(utf8, Some("unicode! ☯ - ♫".to_owned())); - } -} diff --git a/src/metadata.rs b/src/metadata.rs new file mode 100644 index 00000000..38812bb1 --- /dev/null +++ b/src/metadata.rs @@ -0,0 +1,28 @@ +//! Database metadata types. + +use std::collections::BTreeMap; + +use serde::{Deserialize, Serialize}; + +/// Metadata about the MaxMind DB file. +#[derive(Deserialize, Serialize, Clone, Debug)] +pub struct Metadata { + /// Major version of the binary format (always 2). + pub binary_format_major_version: u16, + /// Minor version of the binary format (always 0). + pub binary_format_minor_version: u16, + /// Unix timestamp when the database was built. + pub build_epoch: u64, + /// Database type (e.g., "GeoIP2-City", "GeoLite2-Country"). + pub database_type: String, + /// Map of language codes to database descriptions. + pub description: BTreeMap, + /// IP version supported (4 or 6). + pub ip_version: u16, + /// Languages available in the database. + pub languages: Vec, + /// Number of nodes in the search tree. + pub node_count: u32, + /// Size of each record in bits (24, 28, or 32). + pub record_size: u16, +} diff --git a/src/reader.rs b/src/reader.rs new file mode 100644 index 00000000..15ccf95d --- /dev/null +++ b/src/reader.rs @@ -0,0 +1,655 @@ +//! MaxMind DB reader implementation. + +use std::collections::HashSet; +use std::fs; +use std::net::IpAddr; +use std::path::Path; + +use ipnetwork::IpNetwork; +use serde::Deserialize; + +#[cfg(feature = "mmap")] +pub use memmap2::Mmap; +#[cfg(feature = "mmap")] +use memmap2::MmapOptions; +#[cfg(feature = "mmap")] +use std::fs::File; + +use crate::decoder; +use crate::error::MaxMindDbError; +use crate::metadata::Metadata; +use crate::result::LookupResult; +use crate::within::{IpInt, Within, WithinNode, WithinOptions}; + +/// Size of the data section separator (16 zero bytes). +const DATA_SECTION_SEPARATOR_SIZE: usize = 16; + +/// A reader for the MaxMind DB format. The lifetime `'data` is tied to the +/// lifetime of the underlying buffer holding the contents of the database file. +/// +/// The `Reader` supports both file-based and memory-mapped access to MaxMind +/// DB files, including GeoIP2 and GeoLite2 databases. +/// +/// # Features +/// +/// - **`mmap`**: Enable memory-mapped file access for better performance +/// - **`simdutf8`**: Use SIMD-accelerated UTF-8 validation (faster string +/// decoding) +/// - **`unsafe-str-decode`**: Skip UTF-8 validation entirely (unsafe, but +/// ~20% faster) +#[derive(Debug)] +pub struct Reader> { + pub(crate) buf: S, + /// Database metadata. + pub metadata: Metadata, + pub(crate) ipv4_start: usize, + /// Bit depth at which ipv4_start was found (0-96). Used to calculate + /// correct prefix lengths for IPv4 lookups in IPv6 databases. + pub(crate) ipv4_start_bit_depth: usize, + pub(crate) pointer_base: usize, +} + +#[cfg(feature = "mmap")] +impl Reader { + /// Open a MaxMind DB database file by memory mapping it. + /// + /// # Example + /// + /// ``` + /// # #[cfg(feature = "mmap")] + /// # { + /// let reader = maxminddb::Reader::open_mmap("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); + /// # } + /// ``` + pub fn open_mmap>(database: P) -> Result, MaxMindDbError> { + let file_read = File::open(database)?; + let mmap = unsafe { MmapOptions::new().map(&file_read) }.map_err(MaxMindDbError::Mmap)?; + Reader::from_source(mmap) + } +} + +impl Reader> { + /// Open a MaxMind DB database file by loading it into memory. + /// + /// # Example + /// + /// ``` + /// let reader = maxminddb::Reader::open_readfile( + /// "test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); + /// ``` + pub fn open_readfile>(database: P) -> Result>, MaxMindDbError> { + let buf: Vec = fs::read(&database)?; // IO error converted via #[from] + Reader::from_source(buf) + } +} + +impl<'de, S: AsRef<[u8]>> Reader { + /// Open a MaxMind DB database from anything that implements AsRef<[u8]> + /// + /// # Example + /// + /// ``` + /// use std::fs; + /// let buf = fs::read("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); + /// let reader = maxminddb::Reader::from_source(buf).unwrap(); + /// ``` + pub fn from_source(buf: S) -> Result, MaxMindDbError> { + let data_section_separator_size = 16; + + let metadata_start = find_metadata_start(buf.as_ref())?; + let mut type_decoder = decoder::Decoder::new(&buf.as_ref()[metadata_start..], 0); + let metadata = Metadata::deserialize(&mut type_decoder)?; + + let search_tree_size = (metadata.node_count as usize) * (metadata.record_size as usize) / 4; + + let mut reader = Reader { + buf, + pointer_base: search_tree_size + data_section_separator_size, + metadata, + ipv4_start: 0, + ipv4_start_bit_depth: 0, + }; + let (ipv4_start, ipv4_start_bit_depth) = reader.find_ipv4_start()?; + reader.ipv4_start = ipv4_start; + reader.ipv4_start_bit_depth = ipv4_start_bit_depth; + + Ok(reader) + } + + /// Lookup an IP address in the database. + /// + /// Returns a [`LookupResult`] that can be used to: + /// - Check if the IP was found with [`found()`](LookupResult::found) + /// - Get the network containing the IP with [`network()`](LookupResult::network) + /// - Decode the full record with [`decode()`](LookupResult::decode) + /// - Decode a specific path with [`decode_path()`](LookupResult::decode_path) + /// - Get a low-level decoder with [`decoder()`](LookupResult::decoder) + /// + /// # Examples + /// + /// Basic city lookup: + /// ``` + /// # use maxminddb::geoip2; + /// # use std::net::IpAddr; + /// # fn main() -> Result<(), maxminddb::MaxMindDbError> { + /// let reader = maxminddb::Reader::open_readfile( + /// "test-data/test-data/GeoIP2-City-Test.mmdb")?; + /// + /// let ip: IpAddr = "89.160.20.128".parse().unwrap(); + /// let result = reader.lookup(ip)?; + /// + /// if result.found() { + /// let city: geoip2::City = result.decode()?; + /// if let Some(city_info) = city.city { + /// if let Some(names) = city_info.names { + /// if let Some(name) = names.get("en") { + /// println!("City: {}", name); + /// } + /// } + /// } + /// } else { + /// println!("No data found for IP {}", ip); + /// } + /// # Ok(()) + /// # } + /// ``` + /// + /// Selective field access: + /// ``` + /// # use maxminddb::{Reader, PathElement}; + /// # use std::net::IpAddr; + /// # fn main() -> Result<(), maxminddb::MaxMindDbError> { + /// let reader = Reader::open_readfile( + /// "test-data/test-data/GeoIP2-City-Test.mmdb")?; + /// let ip: IpAddr = "89.160.20.128".parse().unwrap(); + /// + /// let result = reader.lookup(ip)?; + /// let country_code: Option = result.decode_path(&[ + /// PathElement::Key("country"), + /// PathElement::Key("iso_code"), + /// ])?; + /// + /// println!("Country: {:?}", country_code); + /// # Ok(()) + /// # } + /// ``` + pub fn lookup(&'de self, address: IpAddr) -> Result, MaxMindDbError> { + // Check for IPv6 address in IPv4-only database + if matches!(address, IpAddr::V6(_)) && self.metadata.ip_version == 4 { + return Err(MaxMindDbError::invalid_database( + "you attempted to look up an IPv6 address in an IPv4-only database", + )); + } + + let ip_int = IpInt::new(address); + let (pointer, prefix_len) = self.find_address_in_tree(&ip_int)?; + + // For IPv4 addresses in IPv6 databases, adjust prefix_len to reflect + // the actual bit depth in the tree. The ipv4_start_bit_depth tells us + // how deep in the IPv6 tree we were when we found the IPv4 subtree. + let prefix_len = if matches!(address, IpAddr::V4(_)) && self.metadata.ip_version == 6 { + self.ipv4_start_bit_depth + prefix_len + } else { + prefix_len + }; + + if pointer == 0 { + // IP not found in database + Ok(LookupResult::new_not_found(self, prefix_len as u8, address)) + } else { + // Resolve the pointer to a data offset + let data_offset = self.resolve_data_pointer(pointer)?; + Ok(LookupResult::new_found( + self, + data_offset, + prefix_len as u8, + address, + )) + } + } + + /// Iterate over all networks in the database. + /// + /// This is a convenience method equivalent to calling [`within()`](Self::within) + /// with `0.0.0.0/0` for IPv4-only databases or `::/0` for IPv6 databases. + /// + /// # Arguments + /// + /// * `options` - Controls which networks are yielded. Use [`Default::default()`] + /// for standard behavior. + /// + /// # Examples + /// + /// Iterate over all networks with default options: + /// ``` + /// use maxminddb::{geoip2, Reader}; + /// + /// let reader = Reader::open_readfile( + /// "test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); + /// + /// let mut count = 0; + /// for result in reader.networks(Default::default()).unwrap() { + /// let lookup = result.unwrap(); + /// count += 1; + /// if count >= 10 { break; } + /// } + /// ``` + pub fn networks(&'de self, options: WithinOptions) -> Result, MaxMindDbError> { + let cidr = if self.metadata.ip_version == 6 { + IpNetwork::V6("::/0".parse().unwrap()) + } else { + IpNetwork::V4("0.0.0.0/0".parse().unwrap()) + }; + self.within(cidr, options) + } + + /// Iterate over IP networks within a CIDR range. + /// + /// Returns an iterator that yields [`LookupResult`] for each network in the + /// database that falls within the specified CIDR range. + /// + /// # Arguments + /// + /// * `cidr` - The CIDR range to iterate over. + /// * `options` - Controls which networks are yielded. Use [`Default::default()`] + /// for standard behavior (skip aliases, skip networks without data, include + /// empty values). + /// + /// # Examples + /// + /// Iterate over all IPv4 networks: + /// ``` + /// use ipnetwork::IpNetwork; + /// use maxminddb::{geoip2, Reader}; + /// + /// let reader = Reader::open_readfile( + /// "test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); + /// + /// let ipv4_all = IpNetwork::V4("0.0.0.0/0".parse().unwrap()); + /// let mut count = 0; + /// for result in reader.within(ipv4_all, Default::default()).unwrap() { + /// let lookup = result.unwrap(); + /// let network = lookup.network().unwrap(); + /// let city: geoip2::City = lookup.decode().unwrap(); + /// let city_name = city.city.as_ref() + /// .and_then(|c| c.names.as_ref()) + /// .and_then(|n| n.get("en")); + /// println!("Network: {}, City: {:?}", network, city_name); + /// count += 1; + /// if count >= 10 { break; } // Limit output for example + /// } + /// ``` + /// + /// Search within a specific subnet: + /// ``` + /// use ipnetwork::IpNetwork; + /// use maxminddb::{geoip2, Reader}; + /// + /// let reader = Reader::open_readfile( + /// "test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); + /// + /// let subnet = IpNetwork::V4("192.168.0.0/16".parse().unwrap()); + /// for result in reader.within(subnet, Default::default()).unwrap() { + /// match result { + /// Ok(lookup) => { + /// let network = lookup.network().unwrap(); + /// println!("Found: {}", network); + /// } + /// Err(e) => eprintln!("Error: {}", e), + /// } + /// } + /// ``` + /// + /// Include networks without data: + /// ``` + /// use ipnetwork::IpNetwork; + /// use maxminddb::{Reader, WithinOptions}; + /// + /// let reader = Reader::open_readfile( + /// "test-data/test-data/MaxMind-DB-test-mixed-24.mmdb").unwrap(); + /// + /// let opts = WithinOptions::default().include_networks_without_data(); + /// for result in reader.within("1.0.0.0/8".parse().unwrap(), opts).unwrap() { + /// let lookup = result.unwrap(); + /// if !lookup.found() { + /// println!("Network {} has no data", lookup.network().unwrap()); + /// } + /// } + /// ``` + pub fn within( + &'de self, + cidr: IpNetwork, + options: WithinOptions, + ) -> Result, MaxMindDbError> { + let ip_address = cidr.network(); + let prefix_len = cidr.prefix() as usize; + let ip_int = IpInt::new(ip_address); + let bit_count = ip_int.bit_count(); + + let mut node = self.start_node(bit_count); + let node_count = self.metadata.node_count as usize; + + let mut stack: Vec = Vec::with_capacity(bit_count - prefix_len); + + // Traverse down the tree to the level that matches the cidr mark + let mut depth = 0_usize; + for i in 0..prefix_len { + let bit = ip_int.get_bit(i); + node = self.read_node(node, bit as usize)?; + depth = i + 1; // We've now traversed i+1 bits (bits 0 through i) + + if node >= node_count { + // We've hit a data node or dead end before we exhausted our prefix. + // This means the requested CIDR is contained in a single record. + break; + } + } + + // Always push the node - it could be: + // - A data node (> node_count): will be yielded as a single record + // - The empty node (== node_count): will be skipped unless include_networks_without_data + // - An internal node (< node_count): will be traversed to find all contained records + stack.push(WithinNode { + node, + ip_int, + prefix_len: depth, + }); + + let within = Within { + reader: self, + node_count, + stack, + options, + }; + + Ok(within) + } + + fn find_address_in_tree(&self, ip_int: &IpInt) -> Result<(usize, usize), MaxMindDbError> { + let bit_count = ip_int.bit_count(); + let mut node = self.start_node(bit_count); + + let node_count = self.metadata.node_count as usize; + let mut prefix_len = bit_count; + + for i in 0..bit_count { + if node >= node_count { + prefix_len = i; + break; + } + let bit = ip_int.get_bit(i); + node = self.read_node(node, bit as usize)?; + } + match node_count { + // If node == node_count, it means we hit the placeholder "empty" node + // return 0 as the pointer value to signify "not found". + _ if node == node_count => Ok((0, prefix_len)), + _ if node > node_count => Ok((node, prefix_len)), + _ => Err(MaxMindDbError::invalid_database( + "invalid node in search tree", + )), + } + } + + #[inline] + fn start_node(&self, length: usize) -> usize { + if length == 128 { + 0 + } else { + self.ipv4_start + } + } + + /// Find the IPv4 start node and the bit depth at which it was found. + /// Returns (node, depth) where depth is how far into the tree we traversed. + fn find_ipv4_start(&self) -> Result<(usize, usize), MaxMindDbError> { + if self.metadata.ip_version != 6 { + return Ok((0, 0)); + } + + // We are looking up an IPv4 address in an IPv6 tree. Skip over the + // first 96 nodes. + let mut node: usize = 0; + let mut depth: usize = 0; + for i in 0_u8..96 { + if node >= self.metadata.node_count as usize { + depth = i as usize; + break; + } + node = self.read_node(node, 0)?; + depth = (i + 1) as usize; + } + Ok((node, depth)) + } + + #[inline(always)] + pub(crate) fn read_node( + &self, + node_number: usize, + index: usize, + ) -> Result { + let buf = self.buf.as_ref(); + let base_offset = node_number * (self.metadata.record_size as usize) / 4; + + let val = match self.metadata.record_size { + 24 => { + let offset = base_offset + index * 3; + (buf[offset] as usize) << 16 + | (buf[offset + 1] as usize) << 8 + | buf[offset + 2] as usize + } + 28 => { + let middle = if index != 0 { + buf[base_offset + 3] & 0x0F + } else { + (buf[base_offset + 3] & 0xF0) >> 4 + }; + let offset = base_offset + index * 4; + (middle as usize) << 24 + | (buf[offset] as usize) << 16 + | (buf[offset + 1] as usize) << 8 + | buf[offset + 2] as usize + } + 32 => { + let offset = base_offset + index * 4; + (buf[offset] as usize) << 24 + | (buf[offset + 1] as usize) << 16 + | (buf[offset + 2] as usize) << 8 + | buf[offset + 3] as usize + } + s => { + return Err(MaxMindDbError::invalid_database(format!( + "unknown record size: {s}" + ))) + } + }; + Ok(val) + } + + /// Resolves a pointer from the search tree to an offset in the data section. + #[inline] + pub(crate) fn resolve_data_pointer(&self, pointer: usize) -> Result { + let resolved = pointer - (self.metadata.node_count as usize) - 16; + + // Check bounds using pointer_base which marks the start of the data section + if resolved >= (self.buf.as_ref().len() - self.pointer_base) { + return Err(MaxMindDbError::invalid_database( + "the MaxMind DB file's data pointer resolves to an invalid location", + )); + } + + Ok(resolved) + } + + /// Performs comprehensive validation of the MaxMind DB file. + /// + /// This method validates: + /// - Metadata section: format versions, required fields, and value constraints + /// - Search tree: traverses all networks to verify tree structure integrity + /// - Data section separator: validates the 16-byte separator between tree and data + /// - Data section: verifies all data records referenced by the search tree + /// + /// The verifier is stricter than the MaxMind DB specification and may return + /// errors on some databases that are still readable by normal operations. + /// This method is useful for: + /// - Validating database files after download or generation + /// - Debugging database corruption issues + /// - Ensuring database integrity in critical applications + /// + /// Note: Verification traverses the entire database and may be slow on large files. + /// The method is thread-safe and can be called on an active Reader. + /// + /// # Example + /// + /// ``` + /// use maxminddb::Reader; + /// + /// let reader = Reader::open_readfile("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); + /// reader.verify().expect("Database should be valid"); + /// ``` + pub fn verify(&self) -> Result<(), MaxMindDbError> { + self.verify_metadata()?; + self.verify_database() + } + + fn verify_metadata(&self) -> Result<(), MaxMindDbError> { + let m = &self.metadata; + + if m.binary_format_major_version != 2 { + return Err(MaxMindDbError::invalid_database(format!( + "binary_format_major_version - Expected: 2 Actual: {}", + m.binary_format_major_version + ))); + } + if m.binary_format_minor_version != 0 { + return Err(MaxMindDbError::invalid_database(format!( + "binary_format_minor_version - Expected: 0 Actual: {}", + m.binary_format_minor_version + ))); + } + if m.database_type.is_empty() { + return Err(MaxMindDbError::invalid_database( + "database_type - Expected: non-empty string Actual: \"\"", + )); + } + if m.description.is_empty() { + return Err(MaxMindDbError::invalid_database( + "description - Expected: non-empty map Actual: {}", + )); + } + if m.ip_version != 4 && m.ip_version != 6 { + return Err(MaxMindDbError::invalid_database(format!( + "ip_version - Expected: 4 or 6 Actual: {}", + m.ip_version + ))); + } + if m.record_size != 24 && m.record_size != 28 && m.record_size != 32 { + return Err(MaxMindDbError::invalid_database(format!( + "record_size - Expected: 24, 28, or 32 Actual: {}", + m.record_size + ))); + } + if m.node_count == 0 { + return Err(MaxMindDbError::invalid_database( + "node_count - Expected: positive integer Actual: 0", + )); + } + Ok(()) + } + + fn verify_database(&self) -> Result<(), MaxMindDbError> { + let offsets = self.verify_search_tree()?; + self.verify_data_section_separator()?; + self.verify_data_section(offsets) + } + + fn verify_search_tree(&self) -> Result, MaxMindDbError> { + let mut offsets = HashSet::new(); + let opts = WithinOptions::default().include_networks_without_data(); + + // Maximum number of networks we can expect in a valid database. + // A database with N nodes can have at most 2N data entries (each leaf node + // can have data). We add some margin for safety. + let max_iterations = (self.metadata.node_count as usize).saturating_mul(3); + let mut iteration_count = 0usize; + + for result in self.networks(opts)? { + let lookup = result?; + if let Some(offset) = lookup.offset() { + offsets.insert(offset); + } + + iteration_count += 1; + if iteration_count > max_iterations { + return Err(MaxMindDbError::invalid_database(format!( + "search tree appears to have a cycle or invalid structure (exceeded {max_iterations} iterations)" + ))); + } + } + Ok(offsets) + } + + fn verify_data_section_separator(&self) -> Result<(), MaxMindDbError> { + let separator_start = + self.metadata.node_count as usize * self.metadata.record_size as usize / 4; + let separator_end = separator_start + DATA_SECTION_SEPARATOR_SIZE; + + if separator_end > self.buf.as_ref().len() { + return Err(MaxMindDbError::invalid_database_at( + "data section separator extends past end of file", + separator_start, + )); + } + + let separator = &self.buf.as_ref()[separator_start..separator_end]; + + for &b in separator { + if b != 0 { + return Err(MaxMindDbError::invalid_database_at( + format!("unexpected byte in data separator: {separator:?}"), + separator_start, + )); + } + } + Ok(()) + } + + fn verify_data_section(&self, offsets: HashSet) -> Result<(), MaxMindDbError> { + let data_section = &self.buf.as_ref()[self.pointer_base..]; + + // Verify each offset from the search tree points to valid, decodable data + for &offset in &offsets { + if offset >= data_section.len() { + return Err(MaxMindDbError::invalid_database_at( + format!( + "search tree pointer is beyond data section (len: {})", + data_section.len() + ), + offset, + )); + } + + let mut dec = decoder::Decoder::new(data_section, offset); + + // Try to skip/decode the value to verify it's valid + if let Err(e) = dec.skip_value_for_verification() { + return Err(MaxMindDbError::invalid_database_at( + format!("decoding error: {e}"), + offset, + )); + } + } + + Ok(()) + } +} + +fn find_metadata_start(buf: &[u8]) -> Result { + const METADATA_START_MARKER: &[u8] = b"\xab\xcd\xefMaxMind.com"; + + memchr::memmem::rfind(buf, METADATA_START_MARKER) + .map(|x| x + METADATA_START_MARKER.len()) + .ok_or_else(|| { + MaxMindDbError::invalid_database("could not find MaxMind DB metadata in file") + }) +} diff --git a/src/maxminddb/reader_test.rs b/src/reader_test.rs similarity index 100% rename from src/maxminddb/reader_test.rs rename to src/reader_test.rs diff --git a/src/maxminddb/result.rs b/src/result.rs similarity index 99% rename from src/maxminddb/result.rs rename to src/result.rs index 20f8c2b6..1d5cd958 100644 --- a/src/maxminddb/result.rs +++ b/src/result.rs @@ -10,8 +10,9 @@ use std::net::IpAddr; use ipnetwork::IpNetwork; use serde::Deserialize; -use super::decoder::{TYPE_ARRAY, TYPE_MAP}; -use super::{MaxMindDbError, Reader}; +use crate::decoder::{TYPE_ARRAY, TYPE_MAP}; +use crate::error::MaxMindDbError; +use crate::reader::Reader; /// The result of looking up an IP address in a MaxMind DB. /// diff --git a/src/within.rs b/src/within.rs new file mode 100644 index 00000000..164272ad --- /dev/null +++ b/src/within.rs @@ -0,0 +1,257 @@ +//! Network iteration types. + +use std::cmp::Ordering; +use std::net::IpAddr; + +use crate::decoder; +use crate::error::MaxMindDbError; +use crate::reader::Reader; +use crate::result::LookupResult; + +/// Options for network iteration. +/// +/// Controls which networks are yielded when iterating over the database +/// with [`Reader::within()`] or [`Reader::networks()`]. +/// +/// # Example +/// +/// ``` +/// use maxminddb::WithinOptions; +/// +/// // Default options (skip aliases, skip networks without data, include empty values) +/// let opts = WithinOptions::default(); +/// +/// // Include aliased networks (IPv4 networks via IPv6 aliases) +/// let opts = WithinOptions::default().include_aliased_networks(); +/// +/// // Skip empty values and include networks without data +/// let opts = WithinOptions::default() +/// .skip_empty_values() +/// .include_networks_without_data(); +/// ``` +#[derive(Debug, Clone, Copy, Default)] +pub struct WithinOptions { + /// Include IPv4 networks multiple times when accessed via IPv6 aliases. + pub include_aliased_networks: bool, + /// Include networks that have no associated data record. + pub include_networks_without_data: bool, + /// Skip networks whose data is an empty map or empty array. + pub skip_empty_values: bool, +} + +impl WithinOptions { + /// Include IPv4 networks multiple times when accessed via IPv6 aliases. + /// + /// In IPv6 databases, IPv4 networks are stored at `::0/96`. However, the + /// same data is accessible through several IPv6 prefixes (e.g., + /// `::ffff:0:0/96` for IPv4-mapped IPv6). By default, these aliases are + /// skipped to avoid yielding the same network multiple times. + /// + /// When enabled, the iterator will yield these aliased networks. + #[must_use] + pub fn include_aliased_networks(mut self) -> Self { + self.include_aliased_networks = true; + self + } + + /// Include networks that have no associated data record. + /// + /// Some tree nodes point to "no data" (the node_count sentinel). By default + /// these are skipped. When enabled, these networks are yielded and + /// [`LookupResult::found()`] returns `false` for them. + #[must_use] + pub fn include_networks_without_data(mut self) -> Self { + self.include_networks_without_data = true; + self + } + + /// Skip networks whose data is an empty map or empty array. + /// + /// Some databases store empty maps `{}` or empty arrays `[]` for records + /// without meaningful data. This option filters them out. + #[must_use] + pub fn skip_empty_values(mut self) -> Self { + self.skip_empty_values = true; + self + } +} + +#[derive(Debug)] +pub(crate) struct WithinNode { + pub(crate) node: usize, + pub(crate) ip_int: IpInt, + pub(crate) prefix_len: usize, +} + +/// Iterator over IP networks within a CIDR range. +/// +/// This iterator yields [`LookupResult`] for each network in the database +/// that falls within the specified CIDR range. Use [`LookupResult::decode()`] +/// to deserialize the data for each result. +#[derive(Debug)] +pub struct Within<'de, S: AsRef<[u8]>> { + pub(crate) reader: &'de Reader, + pub(crate) node_count: usize, + pub(crate) stack: Vec, + pub(crate) options: WithinOptions, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum IpInt { + V4(u32), + V6(u128), +} + +impl IpInt { + pub(crate) fn new(ip_addr: IpAddr) -> Self { + match ip_addr { + IpAddr::V4(v4) => IpInt::V4(v4.into()), + IpAddr::V6(v6) => IpInt::V6(v6.into()), + } + } + + #[inline(always)] + pub(crate) fn get_bit(&self, index: usize) -> bool { + match self { + IpInt::V4(ip) => (ip >> (31 - index)) & 1 == 1, + IpInt::V6(ip) => (ip >> (127 - index)) & 1 == 1, + } + } + + pub(crate) fn bit_count(&self) -> usize { + match self { + IpInt::V4(_) => 32, + IpInt::V6(_) => 128, + } + } + + pub(crate) fn is_ipv4_in_ipv6(&self) -> bool { + match self { + IpInt::V4(_) => false, + IpInt::V6(ip) => *ip <= 0xFFFFFFFF, + } + } +} + +impl<'de, S: AsRef<[u8]>> Iterator for Within<'de, S> { + type Item = Result, MaxMindDbError>; + + fn next(&mut self) -> Option { + while let Some(current) = self.stack.pop() { + let bit_count = current.ip_int.bit_count(); + + // Skip networks that are aliases for the IPv4 network (unless option is set) + if !self.options.include_aliased_networks + && self.reader.ipv4_start != 0 + && current.node == self.reader.ipv4_start + && bit_count == 128 + && !current.ip_int.is_ipv4_in_ipv6() + { + continue; + } + + match current.node.cmp(&self.node_count) { + Ordering::Greater => { + // This is a data node, emit it and we're done (until the following next call) + let ip_addr = ip_int_to_addr(¤t.ip_int); + + // Resolve the pointer to a data offset + let data_offset = match self.reader.resolve_data_pointer(current.node) { + Ok(offset) => offset, + Err(e) => return Some(Err(e)), + }; + + // Check if we should skip empty values + if self.options.skip_empty_values { + match self.is_empty_value_at(data_offset) { + Ok(true) => continue, // Skip empty value + Ok(false) => {} // Not empty, proceed + Err(e) => return Some(Err(e)), + } + } + + return Some(Ok(LookupResult::new_found( + self.reader, + data_offset, + current.prefix_len as u8, + ip_addr, + ))); + } + Ordering::Equal => { + // Dead end (no data) - include if option is set + if self.options.include_networks_without_data { + let ip_addr = ip_int_to_addr(¤t.ip_int); + return Some(Ok(LookupResult::new_not_found( + self.reader, + current.prefix_len as u8, + ip_addr, + ))); + } + // Otherwise skip (current behavior) + } + Ordering::Less => { + // In order traversal of our children + // right/1-bit + let mut right_ip_int = current.ip_int; + + if current.prefix_len < bit_count { + let bit = current.prefix_len; + match &mut right_ip_int { + IpInt::V4(ip) => *ip |= 1 << (31 - bit), + IpInt::V6(ip) => *ip |= 1 << (127 - bit), + }; + } + + let node = match self.reader.read_node(current.node, 1) { + Ok(node) => node, + Err(e) => return Some(Err(e)), + }; + self.stack.push(WithinNode { + node, + ip_int: right_ip_int, + prefix_len: current.prefix_len + 1, + }); + // left/0-bit + let node = match self.reader.read_node(current.node, 0) { + Ok(node) => node, + Err(e) => return Some(Err(e)), + }; + self.stack.push(WithinNode { + node, + ip_int: current.ip_int, + prefix_len: current.prefix_len + 1, + }); + } + } + } + None + } +} + +impl<'de, S: AsRef<[u8]>> Within<'de, S> { + /// Check if the value at the given data offset is an empty map or array. + fn is_empty_value_at(&self, data_offset: usize) -> Result { + let buf = &self.reader.buf.as_ref()[self.reader.pointer_base..]; + let mut dec = decoder::Decoder::new(buf, data_offset); + let (size, type_num) = dec.peek_type()?; + match type_num { + decoder::TYPE_MAP | decoder::TYPE_ARRAY => Ok(size == 0), + _ => Ok(false), // Non-container types are never "empty" + } + } +} + +/// Convert IpInt to IpAddr +pub(crate) fn ip_int_to_addr(ip_int: &IpInt) -> IpAddr { + match ip_int { + IpInt::V4(ip) => IpAddr::V4((*ip).into()), + IpInt::V6(ip) => { + // Check if this is an IPv4-mapped IPv6 address + if *ip <= 0xFFFFFFFF { + IpAddr::V4((*ip as u32).into()) + } else { + IpAddr::V6((*ip).into()) + } + } + } +} From de151b00326b56c0c699942c49cb70259dc6a103 Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Fri, 28 Nov 2025 07:05:30 -0800 Subject: [PATCH 20/37] Make WithinOptions fields private MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fields should only be modified through builder methods, not directly. This ensures consistent option construction via the builder pattern. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- src/within.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/within.rs b/src/within.rs index 164272ad..3766b333 100644 --- a/src/within.rs +++ b/src/within.rs @@ -32,11 +32,11 @@ use crate::result::LookupResult; #[derive(Debug, Clone, Copy, Default)] pub struct WithinOptions { /// Include IPv4 networks multiple times when accessed via IPv6 aliases. - pub include_aliased_networks: bool, + include_aliased_networks: bool, /// Include networks that have no associated data record. - pub include_networks_without_data: bool, + include_networks_without_data: bool, /// Skip networks whose data is an empty map or empty array. - pub skip_empty_values: bool, + skip_empty_values: bool, } impl WithinOptions { From 0466e5fa775b35ea5101a961f2f01cef18b46ede Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Fri, 28 Nov 2025 07:07:10 -0800 Subject: [PATCH 21/37] Use Option for LookupResult data offset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace usize::MAX sentinel with Option for clearer semantics. This is an internal change that doesn't affect the public API. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- src/result.rs | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/src/result.rs b/src/result.rs index 1d5cd958..b25f3164 100644 --- a/src/result.rs +++ b/src/result.rs @@ -50,15 +50,12 @@ use crate::reader::Reader; #[derive(Debug, Clone, Copy)] pub struct LookupResult<'a, S: AsRef<[u8]>> { reader: &'a Reader, - /// Offset into the data section, or usize::MAX if not found - data_offset: usize, + /// Offset into the data section, or None if not found. + data_offset: Option, prefix_len: u8, ip: IpAddr, } -/// Not found sentinel value -const NOT_FOUND: usize = usize::MAX; - impl<'a, S: AsRef<[u8]>> LookupResult<'a, S> { /// Creates a new LookupResult for a found IP. pub(crate) fn new_found( @@ -69,7 +66,7 @@ impl<'a, S: AsRef<[u8]>> LookupResult<'a, S> { ) -> Self { LookupResult { reader, - data_offset, + data_offset: Some(data_offset), prefix_len, ip, } @@ -79,7 +76,7 @@ impl<'a, S: AsRef<[u8]>> LookupResult<'a, S> { pub(crate) fn new_not_found(reader: &'a Reader, prefix_len: u8, ip: IpAddr) -> Self { LookupResult { reader, - data_offset: NOT_FOUND, + data_offset: None, prefix_len, ip, } @@ -91,7 +88,7 @@ impl<'a, S: AsRef<[u8]>> LookupResult<'a, S> { /// which is different from an error during lookup. #[inline] pub fn found(&self) -> bool { - self.data_offset != NOT_FOUND + self.data_offset.is_some() } /// Returns the network containing the looked-up IP address. @@ -145,7 +142,7 @@ impl<'a, S: AsRef<[u8]>> LookupResult<'a, S> { /// Returns `None` if the IP was not found. #[inline] pub fn offset(&self) -> Option { - self.found().then_some(self.data_offset) + self.data_offset } /// Decodes the full record into the specified type. @@ -168,14 +165,14 @@ impl<'a, S: AsRef<[u8]>> LookupResult<'a, S> { where T: Deserialize<'a>, { - if !self.found() { + let Some(offset) = self.data_offset else { return Err(MaxMindDbError::decoding( "cannot decode: IP address not found in database", )); - } + }; let buf = &self.reader.buf.as_ref()[self.reader.pointer_base..]; - let mut decoder = super::decoder::Decoder::new(buf, self.data_offset); + let mut decoder = super::decoder::Decoder::new(buf, offset); T::deserialize(&mut decoder) } @@ -223,12 +220,12 @@ impl<'a, S: AsRef<[u8]>> LookupResult<'a, S> { where T: Deserialize<'a>, { - if !self.found() { + let Some(offset) = self.data_offset else { return Ok(None); - } + }; let buf = &self.reader.buf.as_ref()[self.reader.pointer_base..]; - let mut decoder = super::decoder::Decoder::new(buf, self.data_offset); + let mut decoder = super::decoder::Decoder::new(buf, offset); // Navigate through the path for element in path { From 30455fc6e11d6298b101dd711e208221d5f43463 Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Fri, 28 Nov 2025 07:09:16 -0800 Subject: [PATCH 22/37] Add InvalidInput error variant for user input errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Looking up an IPv6 address in an IPv4-only database is a user input error, not a database corruption issue. Adding a separate error variant makes this distinction clear and allows callers to handle these cases appropriately. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- CHANGELOG.md | 4 ++++ src/error.rs | 20 ++++++++++++++++++++ src/lib.rs | 4 ++-- src/reader.rs | 4 ++-- 4 files changed, 28 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 720f5bfb..bcccb06c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -57,6 +57,10 @@ JSON-pointer-style path for locating the error - Pattern matching code must be updated (e.g., `InvalidDatabase(msg)` becomes `InvalidDatabase { message, .. }`) +- **BREAKING CHANGE:** A new `InvalidInput { message }` error variant has been + added for user input errors (e.g., looking up an IPv6 address in an IPv4-only + database). Previously this returned `InvalidDatabase`, which incorrectly + suggested the database was corrupted. - Error messages now include byte offsets when available, making it easier to debug malformed databases. The `#[non_exhaustive]` attribute is added to `MaxMindDbError` to allow future additions without breaking changes. diff --git a/src/error.rs b/src/error.rs index 4685b92b..c780a2fa 100644 --- a/src/error.rs +++ b/src/error.rs @@ -51,6 +51,13 @@ pub enum MaxMindDbError { #[source] IpNetworkError, ), + + /// The provided input is invalid for this operation. + #[error("invalid input: {message}")] + InvalidInput { + /// Description of what is invalid about the input. + message: String, + }, } fn format_invalid_database(message: &str, offset: &Option) -> String { @@ -116,6 +123,13 @@ impl MaxMindDbError { path: Some(path.into()), } } + + /// Creates an InvalidInput error. + pub fn invalid_input(message: impl Into) -> Self { + MaxMindDbError::InvalidInput { + message: message.into(), + } + } } impl de::Error for MaxMindDbError { @@ -186,5 +200,11 @@ mod tests { format!("{}", MaxMindDbError::from(net_err)), "invalid network: invalid prefix".to_owned(), ); + + // InvalidInput error + assert_eq!( + format!("{}", MaxMindDbError::invalid_input("bad address")), + "invalid input: bad address".to_owned(), + ); } } diff --git a/src/lib.rs b/src/lib.rs index 3facb2c8..639c92fa 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -292,7 +292,7 @@ mod tests { let result = reader.lookup(ip); match result { - Err(MaxMindDbError::InvalidDatabase { message, .. }) => { + Err(MaxMindDbError::InvalidInput { message }) => { assert!( message.contains("IPv6") && message.contains("IPv4"), "Expected error message about IPv6 in IPv4 database, got: {}", @@ -300,7 +300,7 @@ mod tests { ); } Err(e) => panic!( - "Expected InvalidDatabase error for IPv6 in IPv4 database, got: {:?}", + "Expected InvalidInput error for IPv6 in IPv4 database, got: {:?}", e ), Ok(_) => panic!("Expected error for IPv6 lookup in IPv4-only database"), diff --git a/src/reader.rs b/src/reader.rs index 15ccf95d..81bc268b 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -176,8 +176,8 @@ impl<'de, S: AsRef<[u8]>> Reader { pub fn lookup(&'de self, address: IpAddr) -> Result, MaxMindDbError> { // Check for IPv6 address in IPv4-only database if matches!(address, IpAddr::V6(_)) && self.metadata.ip_version == 4 { - return Err(MaxMindDbError::invalid_database( - "you attempted to look up an IPv6 address in an IPv4-only database", + return Err(MaxMindDbError::invalid_input( + "cannot look up IPv6 address in IPv4-only database", )); } From 65df89ed252213ce0698cc8820d1758d1292dd32 Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Fri, 28 Nov 2025 07:11:07 -0800 Subject: [PATCH 23/37] Replace PathElement::Index(isize) with explicit variants MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use separate Index(usize) and IndexFromEnd(usize) variants instead of a signed integer. This is more idiomatic Rust and makes the intent clearer at the call site. - Index(0) is the first element - IndexFromEnd(0) is the last element 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- CHANGELOG.md | 4 ++-- src/lib.rs | 16 ++++++------- src/result.rs | 64 +++++++++++++++++++++++++++++++++------------------ 3 files changed, 52 insertions(+), 32 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bcccb06c..2ddf114a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,8 +36,8 @@ - `decode_path()` - Selectively decode specific fields by path - Added `PathElement` enum for navigating nested structures: - `PathElement::Key("name")` - Navigate into map by key - - `PathElement::Index(0)` - Navigate into array by index - - `PathElement::Index(-1)` - Python-style negative indexing + - `PathElement::Index(0)` - Navigate into array by index (0 = first element) + - `PathElement::IndexFromEnd(0)` - Navigate from the end (0 = last element) - Added `verify()` method for comprehensive database validation. Validates metadata, search tree structure, data section separator, and data records. Useful for validating database files after download or generation. diff --git a/src/lib.rs b/src/lib.rs index 639c92fa..fbeb2646 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -338,17 +338,17 @@ mod tests { .unwrap(); assert!(arr_oob.is_none()); - // Test negative index: -1 means last element - let arr_neg1: Option = result - .decode_path(&[PathElement::Key("array"), PathElement::Index(-1)]) + // Test IndexFromEnd: 0 means last element + let arr_last: Option = result + .decode_path(&[PathElement::Key("array"), PathElement::IndexFromEnd(0)]) .unwrap(); - assert_eq!(arr_neg1, Some(3)); + assert_eq!(arr_last, Some(3)); - // Test negative index: -3 means first element - let arr_neg3: Option = result - .decode_path(&[PathElement::Key("array"), PathElement::Index(-3)]) + // Test IndexFromEnd: 2 means first element (array has 3 elements) + let arr_first: Option = result + .decode_path(&[PathElement::Key("array"), PathElement::IndexFromEnd(2)]) .unwrap(); - assert_eq!(arr_neg3, Some(1)); + assert_eq!(arr_first, Some(1)); // Test nested path: map.mapX.arrayX[1] let nested: Option = result diff --git a/src/result.rs b/src/result.rs index b25f3164..987b9766 100644 --- a/src/result.rs +++ b/src/result.rs @@ -188,8 +188,8 @@ impl<'a, S: AsRef<[u8]>> LookupResult<'a, S> { /// # Path Elements /// /// - `PathElement::Key("name")` - Navigate into a map by key - /// - `PathElement::Index(0)` - Navigate into an array by index - /// - `PathElement::Index(-1)` - Last element (Python-style negative indexing) + /// - `PathElement::Index(0)` - Navigate into an array by index (0 = first element) + /// - `PathElement::IndexFromEnd(0)` - Navigate from the end (0 = last element) /// /// # Example /// @@ -269,20 +269,34 @@ impl<'a, S: AsRef<[u8]>> LookupResult<'a, S> { // Consume the array header and get size let size = decoder.consume_array_header()?; - // Handle negative indexing (Python-style) - let actual_idx = if *idx < 0 { - let positive = (-*idx) as usize; - if positive > size { - return Ok(None); // Out of bounds - } - size - positive - } else { - let positive = *idx as usize; - if positive >= size { - return Ok(None); // Out of bounds - } - positive - }; + if *idx >= size { + return Ok(None); // Out of bounds + } + + // Skip to the target index + for _ in 0..*idx { + decoder.skip_value()?; + } + } + PathElement::IndexFromEnd(idx) => { + let (_, type_num) = decoder.peek_type()?; + if type_num != TYPE_ARRAY { + return Err(MaxMindDbError::decoding_at( + format!( + "expected array for IndexFromEnd navigation, got type {type_num}" + ), + decoder.offset(), + )); + } + + // Consume the array header and get size + let size = decoder.consume_array_header()?; + + if *idx >= size { + return Ok(None); // Out of bounds + } + + let actual_idx = size - 1 - *idx; // Skip to the target index for _ in 0..actual_idx { @@ -305,13 +319,16 @@ impl<'a, S: AsRef<[u8]>> LookupResult<'a, S> { pub enum PathElement<'a> { /// Navigate into a map by key. Key(&'a str), - /// Navigate into an array by index. + /// Navigate into an array by index (0-based from the start). /// - /// Supports Python-style negative indexing: /// - `Index(0)` - first element - /// - `Index(-1)` - last element - /// - `Index(-2)` - second-to-last element - Index(isize), + /// - `Index(1)` - second element + Index(usize), + /// Navigate into an array by index from the end. + /// + /// - `IndexFromEnd(0)` - last element + /// - `IndexFromEnd(1)` - second-to-last element + IndexFromEnd(usize), } /// Masks an IP address to its network address given a prefix length. @@ -373,6 +390,9 @@ mod tests { fn test_path_element_debug() { assert_eq!(format!("{:?}", PathElement::Key("test")), "Key(\"test\")"); assert_eq!(format!("{:?}", PathElement::Index(5)), "Index(5)"); - assert_eq!(format!("{:?}", PathElement::Index(-1)), "Index(-1)"); + assert_eq!( + format!("{:?}", PathElement::IndexFromEnd(0)), + "IndexFromEnd(0)" + ); } } From 2852ddabd83f101afba1c631cf1d3bb92c4be69f Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Fri, 28 Nov 2025 07:15:20 -0800 Subject: [PATCH 24/37] Rename found() to has_data() for clarity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The name has_data() more clearly describes what the method checks: whether the database contains data for this IP address. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- CHANGELOG.md | 6 +++--- benches/lookup.rs | 4 ++-- examples/lookup.rs | 2 +- src/geoip2.rs | 4 ++-- src/lib.rs | 12 +++++------ src/reader.rs | 6 +++--- src/reader_test.rs | 52 +++++++++++++++++++++++----------------------- src/result.rs | 12 +++++------ src/within.rs | 2 +- 9 files changed, 50 insertions(+), 50 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2ddf114a..912e02ee 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ when explicitly requested via `decode()`. Migration: - Old: `reader.lookup::(ip)?` returns `Option` - New: `reader.lookup(ip)?.decode::()` returns `City` - - Check if found: `reader.lookup(ip)?.found()` returns `bool` + - Check if data exists: `reader.lookup(ip)?.has_data()` returns `bool` - **BREAKING CHANGE:** The `lookup_prefix` method has been removed. Use `reader.lookup(ip)?.network()` to get the network containing the IP. - **BREAKING CHANGE:** The `Within` iterator now yields `LookupResult` instead @@ -22,14 +22,14 @@ - `include_aliased_networks()` - Include IPv4 networks multiple times when accessed via IPv6 aliases (e.g., `::ffff:0:0/96`, `2001::/32`, `2002::/16`) - `include_networks_without_data()` - Include networks that have no associated - data record. `LookupResult::found()` returns `false` for these. + data record. `LookupResult::has_data()` returns `false` for these. - `skip_empty_values()` - Skip networks whose data is an empty map `{}` or empty array `[]` - Added `networks()` method as a convenience for iterating over all networks in the database. Equivalent to `within("::/0", options)` for IPv6 databases or `within("0.0.0.0/0", options)` for IPv4-only databases. - Added `LookupResult` type with methods: - - `found()` - Check if IP was found in database + - `has_data()` - Check if data exists for this IP - `network()` - Get the network containing the IP - `offset()` - Get data offset for caching/deduplication - `decode()` - Deserialize full record using serde diff --git a/benches/lookup.rs b/benches/lookup.rs index 440fbc33..e4cfa1f8 100644 --- a/benches/lookup.rs +++ b/benches/lookup.rs @@ -33,7 +33,7 @@ where { for ip in ips.iter() { let result = reader.lookup(*ip).unwrap(); - if result.found() { + if result.has_data() { let _: geoip2::City = result.decode().unwrap(); } } @@ -46,7 +46,7 @@ where { ips.par_iter().for_each(|ip| { let result = reader.lookup(*ip).unwrap(); - if result.found() { + if result.has_data() { let _: geoip2::City = result.decode().unwrap(); } }); diff --git a/examples/lookup.rs b/examples/lookup.rs index 2241995b..9caaafbd 100644 --- a/examples/lookup.rs +++ b/examples/lookup.rs @@ -18,7 +18,7 @@ fn main() -> Result<(), Box> { let result = reader.lookup(ip)?; - if result.found() { + if result.has_data() { let city: geoip2::City = result.decode()?; println!("City data for IP {}: {city:#?}", ip); diff --git a/src/geoip2.rs b/src/geoip2.rs index 65cfc285..470f8361 100644 --- a/src/geoip2.rs +++ b/src/geoip2.rs @@ -28,7 +28,7 @@ //! //! // City lookup (most common) //! let result = reader.lookup(ip)?; -//! if result.found() { +//! if result.has_data() { //! let city: geoip2::City = result.decode()?; //! if let Some(city_names) = city.city.and_then(|c| c.names) { //! if let Some(city_name) = city_names.get("en") { @@ -42,7 +42,7 @@ //! //! // Country-only lookup (smaller/faster) //! let result = reader.lookup(ip)?; -//! if result.found() { +//! if result.has_data() { //! let country: geoip2::Country = result.decode()?; //! if let Some(country_names) = country.country.and_then(|c| c.names) { //! if let Some(country_name) = country_names.get("en") { diff --git a/src/lib.rs b/src/lib.rs index fbeb2646..78f121c3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -45,7 +45,7 @@ //! let ip: IpAddr = "89.160.20.128".parse()?; //! let result = reader.lookup(ip)?; //! -//! if result.found() { +//! if result.has_data() { //! let city: geoip2::City = result.decode()?; //! if let Some(country) = city.country { //! println!("Country: {}", country.iso_code.unwrap_or("Unknown")); @@ -215,13 +215,13 @@ mod tests { let result = reader.lookup(ip).unwrap(); assert_eq!( - result.found(), + result.has_data(), test.expected_found, - "IP {} in {}: expected found={}, got found={}", + "IP {} in {}: expected has_data={}, got has_data={}", test.ip, test.db_file, test.expected_found, - result.found() + result.has_data() ); let network = result.network().unwrap(); @@ -243,7 +243,7 @@ mod tests { let ip: IpAddr = "89.160.20.128".parse().unwrap(); let result = reader.lookup(ip).unwrap(); - assert!(result.found(), "lookup should find known IP"); + assert!(result.has_data(), "lookup should find known IP"); // Decode the data let city: geoip2::City = result.decode().unwrap(); @@ -314,7 +314,7 @@ mod tests { let ip: IpAddr = "::1.1.1.0".parse().unwrap(); let result = reader.lookup(ip).unwrap(); - assert!(result.found()); + assert!(result.has_data()); // Test simple path: uint16 let u16_val: Option = result.decode_path(&[PathElement::Key("uint16")]).unwrap(); diff --git a/src/reader.rs b/src/reader.rs index 81bc268b..2d3b7e56 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -119,7 +119,7 @@ impl<'de, S: AsRef<[u8]>> Reader { /// Lookup an IP address in the database. /// /// Returns a [`LookupResult`] that can be used to: - /// - Check if the IP was found with [`found()`](LookupResult::found) + /// - Check if data exists with [`has_data()`](LookupResult::has_data) /// - Get the network containing the IP with [`network()`](LookupResult::network) /// - Decode the full record with [`decode()`](LookupResult::decode) /// - Decode a specific path with [`decode_path()`](LookupResult::decode_path) @@ -138,7 +138,7 @@ impl<'de, S: AsRef<[u8]>> Reader { /// let ip: IpAddr = "89.160.20.128".parse().unwrap(); /// let result = reader.lookup(ip)?; /// - /// if result.found() { + /// if result.has_data() { /// let city: geoip2::City = result.decode()?; /// if let Some(city_info) = city.city { /// if let Some(names) = city_info.names { @@ -311,7 +311,7 @@ impl<'de, S: AsRef<[u8]>> Reader { /// let opts = WithinOptions::default().include_networks_without_data(); /// for result in reader.within("1.0.0.0/8".parse().unwrap(), opts).unwrap() { /// let lookup = result.unwrap(); - /// if !lookup.found() { + /// if !lookup.has_data() { /// println!("Network {} has no data", lookup.network().unwrap()); /// } /// } diff --git a/src/reader_test.rs b/src/reader_test.rs index 8c9c4ed1..a4c5c590 100644 --- a/src/reader_test.rs +++ b/src/reader_test.rs @@ -45,7 +45,7 @@ fn test_decoder() { .expect("error opening mmdb"); let ip: IpAddr = "1.1.1.0".parse().unwrap(); let lookup = r.lookup(ip).unwrap(); - assert!(lookup.found(), "Expected IP to be found"); + assert!(lookup.has_data(), "Expected IP to be found"); let result: TestType = lookup.decode().unwrap(); assert_eq!(result.array, vec![1_u32, 2_u32, 3_u32]); @@ -100,7 +100,7 @@ fn test_broken_database() { struct TestType {} let lookup = r.lookup(ip).unwrap(); - if lookup.found() { + if lookup.has_data() { match lookup.decode::() { Err(e) => assert!(matches!( e, @@ -207,7 +207,7 @@ fn test_lookup_city() { let ip: IpAddr = "89.160.20.112".parse().unwrap(); let lookup = reader.lookup(ip).unwrap(); - assert!(lookup.found()); + assert!(lookup.has_data()); let city: geoip2::City = lookup.decode().unwrap(); let iso_code = city.country.and_then(|cy| cy.iso_code); @@ -225,7 +225,7 @@ fn test_lookup_country() { let ip: IpAddr = "89.160.20.112".parse().unwrap(); let lookup = reader.lookup(ip).unwrap(); - assert!(lookup.found()); + assert!(lookup.has_data()); let country: geoip2::Country = lookup.decode().unwrap(); let country = country.country.unwrap(); @@ -243,7 +243,7 @@ fn test_lookup_connection_type() { let ip: IpAddr = "96.1.20.112".parse().unwrap(); let lookup = reader.lookup(ip).unwrap(); - assert!(lookup.found()); + assert!(lookup.has_data()); let connection_type: geoip2::ConnectionType = lookup.decode().unwrap(); assert_eq!(connection_type.connection_type, Some("Cable/DSL")); @@ -259,7 +259,7 @@ fn test_lookup_annonymous_ip() { let ip: IpAddr = "81.2.69.123".parse().unwrap(); let lookup = reader.lookup(ip).unwrap(); - assert!(lookup.found()); + assert!(lookup.has_data()); let anonymous_ip: geoip2::AnonymousIp = lookup.decode().unwrap(); assert_eq!(anonymous_ip.is_anonymous, Some(true)); @@ -279,7 +279,7 @@ fn test_lookup_density_income() { let ip: IpAddr = "5.83.124.123".parse().unwrap(); let lookup = reader.lookup(ip).unwrap(); - assert!(lookup.found()); + assert!(lookup.has_data()); let density_income: geoip2::DensityIncome = lookup.decode().unwrap(); assert_eq!(density_income.average_income, Some(32323)); @@ -296,7 +296,7 @@ fn test_lookup_domain() { let ip: IpAddr = "66.92.80.123".parse().unwrap(); let lookup = reader.lookup(ip).unwrap(); - assert!(lookup.found()); + assert!(lookup.has_data()); let domain: geoip2::Domain = lookup.decode().unwrap(); assert_eq!(domain.domain, Some("speakeasy.net")); @@ -312,7 +312,7 @@ fn test_lookup_isp() { let ip: IpAddr = "12.87.118.123".parse().unwrap(); let lookup = reader.lookup(ip).unwrap(); - assert!(lookup.found()); + assert!(lookup.has_data()); let isp: geoip2::Isp = lookup.decode().unwrap(); assert_eq!(isp.autonomous_system_number, Some(7018)); @@ -330,7 +330,7 @@ fn test_lookup_asn() { let ip: IpAddr = "1.128.0.123".parse().unwrap(); let lookup = reader.lookup(ip).unwrap(); - assert!(lookup.found()); + assert!(lookup.has_data()); let asn: geoip2::Asn = lookup.decode().unwrap(); assert_eq!(asn.autonomous_system_number, Some(1221)); @@ -346,7 +346,7 @@ fn test_lookup_network() { // --- IPv4 Check (Known) --- let ip: IpAddr = "89.160.20.128".parse().unwrap(); let lookup = reader.lookup(ip).unwrap(); - assert!(lookup.found(), "Expected Some(City) for known IPv4"); + assert!(lookup.has_data(), "Expected Some(City) for known IPv4"); let network = lookup.network().unwrap(); assert_eq!(network.prefix(), 25); let city: geoip2::City = lookup.decode().unwrap(); @@ -355,7 +355,7 @@ fn test_lookup_network() { // --- IPv4 Check (Last Host, Known) --- let ip_last: IpAddr = "89.160.20.254".parse().unwrap(); let lookup_last = reader.lookup(ip_last).unwrap(); - assert!(lookup_last.found(), "Expected Some(City) for last host"); + assert!(lookup_last.has_data(), "Expected Some(City) for last host"); assert_eq!(lookup_last.network().unwrap().prefix(), 25); // Should be same network // --- IPv6 Check (Not Found in Data) --- @@ -363,7 +363,7 @@ fn test_lookup_network() { let ip_v6_not_found: IpAddr = "2c0f:ff00::1".parse().unwrap(); let lookup_nf = reader.lookup(ip_v6_not_found).unwrap(); assert!( - !lookup_nf.found(), + !lookup_nf.has_data(), "Expected not found for non-existent IP 2c0f:ff00::1" ); assert_eq!( @@ -375,7 +375,7 @@ fn test_lookup_network() { // --- IPv6 Check (Known Data) --- let ip_v6_known: IpAddr = "2001:218:85a3:0:0:8a2e:370:7334".parse().unwrap(); let lookup_v6 = reader.lookup(ip_v6_known).unwrap(); - assert!(lookup_v6.found(), "Expected Some(City) for known IPv6"); + assert!(lookup_v6.has_data(), "Expected Some(City) for known IPv6"); assert_eq!( lookup_v6.network().unwrap().prefix(), 32, @@ -528,7 +528,7 @@ fn check_ip>(reader: &Reader, ip_version: usize) { ); let lookup = lookup.unwrap(); assert!( - lookup.found(), + lookup.has_data(), "Lookup for {} returned not found unexpectedly", subnet ); @@ -555,7 +555,7 @@ fn check_ip>(reader: &Reader, ip_version: usize) { let lookup = reader.lookup(ip).unwrap(); assert!( - !lookup.found(), + !lookup.has_data(), "Expected not found for address {}, but it was found", address ); @@ -572,7 +572,7 @@ fn test_json_serialize() { let ip: IpAddr = "89.160.20.112".parse().unwrap(); let lookup = reader.lookup(ip).unwrap(); - assert!(lookup.found()); + assert!(lookup.has_data()); let city: geoip2::City = lookup.decode().unwrap(); let json_value = json!(city); @@ -604,7 +604,7 @@ fn test_networks() { for result in reader.networks(Default::default()).unwrap() { let lookup = result.unwrap(); assert!( - lookup.found(), + lookup.has_data(), "networks() should only yield found records by default" ); @@ -763,7 +763,7 @@ fn test_include_networks_without_data() { for result in reader.within(cidr, opts).unwrap() { let lookup = result.unwrap(); networks.push(lookup.network().unwrap().to_string()); - if lookup.found() { + if lookup.has_data() { found_count += 1; } else { not_found_count += 1; @@ -794,7 +794,7 @@ fn test_skip_empty_values() { let lookup = result.unwrap(); count_without_skip += 1; - if lookup.found() { + if lookup.has_data() { let data: std::collections::BTreeMap = lookup.decode().unwrap(); if data.is_empty() { @@ -811,7 +811,7 @@ fn test_skip_empty_values() { let lookup = result.unwrap(); count_with_skip += 1; - if lookup.found() { + if lookup.has_data() { let data: std::collections::BTreeMap = lookup.decode().unwrap(); assert!( @@ -853,7 +853,7 @@ fn test_skip_empty_values_with_other_options() { let lookup = result.unwrap(); count += 1; - if lookup.found() { + if lookup.has_data() { let data: std::collections::BTreeMap = lookup.decode().unwrap(); assert!( @@ -1192,7 +1192,7 @@ fn test_size_hints() { let r = Reader::open_readfile("test-data/test-data/MaxMind-DB-test-decoder.mmdb").unwrap(); let ip: IpAddr = "1.1.1.0".parse().unwrap(); let lookup = r.lookup(ip).unwrap(); - assert!(lookup.found()); + assert!(lookup.has_data()); let result: TestType = lookup.decode().unwrap(); // Verify array size hint matches actual length @@ -1224,7 +1224,7 @@ fn test_ignored_any() { let r = Reader::open_readfile("test-data/test-data/MaxMind-DB-test-decoder.mmdb").unwrap(); let ip: IpAddr = "1.1.1.0".parse().unwrap(); let lookup = r.lookup(ip).unwrap(); - assert!(lookup.found()); + assert!(lookup.has_data()); let result: PartialRead = lookup.decode().unwrap(); assert_eq!(result.utf8_string, "unicode! ☯ - ♫"); @@ -1249,7 +1249,7 @@ fn test_enum_deserialization() { let r = Reader::open_readfile("test-data/test-data/GeoIP2-Connection-Type-Test.mmdb").unwrap(); let ip: IpAddr = "96.1.20.112".parse().unwrap(); let lookup = r.lookup(ip).unwrap(); - assert!(lookup.found()); + assert!(lookup.has_data()); let result: Record = lookup.decode().unwrap(); assert_eq!(result.connection_type, ConnType::CableDsl); @@ -1280,7 +1280,7 @@ fn test_serde_flatten() { let r = Reader::open_readfile("test-data/test-data/GeoIP2-Country-Test.mmdb").unwrap(); let ip: IpAddr = "81.2.69.160".parse().unwrap(); let lookup = r.lookup(ip).unwrap(); - assert!(lookup.found()); + assert!(lookup.has_data()); let result: PartialCountry = lookup.decode().unwrap(); assert_eq!(result.continent.code, "EU"); diff --git a/src/result.rs b/src/result.rs index 987b9766..b13a320e 100644 --- a/src/result.rs +++ b/src/result.rs @@ -19,7 +19,7 @@ use crate::reader::Reader; /// This is a lightweight handle (~40 bytes) that stores the lookup result /// without immediately decoding the data. You can: /// -/// - Check if the IP was found with [`found()`](Self::found) +/// - Check if data exists with [`has_data()`](Self::has_data) /// - Get the network containing the IP with [`network()`](Self::network) /// - Decode the full record with [`decode()`](Self::decode) /// - Decode a specific path with [`decode_path()`](Self::decode_path) @@ -35,7 +35,7 @@ use crate::reader::Reader; /// /// let result = reader.lookup(ip).unwrap(); /// -/// if result.found() { +/// if result.has_data() { /// // Full decode /// let city: geoip2::City = result.decode().unwrap(); /// @@ -82,12 +82,12 @@ impl<'a, S: AsRef<[u8]>> LookupResult<'a, S> { } } - /// Returns true if the IP address was found in the database. + /// Returns true if the database contains data for this IP address. /// - /// Note that "not found" means the database has no data for this IP, + /// Note that `false` means the database has no data for this IP, /// which is different from an error during lookup. #[inline] - pub fn found(&self) -> bool { + pub fn has_data(&self) -> bool { self.data_offset.is_some() } @@ -183,7 +183,7 @@ impl<'a, S: AsRef<[u8]>> LookupResult<'a, S> { /// - `Ok(None)` if the path doesn't exist (key missing, index out of bounds) /// - `Err(...)` if there's a type mismatch during navigation (e.g., `Key` on an array) /// - /// If `found() == false`, returns `Ok(None)`. + /// If `has_data() == false`, returns `Ok(None)`. /// /// # Path Elements /// diff --git a/src/within.rs b/src/within.rs index 3766b333..dd1e4b14 100644 --- a/src/within.rs +++ b/src/within.rs @@ -58,7 +58,7 @@ impl WithinOptions { /// /// Some tree nodes point to "no data" (the node_count sentinel). By default /// these are skipped. When enabled, these networks are yielded and - /// [`LookupResult::found()`] returns `false` for them. + /// [`LookupResult::has_data()`] returns `false` for them. #[must_use] pub fn include_networks_without_data(mut self) -> Self { self.include_networks_without_data = true; From 84e8d009b55efc6b5817d3e35ed91abb7f6ebece Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Fri, 28 Nov 2025 07:16:19 -0800 Subject: [PATCH 25/37] Add PartialEq and Eq trait implementations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add PartialEq and Eq to Metadata and WithinOptions for easier comparison and use in collections. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- CHANGELOG.md | 1 + src/metadata.rs | 2 +- src/within.rs | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 912e02ee..5aaf7ddf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -38,6 +38,7 @@ - `PathElement::Key("name")` - Navigate into map by key - `PathElement::Index(0)` - Navigate into array by index (0 = first element) - `PathElement::IndexFromEnd(0)` - Navigate from the end (0 = last element) +- `Metadata` and `WithinOptions` now implement `PartialEq` and `Eq` traits. - Added `verify()` method for comprehensive database validation. Validates metadata, search tree structure, data section separator, and data records. Useful for validating database files after download or generation. diff --git a/src/metadata.rs b/src/metadata.rs index 38812bb1..3d7f5c39 100644 --- a/src/metadata.rs +++ b/src/metadata.rs @@ -5,7 +5,7 @@ use std::collections::BTreeMap; use serde::{Deserialize, Serialize}; /// Metadata about the MaxMind DB file. -#[derive(Deserialize, Serialize, Clone, Debug)] +#[derive(Deserialize, Serialize, Clone, Debug, PartialEq, Eq)] pub struct Metadata { /// Major version of the binary format (always 2). pub binary_format_major_version: u16, diff --git a/src/within.rs b/src/within.rs index dd1e4b14..bac66b54 100644 --- a/src/within.rs +++ b/src/within.rs @@ -29,7 +29,7 @@ use crate::result::LookupResult; /// .skip_empty_values() /// .include_networks_without_data(); /// ``` -#[derive(Debug, Clone, Copy, Default)] +#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] pub struct WithinOptions { /// Include IPv4 networks multiple times when accessed via IPv6 aliases. include_aliased_networks: bool, From 2de1a3e5d2868360166505cbb90b333439beba16 Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Fri, 28 Nov 2025 07:20:01 -0800 Subject: [PATCH 26/37] Change decode() to return Result> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of returning Err when the IP isn't found, decode() now returns Ok(None). This makes the "not found" case consistent with decode_path() and eliminates the need to check has_data() before decoding. - Ok(Some(T)) - found and decoded successfully - Ok(None) - IP not found in database - Err(e) - decoding error 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- CHANGELOG.md | 2 +- benches/lookup.rs | 4 ++-- examples/lookup.rs | 15 +++++---------- examples/within.rs | 6 ++++-- src/geoip2.rs | 6 ++---- src/lib.rs | 5 ++--- src/reader.rs | 5 ++--- src/reader_test.rs | 44 ++++++++++++++++++++++---------------------- src/result.rs | 20 ++++++++++++-------- 9 files changed, 52 insertions(+), 55 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5aaf7ddf..f2acea37 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,7 +32,7 @@ - `has_data()` - Check if data exists for this IP - `network()` - Get the network containing the IP - `offset()` - Get data offset for caching/deduplication - - `decode()` - Deserialize full record using serde + - `decode()` - Deserialize full record (returns `Result>`) - `decode_path()` - Selectively decode specific fields by path - Added `PathElement` enum for navigating nested structures: - `PathElement::Key("name")` - Navigate into map by key diff --git a/benches/lookup.rs b/benches/lookup.rs index e4cfa1f8..639b26fa 100644 --- a/benches/lookup.rs +++ b/benches/lookup.rs @@ -34,7 +34,7 @@ where for ip in ips.iter() { let result = reader.lookup(*ip).unwrap(); if result.has_data() { - let _: geoip2::City = result.decode().unwrap(); + let _: geoip2::City = result.decode().unwrap().unwrap(); } } } @@ -47,7 +47,7 @@ where ips.par_iter().for_each(|ip| { let result = reader.lookup(*ip).unwrap(); if result.has_data() { - let _: geoip2::City = result.decode().unwrap(); + let _: geoip2::City = result.decode().unwrap().unwrap(); } }); } diff --git a/examples/lookup.rs b/examples/lookup.rs index 9caaafbd..16e1c8fe 100644 --- a/examples/lookup.rs +++ b/examples/lookup.rs @@ -18,19 +18,14 @@ fn main() -> Result<(), Box> { let result = reader.lookup(ip)?; - if result.has_data() { - let city: geoip2::City = result.decode()?; + if let Some(city) = result.decode::()? { println!("City data for IP {}: {city:#?}", ip); - - // Also show the network - let network = result.network()?; - println!("Network: {}", network); } else { println!("No city data found for IP {}", ip); - - // Even if not found, we can still show the network - let network = result.network()?; - println!("Network (no data): {}", network); } + + // Show the network (available regardless of whether data was found) + let network = result.network()?; + println!("Network: {}", network); Ok(()) } diff --git a/examples/within.rs b/examples/within.rs index 28d0dfe7..6c02ead9 100644 --- a/examples/within.rs +++ b/examples/within.rs @@ -21,12 +21,14 @@ fn main() -> Result<(), Box> { for next in iter { let lookup = next?; let network = lookup.network()?; - let info: geoip2::City = lookup.decode()?; + let Some(info) = lookup.decode::()? else { + continue; // Skip networks without data + }; let continent = info.continent.and_then(|c| c.code).unwrap_or(""); let country = info.country.and_then(|c| c.iso_code).unwrap_or(""); let city = match info.city.and_then(|c| c.names) { - Some(names) => names.get("en").unwrap_or(&""), + Some(names) => names.get("en").copied().unwrap_or(""), None => "", }; if !city.is_empty() { diff --git a/src/geoip2.rs b/src/geoip2.rs index 470f8361..99c4e679 100644 --- a/src/geoip2.rs +++ b/src/geoip2.rs @@ -28,8 +28,7 @@ //! //! // City lookup (most common) //! let result = reader.lookup(ip)?; -//! if result.has_data() { -//! let city: geoip2::City = result.decode()?; +//! if let Some(city) = result.decode::()? { //! if let Some(city_names) = city.city.and_then(|c| c.names) { //! if let Some(city_name) = city_names.get("en") { //! println!("City: {}", city_name); @@ -42,8 +41,7 @@ //! //! // Country-only lookup (smaller/faster) //! let result = reader.lookup(ip)?; -//! if result.has_data() { -//! let country: geoip2::Country = result.decode()?; +//! if let Some(country) = result.decode::()? { //! if let Some(country_names) = country.country.and_then(|c| c.names) { //! if let Some(country_name) = country_names.get("en") { //! println!("Country: {}", country_name); diff --git a/src/lib.rs b/src/lib.rs index 78f121c3..8cb58e19 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -45,8 +45,7 @@ //! let ip: IpAddr = "89.160.20.128".parse()?; //! let result = reader.lookup(ip)?; //! -//! if result.has_data() { -//! let city: geoip2::City = result.decode()?; +//! if let Some(city) = result.decode::()? { //! if let Some(country) = city.country { //! println!("Country: {}", country.iso_code.unwrap_or("Unknown")); //! } @@ -246,7 +245,7 @@ mod tests { assert!(result.has_data(), "lookup should find known IP"); // Decode the data - let city: geoip2::City = result.decode().unwrap(); + let city: geoip2::City = result.decode().unwrap().unwrap(); assert!(city.city.is_some(), "Expected city data"); // Check full network (not just prefix) diff --git a/src/reader.rs b/src/reader.rs index 2d3b7e56..462540d8 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -138,8 +138,7 @@ impl<'de, S: AsRef<[u8]>> Reader { /// let ip: IpAddr = "89.160.20.128".parse().unwrap(); /// let result = reader.lookup(ip)?; /// - /// if result.has_data() { - /// let city: geoip2::City = result.decode()?; + /// if let Some(city) = result.decode::()? { /// if let Some(city_info) = city.city { /// if let Some(names) = city_info.names { /// if let Some(name) = names.get("en") { @@ -270,7 +269,7 @@ impl<'de, S: AsRef<[u8]>> Reader { /// for result in reader.within(ipv4_all, Default::default()).unwrap() { /// let lookup = result.unwrap(); /// let network = lookup.network().unwrap(); - /// let city: geoip2::City = lookup.decode().unwrap(); + /// let city: geoip2::City = lookup.decode().unwrap().unwrap(); /// let city_name = city.city.as_ref() /// .and_then(|c| c.names.as_ref()) /// .and_then(|n| n.get("en")); diff --git a/src/reader_test.rs b/src/reader_test.rs index a4c5c590..a00280e5 100644 --- a/src/reader_test.rs +++ b/src/reader_test.rs @@ -46,7 +46,7 @@ fn test_decoder() { let ip: IpAddr = "1.1.1.0".parse().unwrap(); let lookup = r.lookup(ip).unwrap(); assert!(lookup.has_data(), "Expected IP to be found"); - let result: TestType = lookup.decode().unwrap(); + let result: TestType = lookup.decode().unwrap().unwrap(); assert_eq!(result.array, vec![1_u32, 2_u32, 3_u32]); assert!(result.boolean); @@ -208,7 +208,7 @@ fn test_lookup_city() { let ip: IpAddr = "89.160.20.112".parse().unwrap(); let lookup = reader.lookup(ip).unwrap(); assert!(lookup.has_data()); - let city: geoip2::City = lookup.decode().unwrap(); + let city: geoip2::City = lookup.decode().unwrap().unwrap(); let iso_code = city.country.and_then(|cy| cy.iso_code); @@ -226,7 +226,7 @@ fn test_lookup_country() { let ip: IpAddr = "89.160.20.112".parse().unwrap(); let lookup = reader.lookup(ip).unwrap(); assert!(lookup.has_data()); - let country: geoip2::Country = lookup.decode().unwrap(); + let country: geoip2::Country = lookup.decode().unwrap().unwrap(); let country = country.country.unwrap(); assert_eq!(country.iso_code, Some("SE")); @@ -244,7 +244,7 @@ fn test_lookup_connection_type() { let ip: IpAddr = "96.1.20.112".parse().unwrap(); let lookup = reader.lookup(ip).unwrap(); assert!(lookup.has_data()); - let connection_type: geoip2::ConnectionType = lookup.decode().unwrap(); + let connection_type: geoip2::ConnectionType = lookup.decode().unwrap().unwrap(); assert_eq!(connection_type.connection_type, Some("Cable/DSL")); } @@ -260,7 +260,7 @@ fn test_lookup_annonymous_ip() { let ip: IpAddr = "81.2.69.123".parse().unwrap(); let lookup = reader.lookup(ip).unwrap(); assert!(lookup.has_data()); - let anonymous_ip: geoip2::AnonymousIp = lookup.decode().unwrap(); + let anonymous_ip: geoip2::AnonymousIp = lookup.decode().unwrap().unwrap(); assert_eq!(anonymous_ip.is_anonymous, Some(true)); assert_eq!(anonymous_ip.is_public_proxy, Some(true)); @@ -280,7 +280,7 @@ fn test_lookup_density_income() { let ip: IpAddr = "5.83.124.123".parse().unwrap(); let lookup = reader.lookup(ip).unwrap(); assert!(lookup.has_data()); - let density_income: geoip2::DensityIncome = lookup.decode().unwrap(); + let density_income: geoip2::DensityIncome = lookup.decode().unwrap().unwrap(); assert_eq!(density_income.average_income, Some(32323)); assert_eq!(density_income.population_density, Some(1232)) @@ -297,7 +297,7 @@ fn test_lookup_domain() { let ip: IpAddr = "66.92.80.123".parse().unwrap(); let lookup = reader.lookup(ip).unwrap(); assert!(lookup.has_data()); - let domain: geoip2::Domain = lookup.decode().unwrap(); + let domain: geoip2::Domain = lookup.decode().unwrap().unwrap(); assert_eq!(domain.domain, Some("speakeasy.net")); } @@ -313,7 +313,7 @@ fn test_lookup_isp() { let ip: IpAddr = "12.87.118.123".parse().unwrap(); let lookup = reader.lookup(ip).unwrap(); assert!(lookup.has_data()); - let isp: geoip2::Isp = lookup.decode().unwrap(); + let isp: geoip2::Isp = lookup.decode().unwrap().unwrap(); assert_eq!(isp.autonomous_system_number, Some(7018)); assert_eq!(isp.isp, Some("AT&T Services")); @@ -331,7 +331,7 @@ fn test_lookup_asn() { let ip: IpAddr = "1.128.0.123".parse().unwrap(); let lookup = reader.lookup(ip).unwrap(); assert!(lookup.has_data()); - let asn: geoip2::Asn = lookup.decode().unwrap(); + let asn: geoip2::Asn = lookup.decode().unwrap().unwrap(); assert_eq!(asn.autonomous_system_number, Some(1221)); assert_eq!(asn.autonomous_system_organization, Some("Telstra Pty Ltd")); @@ -349,7 +349,7 @@ fn test_lookup_network() { assert!(lookup.has_data(), "Expected Some(City) for known IPv4"); let network = lookup.network().unwrap(); assert_eq!(network.prefix(), 25); - let city: geoip2::City = lookup.decode().unwrap(); + let city: geoip2::City = lookup.decode().unwrap().unwrap(); assert!(city.country.is_some()); // --- IPv4 Check (Last Host, Known) --- @@ -381,7 +381,7 @@ fn test_lookup_network() { 32, "Prefix length mismatch for known IPv6" ); - let city_v6: geoip2::City = lookup_v6.decode().unwrap(); + let city_v6: geoip2::City = lookup_v6.decode().unwrap().unwrap(); assert!(city_v6.country.is_some()); } @@ -442,7 +442,7 @@ fn test_within_city() { // Check associated data for one of them if network.prefix() == 31 { // 81.2.69.142/31 - let city: geoip2::City = lookup.decode().unwrap(); + let city: geoip2::City = lookup.decode().unwrap().unwrap(); assert!(city.city.is_some()); assert_eq!(city.city.unwrap().geoname_id, Some(2643743)); // London } @@ -532,7 +532,7 @@ fn check_ip>(reader: &Reader, ip_version: usize) { "Lookup for {} returned not found unexpectedly", subnet ); - let value: IpType = lookup.decode().unwrap(); + let value: IpType = lookup.decode().unwrap().unwrap(); // The value stored is often the network address, not the specific IP looked up // We need to parse the found IP and the subnet IP to check containment or equality. @@ -573,7 +573,7 @@ fn test_json_serialize() { let ip: IpAddr = "89.160.20.112".parse().unwrap(); let lookup = reader.lookup(ip).unwrap(); assert!(lookup.has_data()); - let city: geoip2::City = lookup.decode().unwrap(); + let city: geoip2::City = lookup.decode().unwrap().unwrap(); let json_value = json!(city); let json_string = json_value.to_string(); @@ -612,7 +612,7 @@ fn test_networks() { struct IpRecord { ip: String, } - let record: IpRecord = lookup.decode().unwrap(); + let record: IpRecord = lookup.decode().unwrap().unwrap(); let network = lookup.network().unwrap(); assert_eq!( record.ip, @@ -796,7 +796,7 @@ fn test_skip_empty_values() { if lookup.has_data() { let data: std::collections::BTreeMap = - lookup.decode().unwrap(); + lookup.decode().unwrap().unwrap(); if data.is_empty() { empty_count += 1; } @@ -813,7 +813,7 @@ fn test_skip_empty_values() { if lookup.has_data() { let data: std::collections::BTreeMap = - lookup.decode().unwrap(); + lookup.decode().unwrap().unwrap(); assert!( !data.is_empty(), "Should not see empty maps with skip_empty_values" @@ -855,7 +855,7 @@ fn test_skip_empty_values_with_other_options() { if lookup.has_data() { let data: std::collections::BTreeMap = - lookup.decode().unwrap(); + lookup.decode().unwrap().unwrap(); assert!( !data.is_empty(), "Should not see empty maps even with other options" @@ -1193,7 +1193,7 @@ fn test_size_hints() { let ip: IpAddr = "1.1.1.0".parse().unwrap(); let lookup = r.lookup(ip).unwrap(); assert!(lookup.has_data()); - let result: TestType = lookup.decode().unwrap(); + let result: TestType = lookup.decode().unwrap().unwrap(); // Verify array size hint matches actual length assert_eq!(result.array.hint, Some(3)); @@ -1225,7 +1225,7 @@ fn test_ignored_any() { let ip: IpAddr = "1.1.1.0".parse().unwrap(); let lookup = r.lookup(ip).unwrap(); assert!(lookup.has_data()); - let result: PartialRead = lookup.decode().unwrap(); + let result: PartialRead = lookup.decode().unwrap().unwrap(); assert_eq!(result.utf8_string, "unicode! ☯ - ♫"); } @@ -1250,7 +1250,7 @@ fn test_enum_deserialization() { let ip: IpAddr = "96.1.20.112".parse().unwrap(); let lookup = r.lookup(ip).unwrap(); assert!(lookup.has_data()); - let result: Record = lookup.decode().unwrap(); + let result: Record = lookup.decode().unwrap().unwrap(); assert_eq!(result.connection_type, ConnType::CableDsl); } @@ -1282,6 +1282,6 @@ fn test_serde_flatten() { let lookup = r.lookup(ip).unwrap(); assert!(lookup.has_data()); - let result: PartialCountry = lookup.decode().unwrap(); + let result: PartialCountry = lookup.decode().unwrap().unwrap(); assert_eq!(result.continent.code, "EU"); } diff --git a/src/result.rs b/src/result.rs index b13a320e..bd5f1604 100644 --- a/src/result.rs +++ b/src/result.rs @@ -37,7 +37,7 @@ use crate::reader::Reader; /// /// if result.has_data() { /// // Full decode -/// let city: geoip2::City = result.decode().unwrap(); +/// let city: geoip2::City = result.decode().unwrap().unwrap(); /// /// // Or selective decode via path /// let country_code: Option = result.decode_path(&[ @@ -147,7 +147,10 @@ impl<'a, S: AsRef<[u8]>> LookupResult<'a, S> { /// Decodes the full record into the specified type. /// - /// Returns an error if the IP was not found or if decoding fails. + /// Returns: + /// - `Ok(Some(T))` if found and successfully decoded + /// - `Ok(None)` if the IP was not found in the database + /// - `Err(...)` if decoding fails /// /// # Example /// @@ -159,21 +162,22 @@ impl<'a, S: AsRef<[u8]>> LookupResult<'a, S> { /// let ip: IpAddr = "89.160.20.128".parse().unwrap(); /// /// let result = reader.lookup(ip).unwrap(); - /// let city: geoip2::City = result.decode().unwrap(); + /// if let Some(city) = result.decode::()? { + /// println!("Found city data"); + /// } + /// # Ok::<(), maxminddb::MaxMindDbError>(()) /// ``` - pub fn decode(&self) -> Result + pub fn decode(&self) -> Result, MaxMindDbError> where T: Deserialize<'a>, { let Some(offset) = self.data_offset else { - return Err(MaxMindDbError::decoding( - "cannot decode: IP address not found in database", - )); + return Ok(None); }; let buf = &self.reader.buf.as_ref()[self.reader.pointer_base..]; let mut decoder = super::decoder::Decoder::new(buf, offset); - T::deserialize(&mut decoder) + T::deserialize(&mut decoder).map(Some) } /// Decodes a value at a specific path within the record. From da414ab5a0b53355f024ede227820d0892a71937 Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Fri, 28 Nov 2025 07:41:21 -0800 Subject: [PATCH 27/37] Add path! macro for ergonomic path construction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The macro converts string literals to Key elements, non-negative integers to Index elements, and negative integers to IndexFromEnd elements (e.g., -1 becomes the last element). 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- CHANGELOG.md | 4 ++ src/result.rs | 196 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 200 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f2acea37..a7d5d185 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -38,6 +38,10 @@ - `PathElement::Key("name")` - Navigate into map by key - `PathElement::Index(0)` - Navigate into array by index (0 = first element) - `PathElement::IndexFromEnd(0)` - Navigate from the end (0 = last element) +- Added `path!` macro for ergonomic path construction: + - String literals become `Key` elements: `path!["country", "iso_code"]` + - Non-negative integers become `Index` elements: `path!["array", 0]` + - Negative integers become `IndexFromEnd` elements: `path!["array", -1]` (last element) - `Metadata` and `WithinOptions` now implement `PartialEq` and `Eq` traits. - Added `verify()` method for comprehensive database validation. Validates metadata, search tree structure, data section separator, and data records. diff --git a/src/result.rs b/src/result.rs index bd5f1604..213788c8 100644 --- a/src/result.rs +++ b/src/result.rs @@ -319,6 +319,23 @@ impl<'a, S: AsRef<[u8]>> LookupResult<'a, S> { /// /// Used with [`LookupResult::decode_path()`] to selectively decode /// specific fields without parsing the entire record. +/// +/// # Creating Path Elements +/// +/// You can create path elements directly or use the [`path!`](crate::path) macro +/// for a more convenient syntax: +/// +/// ``` +/// use maxminddb::{path, PathElement}; +/// +/// // Direct construction +/// let path = [PathElement::Key("country"), PathElement::Key("iso_code")]; +/// +/// // Using the macro - string literals become Keys, integers become Indexes +/// let path = path!["country", "iso_code"]; +/// let path = path!["subdivisions", 0, "names"]; // Mixed keys and indexes +/// let path = path!["array", -1]; // Negative indexes count from the end +/// ``` #[derive(Debug, Clone, PartialEq, Eq)] pub enum PathElement<'a> { /// Navigate into a map by key. @@ -335,6 +352,97 @@ pub enum PathElement<'a> { IndexFromEnd(usize), } +impl<'a> From<&'a str> for PathElement<'a> { + fn from(s: &'a str) -> Self { + PathElement::Key(s) + } +} + +impl From for PathElement<'_> { + /// Converts an integer to a path element. + /// + /// - Non-negative values become `Index(n)` + /// - Negative values become `IndexFromEnd(-n - 1)`, so `-1` is the last element + fn from(n: i32) -> Self { + if n >= 0 { + PathElement::Index(n as usize) + } else { + PathElement::IndexFromEnd((-n - 1) as usize) + } + } +} + +impl From for PathElement<'_> { + fn from(n: usize) -> Self { + PathElement::Index(n) + } +} + +impl From for PathElement<'_> { + /// Converts a signed integer to a path element. + /// + /// - Non-negative values become `Index(n)` + /// - Negative values become `IndexFromEnd(-n - 1)`, so `-1` is the last element + fn from(n: isize) -> Self { + if n >= 0 { + PathElement::Index(n as usize) + } else { + PathElement::IndexFromEnd((-n - 1) as usize) + } + } +} + +/// Creates a path for use with [`LookupResult::decode_path()`](crate::LookupResult::decode_path). +/// +/// This macro provides a convenient way to construct paths with mixed string keys +/// and integer indexes. +/// +/// # Syntax +/// +/// - String literals become [`PathElement::Key`] +/// - Non-negative integers become [`PathElement::Index`] +/// - Negative integers become [`PathElement::IndexFromEnd`] (e.g., `-1` is the last element) +/// +/// # Examples +/// +/// ``` +/// use maxminddb::{Reader, path}; +/// use std::net::IpAddr; +/// +/// let reader = Reader::open_readfile("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); +/// let ip: IpAddr = "89.160.20.128".parse().unwrap(); +/// let result = reader.lookup(ip).unwrap(); +/// +/// // Navigate to country.iso_code +/// let iso_code: Option = result.decode_path(&path!["country", "iso_code"]).unwrap(); +/// +/// // Navigate to subdivisions[0].names.en +/// let subdiv: Option = result.decode_path(&path!["subdivisions", 0, "names", "en"]).unwrap(); +/// ``` +/// +/// ``` +/// use maxminddb::{Reader, path}; +/// use std::net::IpAddr; +/// +/// let reader = Reader::open_readfile("test-data/test-data/MaxMind-DB-test-decoder.mmdb").unwrap(); +/// let ip: IpAddr = "::1.1.1.0".parse().unwrap(); +/// let result = reader.lookup(ip).unwrap(); +/// +/// // Access the last element of an array +/// let last: Option = result.decode_path(&path!["array", -1]).unwrap(); +/// assert_eq!(last, Some(3)); +/// +/// // Access the second-to-last element +/// let second_to_last: Option = result.decode_path(&path!["array", -2]).unwrap(); +/// assert_eq!(second_to_last, Some(2)); +/// ``` +#[macro_export] +macro_rules! path { + ($($elem:expr),* $(,)?) => { + [$($crate::PathElement::from($elem)),*] + }; +} + /// Masks an IP address to its network address given a prefix length. fn mask_ip(ip: IpAddr, prefix: u8) -> IpAddr { match ip { @@ -399,4 +507,92 @@ mod tests { "IndexFromEnd(0)" ); } + + #[test] + fn test_path_element_from_str() { + let elem: PathElement = "key".into(); + assert_eq!(elem, PathElement::Key("key")); + } + + #[test] + fn test_path_element_from_i32() { + // Positive values become Index + let elem: PathElement = PathElement::from(0i32); + assert_eq!(elem, PathElement::Index(0)); + + let elem: PathElement = PathElement::from(5i32); + assert_eq!(elem, PathElement::Index(5)); + + // Negative values become IndexFromEnd + // -1 → IndexFromEnd(0) (last element) + let elem: PathElement = PathElement::from(-1i32); + assert_eq!(elem, PathElement::IndexFromEnd(0)); + + // -2 → IndexFromEnd(1) (second-to-last) + let elem: PathElement = PathElement::from(-2i32); + assert_eq!(elem, PathElement::IndexFromEnd(1)); + + // -3 → IndexFromEnd(2) + let elem: PathElement = PathElement::from(-3i32); + assert_eq!(elem, PathElement::IndexFromEnd(2)); + } + + #[test] + fn test_path_element_from_usize() { + let elem: PathElement = PathElement::from(0usize); + assert_eq!(elem, PathElement::Index(0)); + + let elem: PathElement = PathElement::from(42usize); + assert_eq!(elem, PathElement::Index(42)); + } + + #[test] + fn test_path_element_from_isize() { + let elem: PathElement = PathElement::from(0isize); + assert_eq!(elem, PathElement::Index(0)); + + let elem: PathElement = PathElement::from(-1isize); + assert_eq!(elem, PathElement::IndexFromEnd(0)); + } + + #[test] + fn test_path_macro_keys_only() { + let p = path!["country", "iso_code"]; + assert_eq!(p.len(), 2); + assert_eq!(p[0], PathElement::Key("country")); + assert_eq!(p[1], PathElement::Key("iso_code")); + } + + #[test] + fn test_path_macro_mixed() { + let p = path!["subdivisions", 0, "names", "en"]; + assert_eq!(p.len(), 4); + assert_eq!(p[0], PathElement::Key("subdivisions")); + assert_eq!(p[1], PathElement::Index(0)); + assert_eq!(p[2], PathElement::Key("names")); + assert_eq!(p[3], PathElement::Key("en")); + } + + #[test] + fn test_path_macro_negative_indexes() { + let p = path!["array", -1]; + assert_eq!(p.len(), 2); + assert_eq!(p[0], PathElement::Key("array")); + assert_eq!(p[1], PathElement::IndexFromEnd(0)); // last element + + let p = path!["data", -2, "value"]; + assert_eq!(p[1], PathElement::IndexFromEnd(1)); // second-to-last + } + + #[test] + fn test_path_macro_trailing_comma() { + let p = path!["a", "b",]; + assert_eq!(p.len(), 2); + } + + #[test] + fn test_path_macro_empty() { + let p: [PathElement; 0] = path![]; + assert_eq!(p.len(), 0); + } } From 89eefd735b07804bcce3807c825f3ecbb5222d6a Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Fri, 28 Nov 2025 08:49:11 -0800 Subject: [PATCH 28/37] Add path context to decode_path errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When decode_path fails during navigation, the error now includes the path traversed up to the point of failure. This helps users understand where in a nested structure the error occurred. For example, decode_path(&["city", "names", 0]) failing at the index will show "path: /city/names/0" in the error message. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- CHANGELOG.md | 2 + src/result.rs | 152 +++++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 127 insertions(+), 27 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a7d5d185..c2d3ce02 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -42,6 +42,8 @@ - String literals become `Key` elements: `path!["country", "iso_code"]` - Non-negative integers become `Index` elements: `path!["array", 0]` - Negative integers become `IndexFromEnd` elements: `path!["array", -1]` (last element) +- `decode_path()` errors now include path context showing where navigation failed + (e.g., `path: /city/names/0`), making it easier to debug issues with nested data. - `Metadata` and `WithinOptions` now implement `PartialEq` and `Eq` traits. - Added `verify()` method for comprehensive database validation. Validates metadata, search tree structure, data section separator, and data records. diff --git a/src/result.rs b/src/result.rs index 213788c8..d3ccaaa6 100644 --- a/src/result.rs +++ b/src/result.rs @@ -231,29 +231,34 @@ impl<'a, S: AsRef<[u8]>> LookupResult<'a, S> { let buf = &self.reader.buf.as_ref()[self.reader.pointer_base..]; let mut decoder = super::decoder::Decoder::new(buf, offset); - // Navigate through the path - for element in path { - match element { + // Navigate through the path, tracking position for error context + for (i, element) in path.iter().enumerate() { + // Closure to add path context to errors during navigation. + // Shows path up to and including the current element where the error occurred. + let with_path = |e| add_path_context(e, &path[..=i]); + + match *element { PathElement::Key(key) => { - let (_, type_num) = decoder.peek_type()?; + let (_, type_num) = decoder.peek_type().map_err(with_path)?; if type_num != TYPE_MAP { - return Err(MaxMindDbError::decoding_at( - format!("expected map for Key navigation, got type {type_num}"), + return Err(MaxMindDbError::decoding_at_path( + format!("expected map for Key(\"{key}\"), got type {type_num}"), decoder.offset(), + render_path(&path[..=i]), )); } // Consume the map header and get size - let size = decoder.consume_map_header()?; + let size = decoder.consume_map_header().map_err(with_path)?; let mut found = false; for _ in 0..size { - let k = decoder.read_string()?; - if k == *key { + let k = decoder.read_string().map_err(with_path)?; + if k == key { found = true; break; } else { - decoder.skip_value()?; + decoder.skip_value().map_err(with_path)?; } } @@ -262,59 +267,92 @@ impl<'a, S: AsRef<[u8]>> LookupResult<'a, S> { } } PathElement::Index(idx) => { - let (_, type_num) = decoder.peek_type()?; + let (_, type_num) = decoder.peek_type().map_err(with_path)?; if type_num != TYPE_ARRAY { - return Err(MaxMindDbError::decoding_at( - format!("expected array for Index navigation, got type {type_num}"), + return Err(MaxMindDbError::decoding_at_path( + format!("expected array for Index({idx}), got type {type_num}"), decoder.offset(), + render_path(&path[..=i]), )); } // Consume the array header and get size - let size = decoder.consume_array_header()?; + let size = decoder.consume_array_header().map_err(with_path)?; - if *idx >= size { + if idx >= size { return Ok(None); // Out of bounds } // Skip to the target index - for _ in 0..*idx { - decoder.skip_value()?; + for _ in 0..idx { + decoder.skip_value().map_err(with_path)?; } } PathElement::IndexFromEnd(idx) => { - let (_, type_num) = decoder.peek_type()?; + let (_, type_num) = decoder.peek_type().map_err(with_path)?; if type_num != TYPE_ARRAY { - return Err(MaxMindDbError::decoding_at( - format!( - "expected array for IndexFromEnd navigation, got type {type_num}" - ), + return Err(MaxMindDbError::decoding_at_path( + format!("expected array for IndexFromEnd({idx}), got type {type_num}"), decoder.offset(), + render_path(&path[..=i]), )); } // Consume the array header and get size - let size = decoder.consume_array_header()?; + let size = decoder.consume_array_header().map_err(with_path)?; - if *idx >= size { + if idx >= size { return Ok(None); // Out of bounds } - let actual_idx = size - 1 - *idx; + let actual_idx = size - 1 - idx; // Skip to the target index for _ in 0..actual_idx { - decoder.skip_value()?; + decoder.skip_value().map_err(with_path)?; } } } } // Decode the value at the current position - T::deserialize(&mut decoder).map(Some) + T::deserialize(&mut decoder) + .map(Some) + .map_err(|e| add_path_context(e, path)) + } +} + +/// Adds path context to a Decoding error if it doesn't already have one. +fn add_path_context(err: MaxMindDbError, path: &[PathElement<'_>]) -> MaxMindDbError { + match err { + MaxMindDbError::Decoding { + message, + offset, + path: None, + } => MaxMindDbError::Decoding { + message, + offset, + path: Some(render_path(path)), + }, + _ => err, } } +/// Renders path elements as a JSON-pointer-like string (e.g., "/city/names/0"). +fn render_path(path: &[PathElement<'_>]) -> String { + use std::fmt::Write; + let mut s = String::new(); + for elem in path { + s.push('/'); + match elem { + PathElement::Key(k) => s.push_str(k), + PathElement::Index(i) => write!(s, "{i}").unwrap(), + PathElement::IndexFromEnd(i) => write!(s, "{}", -((*i as isize) + 1)).unwrap(), + } + } + s +} + /// A path element for navigating into nested data structures. /// /// Used with [`LookupResult::decode_path()`] to selectively decode @@ -595,4 +633,64 @@ mod tests { let p: [PathElement; 0] = path![]; assert_eq!(p.len(), 0); } + + #[test] + fn test_render_path() { + assert_eq!(render_path(&[]), ""); + assert_eq!(render_path(&[PathElement::Key("city")]), "/city"); + assert_eq!( + render_path(&[PathElement::Key("city"), PathElement::Key("names")]), + "/city/names" + ); + assert_eq!( + render_path(&[PathElement::Key("arr"), PathElement::Index(0)]), + "/arr/0" + ); + assert_eq!( + render_path(&[PathElement::Key("arr"), PathElement::Index(42)]), + "/arr/42" + ); + // IndexFromEnd(0) = last = -1, IndexFromEnd(1) = second-to-last = -2 + assert_eq!( + render_path(&[PathElement::Key("arr"), PathElement::IndexFromEnd(0)]), + "/arr/-1" + ); + assert_eq!( + render_path(&[PathElement::Key("arr"), PathElement::IndexFromEnd(1)]), + "/arr/-2" + ); + } + + #[test] + fn test_decode_path_error_includes_path() { + use crate::Reader; + + let reader = Reader::open_readfile("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); + let ip: IpAddr = "89.160.20.128".parse().unwrap(); + let result = reader.lookup(ip).unwrap(); + + // Try to navigate with Index on a map (root is a map, not array) + let err = result + .decode_path::(&[PathElement::Index(0)]) + .unwrap_err(); + let err_str = err.to_string(); + assert!( + err_str.contains("path: /0"), + "error should include path context: {err_str}" + ); + assert!( + err_str.contains("expected array"), + "error should mention expected type: {err_str}" + ); + + // Try to navigate deeper and fail at second element + let err = result + .decode_path::(&[PathElement::Key("city"), PathElement::Index(0)]) + .unwrap_err(); + let err_str = err.to_string(); + assert!( + err_str.contains("path: /city/0"), + "error should include full path to failure: {err_str}" + ); + } } From 87078349f977cb1a96502fde281ed2c69caeb2f5 Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Fri, 28 Nov 2025 09:06:32 -0800 Subject: [PATCH 29/37] Implement Debug for Reader to avoid printing buffer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The derived Debug would print the entire database buffer (potentially gigabytes of binary data). The manual implementation shows only the metadata, which contains the useful information about the database. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- src/reader.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/reader.rs b/src/reader.rs index 462540d8..aa703755 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -37,7 +37,6 @@ const DATA_SECTION_SEPARATOR_SIZE: usize = 16; /// decoding) /// - **`unsafe-str-decode`**: Skip UTF-8 validation entirely (unsafe, but /// ~20% faster) -#[derive(Debug)] pub struct Reader> { pub(crate) buf: S, /// Database metadata. @@ -49,6 +48,18 @@ pub struct Reader> { pub(crate) pointer_base: usize, } +impl> std::fmt::Debug for Reader { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Reader") + .field("buf_len", &self.buf.as_ref().len()) + .field("metadata", &self.metadata) + .field("ipv4_start", &self.ipv4_start) + .field("ipv4_start_bit_depth", &self.ipv4_start_bit_depth) + .field("pointer_base", &self.pointer_base) + .finish_non_exhaustive() + } +} + #[cfg(feature = "mmap")] impl Reader { /// Open a MaxMind DB database file by memory mapping it. From 14e78721fea8315893680955b74f363083d62927 Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Fri, 28 Nov 2025 09:29:01 -0800 Subject: [PATCH 30/37] Refactor GeoIP2 structs for ergonomic access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace BTreeMap-based names with typed Names struct using Option<&str> fields for each language. Make nested struct fields non-optional with Default, while keeping leaf values as Option to preserve semantics. This eliminates nested Option unwrapping: - Old: city.city.as_ref().and_then(|c| c.names.english) - New: city.city.names.english Add is_empty() methods using *self == Self::default() pattern so they don't need updating when fields are added. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- CHANGELOG.md | 16 ++ examples/within.rs | 9 +- src/geoip2.rs | 494 +++++++++++++++++++++++++++++++-------------- src/lib.rs | 7 +- src/reader.rs | 13 +- src/reader_test.rs | 15 +- 6 files changed, 373 insertions(+), 181 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c2d3ce02..c0616596 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -68,6 +68,22 @@ added for user input errors (e.g., looking up an IPv6 address in an IPv4-only database). Previously this returned `InvalidDatabase`, which incorrectly suggested the database was corrupted. +- **BREAKING CHANGE:** The `names` fields in GeoIP2 structs now use a `Names` + struct instead of `BTreeMap<&str, &str>`. This improves performance (no map + allocation) and ergonomics. Each language field is `Option<&str>`: + - Old: `city.names.as_ref().and_then(|n| n.get("en"))` + - New: `city.city.names.english` + - Supported languages: `german`, `english`, `spanish`, `french`, `japanese`, + `brazilian_portuguese`, `russian`, `simplified_chinese` +- **BREAKING CHANGE:** Nested struct fields in GeoIP2 record types (`City`, + `Country`, `Enterprise`) are now non-optional with `Default`. This simplifies + access patterns by removing nested Option unwrapping: + - Old: `city.city.as_ref().and_then(|c| c.names.english)` + - New: `city.city.names.english` + - Old: `city.subdivisions.as_ref().map(|v| v.iter())` + - New: `city.subdivisions.iter()` (empty Vec if not present) + - Leaf values (strings, numbers, bools) remain `Option` to preserve + the distinction between "not present" and "present but empty" - Error messages now include byte offsets when available, making it easier to debug malformed databases. The `#[non_exhaustive]` attribute is added to `MaxMindDbError` to allow future additions without breaking changes. diff --git a/examples/within.rs b/examples/within.rs index 6c02ead9..ea6b1bdf 100644 --- a/examples/within.rs +++ b/examples/within.rs @@ -25,12 +25,9 @@ fn main() -> Result<(), Box> { continue; // Skip networks without data }; - let continent = info.continent.and_then(|c| c.code).unwrap_or(""); - let country = info.country.and_then(|c| c.iso_code).unwrap_or(""); - let city = match info.city.and_then(|c| c.names) { - Some(names) => names.get("en").copied().unwrap_or(""), - None => "", - }; + let continent = info.continent.code.unwrap_or(""); + let country = info.country.iso_code.unwrap_or(""); + let city = info.city.names.english.unwrap_or(""); if !city.is_empty() { println!("{} {}-{}-{}", network, continent, country, city); } else if !country.is_empty() { diff --git a/src/geoip2.rs b/src/geoip2.rs index 99c4e679..bbdf0698 100644 --- a/src/geoip2.rs +++ b/src/geoip2.rs @@ -26,26 +26,29 @@ //! "test-data/test-data/GeoIP2-City-Test.mmdb")?; //! let ip: IpAddr = "89.160.20.128".parse().unwrap(); //! -//! // City lookup (most common) +//! // City lookup - nested structs are always present (default to empty) //! let result = reader.lookup(ip)?; //! if let Some(city) = result.decode::()? { -//! if let Some(city_names) = city.city.and_then(|c| c.names) { -//! if let Some(city_name) = city_names.get("en") { -//! println!("City: {}", city_name); -//! } +//! // Direct access to nested structs - no Option unwrapping needed +//! if let Some(name) = city.city.names.english { +//! println!("City: {}", name); +//! } +//! if let Some(code) = city.country.iso_code { +//! println!("Country: {}", code); //! } -//! if let Some(country_code) = city.country.and_then(|c| c.iso_code) { -//! println!("Country: {}", country_code); +//! // Subdivisions is a Vec, empty if not present +//! for sub in &city.subdivisions { +//! if let Some(code) = sub.iso_code { +//! println!("Subdivision: {}", code); +//! } //! } //! } //! //! // Country-only lookup (smaller/faster) //! let result = reader.lookup(ip)?; //! if let Some(country) = result.decode::()? { -//! if let Some(country_names) = country.country.and_then(|c| c.names) { -//! if let Some(country_name) = country_names.get("en") { -//! println!("Country: {}", country_name); -//! } +//! if let Some(name) = country.country.names.english { +//! println!("Country: {}", name); //! } //! } //! # Ok(()) @@ -54,68 +57,142 @@ use serde::{Deserialize, Serialize}; +/// Localized names for geographic entities. +/// +/// Contains name translations in the languages supported by MaxMind databases. +/// Access names directly via fields like `names.english` or `names.german`. +/// Each field is `Option<&str>` - `None` if not available in that language. +/// +/// # Example +/// +/// ``` +/// use maxminddb::{Reader, geoip2}; +/// use std::net::IpAddr; +/// +/// let reader = Reader::open_readfile("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); +/// let ip: IpAddr = "89.160.20.128".parse().unwrap(); +/// let result = reader.lookup(ip).unwrap(); +/// +/// if let Some(city) = result.decode::().unwrap() { +/// // Access names directly - Option<&str> +/// if let Some(name) = city.city.names.english { +/// println!("City (en): {}", name); +/// } +/// if let Some(name) = city.city.names.german { +/// println!("City (de): {}", name); +/// } +/// } +/// ``` +#[derive(Deserialize, Serialize, Clone, Debug, Default, PartialEq, Eq)] +pub struct Names<'a> { + /// German name (de) + #[serde( + borrow, + rename = "de", + default, + skip_serializing_if = "Option::is_none" + )] + pub german: Option<&'a str>, + /// English name (en) + #[serde(rename = "en", default, skip_serializing_if = "Option::is_none")] + pub english: Option<&'a str>, + /// Spanish name (es) + #[serde(rename = "es", default, skip_serializing_if = "Option::is_none")] + pub spanish: Option<&'a str>, + /// French name (fr) + #[serde(rename = "fr", default, skip_serializing_if = "Option::is_none")] + pub french: Option<&'a str>, + /// Japanese name (ja) + #[serde(rename = "ja", default, skip_serializing_if = "Option::is_none")] + pub japanese: Option<&'a str>, + /// Brazilian Portuguese name (pt-BR) + #[serde(rename = "pt-BR", default, skip_serializing_if = "Option::is_none")] + pub brazilian_portuguese: Option<&'a str>, + /// Russian name (ru) + #[serde(rename = "ru", default, skip_serializing_if = "Option::is_none")] + pub russian: Option<&'a str>, + /// Simplified Chinese name (zh-CN) + #[serde(rename = "zh-CN", default, skip_serializing_if = "Option::is_none")] + pub simplified_chinese: Option<&'a str>, +} + +impl Names<'_> { + /// Returns true if all name fields are `None`. + #[must_use] + pub fn is_empty(&self) -> bool { + self.german.is_none() + && self.english.is_none() + && self.spanish.is_none() + && self.french.is_none() + && self.japanese.is_none() + && self.brazilian_portuguese.is_none() + && self.russian.is_none() + && self.simplified_chinese.is_none() + } +} + /// GeoIP2 Country record -#[derive(Deserialize, Serialize, Clone, Debug)] +#[derive(Deserialize, Serialize, Clone, Debug, Default)] pub struct Country<'a> { - #[serde(borrow)] - #[serde(skip_serializing_if = "Option::is_none")] - pub continent: Option>, - #[serde(skip_serializing_if = "Option::is_none")] - pub country: Option>, - #[serde(skip_serializing_if = "Option::is_none")] - pub registered_country: Option>, - #[serde(skip_serializing_if = "Option::is_none")] - pub represented_country: Option>, - #[serde(skip_serializing_if = "Option::is_none")] - pub traits: Option, + #[serde(borrow, default, skip_serializing_if = "country::Continent::is_empty")] + pub continent: country::Continent<'a>, + #[serde(default, skip_serializing_if = "country::Country::is_empty")] + pub country: country::Country<'a>, + #[serde(default, skip_serializing_if = "country::Country::is_empty")] + pub registered_country: country::Country<'a>, + #[serde(default, skip_serializing_if = "country::RepresentedCountry::is_empty")] + pub represented_country: country::RepresentedCountry<'a>, + #[serde(default, skip_serializing_if = "country::Traits::is_empty")] + pub traits: country::Traits, } /// GeoIP2 City record -#[derive(Deserialize, Serialize, Clone, Debug)] +#[derive(Deserialize, Serialize, Clone, Debug, Default)] pub struct City<'a> { - #[serde(borrow)] - #[serde(skip_serializing_if = "Option::is_none")] - pub city: Option>, - #[serde(skip_serializing_if = "Option::is_none")] - pub continent: Option>, - #[serde(skip_serializing_if = "Option::is_none")] - pub country: Option>, - #[serde(skip_serializing_if = "Option::is_none")] - pub location: Option>, - #[serde(skip_serializing_if = "Option::is_none")] - pub postal: Option>, - #[serde(skip_serializing_if = "Option::is_none")] - pub registered_country: Option>, - #[serde(skip_serializing_if = "Option::is_none")] - pub represented_country: Option>, - #[serde(skip_serializing_if = "Option::is_none")] - pub subdivisions: Option>>, - #[serde(skip_serializing_if = "Option::is_none")] - pub traits: Option, + #[serde(borrow, default, skip_serializing_if = "city::City::is_empty")] + pub city: city::City<'a>, + #[serde(default, skip_serializing_if = "city::Continent::is_empty")] + pub continent: city::Continent<'a>, + #[serde(default, skip_serializing_if = "city::Country::is_empty")] + pub country: city::Country<'a>, + #[serde(default, skip_serializing_if = "city::Location::is_empty")] + pub location: city::Location<'a>, + #[serde(default, skip_serializing_if = "city::Postal::is_empty")] + pub postal: city::Postal<'a>, + #[serde(default, skip_serializing_if = "city::Country::is_empty")] + pub registered_country: city::Country<'a>, + #[serde(default, skip_serializing_if = "city::RepresentedCountry::is_empty")] + pub represented_country: city::RepresentedCountry<'a>, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub subdivisions: Vec>, + #[serde(default, skip_serializing_if = "city::Traits::is_empty")] + pub traits: city::Traits, } /// GeoIP2 Enterprise record -#[derive(Deserialize, Serialize, Clone, Debug)] +#[derive(Deserialize, Serialize, Clone, Debug, Default)] pub struct Enterprise<'a> { - #[serde(borrow)] - #[serde(skip_serializing_if = "Option::is_none")] - pub city: Option>, - #[serde(skip_serializing_if = "Option::is_none")] - pub continent: Option>, - #[serde(skip_serializing_if = "Option::is_none")] - pub country: Option>, - #[serde(skip_serializing_if = "Option::is_none")] - pub location: Option>, - #[serde(skip_serializing_if = "Option::is_none")] - pub postal: Option>, - #[serde(skip_serializing_if = "Option::is_none")] - pub registered_country: Option>, - #[serde(skip_serializing_if = "Option::is_none")] - pub represented_country: Option>, - #[serde(skip_serializing_if = "Option::is_none")] - pub subdivisions: Option>>, - #[serde(skip_serializing_if = "Option::is_none")] - pub traits: Option>, + #[serde(borrow, default, skip_serializing_if = "enterprise::City::is_empty")] + pub city: enterprise::City<'a>, + #[serde(default, skip_serializing_if = "enterprise::Continent::is_empty")] + pub continent: enterprise::Continent<'a>, + #[serde(default, skip_serializing_if = "enterprise::Country::is_empty")] + pub country: enterprise::Country<'a>, + #[serde(default, skip_serializing_if = "enterprise::Location::is_empty")] + pub location: enterprise::Location<'a>, + #[serde(default, skip_serializing_if = "enterprise::Postal::is_empty")] + pub postal: enterprise::Postal<'a>, + #[serde(default, skip_serializing_if = "enterprise::Country::is_empty")] + pub registered_country: enterprise::Country<'a>, + #[serde( + default, + skip_serializing_if = "enterprise::RepresentedCountry::is_empty" + )] + pub represented_country: enterprise::RepresentedCountry<'a>, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub subdivisions: Vec>, + #[serde(default, skip_serializing_if = "enterprise::Traits::is_empty")] + pub traits: enterprise::Traits<'a>, } /// GeoIP2 ISP record @@ -186,207 +263,316 @@ pub struct Asn<'a> { /// Country model structs pub mod country { + use super::Names; use serde::{Deserialize, Serialize}; - use std::collections::BTreeMap; - #[derive(Deserialize, Serialize, Clone, Debug)] + #[derive(Deserialize, Serialize, Clone, Debug, Default, PartialEq)] pub struct Continent<'a> { - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "Option::is_none")] pub code: Option<&'a str>, - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "Option::is_none")] pub geoname_id: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub names: Option>, + #[serde(borrow, default, skip_serializing_if = "Names::is_empty")] + pub names: Names<'a>, + } + + impl Continent<'_> { + /// Returns true if all fields are empty/None. + #[must_use] + pub fn is_empty(&self) -> bool { + *self == Self::default() + } } - #[derive(Deserialize, Serialize, Clone, Debug)] + #[derive(Deserialize, Serialize, Clone, Debug, Default, PartialEq)] pub struct Country<'a> { - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "Option::is_none")] pub geoname_id: Option, - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "Option::is_none")] pub is_in_european_union: Option, - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "Option::is_none")] pub iso_code: Option<&'a str>, - #[serde(skip_serializing_if = "Option::is_none")] - pub names: Option>, + #[serde(borrow, default, skip_serializing_if = "Names::is_empty")] + pub names: Names<'a>, + } + + impl Country<'_> { + /// Returns true if all fields are empty/None. + #[must_use] + pub fn is_empty(&self) -> bool { + *self == Self::default() + } } - #[derive(Deserialize, Serialize, Clone, Debug)] + #[derive(Deserialize, Serialize, Clone, Debug, Default, PartialEq)] pub struct RepresentedCountry<'a> { - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "Option::is_none")] pub geoname_id: Option, - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "Option::is_none")] pub is_in_european_union: Option, - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "Option::is_none")] pub iso_code: Option<&'a str>, - #[serde(skip_serializing_if = "Option::is_none")] - pub names: Option>, - #[serde(rename = "type")] - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(borrow, default, skip_serializing_if = "Names::is_empty")] + pub names: Names<'a>, + #[serde(rename = "type", default, skip_serializing_if = "Option::is_none")] pub representation_type: Option<&'a str>, } - #[derive(Deserialize, Serialize, Clone, Debug)] + impl RepresentedCountry<'_> { + /// Returns true if all fields are empty/None. + #[must_use] + pub fn is_empty(&self) -> bool { + *self == Self::default() + } + } + + #[derive(Deserialize, Serialize, Clone, Debug, Default, PartialEq)] pub struct Traits { - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "Option::is_none")] pub is_anonymous_proxy: Option, - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "Option::is_none")] pub is_anycast: Option, - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "Option::is_none")] pub is_satellite_provider: Option, } + + impl Traits { + /// Returns true if all fields are None. + #[must_use] + pub fn is_empty(&self) -> bool { + *self == Self::default() + } + } } /// City model structs pub mod city { + use super::Names; use serde::{Deserialize, Serialize}; - use std::collections::BTreeMap; pub use super::country::{Continent, Country, RepresentedCountry, Traits}; - #[derive(Deserialize, Serialize, Clone, Debug)] + #[derive(Deserialize, Serialize, Clone, Debug, Default, PartialEq)] pub struct City<'a> { - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "Option::is_none")] pub geoname_id: Option, - #[serde(borrow)] - #[serde(skip_serializing_if = "Option::is_none")] - pub names: Option>, + #[serde(borrow, default, skip_serializing_if = "Names::is_empty")] + pub names: Names<'a>, + } + + impl City<'_> { + /// Returns true if all fields are empty/None. + #[must_use] + pub fn is_empty(&self) -> bool { + *self == Self::default() + } } - #[derive(Deserialize, Serialize, Clone, Debug)] + #[derive(Deserialize, Serialize, Clone, Debug, Default, PartialEq)] pub struct Location<'a> { - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "Option::is_none")] pub accuracy_radius: Option, - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "Option::is_none")] pub latitude: Option, - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "Option::is_none")] pub longitude: Option, - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "Option::is_none")] pub metro_code: Option, - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "Option::is_none")] pub time_zone: Option<&'a str>, } - #[derive(Deserialize, Serialize, Clone, Debug)] + impl Location<'_> { + /// Returns true if all fields are None. + #[must_use] + pub fn is_empty(&self) -> bool { + *self == Self::default() + } + } + + #[derive(Deserialize, Serialize, Clone, Debug, Default, PartialEq)] pub struct Postal<'a> { - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "Option::is_none")] pub code: Option<&'a str>, } - #[derive(Deserialize, Serialize, Clone, Debug)] + impl Postal<'_> { + /// Returns true if all fields are None. + #[must_use] + pub fn is_empty(&self) -> bool { + *self == Self::default() + } + } + + #[derive(Deserialize, Serialize, Clone, Debug, Default, PartialEq)] pub struct Subdivision<'a> { - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "Option::is_none")] pub geoname_id: Option, - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "Option::is_none")] pub iso_code: Option<&'a str>, - #[serde(skip_serializing_if = "Option::is_none")] - pub names: Option>, + #[serde(borrow, default, skip_serializing_if = "Names::is_empty")] + pub names: Names<'a>, + } + + impl Subdivision<'_> { + /// Returns true if all fields are empty/None. + #[must_use] + pub fn is_empty(&self) -> bool { + *self == Self::default() + } } } /// Enterprise model structs pub mod enterprise { + use super::Names; use serde::{Deserialize, Serialize}; - use std::collections::BTreeMap; pub use super::country::{Continent, RepresentedCountry}; - #[derive(Deserialize, Serialize, Clone, Debug)] + #[derive(Deserialize, Serialize, Clone, Debug, Default, PartialEq)] pub struct City<'a> { - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "Option::is_none")] pub confidence: Option, - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "Option::is_none")] pub geoname_id: Option, - #[serde(borrow)] - #[serde(skip_serializing_if = "Option::is_none")] - pub names: Option>, + #[serde(borrow, default, skip_serializing_if = "Names::is_empty")] + pub names: Names<'a>, + } + + impl City<'_> { + /// Returns true if all fields are empty/None. + #[must_use] + pub fn is_empty(&self) -> bool { + *self == Self::default() + } } - #[derive(Deserialize, Serialize, Clone, Debug)] + #[derive(Deserialize, Serialize, Clone, Debug, Default, PartialEq)] pub struct Country<'a> { - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "Option::is_none")] pub confidence: Option, - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "Option::is_none")] pub geoname_id: Option, - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "Option::is_none")] pub is_in_european_union: Option, - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "Option::is_none")] pub iso_code: Option<&'a str>, - #[serde(skip_serializing_if = "Option::is_none")] - pub names: Option>, + #[serde(borrow, default, skip_serializing_if = "Names::is_empty")] + pub names: Names<'a>, + } + + impl Country<'_> { + /// Returns true if all fields are empty/None. + #[must_use] + pub fn is_empty(&self) -> bool { + *self == Self::default() + } } - #[derive(Deserialize, Serialize, Clone, Debug)] + #[derive(Deserialize, Serialize, Clone, Debug, Default, PartialEq)] pub struct Location<'a> { - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "Option::is_none")] pub accuracy_radius: Option, - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "Option::is_none")] pub latitude: Option, - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "Option::is_none")] pub longitude: Option, - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "Option::is_none")] pub metro_code: Option, - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "Option::is_none")] pub time_zone: Option<&'a str>, } - #[derive(Deserialize, Serialize, Clone, Debug)] + impl Location<'_> { + /// Returns true if all fields are None. + #[must_use] + pub fn is_empty(&self) -> bool { + *self == Self::default() + } + } + + #[derive(Deserialize, Serialize, Clone, Debug, Default, PartialEq)] pub struct Postal<'a> { - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "Option::is_none")] pub code: Option<&'a str>, - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "Option::is_none")] pub confidence: Option, } - #[derive(Deserialize, Serialize, Clone, Debug)] + impl Postal<'_> { + /// Returns true if all fields are None. + #[must_use] + pub fn is_empty(&self) -> bool { + *self == Self::default() + } + } + + #[derive(Deserialize, Serialize, Clone, Debug, Default, PartialEq)] pub struct Subdivision<'a> { - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "Option::is_none")] pub confidence: Option, - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "Option::is_none")] pub geoname_id: Option, - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "Option::is_none")] pub iso_code: Option<&'a str>, - #[serde(skip_serializing_if = "Option::is_none")] - pub names: Option>, + #[serde(borrow, default, skip_serializing_if = "Names::is_empty")] + pub names: Names<'a>, } - #[derive(Deserialize, Serialize, Clone, Debug)] + impl Subdivision<'_> { + /// Returns true if all fields are empty/None. + #[must_use] + pub fn is_empty(&self) -> bool { + *self == Self::default() + } + } + + #[derive(Deserialize, Serialize, Clone, Debug, Default, PartialEq)] pub struct Traits<'a> { - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "Option::is_none")] pub autonomous_system_number: Option, - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "Option::is_none")] pub autonomous_system_organization: Option<&'a str>, - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "Option::is_none")] pub connection_type: Option<&'a str>, - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "Option::is_none")] pub domain: Option<&'a str>, - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "Option::is_none")] pub is_anonymous: Option, - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "Option::is_none")] pub is_anonymous_proxy: Option, - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "Option::is_none")] pub is_anonymous_vpn: Option, - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "Option::is_none")] pub is_anycast: Option, - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "Option::is_none")] pub is_hosting_provider: Option, - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "Option::is_none")] pub isp: Option<&'a str>, - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "Option::is_none")] pub is_public_proxy: Option, - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "Option::is_none")] pub is_residential_proxy: Option, - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "Option::is_none")] pub is_satellite_provider: Option, - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "Option::is_none")] pub is_tor_exit_node: Option, - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "Option::is_none")] pub mobile_country_code: Option<&'a str>, - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "Option::is_none")] pub mobile_network_code: Option<&'a str>, - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "Option::is_none")] pub organization: Option<&'a str>, - #[serde(skip_serializing_if = "Option::is_none")] + #[serde(default, skip_serializing_if = "Option::is_none")] pub user_type: Option<&'a str>, } + + impl Traits<'_> { + /// Returns true if all fields are None. + #[must_use] + pub fn is_empty(&self) -> bool { + *self == Self::default() + } + } } diff --git a/src/lib.rs b/src/lib.rs index 8cb58e19..f0fcb4a6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -46,9 +46,8 @@ //! let result = reader.lookup(ip)?; //! //! if let Some(city) = result.decode::()? { -//! if let Some(country) = city.country { -//! println!("Country: {}", country.iso_code.unwrap_or("Unknown")); -//! } +//! // Access nested structs directly - no Option unwrapping needed +//! println!("Country: {}", city.country.iso_code.unwrap_or("Unknown")); //! } //! //! Ok(()) @@ -246,7 +245,7 @@ mod tests { // Decode the data let city: geoip2::City = result.decode().unwrap().unwrap(); - assert!(city.city.is_some(), "Expected city data"); + assert!(!city.city.is_empty(), "Expected city data"); // Check full network (not just prefix) let network = result.network().unwrap(); diff --git a/src/reader.rs b/src/reader.rs index aa703755..13c73a05 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -150,12 +150,9 @@ impl<'de, S: AsRef<[u8]>> Reader { /// let result = reader.lookup(ip)?; /// /// if let Some(city) = result.decode::()? { - /// if let Some(city_info) = city.city { - /// if let Some(names) = city_info.names { - /// if let Some(name) = names.get("en") { - /// println!("City: {}", name); - /// } - /// } + /// // Access nested structs directly - no Option unwrapping needed + /// if let Some(name) = city.city.names.english { + /// println!("City: {}", name); /// } /// } else { /// println!("No data found for IP {}", ip); @@ -281,9 +278,7 @@ impl<'de, S: AsRef<[u8]>> Reader { /// let lookup = result.unwrap(); /// let network = lookup.network().unwrap(); /// let city: geoip2::City = lookup.decode().unwrap().unwrap(); - /// let city_name = city.city.as_ref() - /// .and_then(|c| c.names.as_ref()) - /// .and_then(|n| n.get("en")); + /// let city_name = city.city.names.english; /// println!("Network: {}, City: {:?}", network, city_name); /// count += 1; /// if count >= 10 { break; } // Limit output for example diff --git a/src/reader_test.rs b/src/reader_test.rs index a00280e5..96c34f03 100644 --- a/src/reader_test.rs +++ b/src/reader_test.rs @@ -210,7 +210,7 @@ fn test_lookup_city() { assert!(lookup.has_data()); let city: geoip2::City = lookup.decode().unwrap().unwrap(); - let iso_code = city.country.and_then(|cy| cy.iso_code); + let iso_code = city.country.iso_code; assert_eq!(iso_code, Some("SE")); } @@ -227,10 +227,9 @@ fn test_lookup_country() { let lookup = reader.lookup(ip).unwrap(); assert!(lookup.has_data()); let country: geoip2::Country = lookup.decode().unwrap().unwrap(); - let country = country.country.unwrap(); - assert_eq!(country.iso_code, Some("SE")); - assert_eq!(country.is_in_european_union, Some(true)); + assert_eq!(country.country.iso_code, Some("SE")); + assert_eq!(country.country.is_in_european_union, Some(true)); } #[test] @@ -350,7 +349,7 @@ fn test_lookup_network() { let network = lookup.network().unwrap(); assert_eq!(network.prefix(), 25); let city: geoip2::City = lookup.decode().unwrap().unwrap(); - assert!(city.country.is_some()); + assert!(!city.country.is_empty()); // --- IPv4 Check (Last Host, Known) --- let ip_last: IpAddr = "89.160.20.254".parse().unwrap(); @@ -382,7 +381,7 @@ fn test_lookup_network() { "Prefix length mismatch for known IPv6" ); let city_v6: geoip2::City = lookup_v6.decode().unwrap().unwrap(); - assert!(city_v6.country.is_some()); + assert!(!city_v6.country.is_empty()); } #[test] @@ -443,8 +442,8 @@ fn test_within_city() { if network.prefix() == 31 { // 81.2.69.142/31 let city: geoip2::City = lookup.decode().unwrap().unwrap(); - assert!(city.city.is_some()); - assert_eq!(city.city.unwrap().geoname_id, Some(2643743)); // London + assert!(!city.city.is_empty()); + assert_eq!(city.city.geoname_id, Some(2643743)); // London } found_count += 1; } From a0803b01adc04921d6250d8266550ff3c17497eb Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Fri, 28 Nov 2025 09:43:20 -0800 Subject: [PATCH 31/37] Add Metadata::build_time() method MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a convenience method to convert the build_epoch Unix timestamp to a SystemTime, matching the Go v2 library's BuildTime() method. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- CHANGELOG.md | 1 + src/metadata.rs | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c0616596..88d2e201 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -45,6 +45,7 @@ - `decode_path()` errors now include path context showing where navigation failed (e.g., `path: /city/names/0`), making it easier to debug issues with nested data. - `Metadata` and `WithinOptions` now implement `PartialEq` and `Eq` traits. +- Added `Metadata::build_time()` method to convert `build_epoch` to `SystemTime`. - Added `verify()` method for comprehensive database validation. Validates metadata, search tree structure, data section separator, and data records. Useful for validating database files after download or generation. diff --git a/src/metadata.rs b/src/metadata.rs index 3d7f5c39..2cb4bc29 100644 --- a/src/metadata.rs +++ b/src/metadata.rs @@ -26,3 +26,23 @@ pub struct Metadata { /// Size of each record in bits (24, 28, or 32). pub record_size: u16, } + +impl Metadata { + /// Returns the database build time as a `SystemTime`. + /// + /// This converts the `build_epoch` Unix timestamp to a `SystemTime`. + /// + /// # Example + /// + /// ``` + /// use maxminddb::Reader; + /// + /// let reader = Reader::open_readfile("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); + /// let build_time = reader.metadata.build_time(); + /// println!("Database built: {:?}", build_time); + /// ``` + #[must_use] + pub fn build_time(&self) -> std::time::SystemTime { + std::time::UNIX_EPOCH + std::time::Duration::from_secs(self.build_epoch) + } +} From 5d3f620fd77cd91803c43a4699f02dd69ceace2d Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Fri, 28 Nov 2025 09:44:21 -0800 Subject: [PATCH 32/37] Remove deprecated is_anonymous_proxy and is_satellite_provider fields MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These fields have been removed from MaxMind databases. Users should use the dedicated Anonymous IP database for anonymity detection. This matches the Go v2 library which also removed these fields. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- CHANGELOG.md | 4 ++++ src/geoip2.rs | 8 -------- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 88d2e201..90e26224 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -85,6 +85,10 @@ - New: `city.subdivisions.iter()` (empty Vec if not present) - Leaf values (strings, numbers, bools) remain `Option` to preserve the distinction between "not present" and "present but empty" +- **BREAKING CHANGE:** Removed `is_anonymous_proxy` and `is_satellite_provider` + fields from `country::Traits` and `enterprise::Traits`. These fields are no + longer present in MaxMind databases. Use the Anonymous IP database for + anonymity detection. - Error messages now include byte offsets when available, making it easier to debug malformed databases. The `#[non_exhaustive]` attribute is added to `MaxMindDbError` to allow future additions without breaking changes. diff --git a/src/geoip2.rs b/src/geoip2.rs index bbdf0698..0f06fff0 100644 --- a/src/geoip2.rs +++ b/src/geoip2.rs @@ -328,12 +328,8 @@ pub mod country { #[derive(Deserialize, Serialize, Clone, Debug, Default, PartialEq)] pub struct Traits { - #[serde(default, skip_serializing_if = "Option::is_none")] - pub is_anonymous_proxy: Option, #[serde(default, skip_serializing_if = "Option::is_none")] pub is_anycast: Option, - #[serde(default, skip_serializing_if = "Option::is_none")] - pub is_satellite_provider: Option, } impl Traits { @@ -541,8 +537,6 @@ pub mod enterprise { #[serde(default, skip_serializing_if = "Option::is_none")] pub is_anonymous: Option, #[serde(default, skip_serializing_if = "Option::is_none")] - pub is_anonymous_proxy: Option, - #[serde(default, skip_serializing_if = "Option::is_none")] pub is_anonymous_vpn: Option, #[serde(default, skip_serializing_if = "Option::is_none")] pub is_anycast: Option, @@ -555,8 +549,6 @@ pub mod enterprise { #[serde(default, skip_serializing_if = "Option::is_none")] pub is_residential_proxy: Option, #[serde(default, skip_serializing_if = "Option::is_none")] - pub is_satellite_provider: Option, - #[serde(default, skip_serializing_if = "Option::is_none")] pub is_tor_exit_node: Option, #[serde(default, skip_serializing_if = "Option::is_none")] pub mobile_country_code: Option<&'a str>, From 736baa2a71aadc10344e195d05f744654265e69a Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Fri, 28 Nov 2025 09:54:47 -0800 Subject: [PATCH 33/37] Improve GeoIP2 struct documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add comprehensive documentation for all GeoIP2 structs and fields: - Document all top-level record types (Country, City, Enterprise, etc.) - Add field descriptions with semantic meaning - Include links to Wikipedia for ISO codes and standards - Mark deprecated fields (metro_code) - Document confidence scores for Enterprise fields - Describe possible values for enum-like fields (connection_type, user_type) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- src/geoip2.rs | 220 +++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 208 insertions(+), 12 deletions(-) diff --git a/src/geoip2.rs b/src/geoip2.rs index 0f06fff0..43cbea99 100644 --- a/src/geoip2.rs +++ b/src/geoip2.rs @@ -131,147 +131,229 @@ impl Names<'_> { } } -/// GeoIP2 Country record +/// GeoIP2/GeoLite2 Country database record. +/// +/// Contains country-level geolocation data for an IP address. This is the +/// simplest geolocation record type, suitable when you only need country +/// information. #[derive(Deserialize, Serialize, Clone, Debug, Default)] pub struct Country<'a> { + /// Continent data for the IP address. #[serde(borrow, default, skip_serializing_if = "country::Continent::is_empty")] pub continent: country::Continent<'a>, + /// Country where MaxMind believes the IP is located. #[serde(default, skip_serializing_if = "country::Country::is_empty")] pub country: country::Country<'a>, + /// Country where the ISP has registered the IP block. + /// May differ from `country` (e.g., for mobile networks or VPNs). #[serde(default, skip_serializing_if = "country::Country::is_empty")] pub registered_country: country::Country<'a>, + /// Country represented by users of this IP (e.g., military base or embassy). #[serde(default, skip_serializing_if = "country::RepresentedCountry::is_empty")] pub represented_country: country::RepresentedCountry<'a>, + /// Various traits associated with the IP address. #[serde(default, skip_serializing_if = "country::Traits::is_empty")] pub traits: country::Traits, } -/// GeoIP2 City record +/// GeoIP2/GeoLite2 City database record. +/// +/// Contains city-level geolocation data including location coordinates, +/// postal code, subdivisions (states/provinces), and country information. +/// This is the most comprehensive free geolocation record type. #[derive(Deserialize, Serialize, Clone, Debug, Default)] pub struct City<'a> { + /// City data for the IP address. #[serde(borrow, default, skip_serializing_if = "city::City::is_empty")] pub city: city::City<'a>, + /// Continent data for the IP address. #[serde(default, skip_serializing_if = "city::Continent::is_empty")] pub continent: city::Continent<'a>, + /// Country where MaxMind believes the IP is located. #[serde(default, skip_serializing_if = "city::Country::is_empty")] pub country: city::Country<'a>, + /// Location data including coordinates and time zone. #[serde(default, skip_serializing_if = "city::Location::is_empty")] pub location: city::Location<'a>, + /// Postal code data for the IP address. #[serde(default, skip_serializing_if = "city::Postal::is_empty")] pub postal: city::Postal<'a>, + /// Country where the ISP has registered the IP block. #[serde(default, skip_serializing_if = "city::Country::is_empty")] pub registered_country: city::Country<'a>, + /// Country represented by users of this IP (e.g., military base or embassy). #[serde(default, skip_serializing_if = "city::RepresentedCountry::is_empty")] pub represented_country: city::RepresentedCountry<'a>, + /// Subdivisions (states, provinces, etc.) ordered from largest to smallest. + /// For example, Oxford, UK would have England first, then Oxfordshire. #[serde(default, skip_serializing_if = "Vec::is_empty")] pub subdivisions: Vec>, + /// Various traits associated with the IP address. #[serde(default, skip_serializing_if = "city::Traits::is_empty")] pub traits: city::Traits, } -/// GeoIP2 Enterprise record +/// GeoIP2 Enterprise database record. +/// +/// Contains all City data plus additional confidence scores and traits. +/// Enterprise records include confidence values (0-100) indicating MaxMind's +/// certainty about the accuracy of each field. #[derive(Deserialize, Serialize, Clone, Debug, Default)] pub struct Enterprise<'a> { + /// City data with confidence score. #[serde(borrow, default, skip_serializing_if = "enterprise::City::is_empty")] pub city: enterprise::City<'a>, + /// Continent data for the IP address. #[serde(default, skip_serializing_if = "enterprise::Continent::is_empty")] pub continent: enterprise::Continent<'a>, + /// Country data with confidence score. #[serde(default, skip_serializing_if = "enterprise::Country::is_empty")] pub country: enterprise::Country<'a>, + /// Location data including coordinates and time zone. #[serde(default, skip_serializing_if = "enterprise::Location::is_empty")] pub location: enterprise::Location<'a>, + /// Postal code data with confidence score. #[serde(default, skip_serializing_if = "enterprise::Postal::is_empty")] pub postal: enterprise::Postal<'a>, + /// Country where the ISP has registered the IP block. #[serde(default, skip_serializing_if = "enterprise::Country::is_empty")] pub registered_country: enterprise::Country<'a>, + /// Country represented by users of this IP (e.g., military base or embassy). #[serde( default, skip_serializing_if = "enterprise::RepresentedCountry::is_empty" )] pub represented_country: enterprise::RepresentedCountry<'a>, + /// Subdivisions with confidence scores, ordered from largest to smallest. #[serde(default, skip_serializing_if = "Vec::is_empty")] pub subdivisions: Vec>, + /// Extended traits including ISP, organization, and connection information. #[serde(default, skip_serializing_if = "enterprise::Traits::is_empty")] pub traits: enterprise::Traits<'a>, } -/// GeoIP2 ISP record +/// GeoIP2 ISP database record. +/// +/// Contains Internet Service Provider and organization information for an IP. #[derive(Deserialize, Serialize, Clone, Debug)] pub struct Isp<'a> { + /// The autonomous system number (ASN) for the IP address. #[serde(skip_serializing_if = "Option::is_none")] pub autonomous_system_number: Option, + /// The organization associated with the registered ASN. #[serde(skip_serializing_if = "Option::is_none")] pub autonomous_system_organization: Option<&'a str>, + /// The name of the ISP associated with the IP address. #[serde(skip_serializing_if = "Option::is_none")] pub isp: Option<&'a str>, + /// The mobile country code (MCC) associated with the IP. + /// See . #[serde(skip_serializing_if = "Option::is_none")] pub mobile_country_code: Option<&'a str>, + /// The mobile network code (MNC) associated with the IP. + /// See . #[serde(skip_serializing_if = "Option::is_none")] pub mobile_network_code: Option<&'a str>, + /// The name of the organization associated with the IP address. #[serde(skip_serializing_if = "Option::is_none")] pub organization: Option<&'a str>, } -/// GeoIP2 Connection-Type record +/// GeoIP2 Connection-Type database record. +/// +/// Contains the connection type for an IP address. #[derive(Deserialize, Serialize, Clone, Debug)] pub struct ConnectionType<'a> { + /// The connection type. Possible values include "Dialup", "Cable/DSL", + /// "Corporate", "Cellular", and "Satellite". Additional values may be + /// added in the future. #[serde(skip_serializing_if = "Option::is_none")] pub connection_type: Option<&'a str>, } -/// GeoIP2 Anonymous Ip record +/// GeoIP2 Anonymous IP database record. +/// +/// Contains information about whether an IP address is associated with +/// anonymous or proxy services. #[derive(Deserialize, Serialize, Clone, Debug)] pub struct AnonymousIp { + /// True if the IP belongs to any sort of anonymous network. #[serde(skip_serializing_if = "Option::is_none")] pub is_anonymous: Option, + /// True if the IP is registered to an anonymous VPN provider. + /// Note: If a VPN provider does not register subnets under names associated + /// with them, we will likely only flag their IP ranges using `is_hosting_provider`. #[serde(skip_serializing_if = "Option::is_none")] pub is_anonymous_vpn: Option, + /// True if the IP belongs to a hosting or VPN provider. #[serde(skip_serializing_if = "Option::is_none")] pub is_hosting_provider: Option, + /// True if the IP belongs to a public proxy. #[serde(skip_serializing_if = "Option::is_none")] pub is_public_proxy: Option, + /// True if the IP is on a suspected anonymizing network and belongs to + /// a residential ISP. #[serde(skip_serializing_if = "Option::is_none")] pub is_residential_proxy: Option, + /// True if the IP is a Tor exit node. #[serde(skip_serializing_if = "Option::is_none")] pub is_tor_exit_node: Option, } -/// GeoIP2 DensityIncome record +/// GeoIP2 DensityIncome database record. +/// +/// Contains population density and income data for an IP address location. #[derive(Deserialize, Serialize, Clone, Debug)] pub struct DensityIncome { + /// The average income in US dollars associated with the IP address. #[serde(skip_serializing_if = "Option::is_none")] pub average_income: Option, + /// The estimated number of people per square kilometer. #[serde(skip_serializing_if = "Option::is_none")] pub population_density: Option, } -/// GeoIP2 Domain record +/// GeoIP2 Domain database record. +/// +/// Contains the second-level domain associated with an IP address. #[derive(Deserialize, Serialize, Clone, Debug)] pub struct Domain<'a> { + /// The second-level domain associated with the IP address + /// (e.g., "example.com"). #[serde(skip_serializing_if = "Option::is_none")] pub domain: Option<&'a str>, } -/// GeoIP2 Asn record +/// GeoLite2 ASN database record. +/// +/// Contains Autonomous System Number (ASN) data for an IP address. #[derive(Deserialize, Serialize, Clone, Debug)] pub struct Asn<'a> { + /// The autonomous system number for the IP address. #[serde(skip_serializing_if = "Option::is_none")] pub autonomous_system_number: Option, + /// The organization associated with the registered ASN. #[serde(skip_serializing_if = "Option::is_none")] pub autonomous_system_organization: Option<&'a str>, } -/// Country model structs +/// Country/City database model structs. +/// +/// These structs are used by both [`super::Country`] and [`super::City`] records. pub mod country { use super::Names; use serde::{Deserialize, Serialize}; + /// Continent data for an IP address. #[derive(Deserialize, Serialize, Clone, Debug, Default, PartialEq)] pub struct Continent<'a> { + /// Two-character continent code (e.g., "NA" for North America, "EU" for Europe). #[serde(default, skip_serializing_if = "Option::is_none")] pub code: Option<&'a str>, + /// GeoNames ID for the continent. #[serde(default, skip_serializing_if = "Option::is_none")] pub geoname_id: Option, + /// Localized continent names. #[serde(borrow, default, skip_serializing_if = "Names::is_empty")] pub names: Names<'a>, } @@ -284,14 +366,20 @@ pub mod country { } } + /// Country data for an IP address. #[derive(Deserialize, Serialize, Clone, Debug, Default, PartialEq)] pub struct Country<'a> { + /// GeoNames ID for the country. #[serde(default, skip_serializing_if = "Option::is_none")] pub geoname_id: Option, + /// True if the country is a member state of the European Union. #[serde(default, skip_serializing_if = "Option::is_none")] pub is_in_european_union: Option, + /// Two-character ISO 3166-1 alpha-2 country code. + /// See . #[serde(default, skip_serializing_if = "Option::is_none")] pub iso_code: Option<&'a str>, + /// Localized country names. #[serde(borrow, default, skip_serializing_if = "Names::is_empty")] pub names: Names<'a>, } @@ -304,16 +392,26 @@ pub mod country { } } + /// Represented country data. + /// + /// The represented country is the country represented by something like a + /// military base or embassy. #[derive(Deserialize, Serialize, Clone, Debug, Default, PartialEq)] pub struct RepresentedCountry<'a> { + /// GeoNames ID for the represented country. #[serde(default, skip_serializing_if = "Option::is_none")] pub geoname_id: Option, + /// True if the represented country is a member state of the European Union. #[serde(default, skip_serializing_if = "Option::is_none")] pub is_in_european_union: Option, + /// Two-character ISO 3166-1 alpha-2 country code. + /// See . #[serde(default, skip_serializing_if = "Option::is_none")] pub iso_code: Option<&'a str>, + /// Localized country names. #[serde(borrow, default, skip_serializing_if = "Names::is_empty")] pub names: Names<'a>, + /// Type of entity representing the country (e.g., "military"). #[serde(rename = "type", default, skip_serializing_if = "Option::is_none")] pub representation_type: Option<&'a str>, } @@ -326,8 +424,11 @@ pub mod country { } } + /// Traits data for Country/City records. #[derive(Deserialize, Serialize, Clone, Debug, Default, PartialEq)] pub struct Traits { + /// True if the IP belongs to an anycast network. + /// See . #[serde(default, skip_serializing_if = "Option::is_none")] pub is_anycast: Option, } @@ -341,17 +442,22 @@ pub mod country { } } -/// City model structs +/// City database model structs. +/// +/// City-specific structs. Country-level structs are re-exported from [`super::country`]. pub mod city { use super::Names; use serde::{Deserialize, Serialize}; pub use super::country::{Continent, Country, RepresentedCountry, Traits}; + /// City data for an IP address. #[derive(Deserialize, Serialize, Clone, Debug, Default, PartialEq)] pub struct City<'a> { + /// GeoNames ID for the city. #[serde(default, skip_serializing_if = "Option::is_none")] pub geoname_id: Option, + /// Localized city names. #[serde(borrow, default, skip_serializing_if = "Names::is_empty")] pub names: Names<'a>, } @@ -364,16 +470,29 @@ pub mod city { } } + /// Location data for an IP address. #[derive(Deserialize, Serialize, Clone, Debug, Default, PartialEq)] pub struct Location<'a> { + /// Approximate accuracy radius in kilometers around the coordinates. + /// This is the radius where we have a 67% confidence that the device + /// using the IP address resides within. #[serde(default, skip_serializing_if = "Option::is_none")] pub accuracy_radius: Option, + /// Approximate latitude of the location. This value is not precise and + /// should not be used to identify a particular address or household. #[serde(default, skip_serializing_if = "Option::is_none")] pub latitude: Option, + /// Approximate longitude of the location. This value is not precise and + /// should not be used to identify a particular address or household. #[serde(default, skip_serializing_if = "Option::is_none")] pub longitude: Option, + /// Metro code for the location, used for targeting advertisements. + /// + /// **Deprecated:** Metro codes are no longer maintained and should not be used. #[serde(default, skip_serializing_if = "Option::is_none")] pub metro_code: Option, + /// Time zone associated with the location, as specified by the + /// IANA Time Zone Database (e.g., "America/New_York"). #[serde(default, skip_serializing_if = "Option::is_none")] pub time_zone: Option<&'a str>, } @@ -386,8 +505,11 @@ pub mod city { } } + /// Postal data for an IP address. #[derive(Deserialize, Serialize, Clone, Debug, Default, PartialEq)] pub struct Postal<'a> { + /// Postal code for the location. Not available for all countries. + /// In some countries, this will only contain part of the postal code. #[serde(default, skip_serializing_if = "Option::is_none")] pub code: Option<&'a str>, } @@ -400,12 +522,17 @@ pub mod city { } } + /// Subdivision (state, province, etc.) data for an IP address. #[derive(Deserialize, Serialize, Clone, Debug, Default, PartialEq)] pub struct Subdivision<'a> { + /// GeoNames ID for the subdivision. #[serde(default, skip_serializing_if = "Option::is_none")] pub geoname_id: Option, + /// ISO 3166-2 subdivision code (up to 3 characters). + /// See . #[serde(default, skip_serializing_if = "Option::is_none")] pub iso_code: Option<&'a str>, + /// Localized subdivision names. #[serde(borrow, default, skip_serializing_if = "Names::is_empty")] pub names: Names<'a>, } @@ -419,19 +546,27 @@ pub mod city { } } -/// Enterprise model structs +/// Enterprise database model structs. +/// +/// Enterprise-specific structs with confidence scores. Some structs are +/// re-exported from [`super::country`]. pub mod enterprise { use super::Names; use serde::{Deserialize, Serialize}; pub use super::country::{Continent, RepresentedCountry}; + /// City data with confidence score. #[derive(Deserialize, Serialize, Clone, Debug, Default, PartialEq)] pub struct City<'a> { + /// Confidence score (0-100) indicating MaxMind's certainty that the + /// city is correct. #[serde(default, skip_serializing_if = "Option::is_none")] pub confidence: Option, + /// GeoNames ID for the city. #[serde(default, skip_serializing_if = "Option::is_none")] pub geoname_id: Option, + /// Localized city names. #[serde(borrow, default, skip_serializing_if = "Names::is_empty")] pub names: Names<'a>, } @@ -444,16 +579,24 @@ pub mod enterprise { } } + /// Country data with confidence score. #[derive(Deserialize, Serialize, Clone, Debug, Default, PartialEq)] pub struct Country<'a> { + /// Confidence score (0-100) indicating MaxMind's certainty that the + /// country is correct. #[serde(default, skip_serializing_if = "Option::is_none")] pub confidence: Option, + /// GeoNames ID for the country. #[serde(default, skip_serializing_if = "Option::is_none")] pub geoname_id: Option, + /// True if the country is a member state of the European Union. #[serde(default, skip_serializing_if = "Option::is_none")] pub is_in_european_union: Option, + /// Two-character ISO 3166-1 alpha-2 country code. + /// See . #[serde(default, skip_serializing_if = "Option::is_none")] pub iso_code: Option<&'a str>, + /// Localized country names. #[serde(borrow, default, skip_serializing_if = "Names::is_empty")] pub names: Names<'a>, } @@ -466,16 +609,29 @@ pub mod enterprise { } } + /// Location data for an IP address. #[derive(Deserialize, Serialize, Clone, Debug, Default, PartialEq)] pub struct Location<'a> { + /// Approximate accuracy radius in kilometers around the coordinates. + /// This is the radius where we have a 67% confidence that the device + /// using the IP address resides within. #[serde(default, skip_serializing_if = "Option::is_none")] pub accuracy_radius: Option, + /// Approximate latitude of the location. This value is not precise and + /// should not be used to identify a particular address or household. #[serde(default, skip_serializing_if = "Option::is_none")] pub latitude: Option, + /// Approximate longitude of the location. This value is not precise and + /// should not be used to identify a particular address or household. #[serde(default, skip_serializing_if = "Option::is_none")] pub longitude: Option, + /// Metro code for the location, used for targeting advertisements. + /// + /// **Deprecated:** Metro codes are no longer maintained and should not be used. #[serde(default, skip_serializing_if = "Option::is_none")] pub metro_code: Option, + /// Time zone associated with the location, as specified by the + /// IANA Time Zone Database (e.g., "America/New_York"). #[serde(default, skip_serializing_if = "Option::is_none")] pub time_zone: Option<&'a str>, } @@ -488,10 +644,15 @@ pub mod enterprise { } } + /// Postal data with confidence score. #[derive(Deserialize, Serialize, Clone, Debug, Default, PartialEq)] pub struct Postal<'a> { + /// Postal code for the location. Not available for all countries. + /// In some countries, this will only contain part of the postal code. #[serde(default, skip_serializing_if = "Option::is_none")] pub code: Option<&'a str>, + /// Confidence score (0-100) indicating MaxMind's certainty that the + /// postal code is correct. #[serde(default, skip_serializing_if = "Option::is_none")] pub confidence: Option, } @@ -504,14 +665,21 @@ pub mod enterprise { } } + /// Subdivision data with confidence score. #[derive(Deserialize, Serialize, Clone, Debug, Default, PartialEq)] pub struct Subdivision<'a> { + /// Confidence score (0-100) indicating MaxMind's certainty that the + /// subdivision is correct. #[serde(default, skip_serializing_if = "Option::is_none")] pub confidence: Option, + /// GeoNames ID for the subdivision. #[serde(default, skip_serializing_if = "Option::is_none")] pub geoname_id: Option, + /// ISO 3166-2 subdivision code (up to 3 characters). + /// See . #[serde(default, skip_serializing_if = "Option::is_none")] pub iso_code: Option<&'a str>, + /// Localized subdivision names. #[serde(borrow, default, skip_serializing_if = "Names::is_empty")] pub names: Names<'a>, } @@ -524,38 +692,66 @@ pub mod enterprise { } } + /// Extended traits data for Enterprise records. + /// + /// Contains ISP, organization, connection type, and anonymity information. #[derive(Deserialize, Serialize, Clone, Debug, Default, PartialEq)] pub struct Traits<'a> { + /// The autonomous system number (ASN) for the IP address. #[serde(default, skip_serializing_if = "Option::is_none")] pub autonomous_system_number: Option, + /// The organization associated with the registered ASN. #[serde(default, skip_serializing_if = "Option::is_none")] pub autonomous_system_organization: Option<&'a str>, + /// The connection type. Possible values include "Dialup", "Cable/DSL", + /// "Corporate", "Cellular", and "Satellite". #[serde(default, skip_serializing_if = "Option::is_none")] pub connection_type: Option<&'a str>, + /// The second-level domain associated with the IP address + /// (e.g., "example.com"). #[serde(default, skip_serializing_if = "Option::is_none")] pub domain: Option<&'a str>, + /// True if the IP belongs to any sort of anonymous network. #[serde(default, skip_serializing_if = "Option::is_none")] pub is_anonymous: Option, + /// True if the IP is registered to an anonymous VPN provider. #[serde(default, skip_serializing_if = "Option::is_none")] pub is_anonymous_vpn: Option, + /// True if the IP belongs to an anycast network. + /// See . #[serde(default, skip_serializing_if = "Option::is_none")] pub is_anycast: Option, + /// True if the IP belongs to a hosting or VPN provider. #[serde(default, skip_serializing_if = "Option::is_none")] pub is_hosting_provider: Option, + /// The name of the ISP associated with the IP address. #[serde(default, skip_serializing_if = "Option::is_none")] pub isp: Option<&'a str>, + /// True if the IP belongs to a public proxy. #[serde(default, skip_serializing_if = "Option::is_none")] pub is_public_proxy: Option, + /// True if the IP is on a suspected anonymizing network and belongs to + /// a residential ISP. #[serde(default, skip_serializing_if = "Option::is_none")] pub is_residential_proxy: Option, + /// True if the IP is a Tor exit node. #[serde(default, skip_serializing_if = "Option::is_none")] pub is_tor_exit_node: Option, + /// The mobile country code (MCC) associated with the IP. + /// See . #[serde(default, skip_serializing_if = "Option::is_none")] pub mobile_country_code: Option<&'a str>, + /// The mobile network code (MNC) associated with the IP. + /// See . #[serde(default, skip_serializing_if = "Option::is_none")] pub mobile_network_code: Option<&'a str>, + /// The name of the organization associated with the IP address. #[serde(default, skip_serializing_if = "Option::is_none")] pub organization: Option<&'a str>, + /// The user type associated with the IP address. Possible values include + /// "business", "cafe", "cellular", "college", "government", "hosting", + /// "library", "military", "residential", "router", "school", + /// "search_engine_spider", and "traveler". #[serde(default, skip_serializing_if = "Option::is_none")] pub user_type: Option<&'a str>, } From b306a4635df2974f6a7b7a5f53872963815b5fc0 Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Fri, 28 Nov 2025 10:08:52 -0800 Subject: [PATCH 34/37] Improve documentation for README, Cargo.toml, and API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Update README with inline example, features section, modern syntax - Fix Cargo.toml documentation URL to point to docs.rs - Add feature flag comments to Cargo.toml - Add module and struct documentation to decoder.rs - Expand Within iterator documentation with example - Add file-level docs and comments to example files 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- Cargo.toml | 5 ++++- README.md | 48 +++++++++++++++++++++++++++++++++++----------- examples/lookup.rs | 11 ++++++++++- examples/within.rs | 16 +++++++++++++++- src/decoder.rs | 17 ++++++++++++++++ src/within.rs | 25 +++++++++++++++++++++--- 6 files changed, 105 insertions(+), 17 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 460d18c8..7a7a49fc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,7 +7,7 @@ readme = "README.md" keywords = ["MaxMind", "GeoIP2", "GeoIP", "geolocation", "ip"] categories = ["database", "network-programming"] homepage = "https://github.com/oschwald/maxminddb-rust" -documentation = "http://oschwald.github.io/maxminddb-rust/maxminddb/struct.Reader.html" +documentation = "https://docs.rs/maxminddb" repository = "https://github.com/oschwald/maxminddb-rust" license = "ISC" include = ["/Cargo.toml", "/benches/*.rs", "/src/**/*.rs", "/README.md", "/LICENSE"] @@ -15,8 +15,11 @@ edition = "2021" [features] default = [] +# SIMD-accelerated UTF-8 validation during string decoding simdutf8 = ["dep:simdutf8"] +# Memory-mapped file access for better performance in long-running applications mmap = ["memmap2"] +# Skip UTF-8 validation for maximum performance (mutually exclusive with simdutf8) unsafe-str-decode = [] [lib] diff --git a/README.md b/README.md index 2bef9826..88d1d27c 100644 --- a/README.md +++ b/README.md @@ -28,22 +28,52 @@ Add this to your `Cargo.toml`: ```toml [dependencies] -maxminddb = "0.26" +maxminddb = "0.27" ``` -and this to your crate root: +## Example ```rust -extern crate maxminddb; +use maxminddb::{geoip2, Reader}; +use std::net::IpAddr; + +fn main() -> Result<(), Box> { + let reader = Reader::open_readfile("/path/to/GeoLite2-City.mmdb")?; + + let ip: IpAddr = "89.160.20.128".parse()?; + let result = reader.lookup(ip)?; + + if let Some(city) = result.decode::()? { + println!("Country: {}", city.country.iso_code.unwrap_or("N/A")); + println!("City: {}", city.city.names.english.unwrap_or("N/A")); + } + + Ok(()) +} ``` -## API Documentation +See the [examples](examples/) directory for more usage patterns. -The API docs are on [Docs.rs](https://docs.rs/maxminddb/latest/maxminddb/struct.Reader.html). +## Features -## Example +Optional features: + +- **`mmap`**: Memory-mapped file access for long-running applications +- **`simdutf8`**: SIMD-accelerated UTF-8 validation +- **`unsafe-str-decode`**: Skip UTF-8 validation (requires trusted data) + +Enable in `Cargo.toml`: -See [`examples/lookup.rs`](https://github.com/oschwald/maxminddb-rust/blob/main/examples/lookup.rs) for a basic example. +```toml +[dependencies] +maxminddb = { version = "0.27", features = ["mmap"] } +``` + +Note: `simdutf8` and `unsafe-str-decode` are mutually exclusive. + +## Documentation + +[API documentation on docs.rs](https://docs.rs/maxminddb) ## Benchmarks @@ -64,10 +94,6 @@ If [gnuplot](http://www.gnuplot.info/) is installed, Criterion.rs can generate an HTML report displaying the results of the benchmark under `target/criterion/report/index.html`. -Result of doing 100 random IP lookups: - -![](/assets/pdf_small.svg) - ## Contributing Contributions welcome! Please fork the repository and open a pull request diff --git a/examples/lookup.rs b/examples/lookup.rs index 16e1c8fe..d964c0d8 100644 --- a/examples/lookup.rs +++ b/examples/lookup.rs @@ -1,12 +1,19 @@ +//! Basic IP lookup example. +//! +//! Usage: cargo run --example lookup + use std::net::IpAddr; use maxminddb::geoip2; fn main() -> Result<(), Box> { + // Parse command line arguments let mut args = std::env::args().skip(1); let db_path = args .next() .ok_or("First argument must be the path to the IP database")?; + + // Open the database file let reader = maxminddb::Reader::open_readfile(db_path)?; let ip_str = args @@ -16,15 +23,17 @@ fn main() -> Result<(), Box> { .parse() .map_err(|e| format!("Invalid IP address '{}': {}", ip_str, e))?; + // Look up the IP address let result = reader.lookup(ip)?; + // Decode and display city data if present if let Some(city) = result.decode::()? { println!("City data for IP {}: {city:#?}", ip); } else { println!("No city data found for IP {}", ip); } - // Show the network (available regardless of whether data was found) + // The network is always available, even when no data is found let network = result.network()?; println!("Network: {}", network); Ok(()) diff --git a/examples/within.rs b/examples/within.rs index ea6b1bdf..2720d606 100644 --- a/examples/within.rs +++ b/examples/within.rs @@ -1,30 +1,44 @@ +//! Iterate over networks within a CIDR range. +//! +//! Usage: cargo run --example within +//! +//! Example: cargo run --example within GeoLite2-City.mmdb "89.160.20.0/24" + use ipnetwork::IpNetwork; use maxminddb::{geoip2, Within}; fn main() -> Result<(), Box> { + // Parse command line arguments let mut args = std::env::args().skip(1); let db_path = args .next() .ok_or("First argument must be the path to the IP database")?; + + // Open the database file let reader = maxminddb::Reader::open_readfile(db_path)?; let cidr_str = args.next().ok_or( "Second argument must be the IP address and mask in CIDR notation, e.g. 0.0.0.0/0 or ::/0", )?; + // Parse the CIDR notation let ip_net: IpNetwork = cidr_str .parse() .map_err(|e| format!("Invalid CIDR notation '{}': {}", cidr_str, e))?; + // Iterate over all networks within the specified range let mut n = 0; let iter: Within<_> = reader.within(ip_net, Default::default())?; for next in iter { let lookup = next?; let network = lookup.network()?; + + // Skip networks without data let Some(info) = lookup.decode::()? else { - continue; // Skip networks without data + continue; }; + // Display location hierarchy let continent = info.continent.code.unwrap_or(""); let country = info.country.iso_code.unwrap_or(""); let city = info.city.names.english.unwrap_or(""); diff --git a/src/decoder.rs b/src/decoder.rs index 3c29f879..23aedd92 100644 --- a/src/decoder.rs +++ b/src/decoder.rs @@ -1,3 +1,12 @@ +//! Binary format decoder for MaxMind DB files. +//! +//! This module implements deserialization of the MaxMind DB binary format +//! into Rust types via serde. The decoder handles all MaxMind DB data types +//! including pointers, maps, arrays, and primitive types. +//! +//! Most users should not need to interact with this module directly. +//! Use [`Reader::lookup()`](crate::Reader::lookup) for normal lookups. + use log::debug; use serde::de::{self, DeserializeSeed, MapAccess, SeqAccess, Visitor}; use serde::forward_to_deserialize_any; @@ -47,6 +56,14 @@ enum Value<'a, 'de> { Array(ArrayAccess<'a, 'de>), } +/// Low-level decoder for MaxMind DB binary data. +/// +/// This decoder implements serde's `Deserializer` trait to convert +/// MaxMind DB binary format into Rust types. It handles pointer +/// resolution, type coercion, and nested data structures. +/// +/// Most users should use [`LookupResult::decode()`](crate::LookupResult::decode) +/// instead of this type directly. #[derive(Debug)] pub struct Decoder<'de> { buf: &'de [u8], diff --git a/src/within.rs b/src/within.rs index bac66b54..233bd2ae 100644 --- a/src/within.rs +++ b/src/within.rs @@ -85,9 +85,28 @@ pub(crate) struct WithinNode { /// Iterator over IP networks within a CIDR range. /// -/// This iterator yields [`LookupResult`] for each network in the database -/// that falls within the specified CIDR range. Use [`LookupResult::decode()`] -/// to deserialize the data for each result. +/// Created by [`Reader::within()`](crate::Reader::within) or +/// [`Reader::networks()`](crate::Reader::networks). Yields +/// [`LookupResult`] for each network in the database that falls +/// within the specified range. +/// +/// Networks are yielded in depth-first order through the search tree. +/// Use [`LookupResult::decode()`](crate::LookupResult::decode) to +/// deserialize the data for each result. +/// +/// # Example +/// +/// ``` +/// use maxminddb::{Reader, WithinOptions, geoip2}; +/// +/// let reader = Reader::open_readfile("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); +/// for result in reader.within("89.160.20.0/24".parse().unwrap(), Default::default()).unwrap() { +/// let lookup = result.unwrap(); +/// if let Some(city) = lookup.decode::().unwrap() { +/// println!("{}: {:?}", lookup.network().unwrap(), city.city.names.english); +/// } +/// } +/// ``` #[derive(Debug)] pub struct Within<'de, S: AsRef<[u8]>> { pub(crate) reader: &'de Reader, From 54a11bd4017cb355d4cd6dfed44eb8b237d122e0 Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Fri, 28 Nov 2025 10:10:19 -0800 Subject: [PATCH 35/37] Make Decoder struct and methods pub(crate) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Decoder is not exported from the crate, so pub visibility was misleading. Changed to pub(crate) for clarity. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- src/decoder.rs | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/src/decoder.rs b/src/decoder.rs index 23aedd92..99657664 100644 --- a/src/decoder.rs +++ b/src/decoder.rs @@ -56,23 +56,19 @@ enum Value<'a, 'de> { Array(ArrayAccess<'a, 'de>), } -/// Low-level decoder for MaxMind DB binary data. +/// Decoder for MaxMind DB binary format. /// -/// This decoder implements serde's `Deserializer` trait to convert -/// MaxMind DB binary format into Rust types. It handles pointer -/// resolution, type coercion, and nested data structures. -/// -/// Most users should use [`LookupResult::decode()`](crate::LookupResult::decode) -/// instead of this type directly. +/// Implements serde's `Deserializer` trait. Handles pointer resolution, +/// type coercion, and nested data structures. #[derive(Debug)] -pub struct Decoder<'de> { +pub(crate) struct Decoder<'de> { buf: &'de [u8], current_ptr: usize, depth: u16, } impl<'de> Decoder<'de> { - pub fn new(buf: &'de [u8], start_ptr: usize) -> Decoder<'de> { + pub(crate) fn new(buf: &'de [u8], start_ptr: usize) -> Decoder<'de> { Decoder { buf, current_ptr: start_ptr, @@ -110,9 +106,8 @@ impl<'de> Decoder<'de> { MaxMindDbError::decoding_at(msg, self.current_ptr) } - /// Returns the current offset in the data section. #[inline] - pub fn offset(&self) -> usize { + pub(crate) fn offset(&self) -> usize { self.current_ptr } From 75dd92ad47bc14b688f249a1dbddb9bde721c4e4 Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Fri, 28 Nov 2025 10:14:40 -0800 Subject: [PATCH 36/37] Prepare 0.27.0 release MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Reorganize changelog with categorized sections - Add UPGRADING.md with migration guide and code examples - Bump version to 0.27.0 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- CHANGELOG.md | 145 +++++++++++++++--------------------- Cargo.toml | 4 +- UPGRADING.md | 203 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 262 insertions(+), 90 deletions(-) create mode 100644 UPGRADING.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 90e26224..5c55e76b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,97 +1,66 @@ # Change Log -## 0.27.0 - UNRELEASED - -- **BREAKING CHANGE:** The `lookup` method now returns a `LookupResult` instead - of `Option`. The new API enables lazy decoding - data is only deserialized - when explicitly requested via `decode()`. Migration: - - Old: `reader.lookup::(ip)?` returns `Option` - - New: `reader.lookup(ip)?.decode::()` returns `City` - - Check if data exists: `reader.lookup(ip)?.has_data()` returns `bool` -- **BREAKING CHANGE:** The `lookup_prefix` method has been removed. Use - `reader.lookup(ip)?.network()` to get the network containing the IP. -- **BREAKING CHANGE:** The `Within` iterator now yields `LookupResult` instead - of `WithinItem`. Access the network via `result.network()?` and decode - data via `result.decode::()?`. -- **BREAKING CHANGE:** The `within()` method now takes a second `options` - parameter of type `WithinOptions`. Use `Default::default()` for the previous - behavior: - - Old: `reader.within(cidr)?` - - New: `reader.within(cidr, Default::default())?` -- Added `WithinOptions` struct to control network iteration behavior: - - `include_aliased_networks()` - Include IPv4 networks multiple times when - accessed via IPv6 aliases (e.g., `::ffff:0:0/96`, `2001::/32`, `2002::/16`) - - `include_networks_without_data()` - Include networks that have no associated - data record. `LookupResult::has_data()` returns `false` for these. - - `skip_empty_values()` - Skip networks whose data is an empty map `{}` or - empty array `[]` -- Added `networks()` method as a convenience for iterating over all networks in - the database. Equivalent to `within("::/0", options)` for IPv6 databases or - `within("0.0.0.0/0", options)` for IPv4-only databases. -- Added `LookupResult` type with methods: +## 0.27.0 - 2025-11-28 + +This release includes significant API changes. See [UPGRADING.md](UPGRADING.md) +for migration guidance. + +### Breaking Changes + +#### Lookup API + +- `lookup()` now returns `LookupResult` instead of `Option`. The new API + enables lazy decoding - data is only deserialized when explicitly requested. +- `lookup_prefix()` has been removed. Use `lookup(ip)?.network()` instead. + +#### Iteration API + +- `within()` now requires a second `WithinOptions` parameter. Use + `Default::default()` for the previous behavior. +- `Within` iterator now yields `LookupResult` instead of `WithinItem`. + +#### GeoIP2 Structs + +- The `names` fields now use a `Names` struct instead of `BTreeMap<&str, &str>`. + Access names directly via language fields (e.g., `names.english`). +- Nested struct fields (`city`, `country`, `location`, etc.) are now + non-optional with `Default`, simplifying access patterns. +- Removed `is_anonymous_proxy` and `is_satellite_provider` from `Traits`. + These fields are no longer present in MaxMind databases. + +#### Error Types + +- `InvalidDatabase` and `Decoding` variants now use structured fields instead + of a single string. Pattern matching must be updated. +- New `InvalidInput` variant for user input errors (e.g., IPv6 lookup in + IPv4-only database). + +### Added + +- `LookupResult` type with lazy decoding support: - `has_data()` - Check if data exists for this IP - `network()` - Get the network containing the IP - `offset()` - Get data offset for caching/deduplication - - `decode()` - Deserialize full record (returns `Result>`) + - `decode()` - Deserialize full record - `decode_path()` - Selectively decode specific fields by path -- Added `PathElement` enum for navigating nested structures: - - `PathElement::Key("name")` - Navigate into map by key - - `PathElement::Index(0)` - Navigate into array by index (0 = first element) - - `PathElement::IndexFromEnd(0)` - Navigate from the end (0 = last element) -- Added `path!` macro for ergonomic path construction: - - String literals become `Key` elements: `path!["country", "iso_code"]` - - Non-negative integers become `Index` elements: `path!["array", 0]` - - Negative integers become `IndexFromEnd` elements: `path!["array", -1]` (last element) -- `decode_path()` errors now include path context showing where navigation failed - (e.g., `path: /city/names/0`), making it easier to debug issues with nested data. -- `Metadata` and `WithinOptions` now implement `PartialEq` and `Eq` traits. -- Added `Metadata::build_time()` method to convert `build_epoch` to `SystemTime`. -- Added `verify()` method for comprehensive database validation. Validates - metadata, search tree structure, data section separator, and data records. - Useful for validating database files after download or generation. -- Serde deserializer improvements: - - Added size hints to `SeqAccess` and `MapAccess` for efficient collection - pre-allocation - - `is_human_readable()` now returns `false` since MMDB is a binary format - - Implemented `deserialize_ignored_any` for efficient value skipping - - Implemented `deserialize_enum` for string-to-enum deserialization -- Added recursion depth limit (512) matching libmaxminddb and the Go reader. - This prevents stack overflow when decoding malformed databases with deeply - nested structures. -- **BREAKING CHANGE:** The `InvalidDatabase` and `Decoding` error variants now - use structured fields instead of a single string: - - `InvalidDatabase { message, offset }` - includes optional byte offset - - `Decoding { message, offset, path }` - includes optional byte offset and - JSON-pointer-style path for locating the error - - Pattern matching code must be updated (e.g., `InvalidDatabase(msg)` becomes - `InvalidDatabase { message, .. }`) -- **BREAKING CHANGE:** A new `InvalidInput { message }` error variant has been - added for user input errors (e.g., looking up an IPv6 address in an IPv4-only - database). Previously this returned `InvalidDatabase`, which incorrectly - suggested the database was corrupted. -- **BREAKING CHANGE:** The `names` fields in GeoIP2 structs now use a `Names` - struct instead of `BTreeMap<&str, &str>`. This improves performance (no map - allocation) and ergonomics. Each language field is `Option<&str>`: - - Old: `city.names.as_ref().and_then(|n| n.get("en"))` - - New: `city.city.names.english` - - Supported languages: `german`, `english`, `spanish`, `french`, `japanese`, - `brazilian_portuguese`, `russian`, `simplified_chinese` -- **BREAKING CHANGE:** Nested struct fields in GeoIP2 record types (`City`, - `Country`, `Enterprise`) are now non-optional with `Default`. This simplifies - access patterns by removing nested Option unwrapping: - - Old: `city.city.as_ref().and_then(|c| c.names.english)` - - New: `city.city.names.english` - - Old: `city.subdivisions.as_ref().map(|v| v.iter())` - - New: `city.subdivisions.iter()` (empty Vec if not present) - - Leaf values (strings, numbers, bools) remain `Option` to preserve - the distinction between "not present" and "present but empty" -- **BREAKING CHANGE:** Removed `is_anonymous_proxy` and `is_satellite_provider` - fields from `country::Traits` and `enterprise::Traits`. These fields are no - longer present in MaxMind databases. Use the Anonymous IP database for - anonymity detection. -- Error messages now include byte offsets when available, making it easier to - debug malformed databases. The `#[non_exhaustive]` attribute is added to - `MaxMindDbError` to allow future additions without breaking changes. +- `PathElement` enum and `path!` macro for navigating nested structures. +- `WithinOptions` to control network iteration behavior: + - `include_aliased_networks()` - Include IPv4 via IPv6 aliases + - `include_networks_without_data()` - Include networks without data records + - `skip_empty_values()` - Skip empty maps/arrays +- `networks()` method for iterating over all networks in the database. +- `verify()` method for comprehensive database validation. +- `Metadata::build_time()` to convert `build_epoch` to `SystemTime`. +- `PartialEq` and `Eq` implementations for `Metadata` and `WithinOptions`. + +### Changed + +- Error messages now include byte offsets when available. +- `decode_path()` errors include path context showing where navigation failed. +- Added recursion depth limit (512) matching libmaxminddb and Go reader. +- Serde deserializer improvements: size hints, `is_human_readable()` returns + false, `deserialize_ignored_any`, and `deserialize_enum` support. +- `MaxMindDbError` is now `#[non_exhaustive]`. ## 0.26.0 - 2025-03-28 diff --git a/Cargo.toml b/Cargo.toml index 7a7a49fc..c2719799 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "maxminddb" -version = "0.26.0" +version = "0.27.0" authors = [ "Gregory J. Oschwald " ] description = "Library for reading MaxMind DB format used by GeoIP2 and GeoLite2" readme = "README.md" @@ -10,7 +10,7 @@ homepage = "https://github.com/oschwald/maxminddb-rust" documentation = "https://docs.rs/maxminddb" repository = "https://github.com/oschwald/maxminddb-rust" license = "ISC" -include = ["/Cargo.toml", "/benches/*.rs", "/src/**/*.rs", "/README.md", "/LICENSE"] +include = ["/Cargo.toml", "/benches/*.rs", "/src/**/*.rs", "/README.md", "/UPGRADING.md", "/LICENSE"] edition = "2021" [features] diff --git a/UPGRADING.md b/UPGRADING.md new file mode 100644 index 00000000..4043d847 --- /dev/null +++ b/UPGRADING.md @@ -0,0 +1,203 @@ +# Upgrading Guide + +## 0.26 to 0.27 + +This release includes significant API changes to improve ergonomics and enable +new features like lazy decoding and selective field access. + +### Lookup API + +The `lookup()` method now returns a `LookupResult` that supports lazy decoding. + +**Before (0.26):** + +```rust +let city: Option = reader.lookup(ip)?; +if let Some(city) = city { + println!("{:?}", city.city); +} +``` + +**After (0.27):** + +```rust +let result = reader.lookup(ip)?; +if let Some(city) = result.decode::()? { + println!("{:?}", city.city); +} +``` + +The new API allows you to: + +- Check if data exists without decoding: `result.has_data()` +- Get the network for the IP: `result.network()?` +- Decode only specific fields: `result.decode_path(&[...])?` + +### lookup_prefix Removal + +The `lookup_prefix()` method has been removed. Use `lookup()` with `network()`. + +**Before (0.26):** + +```rust +let (city, prefix_len) = reader.lookup_prefix(ip)?; +``` + +**After (0.27):** + +```rust +let result = reader.lookup(ip)?; +let city = result.decode::()?; +let network = result.network()?; // Returns IpNetwork with prefix +``` + +### Within Iterator + +The `within()` method now requires a `WithinOptions` parameter. + +**Before (0.26):** + +```rust +for item in reader.within::(cidr)? { + let item = item?; + println!("{}: {:?}", item.ip_net, item.info); +} +``` + +**After (0.27):** + +```rust +use maxminddb::WithinOptions; + +for result in reader.within(cidr, Default::default())? { + let result = result?; + let network = result.network()?; + if let Some(city) = result.decode::()? { + println!("{}: {:?}", network, city); + } +} +``` + +To customize iteration behavior: + +```rust +let options = WithinOptions::default() + .include_aliased_networks() // Include IPv4 via IPv6 aliases + .include_networks_without_data() // Include networks without data + .skip_empty_values(); // Skip empty maps/arrays + +for result in reader.within(cidr, options)? { + // ... +} +``` + +### GeoIP2 Name Fields + +The `names` fields now use a `Names` struct instead of `BTreeMap`. + +**Before (0.26):** + +```rust +let name = city.city + .as_ref() + .and_then(|c| c.names.as_ref()) + .and_then(|n| n.get("en")); +``` + +**After (0.27):** + +```rust +let name = city.city.names.english; +``` + +Available language fields: + +- `german` +- `english` +- `spanish` +- `french` +- `japanese` +- `brazilian_portuguese` +- `russian` +- `simplified_chinese` + +### GeoIP2 Nested Structs + +Nested struct fields are now non-optional with `Default`. + +**Before (0.26):** + +```rust +let iso_code = city.country + .as_ref() + .and_then(|c| c.iso_code.as_ref()); + +let subdivisions = city.subdivisions + .as_ref() + .map(|v| v.iter()) + .into_iter() + .flatten(); +``` + +**After (0.27):** + +```rust +let iso_code = city.country.iso_code; + +for subdivision in &city.subdivisions { + // ... +} +``` + +Leaf values (strings, numbers, bools) remain `Option`. + +### Removed Trait Fields + +The `is_anonymous_proxy` and `is_satellite_provider` fields have been removed +from `country::Traits` and `enterprise::Traits`. These fields are no longer +present in MaxMind databases. + +For anonymity detection, use the [Anonymous IP database](https://www.maxmind.com/en/geoip2-anonymous-ip-database). + +### Error Types + +Error variants now use structured fields. + +**Before (0.26):** + +```rust +match error { + MaxMindDbError::InvalidDatabase(msg) => { + println!("Invalid database: {}", msg); + } + // ... +} +``` + +**After (0.27):** + +```rust +match error { + MaxMindDbError::InvalidDatabase { message, offset } => { + println!("Invalid database: {} at {:?}", message, offset); + } + MaxMindDbError::InvalidInput { message } => { + println!("Invalid input: {}", message); + } + // ... +} +``` + +The new `InvalidInput` variant is used for user errors like looking up an IPv6 +address in an IPv4-only database. + +### Quick Migration Checklist + +1. Update `lookup()` calls to use `.decode::()?` +2. Replace `lookup_prefix()` with `lookup()` + `network()` +3. Add `Default::default()` as second argument to `within()` +4. Update `within()` loops to use `result.network()` and `result.decode()` +5. Replace `names.get("en")` with `names.english` +6. Remove `.as_ref()` chains for nested GeoIP2 fields +7. Remove references to `is_anonymous_proxy` and `is_satellite_provider` +8. Update error matching to use struct patterns From 98f0e4fff9678c841ed33f3b8a46322f6163c32a Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Fri, 28 Nov 2025 10:20:04 -0800 Subject: [PATCH 37/37] Mark Reader::open_mmap as unsafe to fix soundness issue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function wraps memmap2's MmapOptions::map(), which is unsafe because undefined behavior can occur if the underlying file is modified while mapped. By marking open_mmap as unsafe, callers must acknowledge this safety requirement. Fixes #86 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- CHANGELOG.md | 6 ++++++ benches/lookup.rs | 6 ++++-- src/reader.rs | 17 ++++++++++++++--- src/reader_test.rs | 3 ++- 4 files changed, 26 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5c55e76b..269385e6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,6 +35,12 @@ for migration guidance. - New `InvalidInput` variant for user input errors (e.g., IPv6 lookup in IPv4-only database). +#### Memory Mapping + +- `Reader::open_mmap` is now `unsafe`. The caller must ensure the database + file is not modified or truncated while the `Reader` exists. This fixes a + soundness issue. Reported by paolobarbolini. GitHub #86. + ### Added - `LookupResult` type with lazy decoding support: diff --git a/benches/lookup.rs b/benches/lookup.rs index 639b26fa..e12c855b 100644 --- a/benches/lookup.rs +++ b/benches/lookup.rs @@ -59,7 +59,8 @@ pub fn criterion_benchmark(c: &mut Criterion) { #[cfg(not(feature = "mmap"))] let reader = maxminddb::Reader::open_readfile(DB_FILE).unwrap(); #[cfg(feature = "mmap")] - let reader = maxminddb::Reader::open_mmap(DB_FILE).unwrap(); + // SAFETY: The benchmark database file will not be modified during the benchmark. + let reader = unsafe { maxminddb::Reader::open_mmap(DB_FILE) }.unwrap(); c.bench_function("maxminddb", |b| b.iter(|| bench_maxminddb(&ips, &reader))); } @@ -69,7 +70,8 @@ pub fn criterion_par_benchmark(c: &mut Criterion) { #[cfg(not(feature = "mmap"))] let reader = maxminddb::Reader::open_readfile(DB_FILE).unwrap(); #[cfg(feature = "mmap")] - let reader = maxminddb::Reader::open_mmap(DB_FILE).unwrap(); + // SAFETY: The benchmark database file will not be modified during the benchmark. + let reader = unsafe { maxminddb::Reader::open_mmap(DB_FILE) }.unwrap(); c.bench_function("maxminddb_par", |b| { b.iter(|| bench_par_maxminddb(&ips, &reader)) diff --git a/src/reader.rs b/src/reader.rs index 13c73a05..d255eb99 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -64,17 +64,28 @@ impl> std::fmt::Debug for Reader { impl Reader { /// Open a MaxMind DB database file by memory mapping it. /// + /// # Safety + /// + /// The caller must ensure that the database file is not modified or + /// truncated while the `Reader` exists. Modifying or truncating the + /// file while it is memory-mapped will result in undefined behavior. + /// /// # Example /// /// ``` /// # #[cfg(feature = "mmap")] /// # { - /// let reader = maxminddb::Reader::open_mmap("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); + /// // SAFETY: The database file will not be modified while the reader exists. + /// let reader = unsafe { + /// maxminddb::Reader::open_mmap("test-data/test-data/GeoIP2-City-Test.mmdb") + /// }.unwrap(); /// # } /// ``` - pub fn open_mmap>(database: P) -> Result, MaxMindDbError> { + pub unsafe fn open_mmap>(database: P) -> Result, MaxMindDbError> { let file_read = File::open(database)?; - let mmap = unsafe { MmapOptions::new().map(&file_read) }.map_err(MaxMindDbError::Mmap)?; + let mmap = MmapOptions::new() + .map(&file_read) + .map_err(MaxMindDbError::Mmap)?; Reader::from_source(mmap) } } diff --git a/src/reader_test.rs b/src/reader_test.rs index 96c34f03..f16785b1 100644 --- a/src/reader_test.rs +++ b/src/reader_test.rs @@ -189,7 +189,8 @@ fn test_reader_mmap() { "test-data/test-data/MaxMind-DB-test-ipv{}-{}.mmdb", ip_version, record_size ); - let reader = Reader::open_mmap(filename).ok().unwrap(); + // SAFETY: The test database file will not be modified during the test. + let reader = unsafe { Reader::open_mmap(filename) }.ok().unwrap(); check_metadata(&reader, *ip_version, *record_size); check_ip(&reader, *ip_version);