diff --git a/CHANGELOG.md b/CHANGELOG.md index 5a2a75a0..269385e6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,73 @@ # Change Log +## 0.27.0 - 2025-11-28 + +This release includes significant API changes. See [UPGRADING.md](UPGRADING.md) +for migration guidance. + +### Breaking Changes + +#### Lookup API + +- `lookup()` now returns `LookupResult` instead of `Option`. The new API + enables lazy decoding - data is only deserialized when explicitly requested. +- `lookup_prefix()` has been removed. Use `lookup(ip)?.network()` instead. + +#### Iteration API + +- `within()` now requires a second `WithinOptions` parameter. Use + `Default::default()` for the previous behavior. +- `Within` iterator now yields `LookupResult` instead of `WithinItem`. + +#### GeoIP2 Structs + +- The `names` fields now use a `Names` struct instead of `BTreeMap<&str, &str>`. + Access names directly via language fields (e.g., `names.english`). +- Nested struct fields (`city`, `country`, `location`, etc.) are now + non-optional with `Default`, simplifying access patterns. +- Removed `is_anonymous_proxy` and `is_satellite_provider` from `Traits`. + These fields are no longer present in MaxMind databases. + +#### Error Types + +- `InvalidDatabase` and `Decoding` variants now use structured fields instead + of a single string. Pattern matching must be updated. +- New `InvalidInput` variant for user input errors (e.g., IPv6 lookup in + IPv4-only database). + +#### Memory Mapping + +- `Reader::open_mmap` is now `unsafe`. The caller must ensure the database + file is not modified or truncated while the `Reader` exists. This fixes a + soundness issue. Reported by paolobarbolini. GitHub #86. + +### Added + +- `LookupResult` type with lazy decoding support: + - `has_data()` - Check if data exists for this IP + - `network()` - Get the network containing the IP + - `offset()` - Get data offset for caching/deduplication + - `decode()` - Deserialize full record + - `decode_path()` - Selectively decode specific fields by path +- `PathElement` enum and `path!` macro for navigating nested structures. +- `WithinOptions` to control network iteration behavior: + - `include_aliased_networks()` - Include IPv4 via IPv6 aliases + - `include_networks_without_data()` - Include networks without data records + - `skip_empty_values()` - Skip empty maps/arrays +- `networks()` method for iterating over all networks in the database. +- `verify()` method for comprehensive database validation. +- `Metadata::build_time()` to convert `build_epoch` to `SystemTime`. +- `PartialEq` and `Eq` implementations for `Metadata` and `WithinOptions`. + +### Changed + +- Error messages now include byte offsets when available. +- `decode_path()` errors include path context showing where navigation failed. +- Added recursion depth limit (512) matching libmaxminddb and Go reader. +- Serde deserializer improvements: size hints, `is_human_readable()` returns + false, `deserialize_ignored_any`, and `deserialize_enum` support. +- `MaxMindDbError` is now `#[non_exhaustive]`. + ## 0.26.0 - 2025-03-28 - **BREAKING CHANGE:** The `lookup` and `lookup_prefix` methods now return diff --git a/Cargo.toml b/Cargo.toml index 76123e21..c2719799 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,27 +1,29 @@ [package] name = "maxminddb" -version = "0.26.0" +version = "0.27.0" authors = [ "Gregory J. Oschwald " ] description = "Library for reading MaxMind DB format used by GeoIP2 and GeoLite2" readme = "README.md" keywords = ["MaxMind", "GeoIP2", "GeoIP", "geolocation", "ip"] categories = ["database", "network-programming"] homepage = "https://github.com/oschwald/maxminddb-rust" -documentation = "http://oschwald.github.io/maxminddb-rust/maxminddb/struct.Reader.html" +documentation = "https://docs.rs/maxminddb" repository = "https://github.com/oschwald/maxminddb-rust" license = "ISC" -include = ["/Cargo.toml", "/benches/*.rs", "/src/**/*.rs", "/README.md", "/LICENSE"] +include = ["/Cargo.toml", "/benches/*.rs", "/src/**/*.rs", "/README.md", "/UPGRADING.md", "/LICENSE"] edition = "2021" [features] default = [] +# SIMD-accelerated UTF-8 validation during string decoding simdutf8 = ["dep:simdutf8"] +# Memory-mapped file access for better performance in long-running applications mmap = ["memmap2"] +# Skip UTF-8 validation for maximum performance (mutually exclusive with simdutf8) unsafe-str-decode = [] [lib] -name ="maxminddb" -path = "src/maxminddb/lib.rs" +name = "maxminddb" [dependencies] ipnetwork = "0.21.1" diff --git a/README.md b/README.md index 2bef9826..88d1d27c 100644 --- a/README.md +++ b/README.md @@ -28,22 +28,52 @@ Add this to your `Cargo.toml`: ```toml [dependencies] -maxminddb = "0.26" +maxminddb = "0.27" ``` -and this to your crate root: +## Example ```rust -extern crate maxminddb; +use maxminddb::{geoip2, Reader}; +use std::net::IpAddr; + +fn main() -> Result<(), Box> { + let reader = Reader::open_readfile("/path/to/GeoLite2-City.mmdb")?; + + let ip: IpAddr = "89.160.20.128".parse()?; + let result = reader.lookup(ip)?; + + if let Some(city) = result.decode::()? { + println!("Country: {}", city.country.iso_code.unwrap_or("N/A")); + println!("City: {}", city.city.names.english.unwrap_or("N/A")); + } + + Ok(()) +} ``` -## API Documentation +See the [examples](examples/) directory for more usage patterns. -The API docs are on [Docs.rs](https://docs.rs/maxminddb/latest/maxminddb/struct.Reader.html). +## Features -## Example +Optional features: + +- **`mmap`**: Memory-mapped file access for long-running applications +- **`simdutf8`**: SIMD-accelerated UTF-8 validation +- **`unsafe-str-decode`**: Skip UTF-8 validation (requires trusted data) + +Enable in `Cargo.toml`: -See [`examples/lookup.rs`](https://github.com/oschwald/maxminddb-rust/blob/main/examples/lookup.rs) for a basic example. +```toml +[dependencies] +maxminddb = { version = "0.27", features = ["mmap"] } +``` + +Note: `simdutf8` and `unsafe-str-decode` are mutually exclusive. + +## Documentation + +[API documentation on docs.rs](https://docs.rs/maxminddb) ## Benchmarks @@ -64,10 +94,6 @@ If [gnuplot](http://www.gnuplot.info/) is installed, Criterion.rs can generate an HTML report displaying the results of the benchmark under `target/criterion/report/index.html`. -Result of doing 100 random IP lookups: - -![](/assets/pdf_small.svg) - ## Contributing Contributions welcome! Please fork the repository and open a pull request diff --git a/UPGRADING.md b/UPGRADING.md new file mode 100644 index 00000000..4043d847 --- /dev/null +++ b/UPGRADING.md @@ -0,0 +1,203 @@ +# Upgrading Guide + +## 0.26 to 0.27 + +This release includes significant API changes to improve ergonomics and enable +new features like lazy decoding and selective field access. + +### Lookup API + +The `lookup()` method now returns a `LookupResult` that supports lazy decoding. + +**Before (0.26):** + +```rust +let city: Option = reader.lookup(ip)?; +if let Some(city) = city { + println!("{:?}", city.city); +} +``` + +**After (0.27):** + +```rust +let result = reader.lookup(ip)?; +if let Some(city) = result.decode::()? { + println!("{:?}", city.city); +} +``` + +The new API allows you to: + +- Check if data exists without decoding: `result.has_data()` +- Get the network for the IP: `result.network()?` +- Decode only specific fields: `result.decode_path(&[...])?` + +### lookup_prefix Removal + +The `lookup_prefix()` method has been removed. Use `lookup()` with `network()`. + +**Before (0.26):** + +```rust +let (city, prefix_len) = reader.lookup_prefix(ip)?; +``` + +**After (0.27):** + +```rust +let result = reader.lookup(ip)?; +let city = result.decode::()?; +let network = result.network()?; // Returns IpNetwork with prefix +``` + +### Within Iterator + +The `within()` method now requires a `WithinOptions` parameter. + +**Before (0.26):** + +```rust +for item in reader.within::(cidr)? { + let item = item?; + println!("{}: {:?}", item.ip_net, item.info); +} +``` + +**After (0.27):** + +```rust +use maxminddb::WithinOptions; + +for result in reader.within(cidr, Default::default())? { + let result = result?; + let network = result.network()?; + if let Some(city) = result.decode::()? { + println!("{}: {:?}", network, city); + } +} +``` + +To customize iteration behavior: + +```rust +let options = WithinOptions::default() + .include_aliased_networks() // Include IPv4 via IPv6 aliases + .include_networks_without_data() // Include networks without data + .skip_empty_values(); // Skip empty maps/arrays + +for result in reader.within(cidr, options)? { + // ... +} +``` + +### GeoIP2 Name Fields + +The `names` fields now use a `Names` struct instead of `BTreeMap`. + +**Before (0.26):** + +```rust +let name = city.city + .as_ref() + .and_then(|c| c.names.as_ref()) + .and_then(|n| n.get("en")); +``` + +**After (0.27):** + +```rust +let name = city.city.names.english; +``` + +Available language fields: + +- `german` +- `english` +- `spanish` +- `french` +- `japanese` +- `brazilian_portuguese` +- `russian` +- `simplified_chinese` + +### GeoIP2 Nested Structs + +Nested struct fields are now non-optional with `Default`. + +**Before (0.26):** + +```rust +let iso_code = city.country + .as_ref() + .and_then(|c| c.iso_code.as_ref()); + +let subdivisions = city.subdivisions + .as_ref() + .map(|v| v.iter()) + .into_iter() + .flatten(); +``` + +**After (0.27):** + +```rust +let iso_code = city.country.iso_code; + +for subdivision in &city.subdivisions { + // ... +} +``` + +Leaf values (strings, numbers, bools) remain `Option`. + +### Removed Trait Fields + +The `is_anonymous_proxy` and `is_satellite_provider` fields have been removed +from `country::Traits` and `enterprise::Traits`. These fields are no longer +present in MaxMind databases. + +For anonymity detection, use the [Anonymous IP database](https://www.maxmind.com/en/geoip2-anonymous-ip-database). + +### Error Types + +Error variants now use structured fields. + +**Before (0.26):** + +```rust +match error { + MaxMindDbError::InvalidDatabase(msg) => { + println!("Invalid database: {}", msg); + } + // ... +} +``` + +**After (0.27):** + +```rust +match error { + MaxMindDbError::InvalidDatabase { message, offset } => { + println!("Invalid database: {} at {:?}", message, offset); + } + MaxMindDbError::InvalidInput { message } => { + println!("Invalid input: {}", message); + } + // ... +} +``` + +The new `InvalidInput` variant is used for user errors like looking up an IPv6 +address in an IPv4-only database. + +### Quick Migration Checklist + +1. Update `lookup()` calls to use `.decode::()?` +2. Replace `lookup_prefix()` with `lookup()` + `network()` +3. Add `Default::default()` as second argument to `within()` +4. Update `within()` loops to use `result.network()` and `result.decode()` +5. Replace `names.get("en")` with `names.english` +6. Remove `.as_ref()` chains for nested GeoIP2 fields +7. Remove references to `is_anonymous_proxy` and `is_satellite_provider` +8. Update error matching to use struct patterns diff --git a/benches/lookup.rs b/benches/lookup.rs index 9a87d619..e12c855b 100644 --- a/benches/lookup.rs +++ b/benches/lookup.rs @@ -32,7 +32,10 @@ where T: AsRef<[u8]>, { for ip in ips.iter() { - let _ = reader.lookup::(*ip); + let result = reader.lookup(*ip).unwrap(); + if result.has_data() { + let _: geoip2::City = result.decode().unwrap().unwrap(); + } } } @@ -42,7 +45,10 @@ where T: AsRef<[u8]> + std::marker::Sync, { ips.par_iter().for_each(|ip| { - let _ = reader.lookup::(*ip); + let result = reader.lookup(*ip).unwrap(); + if result.has_data() { + let _: geoip2::City = result.decode().unwrap().unwrap(); + } }); } @@ -53,7 +59,8 @@ pub fn criterion_benchmark(c: &mut Criterion) { #[cfg(not(feature = "mmap"))] let reader = maxminddb::Reader::open_readfile(DB_FILE).unwrap(); #[cfg(feature = "mmap")] - let reader = maxminddb::Reader::open_mmap(DB_FILE).unwrap(); + // SAFETY: The benchmark database file will not be modified during the benchmark. + let reader = unsafe { maxminddb::Reader::open_mmap(DB_FILE) }.unwrap(); c.bench_function("maxminddb", |b| b.iter(|| bench_maxminddb(&ips, &reader))); } @@ -63,7 +70,8 @@ pub fn criterion_par_benchmark(c: &mut Criterion) { #[cfg(not(feature = "mmap"))] let reader = maxminddb::Reader::open_readfile(DB_FILE).unwrap(); #[cfg(feature = "mmap")] - let reader = maxminddb::Reader::open_mmap(DB_FILE).unwrap(); + // SAFETY: The benchmark database file will not be modified during the benchmark. + let reader = unsafe { maxminddb::Reader::open_mmap(DB_FILE) }.unwrap(); c.bench_function("maxminddb_par", |b| { b.iter(|| bench_par_maxminddb(&ips, &reader)) diff --git a/examples/lookup.rs b/examples/lookup.rs index 6b49dead..d964c0d8 100644 --- a/examples/lookup.rs +++ b/examples/lookup.rs @@ -1,12 +1,19 @@ +//! Basic IP lookup example. +//! +//! Usage: cargo run --example lookup + use std::net::IpAddr; use maxminddb::geoip2; fn main() -> Result<(), Box> { + // Parse command line arguments let mut args = std::env::args().skip(1); let db_path = args .next() .ok_or("First argument must be the path to the IP database")?; + + // Open the database file let reader = maxminddb::Reader::open_readfile(db_path)?; let ip_str = args @@ -16,13 +23,18 @@ fn main() -> Result<(), Box> { .parse() .map_err(|e| format!("Invalid IP address '{}': {}", ip_str, e))?; - match reader.lookup::(ip)? { - Some(city) => { - println!("City data for IP {}: {city:#?}", ip); - } - None => { - println!("No city data found for IP {}", ip); - } + // Look up the IP address + let result = reader.lookup(ip)?; + + // Decode and display city data if present + if let Some(city) = result.decode::()? { + println!("City data for IP {}: {city:#?}", ip); + } else { + println!("No city data found for IP {}", ip); } + + // The network is always available, even when no data is found + let network = result.network()?; + println!("Network: {}", network); Ok(()) } diff --git a/examples/within.rs b/examples/within.rs index d0d31d4e..2720d606 100644 --- a/examples/within.rs +++ b/examples/within.rs @@ -1,37 +1,53 @@ +//! Iterate over networks within a CIDR range. +//! +//! Usage: cargo run --example within +//! +//! Example: cargo run --example within GeoLite2-City.mmdb "89.160.20.0/24" + use ipnetwork::IpNetwork; use maxminddb::{geoip2, Within}; fn main() -> Result<(), Box> { + // Parse command line arguments let mut args = std::env::args().skip(1); let db_path = args .next() .ok_or("First argument must be the path to the IP database")?; + + // Open the database file let reader = maxminddb::Reader::open_readfile(db_path)?; let cidr_str = args.next().ok_or( "Second argument must be the IP address and mask in CIDR notation, e.g. 0.0.0.0/0 or ::/0", )?; + // Parse the CIDR notation let ip_net: IpNetwork = cidr_str .parse() .map_err(|e| format!("Invalid CIDR notation '{}': {}", cidr_str, e))?; + // Iterate over all networks within the specified range let mut n = 0; - let iter: Within = reader.within(ip_net)?; + let iter: Within<_> = reader.within(ip_net, Default::default())?; for next in iter { - let item = next?; - let continent = item.info.continent.and_then(|c| c.code).unwrap_or(""); - let country = item.info.country.and_then(|c| c.iso_code).unwrap_or(""); - let city = match item.info.city.and_then(|c| c.names) { - Some(names) => names.get("en").unwrap_or(&""), - None => "", + let lookup = next?; + let network = lookup.network()?; + + // Skip networks without data + let Some(info) = lookup.decode::()? else { + continue; }; + + // Display location hierarchy + let continent = info.continent.code.unwrap_or(""); + let country = info.country.iso_code.unwrap_or(""); + let city = info.city.names.english.unwrap_or(""); if !city.is_empty() { - println!("{} {}-{}-{}", item.ip_net, continent, country, city); + println!("{} {}-{}-{}", network, continent, country, city); } else if !country.is_empty() { - println!("{} {}-{}", item.ip_net, continent, country); + println!("{} {}-{}", network, continent, country); } else if !continent.is_empty() { - println!("{} {}", item.ip_net, continent); + println!("{} {}", network, continent); } n += 1; } diff --git a/src/decoder.rs b/src/decoder.rs new file mode 100644 index 00000000..99657664 --- /dev/null +++ b/src/decoder.rs @@ -0,0 +1,726 @@ +//! Binary format decoder for MaxMind DB files. +//! +//! This module implements deserialization of the MaxMind DB binary format +//! into Rust types via serde. The decoder handles all MaxMind DB data types +//! including pointers, maps, arrays, and primitive types. +//! +//! Most users should not need to interact with this module directly. +//! Use [`Reader::lookup()`](crate::Reader::lookup) for normal lookups. + +use log::debug; +use serde::de::{self, DeserializeSeed, MapAccess, SeqAccess, Visitor}; +use serde::forward_to_deserialize_any; +use std::convert::TryInto; + +use crate::error::MaxMindDbError; + +// MaxMind DB type constants +const TYPE_EXTENDED: u8 = 0; +pub(crate) const TYPE_POINTER: u8 = 1; +const TYPE_STRING: u8 = 2; +const TYPE_DOUBLE: u8 = 3; +const TYPE_BYTES: u8 = 4; +const TYPE_UINT16: u8 = 5; +const TYPE_UINT32: u8 = 6; +pub(crate) const TYPE_MAP: u8 = 7; +const TYPE_INT32: u8 = 8; +const TYPE_UINT64: u8 = 9; +const TYPE_UINT128: u8 = 10; +pub(crate) const TYPE_ARRAY: u8 = 11; +const TYPE_BOOL: u8 = 14; +const TYPE_FLOAT: u8 = 15; + +/// Maximum recursion depth for nested data structures. +/// This matches the value used in libmaxminddb and the Go reader. +const MAXIMUM_DATA_STRUCTURE_DEPTH: u16 = 512; + +fn to_usize(base: u8, bytes: &[u8]) -> usize { + bytes + .iter() + .fold(base as usize, |acc, &b| (acc << 8) | b as usize) +} + +enum Value<'a, 'de> { + Any { prev_ptr: usize }, + Bytes(&'de [u8]), + String(&'de str), + Bool(bool), + I32(i32), + U16(u16), + U32(u32), + U64(u64), + U128(u128), + F64(f64), + F32(f32), + Map(MapAccessor<'a, 'de>), + Array(ArrayAccess<'a, 'de>), +} + +/// Decoder for MaxMind DB binary format. +/// +/// Implements serde's `Deserializer` trait. Handles pointer resolution, +/// type coercion, and nested data structures. +#[derive(Debug)] +pub(crate) struct Decoder<'de> { + buf: &'de [u8], + current_ptr: usize, + depth: u16, +} + +impl<'de> Decoder<'de> { + pub(crate) fn new(buf: &'de [u8], start_ptr: usize) -> Decoder<'de> { + Decoder { + buf, + current_ptr: start_ptr, + depth: 0, + } + } + + /// Check and increment depth, returning error if limit exceeded. + #[inline] + fn enter_nested(&mut self) -> DecodeResult<()> { + if self.depth >= MAXIMUM_DATA_STRUCTURE_DEPTH { + return Err(self.invalid_db_error( + "exceeded maximum data structure depth; database is likely corrupt", + )); + } + self.depth += 1; + Ok(()) + } + + /// Decrement depth when exiting a nested structure. + #[inline] + fn exit_nested(&mut self) { + self.depth = self.depth.saturating_sub(1); + } + + /// Create an InvalidDatabase error with current offset context. + #[inline] + fn invalid_db_error(&self, msg: &str) -> MaxMindDbError { + MaxMindDbError::invalid_database_at(msg, self.current_ptr) + } + + /// Create a Decoding error with current offset context. + #[inline] + fn decode_error(&self, msg: &str) -> MaxMindDbError { + MaxMindDbError::decoding_at(msg, self.current_ptr) + } + + #[inline] + pub(crate) fn offset(&self) -> usize { + self.current_ptr + } + + #[inline(always)] + fn eat_byte(&mut self) -> u8 { + let b = self.buf[self.current_ptr]; + self.current_ptr += 1; + b + } + + #[inline(always)] + fn size_from_ctrl_byte(&mut self, ctrl_byte: u8, type_num: u8) -> usize { + let size = (ctrl_byte & 0x1f) as usize; + // Extended type - size field is used differently + if type_num == TYPE_EXTENDED { + return size; + } + + let bytes_to_read = size.saturating_sub(28); + + let new_offset = self.current_ptr + bytes_to_read; + let size_bytes = &self.buf[self.current_ptr..new_offset]; + self.current_ptr = new_offset; + + match size { + s if s < 29 => s, + 29 => 29_usize + size_bytes[0] as usize, + 30 => 285_usize + to_usize(0, size_bytes), + _ => 65_821_usize + to_usize(0, size_bytes), + } + } + + #[inline(always)] + fn size_and_type(&mut self) -> (usize, u8) { + let ctrl_byte = self.eat_byte(); + let mut type_num = ctrl_byte >> 5; + // Extended type: type 0 means read next byte for actual type + if type_num == TYPE_EXTENDED { + type_num = self.eat_byte() + TYPE_MAP; // Extended types start at 7 + } + let size = self.size_from_ctrl_byte(ctrl_byte, type_num); + (size, type_num) + } + + fn decode_any>(&mut self, visitor: V) -> DecodeResult { + match self.decode_any_value()? { + Value::Any { prev_ptr } => { + // Pointer dereference - track depth + self.enter_nested()?; + let res = self.decode_any(visitor); + self.exit_nested(); + self.current_ptr = prev_ptr; + res + } + Value::Bool(x) => visitor.visit_bool(x), + Value::Bytes(x) => visitor.visit_borrowed_bytes(x), + Value::String(x) => visitor.visit_borrowed_str(x), + Value::I32(x) => visitor.visit_i32(x), + Value::U16(x) => visitor.visit_u16(x), + Value::U32(x) => visitor.visit_u32(x), + Value::U64(x) => visitor.visit_u64(x), + Value::U128(x) => visitor.visit_u128(x), + Value::F64(x) => visitor.visit_f64(x), + Value::F32(x) => visitor.visit_f32(x), + // Maps and arrays enter_nested in decode_any_value; exit when done + Value::Map(x) => { + let res = visitor.visit_map(x); + self.exit_nested(); + res + } + Value::Array(x) => { + let res = visitor.visit_seq(x); + self.exit_nested(); + res + } + } + } + + #[inline(always)] + fn decode_any_value(&mut self) -> DecodeResult> { + let (size, type_num) = self.size_and_type(); + + Ok(match type_num { + TYPE_POINTER => { + let new_ptr = self.decode_pointer(size); + let prev_ptr = self.current_ptr; + self.current_ptr = new_ptr; + + Value::Any { prev_ptr } + } + TYPE_STRING => Value::String(self.decode_string(size)?), + TYPE_DOUBLE => Value::F64(self.decode_double(size)?), + TYPE_BYTES => Value::Bytes(self.decode_bytes(size)?), + TYPE_UINT16 => Value::U16(self.decode_uint16(size)?), + TYPE_UINT32 => Value::U32(self.decode_uint32(size)?), + TYPE_MAP => { + self.enter_nested()?; + self.decode_map(size) + } + TYPE_INT32 => Value::I32(self.decode_int(size)?), + TYPE_UINT64 => Value::U64(self.decode_uint64(size)?), + TYPE_UINT128 => Value::U128(self.decode_uint128(size)?), + TYPE_ARRAY => { + self.enter_nested()?; + self.decode_array(size) + } + TYPE_BOOL => Value::Bool(self.decode_bool(size)?), + TYPE_FLOAT => Value::F32(self.decode_float(size)?), + u => return Err(self.invalid_db_error(&format!("unknown data type: {u}"))), + }) + } + + fn decode_array(&mut self, size: usize) -> Value<'_, 'de> { + Value::Array(ArrayAccess { + de: self, + count: size, + }) + } + + fn decode_bool(&mut self, size: usize) -> DecodeResult { + match size { + 0 | 1 => Ok(size != 0), + s => Err(self.invalid_db_error(&format!("bool of size {s}"))), + } + } + + fn decode_bytes(&mut self, size: usize) -> DecodeResult<&'de [u8]> { + let new_offset = self.current_ptr + size; + let u8_slice = &self.buf[self.current_ptr..new_offset]; + self.current_ptr = new_offset; + + Ok(u8_slice) + } + + fn decode_float(&mut self, size: usize) -> DecodeResult { + let new_offset = self.current_ptr + size; + let value: [u8; 4] = self.buf[self.current_ptr..new_offset] + .try_into() + .map_err(|_| self.invalid_db_error(&format!("float of size {size}")))?; + self.current_ptr = new_offset; + let float_value = f32::from_be_bytes(value); + Ok(float_value) + } + + fn decode_double(&mut self, size: usize) -> DecodeResult { + let new_offset = self.current_ptr + size; + let value: [u8; 8] = self.buf[self.current_ptr..new_offset] + .try_into() + .map_err(|_| self.invalid_db_error(&format!("double of size {size}")))?; + self.current_ptr = new_offset; + let float_value = f64::from_be_bytes(value); + Ok(float_value) + } + + fn decode_uint64(&mut self, size: usize) -> DecodeResult { + match size { + s if s <= 8 => { + let new_offset = self.current_ptr + size; + + let value = self.buf[self.current_ptr..new_offset] + .iter() + .fold(0_u64, |acc, &b| (acc << 8) | u64::from(b)); + self.current_ptr = new_offset; + Ok(value) + } + s => Err(self.invalid_db_error(&format!("u64 of size {s}"))), + } + } + + fn decode_uint128(&mut self, size: usize) -> DecodeResult { + match size { + s if s <= 16 => { + let new_offset = self.current_ptr + size; + + let value = self.buf[self.current_ptr..new_offset] + .iter() + .fold(0_u128, |acc, &b| (acc << 8) | u128::from(b)); + self.current_ptr = new_offset; + Ok(value) + } + s => Err(self.invalid_db_error(&format!("u128 of size {s}"))), + } + } + + fn decode_uint32(&mut self, size: usize) -> DecodeResult { + match size { + s if s <= 4 => { + let new_offset = self.current_ptr + size; + + let value = self.buf[self.current_ptr..new_offset] + .iter() + .fold(0_u32, |acc, &b| (acc << 8) | u32::from(b)); + self.current_ptr = new_offset; + Ok(value) + } + s => Err(self.invalid_db_error(&format!("u32 of size {s}"))), + } + } + + fn decode_uint16(&mut self, size: usize) -> DecodeResult { + match size { + s if s <= 2 => { + let new_offset = self.current_ptr + size; + + let value = self.buf[self.current_ptr..new_offset] + .iter() + .fold(0_u16, |acc, &b| (acc << 8) | u16::from(b)); + self.current_ptr = new_offset; + Ok(value) + } + s => Err(self.invalid_db_error(&format!("u16 of size {s}"))), + } + } + + fn decode_int(&mut self, size: usize) -> DecodeResult { + match size { + s if s <= 4 => { + let new_offset = self.current_ptr + size; + + let value = self.buf[self.current_ptr..new_offset] + .iter() + .fold(0_i32, |acc, &b| (acc << 8) | i32::from(b)); + self.current_ptr = new_offset; + Ok(value) + } + s => Err(self.invalid_db_error(&format!("i32 of size {s}"))), + } + } + + fn decode_map(&mut self, size: usize) -> Value<'_, 'de> { + Value::Map(MapAccessor { + de: self, + count: size * 2, + }) + } + + fn decode_pointer(&mut self, size: usize) -> usize { + let pointer_value_offset = [0, 0, 2048, 526_336, 0]; + let pointer_size = ((size >> 3) & 0x3) + 1; + let new_offset = self.current_ptr + pointer_size; + let pointer_bytes = &self.buf[self.current_ptr..new_offset]; + self.current_ptr = new_offset; + + let base = if pointer_size == 4 { + 0 + } else { + (size & 0x7) as u8 + }; + let unpacked = to_usize(base, pointer_bytes); + + unpacked + pointer_value_offset[pointer_size] + } + + #[cfg(feature = "unsafe-str-decode")] + fn decode_string(&mut self, size: usize) -> DecodeResult<&'de str> { + use std::str::from_utf8_unchecked; + + let new_offset: usize = self.current_ptr + size; + let bytes = &self.buf[self.current_ptr..new_offset]; + self.current_ptr = new_offset; + // SAFETY: + // A corrupt maxminddb will cause undefined behaviour. + // If the caller has verified the integrity of their database and trusts their upstream + // provider, they can opt-into the performance gains provided by this unsafe function via + // the `unsafe-str-decode` feature flag. + // This can provide around 20% performance increase in the lookup benchmark. + let v = unsafe { from_utf8_unchecked(bytes) }; + Ok(v) + } + + #[cfg(not(feature = "unsafe-str-decode"))] + fn decode_string(&mut self, size: usize) -> DecodeResult<&'de str> { + #[cfg(feature = "simdutf8")] + use simdutf8::basic::from_utf8; + #[cfg(not(feature = "simdutf8"))] + use std::str::from_utf8; + + let new_offset: usize = self.current_ptr + size; + let bytes = &self.buf[self.current_ptr..new_offset]; + self.current_ptr = new_offset; + match from_utf8(bytes) { + Ok(v) => Ok(v), + Err(_) => Err(self.invalid_db_error("invalid UTF-8 in string")), + } + } + + // ========== Navigation methods for path decoding and verification ========== + + /// Peeks at the type and size without consuming it. + /// Returns (size, type_num) and restores the position. + pub(crate) fn peek_type(&mut self) -> DecodeResult<(usize, u8)> { + let saved_ptr = self.current_ptr; + let result = self.size_and_type_following_pointers()?; + self.current_ptr = saved_ptr; + Ok(result) + } + + /// Consumes a map header, returning its size. Follows pointers. + pub(crate) fn consume_map_header(&mut self) -> DecodeResult { + let (size, type_num) = self.size_and_type(); + if type_num == TYPE_POINTER { + let new_ptr = self.decode_pointer(size); + self.current_ptr = new_ptr; + self.consume_map_header() + } else if type_num == TYPE_MAP { + Ok(size) + } else { + Err(self.decode_error(&format!("expected map, got type {type_num}"))) + } + } + + /// Consumes an array header, returning its size. Follows pointers. + pub(crate) fn consume_array_header(&mut self) -> DecodeResult { + let (size, type_num) = self.size_and_type(); + if type_num == TYPE_POINTER { + let new_ptr = self.decode_pointer(size); + self.current_ptr = new_ptr; + self.consume_array_header() + } else if type_num == TYPE_ARRAY { + Ok(size) + } else { + Err(self.decode_error(&format!("expected array, got type {type_num}"))) + } + } + + /// Gets size and type, following any pointers. + fn size_and_type_following_pointers(&mut self) -> DecodeResult<(usize, u8)> { + let (size, type_num) = self.size_and_type(); + if type_num == TYPE_POINTER { + // Pointer - follow it + let new_ptr = self.decode_pointer(size); + self.current_ptr = new_ptr; + self.size_and_type_following_pointers() + } else { + Ok((size, type_num)) + } + } + + /// Reads a string directly, following pointers if needed. + pub(crate) fn read_string(&mut self) -> DecodeResult<&'de str> { + let (size, type_num) = self.size_and_type(); + if type_num == TYPE_POINTER { + // Pointer + let new_ptr = self.decode_pointer(size); + let saved_ptr = self.current_ptr; + self.current_ptr = new_ptr; + let result = self.read_string(); + self.current_ptr = saved_ptr; + result + } else if type_num == TYPE_STRING { + self.decode_string(size) + } else { + Err(self.invalid_db_error(&format!("expected string, got type {type_num}"))) + } + } + + /// Skips the current value, following pointers. + pub(crate) fn skip_value(&mut self) -> DecodeResult<()> { + let (size, type_num) = self.size_and_type(); + self.skip_value_inner(size, type_num, true) + } + + /// Skips the current value without following pointers (for verification). + pub(crate) fn skip_value_for_verification(&mut self) -> DecodeResult<()> { + let (size, type_num) = self.size_and_type(); + self.skip_value_inner(size, type_num, false) + } + + fn skip_value_inner( + &mut self, + size: usize, + type_num: u8, + follow_pointers: bool, + ) -> DecodeResult<()> { + match type_num { + TYPE_POINTER => { + let new_ptr = self.decode_pointer(size); + if follow_pointers { + let saved_ptr = self.current_ptr; + self.current_ptr = new_ptr; + self.skip_value()?; + self.current_ptr = saved_ptr; + } + Ok(()) + } + TYPE_STRING | TYPE_BYTES => { + // String or Bytes - skip size bytes + self.current_ptr += size; + Ok(()) + } + TYPE_DOUBLE => { + // Double - must be exactly 8 bytes + if size != 8 { + return Err(self.invalid_db_error(&format!("double of size {size}"))); + } + self.current_ptr += size; + Ok(()) + } + TYPE_FLOAT => { + // Float - must be exactly 4 bytes + if size != 4 { + return Err(self.invalid_db_error(&format!("float of size {size}"))); + } + self.current_ptr += size; + Ok(()) + } + TYPE_UINT16 | TYPE_UINT32 | TYPE_INT32 | TYPE_UINT64 | TYPE_UINT128 => { + // Numeric types - skip size bytes + self.current_ptr += size; + Ok(()) + } + TYPE_BOOL => { + // Boolean - size field IS the value, no data bytes to skip + Ok(()) + } + TYPE_MAP => { + // Map - skip size key-value pairs + for _ in 0..size { + self.skip_value_inner_with_follow(follow_pointers)?; // key + self.skip_value_inner_with_follow(follow_pointers)?; // value + } + Ok(()) + } + TYPE_ARRAY => { + // Array - skip size elements + for _ in 0..size { + self.skip_value_inner_with_follow(follow_pointers)?; + } + Ok(()) + } + u => Err(self.invalid_db_error(&format!("unknown data type: {u}"))), + } + } + + fn skip_value_inner_with_follow(&mut self, follow_pointers: bool) -> DecodeResult<()> { + let (size, type_num) = self.size_and_type(); + self.skip_value_inner(size, type_num, follow_pointers) + } +} + +pub type DecodeResult = Result; + +impl<'de: 'a, 'a> de::Deserializer<'de> for &'a mut Decoder<'de> { + type Error = MaxMindDbError; + + fn deserialize_any(self, visitor: V) -> DecodeResult + where + V: Visitor<'de>, + { + debug!("deserialize_any"); + + self.decode_any(visitor) + } + + fn deserialize_option(self, visitor: V) -> DecodeResult + where + V: Visitor<'de>, + { + debug!("deserialize_option"); + + visitor.visit_some(self) + } + + fn is_human_readable(&self) -> bool { + false + } + + fn deserialize_ignored_any(self, visitor: V) -> DecodeResult + where + V: Visitor<'de>, + { + self.skip_value()?; + visitor.visit_unit() + } + + fn deserialize_enum( + self, + _name: &'static str, + _variants: &'static [&'static str], + visitor: V, + ) -> DecodeResult + where + V: Visitor<'de>, + { + visitor.visit_enum(EnumAccessor { de: self }) + } + + forward_to_deserialize_any! { + bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string + bytes byte_buf unit unit_struct newtype_struct seq tuple + tuple_struct map struct identifier + } +} + +struct ArrayAccess<'a, 'de: 'a> { + de: &'a mut Decoder<'de>, + count: usize, +} + +// `SeqAccess` is provided to the `Visitor` to give it the ability to iterate +// through elements of the sequence. +impl<'de> SeqAccess<'de> for ArrayAccess<'_, 'de> { + type Error = MaxMindDbError; + + fn size_hint(&self) -> Option { + Some(self.count) + } + + fn next_element_seed(&mut self, seed: T) -> DecodeResult> + where + T: DeserializeSeed<'de>, + { + // Check if there are no more elements. + if self.count == 0 { + return Ok(None); + } + self.count -= 1; + + // Deserialize an array element. + seed.deserialize(&mut *self.de).map(Some) + } +} + +struct MapAccessor<'a, 'de: 'a> { + de: &'a mut Decoder<'de>, + count: usize, +} + +// `MapAccess` is provided to the `Visitor` to give it the ability to iterate +// through entries of the map. +impl<'de> MapAccess<'de> for MapAccessor<'_, 'de> { + type Error = MaxMindDbError; + + fn size_hint(&self) -> Option { + Some(self.count / 2) + } + + fn next_key_seed(&mut self, seed: K) -> DecodeResult> + where + K: DeserializeSeed<'de>, + { + // Check if there are no more entries. + if self.count == 0 { + return Ok(None); + } + self.count -= 1; + + // Deserialize a map key. + seed.deserialize(&mut *self.de).map(Some) + } + + fn next_value_seed(&mut self, seed: V) -> DecodeResult + where + V: DeserializeSeed<'de>, + { + // Check if there are no more entries. + if self.count == 0 { + return Err(self.de.decode_error("no more entries")); + } + self.count -= 1; + + // Deserialize a map value. + seed.deserialize(&mut *self.de) + } +} + +struct EnumAccessor<'a, 'de: 'a> { + de: &'a mut Decoder<'de>, +} + +impl<'de> de::EnumAccess<'de> for EnumAccessor<'_, 'de> { + type Error = MaxMindDbError; + type Variant = Self; + + fn variant_seed(self, seed: V) -> DecodeResult<(V::Value, Self::Variant)> + where + V: DeserializeSeed<'de>, + { + // Deserialize the variant identifier (string) + let variant = seed.deserialize(&mut *self.de)?; + Ok((variant, self)) + } +} + +impl<'de> de::VariantAccess<'de> for EnumAccessor<'_, 'de> { + type Error = MaxMindDbError; + + fn unit_variant(self) -> DecodeResult<()> { + Ok(()) + } + + fn newtype_variant_seed(self, seed: T) -> DecodeResult + where + T: DeserializeSeed<'de>, + { + seed.deserialize(&mut *self.de) + } + + fn tuple_variant(self, _len: usize, visitor: V) -> DecodeResult + where + V: Visitor<'de>, + { + de::Deserializer::deserialize_seq(&mut *self.de, visitor) + } + + fn struct_variant( + self, + _fields: &'static [&'static str], + visitor: V, + ) -> DecodeResult + where + V: Visitor<'de>, + { + de::Deserializer::deserialize_map(&mut *self.de, visitor) + } +} diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 00000000..c780a2fa --- /dev/null +++ b/src/error.rs @@ -0,0 +1,210 @@ +//! Error types for MaxMind DB operations. + +use std::fmt::Display; +use std::io; + +use ipnetwork::IpNetworkError; +use serde::de; +use thiserror::Error; + +/// Error returned by MaxMind DB operations. +#[derive(Error, Debug)] +#[non_exhaustive] +pub enum MaxMindDbError { + /// The database file is invalid or corrupted. + #[error("{}", format_invalid_database(.message, .offset))] + InvalidDatabase { + /// Description of what is invalid. + message: String, + /// Byte offset in the database where the error was detected. + offset: Option, + }, + + /// An I/O error occurred while reading the database. + #[error("i/o error: {0}")] + Io( + #[from] + #[source] + io::Error, + ), + + /// Memory mapping failed. + #[cfg(feature = "mmap")] + #[error("memory map error: {0}")] + Mmap(#[source] io::Error), + + /// Error decoding data from the database. + #[error("{}", format_decoding_error(.message, .offset, .path.as_deref()))] + Decoding { + /// Description of the decoding error. + message: String, + /// Byte offset in the data section where the error occurred. + offset: Option, + /// JSON-pointer-like path to the field (e.g., "/city/names/en"). + path: Option, + }, + + /// The provided network/CIDR is invalid. + #[error("invalid network: {0}")] + InvalidNetwork( + #[from] + #[source] + IpNetworkError, + ), + + /// The provided input is invalid for this operation. + #[error("invalid input: {message}")] + InvalidInput { + /// Description of what is invalid about the input. + message: String, + }, +} + +fn format_invalid_database(message: &str, offset: &Option) -> String { + match offset { + Some(off) => format!("invalid database at offset {off}: {message}"), + None => format!("invalid database: {message}"), + } +} + +fn format_decoding_error(message: &str, offset: &Option, path: Option<&str>) -> String { + match (offset, path) { + (Some(off), Some(p)) => format!("decoding error at offset {off} (path: {p}): {message}"), + (Some(off), None) => format!("decoding error at offset {off}: {message}"), + (None, Some(p)) => format!("decoding error (path: {p}): {message}"), + (None, None) => format!("decoding error: {message}"), + } +} + +impl MaxMindDbError { + /// Creates an InvalidDatabase error with just a message. + pub fn invalid_database(message: impl Into) -> Self { + MaxMindDbError::InvalidDatabase { + message: message.into(), + offset: None, + } + } + + /// Creates an InvalidDatabase error with message and offset. + pub fn invalid_database_at(message: impl Into, offset: usize) -> Self { + MaxMindDbError::InvalidDatabase { + message: message.into(), + offset: Some(offset), + } + } + + /// Creates a Decoding error with just a message. + pub fn decoding(message: impl Into) -> Self { + MaxMindDbError::Decoding { + message: message.into(), + offset: None, + path: None, + } + } + + /// Creates a Decoding error with message and offset. + pub fn decoding_at(message: impl Into, offset: usize) -> Self { + MaxMindDbError::Decoding { + message: message.into(), + offset: Some(offset), + path: None, + } + } + + /// Creates a Decoding error with message, offset, and path. + pub fn decoding_at_path( + message: impl Into, + offset: usize, + path: impl Into, + ) -> Self { + MaxMindDbError::Decoding { + message: message.into(), + offset: Some(offset), + path: Some(path.into()), + } + } + + /// Creates an InvalidInput error. + pub fn invalid_input(message: impl Into) -> Self { + MaxMindDbError::InvalidInput { + message: message.into(), + } + } +} + +impl de::Error for MaxMindDbError { + fn custom(msg: T) -> Self { + MaxMindDbError::decoding(msg.to_string()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::{Error, ErrorKind}; + + #[test] + fn test_error_display() { + // Error without offset + assert_eq!( + format!( + "{}", + MaxMindDbError::invalid_database("something went wrong") + ), + "invalid database: something went wrong".to_owned(), + ); + // Error with offset + assert_eq!( + format!( + "{}", + MaxMindDbError::invalid_database_at("something went wrong", 42) + ), + "invalid database at offset 42: something went wrong".to_owned(), + ); + let io_err = Error::new(ErrorKind::NotFound, "file not found"); + assert_eq!( + format!("{}", MaxMindDbError::from(io_err)), + "i/o error: file not found".to_owned(), + ); + + #[cfg(feature = "mmap")] + { + let mmap_io_err = Error::new(ErrorKind::PermissionDenied, "mmap failed"); + assert_eq!( + format!("{}", MaxMindDbError::Mmap(mmap_io_err)), + "memory map error: mmap failed".to_owned(), + ); + } + + // Decoding error without offset + assert_eq!( + format!("{}", MaxMindDbError::decoding("unexpected type")), + "decoding error: unexpected type".to_owned(), + ); + // Decoding error with offset + assert_eq!( + format!("{}", MaxMindDbError::decoding_at("unexpected type", 100)), + "decoding error at offset 100: unexpected type".to_owned(), + ); + // Decoding error with offset and path + assert_eq!( + format!( + "{}", + MaxMindDbError::decoding_at_path("unexpected type", 100, "/city/names/en") + ), + "decoding error at offset 100 (path: /city/names/en): unexpected type".to_owned(), + ); + + let net_err = IpNetworkError::InvalidPrefix; + assert_eq!( + format!("{}", MaxMindDbError::from(net_err)), + "invalid network: invalid prefix".to_owned(), + ); + + // InvalidInput error + assert_eq!( + format!("{}", MaxMindDbError::invalid_input("bad address")), + "invalid input: bad address".to_owned(), + ); + } +} diff --git a/src/geoip2.rs b/src/geoip2.rs new file mode 100644 index 00000000..43cbea99 --- /dev/null +++ b/src/geoip2.rs @@ -0,0 +1,766 @@ +//! GeoIP2 and GeoLite2 database record structures +//! +//! This module provides strongly-typed Rust structures that correspond to the +//! various GeoIP2 and GeoLite2 database record formats. +//! +//! # Record Types +//! +//! - [`City`] - Complete city-level geolocation data (most comprehensive) +//! - [`Country`] - Country-level geolocation data +//! - [`Enterprise`] - Enterprise database with additional confidence scores +//! - [`Isp`] - Internet Service Provider information +//! - [`AnonymousIp`] - Anonymous proxy and VPN detection +//! - [`ConnectionType`] - Connection type classification +//! - [`Domain`] - Domain information +//! - [`Asn`] - Autonomous System Number data +//! - [`DensityIncome`] - Population density and income data +//! +//! # Usage Examples +//! +//! ```rust +//! use maxminddb::{Reader, geoip2}; +//! use std::net::IpAddr; +//! +//! # fn main() -> Result<(), maxminddb::MaxMindDbError> { +//! let reader = Reader::open_readfile( +//! "test-data/test-data/GeoIP2-City-Test.mmdb")?; +//! let ip: IpAddr = "89.160.20.128".parse().unwrap(); +//! +//! // City lookup - nested structs are always present (default to empty) +//! let result = reader.lookup(ip)?; +//! if let Some(city) = result.decode::()? { +//! // Direct access to nested structs - no Option unwrapping needed +//! if let Some(name) = city.city.names.english { +//! println!("City: {}", name); +//! } +//! if let Some(code) = city.country.iso_code { +//! println!("Country: {}", code); +//! } +//! // Subdivisions is a Vec, empty if not present +//! for sub in &city.subdivisions { +//! if let Some(code) = sub.iso_code { +//! println!("Subdivision: {}", code); +//! } +//! } +//! } +//! +//! // Country-only lookup (smaller/faster) +//! let result = reader.lookup(ip)?; +//! if let Some(country) = result.decode::()? { +//! if let Some(name) = country.country.names.english { +//! println!("Country: {}", name); +//! } +//! } +//! # Ok(()) +//! # } +//! ``` + +use serde::{Deserialize, Serialize}; + +/// Localized names for geographic entities. +/// +/// Contains name translations in the languages supported by MaxMind databases. +/// Access names directly via fields like `names.english` or `names.german`. +/// Each field is `Option<&str>` - `None` if not available in that language. +/// +/// # Example +/// +/// ``` +/// use maxminddb::{Reader, geoip2}; +/// use std::net::IpAddr; +/// +/// let reader = Reader::open_readfile("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); +/// let ip: IpAddr = "89.160.20.128".parse().unwrap(); +/// let result = reader.lookup(ip).unwrap(); +/// +/// if let Some(city) = result.decode::().unwrap() { +/// // Access names directly - Option<&str> +/// if let Some(name) = city.city.names.english { +/// println!("City (en): {}", name); +/// } +/// if let Some(name) = city.city.names.german { +/// println!("City (de): {}", name); +/// } +/// } +/// ``` +#[derive(Deserialize, Serialize, Clone, Debug, Default, PartialEq, Eq)] +pub struct Names<'a> { + /// German name (de) + #[serde( + borrow, + rename = "de", + default, + skip_serializing_if = "Option::is_none" + )] + pub german: Option<&'a str>, + /// English name (en) + #[serde(rename = "en", default, skip_serializing_if = "Option::is_none")] + pub english: Option<&'a str>, + /// Spanish name (es) + #[serde(rename = "es", default, skip_serializing_if = "Option::is_none")] + pub spanish: Option<&'a str>, + /// French name (fr) + #[serde(rename = "fr", default, skip_serializing_if = "Option::is_none")] + pub french: Option<&'a str>, + /// Japanese name (ja) + #[serde(rename = "ja", default, skip_serializing_if = "Option::is_none")] + pub japanese: Option<&'a str>, + /// Brazilian Portuguese name (pt-BR) + #[serde(rename = "pt-BR", default, skip_serializing_if = "Option::is_none")] + pub brazilian_portuguese: Option<&'a str>, + /// Russian name (ru) + #[serde(rename = "ru", default, skip_serializing_if = "Option::is_none")] + pub russian: Option<&'a str>, + /// Simplified Chinese name (zh-CN) + #[serde(rename = "zh-CN", default, skip_serializing_if = "Option::is_none")] + pub simplified_chinese: Option<&'a str>, +} + +impl Names<'_> { + /// Returns true if all name fields are `None`. + #[must_use] + pub fn is_empty(&self) -> bool { + self.german.is_none() + && self.english.is_none() + && self.spanish.is_none() + && self.french.is_none() + && self.japanese.is_none() + && self.brazilian_portuguese.is_none() + && self.russian.is_none() + && self.simplified_chinese.is_none() + } +} + +/// GeoIP2/GeoLite2 Country database record. +/// +/// Contains country-level geolocation data for an IP address. This is the +/// simplest geolocation record type, suitable when you only need country +/// information. +#[derive(Deserialize, Serialize, Clone, Debug, Default)] +pub struct Country<'a> { + /// Continent data for the IP address. + #[serde(borrow, default, skip_serializing_if = "country::Continent::is_empty")] + pub continent: country::Continent<'a>, + /// Country where MaxMind believes the IP is located. + #[serde(default, skip_serializing_if = "country::Country::is_empty")] + pub country: country::Country<'a>, + /// Country where the ISP has registered the IP block. + /// May differ from `country` (e.g., for mobile networks or VPNs). + #[serde(default, skip_serializing_if = "country::Country::is_empty")] + pub registered_country: country::Country<'a>, + /// Country represented by users of this IP (e.g., military base or embassy). + #[serde(default, skip_serializing_if = "country::RepresentedCountry::is_empty")] + pub represented_country: country::RepresentedCountry<'a>, + /// Various traits associated with the IP address. + #[serde(default, skip_serializing_if = "country::Traits::is_empty")] + pub traits: country::Traits, +} + +/// GeoIP2/GeoLite2 City database record. +/// +/// Contains city-level geolocation data including location coordinates, +/// postal code, subdivisions (states/provinces), and country information. +/// This is the most comprehensive free geolocation record type. +#[derive(Deserialize, Serialize, Clone, Debug, Default)] +pub struct City<'a> { + /// City data for the IP address. + #[serde(borrow, default, skip_serializing_if = "city::City::is_empty")] + pub city: city::City<'a>, + /// Continent data for the IP address. + #[serde(default, skip_serializing_if = "city::Continent::is_empty")] + pub continent: city::Continent<'a>, + /// Country where MaxMind believes the IP is located. + #[serde(default, skip_serializing_if = "city::Country::is_empty")] + pub country: city::Country<'a>, + /// Location data including coordinates and time zone. + #[serde(default, skip_serializing_if = "city::Location::is_empty")] + pub location: city::Location<'a>, + /// Postal code data for the IP address. + #[serde(default, skip_serializing_if = "city::Postal::is_empty")] + pub postal: city::Postal<'a>, + /// Country where the ISP has registered the IP block. + #[serde(default, skip_serializing_if = "city::Country::is_empty")] + pub registered_country: city::Country<'a>, + /// Country represented by users of this IP (e.g., military base or embassy). + #[serde(default, skip_serializing_if = "city::RepresentedCountry::is_empty")] + pub represented_country: city::RepresentedCountry<'a>, + /// Subdivisions (states, provinces, etc.) ordered from largest to smallest. + /// For example, Oxford, UK would have England first, then Oxfordshire. + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub subdivisions: Vec>, + /// Various traits associated with the IP address. + #[serde(default, skip_serializing_if = "city::Traits::is_empty")] + pub traits: city::Traits, +} + +/// GeoIP2 Enterprise database record. +/// +/// Contains all City data plus additional confidence scores and traits. +/// Enterprise records include confidence values (0-100) indicating MaxMind's +/// certainty about the accuracy of each field. +#[derive(Deserialize, Serialize, Clone, Debug, Default)] +pub struct Enterprise<'a> { + /// City data with confidence score. + #[serde(borrow, default, skip_serializing_if = "enterprise::City::is_empty")] + pub city: enterprise::City<'a>, + /// Continent data for the IP address. + #[serde(default, skip_serializing_if = "enterprise::Continent::is_empty")] + pub continent: enterprise::Continent<'a>, + /// Country data with confidence score. + #[serde(default, skip_serializing_if = "enterprise::Country::is_empty")] + pub country: enterprise::Country<'a>, + /// Location data including coordinates and time zone. + #[serde(default, skip_serializing_if = "enterprise::Location::is_empty")] + pub location: enterprise::Location<'a>, + /// Postal code data with confidence score. + #[serde(default, skip_serializing_if = "enterprise::Postal::is_empty")] + pub postal: enterprise::Postal<'a>, + /// Country where the ISP has registered the IP block. + #[serde(default, skip_serializing_if = "enterprise::Country::is_empty")] + pub registered_country: enterprise::Country<'a>, + /// Country represented by users of this IP (e.g., military base or embassy). + #[serde( + default, + skip_serializing_if = "enterprise::RepresentedCountry::is_empty" + )] + pub represented_country: enterprise::RepresentedCountry<'a>, + /// Subdivisions with confidence scores, ordered from largest to smallest. + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub subdivisions: Vec>, + /// Extended traits including ISP, organization, and connection information. + #[serde(default, skip_serializing_if = "enterprise::Traits::is_empty")] + pub traits: enterprise::Traits<'a>, +} + +/// GeoIP2 ISP database record. +/// +/// Contains Internet Service Provider and organization information for an IP. +#[derive(Deserialize, Serialize, Clone, Debug)] +pub struct Isp<'a> { + /// The autonomous system number (ASN) for the IP address. + #[serde(skip_serializing_if = "Option::is_none")] + pub autonomous_system_number: Option, + /// The organization associated with the registered ASN. + #[serde(skip_serializing_if = "Option::is_none")] + pub autonomous_system_organization: Option<&'a str>, + /// The name of the ISP associated with the IP address. + #[serde(skip_serializing_if = "Option::is_none")] + pub isp: Option<&'a str>, + /// The mobile country code (MCC) associated with the IP. + /// See . + #[serde(skip_serializing_if = "Option::is_none")] + pub mobile_country_code: Option<&'a str>, + /// The mobile network code (MNC) associated with the IP. + /// See . + #[serde(skip_serializing_if = "Option::is_none")] + pub mobile_network_code: Option<&'a str>, + /// The name of the organization associated with the IP address. + #[serde(skip_serializing_if = "Option::is_none")] + pub organization: Option<&'a str>, +} + +/// GeoIP2 Connection-Type database record. +/// +/// Contains the connection type for an IP address. +#[derive(Deserialize, Serialize, Clone, Debug)] +pub struct ConnectionType<'a> { + /// The connection type. Possible values include "Dialup", "Cable/DSL", + /// "Corporate", "Cellular", and "Satellite". Additional values may be + /// added in the future. + #[serde(skip_serializing_if = "Option::is_none")] + pub connection_type: Option<&'a str>, +} + +/// GeoIP2 Anonymous IP database record. +/// +/// Contains information about whether an IP address is associated with +/// anonymous or proxy services. +#[derive(Deserialize, Serialize, Clone, Debug)] +pub struct AnonymousIp { + /// True if the IP belongs to any sort of anonymous network. + #[serde(skip_serializing_if = "Option::is_none")] + pub is_anonymous: Option, + /// True if the IP is registered to an anonymous VPN provider. + /// Note: If a VPN provider does not register subnets under names associated + /// with them, we will likely only flag their IP ranges using `is_hosting_provider`. + #[serde(skip_serializing_if = "Option::is_none")] + pub is_anonymous_vpn: Option, + /// True if the IP belongs to a hosting or VPN provider. + #[serde(skip_serializing_if = "Option::is_none")] + pub is_hosting_provider: Option, + /// True if the IP belongs to a public proxy. + #[serde(skip_serializing_if = "Option::is_none")] + pub is_public_proxy: Option, + /// True if the IP is on a suspected anonymizing network and belongs to + /// a residential ISP. + #[serde(skip_serializing_if = "Option::is_none")] + pub is_residential_proxy: Option, + /// True if the IP is a Tor exit node. + #[serde(skip_serializing_if = "Option::is_none")] + pub is_tor_exit_node: Option, +} + +/// GeoIP2 DensityIncome database record. +/// +/// Contains population density and income data for an IP address location. +#[derive(Deserialize, Serialize, Clone, Debug)] +pub struct DensityIncome { + /// The average income in US dollars associated with the IP address. + #[serde(skip_serializing_if = "Option::is_none")] + pub average_income: Option, + /// The estimated number of people per square kilometer. + #[serde(skip_serializing_if = "Option::is_none")] + pub population_density: Option, +} + +/// GeoIP2 Domain database record. +/// +/// Contains the second-level domain associated with an IP address. +#[derive(Deserialize, Serialize, Clone, Debug)] +pub struct Domain<'a> { + /// The second-level domain associated with the IP address + /// (e.g., "example.com"). + #[serde(skip_serializing_if = "Option::is_none")] + pub domain: Option<&'a str>, +} + +/// GeoLite2 ASN database record. +/// +/// Contains Autonomous System Number (ASN) data for an IP address. +#[derive(Deserialize, Serialize, Clone, Debug)] +pub struct Asn<'a> { + /// The autonomous system number for the IP address. + #[serde(skip_serializing_if = "Option::is_none")] + pub autonomous_system_number: Option, + /// The organization associated with the registered ASN. + #[serde(skip_serializing_if = "Option::is_none")] + pub autonomous_system_organization: Option<&'a str>, +} + +/// Country/City database model structs. +/// +/// These structs are used by both [`super::Country`] and [`super::City`] records. +pub mod country { + use super::Names; + use serde::{Deserialize, Serialize}; + + /// Continent data for an IP address. + #[derive(Deserialize, Serialize, Clone, Debug, Default, PartialEq)] + pub struct Continent<'a> { + /// Two-character continent code (e.g., "NA" for North America, "EU" for Europe). + #[serde(default, skip_serializing_if = "Option::is_none")] + pub code: Option<&'a str>, + /// GeoNames ID for the continent. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub geoname_id: Option, + /// Localized continent names. + #[serde(borrow, default, skip_serializing_if = "Names::is_empty")] + pub names: Names<'a>, + } + + impl Continent<'_> { + /// Returns true if all fields are empty/None. + #[must_use] + pub fn is_empty(&self) -> bool { + *self == Self::default() + } + } + + /// Country data for an IP address. + #[derive(Deserialize, Serialize, Clone, Debug, Default, PartialEq)] + pub struct Country<'a> { + /// GeoNames ID for the country. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub geoname_id: Option, + /// True if the country is a member state of the European Union. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub is_in_european_union: Option, + /// Two-character ISO 3166-1 alpha-2 country code. + /// See . + #[serde(default, skip_serializing_if = "Option::is_none")] + pub iso_code: Option<&'a str>, + /// Localized country names. + #[serde(borrow, default, skip_serializing_if = "Names::is_empty")] + pub names: Names<'a>, + } + + impl Country<'_> { + /// Returns true if all fields are empty/None. + #[must_use] + pub fn is_empty(&self) -> bool { + *self == Self::default() + } + } + + /// Represented country data. + /// + /// The represented country is the country represented by something like a + /// military base or embassy. + #[derive(Deserialize, Serialize, Clone, Debug, Default, PartialEq)] + pub struct RepresentedCountry<'a> { + /// GeoNames ID for the represented country. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub geoname_id: Option, + /// True if the represented country is a member state of the European Union. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub is_in_european_union: Option, + /// Two-character ISO 3166-1 alpha-2 country code. + /// See . + #[serde(default, skip_serializing_if = "Option::is_none")] + pub iso_code: Option<&'a str>, + /// Localized country names. + #[serde(borrow, default, skip_serializing_if = "Names::is_empty")] + pub names: Names<'a>, + /// Type of entity representing the country (e.g., "military"). + #[serde(rename = "type", default, skip_serializing_if = "Option::is_none")] + pub representation_type: Option<&'a str>, + } + + impl RepresentedCountry<'_> { + /// Returns true if all fields are empty/None. + #[must_use] + pub fn is_empty(&self) -> bool { + *self == Self::default() + } + } + + /// Traits data for Country/City records. + #[derive(Deserialize, Serialize, Clone, Debug, Default, PartialEq)] + pub struct Traits { + /// True if the IP belongs to an anycast network. + /// See . + #[serde(default, skip_serializing_if = "Option::is_none")] + pub is_anycast: Option, + } + + impl Traits { + /// Returns true if all fields are None. + #[must_use] + pub fn is_empty(&self) -> bool { + *self == Self::default() + } + } +} + +/// City database model structs. +/// +/// City-specific structs. Country-level structs are re-exported from [`super::country`]. +pub mod city { + use super::Names; + use serde::{Deserialize, Serialize}; + + pub use super::country::{Continent, Country, RepresentedCountry, Traits}; + + /// City data for an IP address. + #[derive(Deserialize, Serialize, Clone, Debug, Default, PartialEq)] + pub struct City<'a> { + /// GeoNames ID for the city. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub geoname_id: Option, + /// Localized city names. + #[serde(borrow, default, skip_serializing_if = "Names::is_empty")] + pub names: Names<'a>, + } + + impl City<'_> { + /// Returns true if all fields are empty/None. + #[must_use] + pub fn is_empty(&self) -> bool { + *self == Self::default() + } + } + + /// Location data for an IP address. + #[derive(Deserialize, Serialize, Clone, Debug, Default, PartialEq)] + pub struct Location<'a> { + /// Approximate accuracy radius in kilometers around the coordinates. + /// This is the radius where we have a 67% confidence that the device + /// using the IP address resides within. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub accuracy_radius: Option, + /// Approximate latitude of the location. This value is not precise and + /// should not be used to identify a particular address or household. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub latitude: Option, + /// Approximate longitude of the location. This value is not precise and + /// should not be used to identify a particular address or household. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub longitude: Option, + /// Metro code for the location, used for targeting advertisements. + /// + /// **Deprecated:** Metro codes are no longer maintained and should not be used. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub metro_code: Option, + /// Time zone associated with the location, as specified by the + /// IANA Time Zone Database (e.g., "America/New_York"). + #[serde(default, skip_serializing_if = "Option::is_none")] + pub time_zone: Option<&'a str>, + } + + impl Location<'_> { + /// Returns true if all fields are None. + #[must_use] + pub fn is_empty(&self) -> bool { + *self == Self::default() + } + } + + /// Postal data for an IP address. + #[derive(Deserialize, Serialize, Clone, Debug, Default, PartialEq)] + pub struct Postal<'a> { + /// Postal code for the location. Not available for all countries. + /// In some countries, this will only contain part of the postal code. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub code: Option<&'a str>, + } + + impl Postal<'_> { + /// Returns true if all fields are None. + #[must_use] + pub fn is_empty(&self) -> bool { + *self == Self::default() + } + } + + /// Subdivision (state, province, etc.) data for an IP address. + #[derive(Deserialize, Serialize, Clone, Debug, Default, PartialEq)] + pub struct Subdivision<'a> { + /// GeoNames ID for the subdivision. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub geoname_id: Option, + /// ISO 3166-2 subdivision code (up to 3 characters). + /// See . + #[serde(default, skip_serializing_if = "Option::is_none")] + pub iso_code: Option<&'a str>, + /// Localized subdivision names. + #[serde(borrow, default, skip_serializing_if = "Names::is_empty")] + pub names: Names<'a>, + } + + impl Subdivision<'_> { + /// Returns true if all fields are empty/None. + #[must_use] + pub fn is_empty(&self) -> bool { + *self == Self::default() + } + } +} + +/// Enterprise database model structs. +/// +/// Enterprise-specific structs with confidence scores. Some structs are +/// re-exported from [`super::country`]. +pub mod enterprise { + use super::Names; + use serde::{Deserialize, Serialize}; + + pub use super::country::{Continent, RepresentedCountry}; + + /// City data with confidence score. + #[derive(Deserialize, Serialize, Clone, Debug, Default, PartialEq)] + pub struct City<'a> { + /// Confidence score (0-100) indicating MaxMind's certainty that the + /// city is correct. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub confidence: Option, + /// GeoNames ID for the city. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub geoname_id: Option, + /// Localized city names. + #[serde(borrow, default, skip_serializing_if = "Names::is_empty")] + pub names: Names<'a>, + } + + impl City<'_> { + /// Returns true if all fields are empty/None. + #[must_use] + pub fn is_empty(&self) -> bool { + *self == Self::default() + } + } + + /// Country data with confidence score. + #[derive(Deserialize, Serialize, Clone, Debug, Default, PartialEq)] + pub struct Country<'a> { + /// Confidence score (0-100) indicating MaxMind's certainty that the + /// country is correct. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub confidence: Option, + /// GeoNames ID for the country. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub geoname_id: Option, + /// True if the country is a member state of the European Union. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub is_in_european_union: Option, + /// Two-character ISO 3166-1 alpha-2 country code. + /// See . + #[serde(default, skip_serializing_if = "Option::is_none")] + pub iso_code: Option<&'a str>, + /// Localized country names. + #[serde(borrow, default, skip_serializing_if = "Names::is_empty")] + pub names: Names<'a>, + } + + impl Country<'_> { + /// Returns true if all fields are empty/None. + #[must_use] + pub fn is_empty(&self) -> bool { + *self == Self::default() + } + } + + /// Location data for an IP address. + #[derive(Deserialize, Serialize, Clone, Debug, Default, PartialEq)] + pub struct Location<'a> { + /// Approximate accuracy radius in kilometers around the coordinates. + /// This is the radius where we have a 67% confidence that the device + /// using the IP address resides within. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub accuracy_radius: Option, + /// Approximate latitude of the location. This value is not precise and + /// should not be used to identify a particular address or household. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub latitude: Option, + /// Approximate longitude of the location. This value is not precise and + /// should not be used to identify a particular address or household. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub longitude: Option, + /// Metro code for the location, used for targeting advertisements. + /// + /// **Deprecated:** Metro codes are no longer maintained and should not be used. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub metro_code: Option, + /// Time zone associated with the location, as specified by the + /// IANA Time Zone Database (e.g., "America/New_York"). + #[serde(default, skip_serializing_if = "Option::is_none")] + pub time_zone: Option<&'a str>, + } + + impl Location<'_> { + /// Returns true if all fields are None. + #[must_use] + pub fn is_empty(&self) -> bool { + *self == Self::default() + } + } + + /// Postal data with confidence score. + #[derive(Deserialize, Serialize, Clone, Debug, Default, PartialEq)] + pub struct Postal<'a> { + /// Postal code for the location. Not available for all countries. + /// In some countries, this will only contain part of the postal code. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub code: Option<&'a str>, + /// Confidence score (0-100) indicating MaxMind's certainty that the + /// postal code is correct. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub confidence: Option, + } + + impl Postal<'_> { + /// Returns true if all fields are None. + #[must_use] + pub fn is_empty(&self) -> bool { + *self == Self::default() + } + } + + /// Subdivision data with confidence score. + #[derive(Deserialize, Serialize, Clone, Debug, Default, PartialEq)] + pub struct Subdivision<'a> { + /// Confidence score (0-100) indicating MaxMind's certainty that the + /// subdivision is correct. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub confidence: Option, + /// GeoNames ID for the subdivision. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub geoname_id: Option, + /// ISO 3166-2 subdivision code (up to 3 characters). + /// See . + #[serde(default, skip_serializing_if = "Option::is_none")] + pub iso_code: Option<&'a str>, + /// Localized subdivision names. + #[serde(borrow, default, skip_serializing_if = "Names::is_empty")] + pub names: Names<'a>, + } + + impl Subdivision<'_> { + /// Returns true if all fields are empty/None. + #[must_use] + pub fn is_empty(&self) -> bool { + *self == Self::default() + } + } + + /// Extended traits data for Enterprise records. + /// + /// Contains ISP, organization, connection type, and anonymity information. + #[derive(Deserialize, Serialize, Clone, Debug, Default, PartialEq)] + pub struct Traits<'a> { + /// The autonomous system number (ASN) for the IP address. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub autonomous_system_number: Option, + /// The organization associated with the registered ASN. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub autonomous_system_organization: Option<&'a str>, + /// The connection type. Possible values include "Dialup", "Cable/DSL", + /// "Corporate", "Cellular", and "Satellite". + #[serde(default, skip_serializing_if = "Option::is_none")] + pub connection_type: Option<&'a str>, + /// The second-level domain associated with the IP address + /// (e.g., "example.com"). + #[serde(default, skip_serializing_if = "Option::is_none")] + pub domain: Option<&'a str>, + /// True if the IP belongs to any sort of anonymous network. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub is_anonymous: Option, + /// True if the IP is registered to an anonymous VPN provider. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub is_anonymous_vpn: Option, + /// True if the IP belongs to an anycast network. + /// See . + #[serde(default, skip_serializing_if = "Option::is_none")] + pub is_anycast: Option, + /// True if the IP belongs to a hosting or VPN provider. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub is_hosting_provider: Option, + /// The name of the ISP associated with the IP address. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub isp: Option<&'a str>, + /// True if the IP belongs to a public proxy. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub is_public_proxy: Option, + /// True if the IP is on a suspected anonymizing network and belongs to + /// a residential ISP. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub is_residential_proxy: Option, + /// True if the IP is a Tor exit node. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub is_tor_exit_node: Option, + /// The mobile country code (MCC) associated with the IP. + /// See . + #[serde(default, skip_serializing_if = "Option::is_none")] + pub mobile_country_code: Option<&'a str>, + /// The mobile network code (MNC) associated with the IP. + /// See . + #[serde(default, skip_serializing_if = "Option::is_none")] + pub mobile_network_code: Option<&'a str>, + /// The name of the organization associated with the IP address. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub organization: Option<&'a str>, + /// The user type associated with the IP address. Possible values include + /// "business", "cafe", "cellular", "college", "government", "hosting", + /// "library", "military", "residential", "router", "school", + /// "search_engine_spider", and "traveler". + #[serde(default, skip_serializing_if = "Option::is_none")] + pub user_type: Option<&'a str>, + } + + impl Traits<'_> { + /// Returns true if all fields are None. + #[must_use] + pub fn is_empty(&self) -> bool { + *self == Self::default() + } + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 00000000..f0fcb4a6 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,374 @@ +#![deny(trivial_casts, trivial_numeric_casts, unused_import_braces)] +//! # MaxMind DB Reader +//! +//! This library reads the MaxMind DB format, including the GeoIP2 and GeoLite2 databases. +//! +//! ## Features +//! +//! This crate provides several optional features for performance and functionality: +//! +//! - **`mmap`** (default: disabled): Enable memory-mapped file access for +//! better performance in long-running applications +//! - **`simdutf8`** (default: disabled): Use SIMD instructions for faster +//! UTF-8 validation during string decoding +//! - **`unsafe-str-decode`** (default: disabled): Skip UTF-8 validation +//! entirely for maximum performance (~20% faster lookups) +//! +//! **Note**: `simdutf8` and `unsafe-str-decode` are mutually exclusive. +//! +//! ## Database Compatibility +//! +//! This library supports all MaxMind DB format databases: +//! - **GeoIP2** databases (City, Country, Enterprise, ISP, etc.) +//! - **GeoLite2** databases (free versions) +//! - Custom MaxMind DB format databases +//! +//! ## Thread Safety +//! +//! The `Reader` is `Send` and `Sync`, making it safe to share across threads. +//! This makes it ideal for web servers and other concurrent applications. +//! +//! ## Quick Start +//! +//! ```rust +//! use maxminddb::{Reader, geoip2}; +//! use std::net::IpAddr; +//! +//! fn main() -> Result<(), Box> { +//! // Open database file +//! # let reader = Reader::open_readfile("test-data/test-data/GeoIP2-City-Test.mmdb")?; +//! # /* +//! let reader = Reader::open_readfile("/path/to/GeoIP2-City.mmdb")?; +//! # */ +//! +//! // Look up an IP address +//! let ip: IpAddr = "89.160.20.128".parse()?; +//! let result = reader.lookup(ip)?; +//! +//! if let Some(city) = result.decode::()? { +//! // Access nested structs directly - no Option unwrapping needed +//! println!("Country: {}", city.country.iso_code.unwrap_or("Unknown")); +//! } +//! +//! Ok(()) +//! } +//! ``` +//! +//! ## Selective Field Access +//! +//! Use `decode_path` to extract specific fields without deserializing the entire record: +//! +//! ```rust +//! use maxminddb::{Reader, PathElement}; +//! use std::net::IpAddr; +//! +//! let reader = Reader::open_readfile("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); +//! let ip: IpAddr = "89.160.20.128".parse().unwrap(); +//! +//! let result = reader.lookup(ip).unwrap(); +//! let country_code: Option = result.decode_path(&[ +//! PathElement::Key("country"), +//! PathElement::Key("iso_code"), +//! ]).unwrap(); +//! +//! println!("Country: {:?}", country_code); +//! ``` + +#[cfg(all(feature = "simdutf8", feature = "unsafe-str-decode"))] +compile_error!("features `simdutf8` and `unsafe-str-decode` are mutually exclusive"); + +mod decoder; +mod error; +pub mod geoip2; +mod metadata; +mod reader; +mod result; +mod within; + +// Re-export public types +pub use error::MaxMindDbError; +pub use metadata::Metadata; +pub use reader::Reader; +pub use result::{LookupResult, PathElement}; +pub use within::{Within, WithinOptions}; + +#[cfg(feature = "mmap")] +pub use memmap2::Mmap; + +#[cfg(test)] +mod reader_test; + +#[cfg(test)] +mod tests { + use super::*; + use std::net::IpAddr; + + #[test] + fn test_lookup_network() { + use std::collections::HashMap; + + struct TestCase { + ip: &'static str, + db_file: &'static str, + expected_network: &'static str, + expected_found: bool, + } + + let test_cases = [ + // IPv4 address in IPv6 database - not found, returns containing network + TestCase { + ip: "1.1.1.1", + db_file: "test-data/test-data/MaxMind-DB-test-ipv6-32.mmdb", + expected_network: "1.0.0.0/8", + expected_found: false, + }, + // IPv6 exact match + TestCase { + ip: "::1:ffff:ffff", + db_file: "test-data/test-data/MaxMind-DB-test-ipv6-24.mmdb", + expected_network: "::1:ffff:ffff/128", + expected_found: true, + }, + // IPv6 network match (not exact) + TestCase { + ip: "::2:0:1", + db_file: "test-data/test-data/MaxMind-DB-test-ipv6-24.mmdb", + expected_network: "::2:0:0/122", + expected_found: true, + }, + // IPv4 exact match + TestCase { + ip: "1.1.1.1", + db_file: "test-data/test-data/MaxMind-DB-test-ipv4-24.mmdb", + expected_network: "1.1.1.1/32", + expected_found: true, + }, + // IPv4 network match (not exact) + TestCase { + ip: "1.1.1.3", + db_file: "test-data/test-data/MaxMind-DB-test-ipv4-24.mmdb", + expected_network: "1.1.1.2/31", + expected_found: true, + }, + // IPv4 in decoder test database + TestCase { + ip: "1.1.1.3", + db_file: "test-data/test-data/MaxMind-DB-test-decoder.mmdb", + expected_network: "1.1.1.0/24", + expected_found: true, + }, + // IPv4-mapped IPv6 address - preserves IPv6 form + TestCase { + ip: "::ffff:1.1.1.128", + db_file: "test-data/test-data/MaxMind-DB-test-decoder.mmdb", + expected_network: "::ffff:1.1.1.0/120", + expected_found: true, + }, + // IPv4-compatible IPv6 address - uses compressed IPv6 notation + TestCase { + ip: "::1.1.1.128", + db_file: "test-data/test-data/MaxMind-DB-test-decoder.mmdb", + expected_network: "::101:100/120", + expected_found: true, + }, + // No IPv4 search tree - IPv4 address returns ::/64 + TestCase { + ip: "200.0.2.1", + db_file: "test-data/test-data/MaxMind-DB-no-ipv4-search-tree.mmdb", + expected_network: "::/64", + expected_found: true, + }, + // No IPv4 search tree - IPv6 address in IPv4 range + TestCase { + ip: "::200.0.2.1", + db_file: "test-data/test-data/MaxMind-DB-no-ipv4-search-tree.mmdb", + expected_network: "::/64", + expected_found: true, + }, + // No IPv4 search tree - IPv6 address at boundary of IPv4 space + TestCase { + ip: "0:0:0:0:ffff:ffff:ffff:ffff", + db_file: "test-data/test-data/MaxMind-DB-no-ipv4-search-tree.mmdb", + expected_network: "::/64", + expected_found: true, + }, + // No IPv4 search tree - high IPv6 address not found + TestCase { + ip: "ef00::", + db_file: "test-data/test-data/MaxMind-DB-no-ipv4-search-tree.mmdb", + expected_network: "8000::/1", + expected_found: false, + }, + ]; + + // Cache readers to avoid reopening the same file multiple times + let mut readers: HashMap<&str, Reader>> = HashMap::new(); + + for test in &test_cases { + let reader = readers + .entry(test.db_file) + .or_insert_with(|| Reader::open_readfile(test.db_file).unwrap()); + + let ip: IpAddr = test.ip.parse().unwrap(); + let result = reader.lookup(ip).unwrap(); + + assert_eq!( + result.has_data(), + test.expected_found, + "IP {} in {}: expected has_data={}, got has_data={}", + test.ip, + test.db_file, + test.expected_found, + result.has_data() + ); + + let network = result.network().unwrap(); + assert_eq!( + network.to_string(), + test.expected_network, + "IP {} in {}: expected network {}, got {}", + test.ip, + test.db_file, + test.expected_network, + network + ); + } + } + + #[test] + fn test_lookup_with_geoip_data() { + let reader = Reader::open_readfile("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); + let ip: IpAddr = "89.160.20.128".parse().unwrap(); + + let result = reader.lookup(ip).unwrap(); + assert!(result.has_data(), "lookup should find known IP"); + + // Decode the data + let city: geoip2::City = result.decode().unwrap().unwrap(); + assert!(!city.city.is_empty(), "Expected city data"); + + // Check full network (not just prefix) + let network = result.network().unwrap(); + assert_eq!( + network.to_string(), + "89.160.20.128/25", + "Expected network 89.160.20.128/25" + ); + + // Check offset is available for caching + assert!( + result.offset().is_some(), + "Expected offset to be Some for found IP" + ); + } + + #[test] + fn test_decode_path() { + let reader = Reader::open_readfile("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); + let ip: IpAddr = "89.160.20.128".parse().unwrap(); + + let result = reader.lookup(ip).unwrap(); + + // Navigate to country.iso_code + let iso_code: Option = result + .decode_path(&[PathElement::Key("country"), PathElement::Key("iso_code")]) + .unwrap(); + assert_eq!(iso_code, Some("SE".to_owned())); + + // Navigate to non-existent path + let missing: Option = result + .decode_path(&[PathElement::Key("nonexistent")]) + .unwrap(); + assert!(missing.is_none()); + } + + #[test] + fn test_ipv6_in_ipv4_database() { + let reader = + Reader::open_readfile("test-data/test-data/MaxMind-DB-test-ipv4-24.mmdb").unwrap(); + let ip: IpAddr = "2001::".parse().unwrap(); + + let result = reader.lookup(ip); + match result { + Err(MaxMindDbError::InvalidInput { message }) => { + assert!( + message.contains("IPv6") && message.contains("IPv4"), + "Expected error message about IPv6 in IPv4 database, got: {}", + message + ); + } + Err(e) => panic!( + "Expected InvalidInput error for IPv6 in IPv4 database, got: {:?}", + e + ), + Ok(_) => panic!("Expected error for IPv6 lookup in IPv4-only database"), + } + } + + #[test] + fn test_decode_path_comprehensive() { + let reader = + Reader::open_readfile("test-data/test-data/MaxMind-DB-test-decoder.mmdb").unwrap(); + let ip: IpAddr = "::1.1.1.0".parse().unwrap(); + + let result = reader.lookup(ip).unwrap(); + assert!(result.has_data()); + + // Test simple path: uint16 + let u16_val: Option = result.decode_path(&[PathElement::Key("uint16")]).unwrap(); + assert_eq!(u16_val, Some(100)); + + // Test array access: first element + let arr_first: Option = result + .decode_path(&[PathElement::Key("array"), PathElement::Index(0)]) + .unwrap(); + assert_eq!(arr_first, Some(1)); + + // Test array access: last element (index 2) + let arr_last: Option = result + .decode_path(&[PathElement::Key("array"), PathElement::Index(2)]) + .unwrap(); + assert_eq!(arr_last, Some(3)); + + // Test array access: out of bounds (index 3) returns None + let arr_oob: Option = result + .decode_path(&[PathElement::Key("array"), PathElement::Index(3)]) + .unwrap(); + assert!(arr_oob.is_none()); + + // Test IndexFromEnd: 0 means last element + let arr_last: Option = result + .decode_path(&[PathElement::Key("array"), PathElement::IndexFromEnd(0)]) + .unwrap(); + assert_eq!(arr_last, Some(3)); + + // Test IndexFromEnd: 2 means first element (array has 3 elements) + let arr_first: Option = result + .decode_path(&[PathElement::Key("array"), PathElement::IndexFromEnd(2)]) + .unwrap(); + assert_eq!(arr_first, Some(1)); + + // Test nested path: map.mapX.arrayX[1] + let nested: Option = result + .decode_path(&[ + PathElement::Key("map"), + PathElement::Key("mapX"), + PathElement::Key("arrayX"), + PathElement::Index(1), + ]) + .unwrap(); + assert_eq!(nested, Some(8)); + + // Test non-existent key returns None + let missing: Option = result + .decode_path(&[PathElement::Key("does-not-exist"), PathElement::Index(1)]) + .unwrap(); + assert!(missing.is_none()); + + // Test utf8_string path + let utf8: Option = result + .decode_path(&[PathElement::Key("utf8_string")]) + .unwrap(); + assert_eq!(utf8, Some("unicode! ☯ - ♫".to_owned())); + } +} diff --git a/src/maxminddb/decoder.rs b/src/maxminddb/decoder.rs deleted file mode 100644 index 745079c8..00000000 --- a/src/maxminddb/decoder.rs +++ /dev/null @@ -1,430 +0,0 @@ -use log::debug; -use serde::de::{self, DeserializeSeed, MapAccess, SeqAccess, Visitor}; -use serde::forward_to_deserialize_any; -use std::convert::TryInto; - -use super::MaxMindDbError; - -fn to_usize(base: u8, bytes: &[u8]) -> usize { - bytes - .iter() - .fold(base as usize, |acc, &b| (acc << 8) | b as usize) -} - -enum Value<'a, 'de> { - Any { prev_ptr: usize }, - Bytes(&'de [u8]), - String(&'de str), - Bool(bool), - I32(i32), - U16(u16), - U32(u32), - U64(u64), - U128(u128), - F64(f64), - F32(f32), - Map(MapAccessor<'a, 'de>), - Array(ArrayAccess<'a, 'de>), -} - -#[derive(Debug)] -pub struct Decoder<'de> { - buf: &'de [u8], - current_ptr: usize, -} - -impl<'de> Decoder<'de> { - pub fn new(buf: &'de [u8], start_ptr: usize) -> Decoder<'de> { - Decoder { - buf, - current_ptr: start_ptr, - } - } - - #[inline(always)] - fn eat_byte(&mut self) -> u8 { - let b = self.buf[self.current_ptr]; - self.current_ptr += 1; - b - } - - #[inline(always)] - fn size_from_ctrl_byte(&mut self, ctrl_byte: u8, type_num: u8) -> usize { - let size = (ctrl_byte & 0x1f) as usize; - // extended - if type_num == 0 { - return size; - } - - let bytes_to_read = size.saturating_sub(28); - - let new_offset = self.current_ptr + bytes_to_read; - let size_bytes = &self.buf[self.current_ptr..new_offset]; - self.current_ptr = new_offset; - - match size { - s if s < 29 => s, - 29 => 29_usize + size_bytes[0] as usize, - 30 => 285_usize + to_usize(0, size_bytes), - _ => 65_821_usize + to_usize(0, size_bytes), - } - } - - #[inline(always)] - fn size_and_type(&mut self) -> (usize, u8) { - let ctrl_byte = self.eat_byte(); - let mut type_num = ctrl_byte >> 5; - // Extended type - if type_num == 0 { - type_num = self.eat_byte() + 7; - } - let size = self.size_from_ctrl_byte(ctrl_byte, type_num); - (size, type_num) - } - - fn decode_any>(&mut self, visitor: V) -> DecodeResult { - match self.decode_any_value()? { - Value::Any { prev_ptr } => { - let res = self.decode_any(visitor); - self.current_ptr = prev_ptr; - res - } - Value::Bool(x) => visitor.visit_bool(x), - Value::Bytes(x) => visitor.visit_borrowed_bytes(x), - Value::String(x) => visitor.visit_borrowed_str(x), - Value::I32(x) => visitor.visit_i32(x), - Value::U16(x) => visitor.visit_u16(x), - Value::U32(x) => visitor.visit_u32(x), - Value::U64(x) => visitor.visit_u64(x), - Value::U128(x) => visitor.visit_u128(x), - Value::F64(x) => visitor.visit_f64(x), - Value::F32(x) => visitor.visit_f32(x), - Value::Map(x) => visitor.visit_map(x), - Value::Array(x) => visitor.visit_seq(x), - } - } - - #[inline(always)] - fn decode_any_value(&mut self) -> DecodeResult> { - let (size, type_num) = self.size_and_type(); - - Ok(match type_num { - 1 => { - let new_ptr = self.decode_pointer(size); - let prev_ptr = self.current_ptr; - self.current_ptr = new_ptr; - - Value::Any { prev_ptr } - } - 2 => Value::String(self.decode_string(size)?), - 3 => Value::F64(self.decode_double(size)?), - 4 => Value::Bytes(self.decode_bytes(size)?), - 5 => Value::U16(self.decode_uint16(size)?), - 6 => Value::U32(self.decode_uint32(size)?), - 7 => self.decode_map(size), - 8 => Value::I32(self.decode_int(size)?), - 9 => Value::U64(self.decode_uint64(size)?), - 10 => Value::U128(self.decode_uint128(size)?), - 11 => self.decode_array(size), - 14 => Value::Bool(self.decode_bool(size)?), - 15 => Value::F32(self.decode_float(size)?), - u => { - return Err(MaxMindDbError::InvalidDatabase(format!( - "Unknown data type: {u:?}" - ))) - } - }) - } - - fn decode_array(&mut self, size: usize) -> Value<'_, 'de> { - Value::Array(ArrayAccess { - de: self, - count: size, - }) - } - - fn decode_bool(&mut self, size: usize) -> DecodeResult { - match size { - 0 | 1 => Ok(size != 0), - s => Err(MaxMindDbError::InvalidDatabase(format!( - "bool of size {s:?}" - ))), - } - } - - fn decode_bytes(&mut self, size: usize) -> DecodeResult<&'de [u8]> { - let new_offset = self.current_ptr + size; - let u8_slice = &self.buf[self.current_ptr..new_offset]; - self.current_ptr = new_offset; - - Ok(u8_slice) - } - - fn decode_float(&mut self, size: usize) -> DecodeResult { - let new_offset = self.current_ptr + size; - let value: [u8; 4] = self.buf[self.current_ptr..new_offset] - .try_into() - .map_err(|_| { - MaxMindDbError::InvalidDatabase(format!( - "float of size {:?}", - new_offset - self.current_ptr - )) - })?; - self.current_ptr = new_offset; - let float_value = f32::from_be_bytes(value); - Ok(float_value) - } - - fn decode_double(&mut self, size: usize) -> DecodeResult { - let new_offset = self.current_ptr + size; - let value: [u8; 8] = self.buf[self.current_ptr..new_offset] - .try_into() - .map_err(|_| { - MaxMindDbError::InvalidDatabase(format!( - "double of size {:?}", - new_offset - self.current_ptr - )) - })?; - self.current_ptr = new_offset; - let float_value = f64::from_be_bytes(value); - Ok(float_value) - } - - fn decode_uint64(&mut self, size: usize) -> DecodeResult { - match size { - s if s <= 8 => { - let new_offset = self.current_ptr + size; - - let value = self.buf[self.current_ptr..new_offset] - .iter() - .fold(0_u64, |acc, &b| (acc << 8) | u64::from(b)); - self.current_ptr = new_offset; - Ok(value) - } - s => Err(MaxMindDbError::InvalidDatabase(format!( - "u64 of size {s:?}" - ))), - } - } - - fn decode_uint128(&mut self, size: usize) -> DecodeResult { - match size { - s if s <= 16 => { - let new_offset = self.current_ptr + size; - - let value = self.buf[self.current_ptr..new_offset] - .iter() - .fold(0_u128, |acc, &b| (acc << 8) | u128::from(b)); - self.current_ptr = new_offset; - Ok(value) - } - s => Err(MaxMindDbError::InvalidDatabase(format!( - "u128 of size {s:?}" - ))), - } - } - - fn decode_uint32(&mut self, size: usize) -> DecodeResult { - match size { - s if s <= 4 => { - let new_offset = self.current_ptr + size; - - let value = self.buf[self.current_ptr..new_offset] - .iter() - .fold(0_u32, |acc, &b| (acc << 8) | u32::from(b)); - self.current_ptr = new_offset; - Ok(value) - } - s => Err(MaxMindDbError::InvalidDatabase(format!( - "u32 of size {s:?}" - ))), - } - } - - fn decode_uint16(&mut self, size: usize) -> DecodeResult { - match size { - s if s <= 2 => { - let new_offset = self.current_ptr + size; - - let value = self.buf[self.current_ptr..new_offset] - .iter() - .fold(0_u16, |acc, &b| (acc << 8) | u16::from(b)); - self.current_ptr = new_offset; - Ok(value) - } - s => Err(MaxMindDbError::InvalidDatabase(format!( - "u16 of size {s:?}" - ))), - } - } - - fn decode_int(&mut self, size: usize) -> DecodeResult { - match size { - s if s <= 4 => { - let new_offset = self.current_ptr + size; - - let value = self.buf[self.current_ptr..new_offset] - .iter() - .fold(0_i32, |acc, &b| (acc << 8) | i32::from(b)); - self.current_ptr = new_offset; - Ok(value) - } - s => Err(MaxMindDbError::InvalidDatabase(format!( - "int32 of size {s:?}" - ))), - } - } - - fn decode_map(&mut self, size: usize) -> Value<'_, 'de> { - Value::Map(MapAccessor { - de: self, - count: size * 2, - }) - } - - fn decode_pointer(&mut self, size: usize) -> usize { - let pointer_value_offset = [0, 0, 2048, 526_336, 0]; - let pointer_size = ((size >> 3) & 0x3) + 1; - let new_offset = self.current_ptr + pointer_size; - let pointer_bytes = &self.buf[self.current_ptr..new_offset]; - self.current_ptr = new_offset; - - let base = if pointer_size == 4 { - 0 - } else { - (size & 0x7) as u8 - }; - let unpacked = to_usize(base, pointer_bytes); - - unpacked + pointer_value_offset[pointer_size] - } - - #[cfg(feature = "unsafe-str-decode")] - fn decode_string(&mut self, size: usize) -> DecodeResult<&'de str> { - use std::str::from_utf8_unchecked; - - let new_offset: usize = self.current_ptr + size; - let bytes = &self.buf[self.current_ptr..new_offset]; - self.current_ptr = new_offset; - // SAFETY: - // A corrupt maxminddb will cause undefined behaviour. - // If the caller has verified the integrity of their database and trusts their upstream - // provider, they can opt-into the performance gains provided by this unsafe function via - // the `unsafe-str-decode` feature flag. - // This can provide around 20% performance increase in the lookup benchmark. - let v = unsafe { from_utf8_unchecked(bytes) }; - Ok(v) - } - - #[cfg(not(feature = "unsafe-str-decode"))] - fn decode_string(&mut self, size: usize) -> DecodeResult<&'de str> { - #[cfg(feature = "simdutf8")] - use simdutf8::basic::from_utf8; - #[cfg(not(feature = "simdutf8"))] - use std::str::from_utf8; - - let new_offset: usize = self.current_ptr + size; - let bytes = &self.buf[self.current_ptr..new_offset]; - self.current_ptr = new_offset; - match from_utf8(bytes) { - Ok(v) => Ok(v), - Err(_) => Err(MaxMindDbError::InvalidDatabase( - "error decoding string".to_owned(), - )), - } - } -} - -pub type DecodeResult = Result; - -impl<'de: 'a, 'a> de::Deserializer<'de> for &'a mut Decoder<'de> { - type Error = MaxMindDbError; - - fn deserialize_any(self, visitor: V) -> DecodeResult - where - V: Visitor<'de>, - { - debug!("deserialize_any"); - - self.decode_any(visitor) - } - - fn deserialize_option(self, visitor: V) -> DecodeResult - where - V: Visitor<'de>, - { - debug!("deserialize_option"); - - visitor.visit_some(self) - } - - forward_to_deserialize_any! { - bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string - bytes byte_buf unit unit_struct newtype_struct seq tuple - tuple_struct map struct enum identifier ignored_any - } -} - -struct ArrayAccess<'a, 'de: 'a> { - de: &'a mut Decoder<'de>, - count: usize, -} - -// `SeqAccess` is provided to the `Visitor` to give it the ability to iterate -// through elements of the sequence. -impl<'de> SeqAccess<'de> for ArrayAccess<'_, 'de> { - type Error = MaxMindDbError; - - fn next_element_seed(&mut self, seed: T) -> DecodeResult> - where - T: DeserializeSeed<'de>, - { - // Check if there are no more elements. - if self.count == 0 { - return Ok(None); - } - self.count -= 1; - - // Deserialize an array element. - seed.deserialize(&mut *self.de).map(Some) - } -} - -struct MapAccessor<'a, 'de: 'a> { - de: &'a mut Decoder<'de>, - count: usize, -} - -// `MapAccess` is provided to the `Visitor` to give it the ability to iterate -// through entries of the map. -impl<'de> MapAccess<'de> for MapAccessor<'_, 'de> { - type Error = MaxMindDbError; - - fn next_key_seed(&mut self, seed: K) -> DecodeResult> - where - K: DeserializeSeed<'de>, - { - // Check if there are no more entries. - if self.count == 0 { - return Ok(None); - } - self.count -= 1; - - // Deserialize a map key. - seed.deserialize(&mut *self.de).map(Some) - } - - fn next_value_seed(&mut self, seed: V) -> DecodeResult - where - V: DeserializeSeed<'de>, - { - // Check if there are no more entries. - if self.count == 0 { - return Err(MaxMindDbError::Decoding("no more entries".to_owned())); - } - self.count -= 1; - - // Deserialize a map value. - seed.deserialize(&mut *self.de) - } -} diff --git a/src/maxminddb/geoip2.rs b/src/maxminddb/geoip2.rs deleted file mode 100644 index edb44fe4..00000000 --- a/src/maxminddb/geoip2.rs +++ /dev/null @@ -1,390 +0,0 @@ -//! GeoIP2 and GeoLite2 database record structures -//! -//! This module provides strongly-typed Rust structures that correspond to the -//! various GeoIP2 and GeoLite2 database record formats. -//! -//! # Record Types -//! -//! - [`City`] - Complete city-level geolocation data (most comprehensive) -//! - [`Country`] - Country-level geolocation data -//! - [`Enterprise`] - Enterprise database with additional confidence scores -//! - [`Isp`] - Internet Service Provider information -//! - [`AnonymousIp`] - Anonymous proxy and VPN detection -//! - [`ConnectionType`] - Connection type classification -//! - [`Domain`] - Domain information -//! - [`Asn`] - Autonomous System Number data -//! - [`DensityIncome`] - Population density and income data -//! -//! # Usage Examples -//! -//! ```rust -//! use maxminddb::{Reader, geoip2}; -//! use std::net::IpAddr; -//! -//! # fn main() -> Result<(), maxminddb::MaxMindDbError> { -//! let reader = Reader::open_readfile( -//! "test-data/test-data/GeoIP2-City-Test.mmdb")?; -//! let ip: IpAddr = "89.160.20.128".parse().unwrap(); -//! -//! // City lookup (most common) -//! if let Some(city) = reader.lookup::(ip)? { -//! if let Some(city_names) = city.city.and_then(|c| c.names) { -//! if let Some(city_name) = city_names.get("en") { -//! println!("City: {}", city_name); -//! } -//! } -//! if let Some(country_code) = city.country.and_then(|c| c.iso_code) { -//! println!("Country: {}", country_code); -//! } -//! } -//! -//! // Country-only lookup (smaller/faster) -//! if let Some(country) = reader.lookup::(ip)? { -//! if let Some(country_names) = country.country.and_then(|c| c.names) { -//! if let Some(country_name) = country_names.get("en") { -//! println!("Country: {}", country_name); -//! } -//! } -//! } -//! # Ok(()) -//! # } -//! ``` - -use serde::{Deserialize, Serialize}; - -/// GeoIP2 Country record -#[derive(Deserialize, Serialize, Clone, Debug)] -pub struct Country<'a> { - #[serde(borrow)] - #[serde(skip_serializing_if = "Option::is_none")] - pub continent: Option>, - #[serde(skip_serializing_if = "Option::is_none")] - pub country: Option>, - #[serde(skip_serializing_if = "Option::is_none")] - pub registered_country: Option>, - #[serde(skip_serializing_if = "Option::is_none")] - pub represented_country: Option>, - #[serde(skip_serializing_if = "Option::is_none")] - pub traits: Option, -} - -/// GeoIP2 City record -#[derive(Deserialize, Serialize, Clone, Debug)] -pub struct City<'a> { - #[serde(borrow)] - #[serde(skip_serializing_if = "Option::is_none")] - pub city: Option>, - #[serde(skip_serializing_if = "Option::is_none")] - pub continent: Option>, - #[serde(skip_serializing_if = "Option::is_none")] - pub country: Option>, - #[serde(skip_serializing_if = "Option::is_none")] - pub location: Option>, - #[serde(skip_serializing_if = "Option::is_none")] - pub postal: Option>, - #[serde(skip_serializing_if = "Option::is_none")] - pub registered_country: Option>, - #[serde(skip_serializing_if = "Option::is_none")] - pub represented_country: Option>, - #[serde(skip_serializing_if = "Option::is_none")] - pub subdivisions: Option>>, - #[serde(skip_serializing_if = "Option::is_none")] - pub traits: Option, -} - -/// GeoIP2 Enterprise record -#[derive(Deserialize, Serialize, Clone, Debug)] -pub struct Enterprise<'a> { - #[serde(borrow)] - #[serde(skip_serializing_if = "Option::is_none")] - pub city: Option>, - #[serde(skip_serializing_if = "Option::is_none")] - pub continent: Option>, - #[serde(skip_serializing_if = "Option::is_none")] - pub country: Option>, - #[serde(skip_serializing_if = "Option::is_none")] - pub location: Option>, - #[serde(skip_serializing_if = "Option::is_none")] - pub postal: Option>, - #[serde(skip_serializing_if = "Option::is_none")] - pub registered_country: Option>, - #[serde(skip_serializing_if = "Option::is_none")] - pub represented_country: Option>, - #[serde(skip_serializing_if = "Option::is_none")] - pub subdivisions: Option>>, - #[serde(skip_serializing_if = "Option::is_none")] - pub traits: Option>, -} - -/// GeoIP2 ISP record -#[derive(Deserialize, Serialize, Clone, Debug)] -pub struct Isp<'a> { - #[serde(skip_serializing_if = "Option::is_none")] - pub autonomous_system_number: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub autonomous_system_organization: Option<&'a str>, - #[serde(skip_serializing_if = "Option::is_none")] - pub isp: Option<&'a str>, - #[serde(skip_serializing_if = "Option::is_none")] - pub mobile_country_code: Option<&'a str>, - #[serde(skip_serializing_if = "Option::is_none")] - pub mobile_network_code: Option<&'a str>, - #[serde(skip_serializing_if = "Option::is_none")] - pub organization: Option<&'a str>, -} - -/// GeoIP2 Connection-Type record -#[derive(Deserialize, Serialize, Clone, Debug)] -pub struct ConnectionType<'a> { - #[serde(skip_serializing_if = "Option::is_none")] - pub connection_type: Option<&'a str>, -} - -/// GeoIP2 Anonymous Ip record -#[derive(Deserialize, Serialize, Clone, Debug)] -pub struct AnonymousIp { - #[serde(skip_serializing_if = "Option::is_none")] - pub is_anonymous: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub is_anonymous_vpn: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub is_hosting_provider: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub is_public_proxy: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub is_residential_proxy: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub is_tor_exit_node: Option, -} - -/// GeoIP2 DensityIncome record -#[derive(Deserialize, Serialize, Clone, Debug)] -pub struct DensityIncome { - #[serde(skip_serializing_if = "Option::is_none")] - pub average_income: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub population_density: Option, -} - -/// GeoIP2 Domain record -#[derive(Deserialize, Serialize, Clone, Debug)] -pub struct Domain<'a> { - #[serde(skip_serializing_if = "Option::is_none")] - pub domain: Option<&'a str>, -} - -/// GeoIP2 Asn record -#[derive(Deserialize, Serialize, Clone, Debug)] -pub struct Asn<'a> { - #[serde(skip_serializing_if = "Option::is_none")] - pub autonomous_system_number: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub autonomous_system_organization: Option<&'a str>, -} - -/// Country model structs -pub mod country { - use serde::{Deserialize, Serialize}; - use std::collections::BTreeMap; - - #[derive(Deserialize, Serialize, Clone, Debug)] - pub struct Continent<'a> { - #[serde(skip_serializing_if = "Option::is_none")] - pub code: Option<&'a str>, - #[serde(skip_serializing_if = "Option::is_none")] - pub geoname_id: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub names: Option>, - } - - #[derive(Deserialize, Serialize, Clone, Debug)] - pub struct Country<'a> { - #[serde(skip_serializing_if = "Option::is_none")] - pub geoname_id: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub is_in_european_union: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub iso_code: Option<&'a str>, - #[serde(skip_serializing_if = "Option::is_none")] - pub names: Option>, - } - - #[derive(Deserialize, Serialize, Clone, Debug)] - pub struct RepresentedCountry<'a> { - #[serde(skip_serializing_if = "Option::is_none")] - pub geoname_id: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub is_in_european_union: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub iso_code: Option<&'a str>, - #[serde(skip_serializing_if = "Option::is_none")] - pub names: Option>, - #[serde(rename = "type")] - #[serde(skip_serializing_if = "Option::is_none")] - pub representation_type: Option<&'a str>, - } - - #[derive(Deserialize, Serialize, Clone, Debug)] - pub struct Traits { - #[serde(skip_serializing_if = "Option::is_none")] - pub is_anonymous_proxy: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub is_anycast: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub is_satellite_provider: Option, - } -} - -/// City model structs -pub mod city { - use serde::{Deserialize, Serialize}; - use std::collections::BTreeMap; - - pub use super::country::{Continent, Country, RepresentedCountry, Traits}; - - #[derive(Deserialize, Serialize, Clone, Debug)] - pub struct City<'a> { - #[serde(skip_serializing_if = "Option::is_none")] - pub geoname_id: Option, - #[serde(borrow)] - #[serde(skip_serializing_if = "Option::is_none")] - pub names: Option>, - } - - #[derive(Deserialize, Serialize, Clone, Debug)] - pub struct Location<'a> { - #[serde(skip_serializing_if = "Option::is_none")] - pub accuracy_radius: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub latitude: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub longitude: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub metro_code: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub time_zone: Option<&'a str>, - } - - #[derive(Deserialize, Serialize, Clone, Debug)] - pub struct Postal<'a> { - #[serde(skip_serializing_if = "Option::is_none")] - pub code: Option<&'a str>, - } - - #[derive(Deserialize, Serialize, Clone, Debug)] - pub struct Subdivision<'a> { - #[serde(skip_serializing_if = "Option::is_none")] - pub geoname_id: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub iso_code: Option<&'a str>, - #[serde(skip_serializing_if = "Option::is_none")] - pub names: Option>, - } -} - -/// Enterprise model structs -pub mod enterprise { - use serde::{Deserialize, Serialize}; - use std::collections::BTreeMap; - - pub use super::country::{Continent, RepresentedCountry}; - - #[derive(Deserialize, Serialize, Clone, Debug)] - pub struct City<'a> { - #[serde(skip_serializing_if = "Option::is_none")] - pub confidence: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub geoname_id: Option, - #[serde(borrow)] - #[serde(skip_serializing_if = "Option::is_none")] - pub names: Option>, - } - - #[derive(Deserialize, Serialize, Clone, Debug)] - pub struct Country<'a> { - #[serde(skip_serializing_if = "Option::is_none")] - pub confidence: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub geoname_id: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub is_in_european_union: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub iso_code: Option<&'a str>, - #[serde(skip_serializing_if = "Option::is_none")] - pub names: Option>, - } - - #[derive(Deserialize, Serialize, Clone, Debug)] - pub struct Location<'a> { - #[serde(skip_serializing_if = "Option::is_none")] - pub accuracy_radius: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub latitude: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub longitude: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub metro_code: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub time_zone: Option<&'a str>, - } - - #[derive(Deserialize, Serialize, Clone, Debug)] - pub struct Postal<'a> { - #[serde(skip_serializing_if = "Option::is_none")] - pub code: Option<&'a str>, - #[serde(skip_serializing_if = "Option::is_none")] - pub confidence: Option, - } - - #[derive(Deserialize, Serialize, Clone, Debug)] - pub struct Subdivision<'a> { - #[serde(skip_serializing_if = "Option::is_none")] - pub confidence: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub geoname_id: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub iso_code: Option<&'a str>, - #[serde(skip_serializing_if = "Option::is_none")] - pub names: Option>, - } - - #[derive(Deserialize, Serialize, Clone, Debug)] - pub struct Traits<'a> { - #[serde(skip_serializing_if = "Option::is_none")] - pub autonomous_system_number: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub autonomous_system_organization: Option<&'a str>, - #[serde(skip_serializing_if = "Option::is_none")] - pub connection_type: Option<&'a str>, - #[serde(skip_serializing_if = "Option::is_none")] - pub domain: Option<&'a str>, - #[serde(skip_serializing_if = "Option::is_none")] - pub is_anonymous: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub is_anonymous_proxy: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub is_anonymous_vpn: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub is_anycast: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub is_hosting_provider: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub isp: Option<&'a str>, - #[serde(skip_serializing_if = "Option::is_none")] - pub is_public_proxy: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub is_residential_proxy: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub is_satellite_provider: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub is_tor_exit_node: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub mobile_country_code: Option<&'a str>, - #[serde(skip_serializing_if = "Option::is_none")] - pub mobile_network_code: Option<&'a str>, - #[serde(skip_serializing_if = "Option::is_none")] - pub organization: Option<&'a str>, - #[serde(skip_serializing_if = "Option::is_none")] - pub user_type: Option<&'a str>, - } -} diff --git a/src/maxminddb/lib.rs b/src/maxminddb/lib.rs deleted file mode 100644 index 959cb7e4..00000000 --- a/src/maxminddb/lib.rs +++ /dev/null @@ -1,802 +0,0 @@ -#![deny(trivial_casts, trivial_numeric_casts, unused_import_braces)] -//! # MaxMind DB Reader -//! -//! This library reads the MaxMind DB format, including the GeoIP2 and GeoLite2 databases. -//! -//! ## Features -//! -//! This crate provides several optional features for performance and functionality: -//! -//! - **`mmap`** (default: disabled): Enable memory-mapped file access for -//! better performance in long-running applications -//! - **`simdutf8`** (default: disabled): Use SIMD instructions for faster -//! UTF-8 validation during string decoding -//! - **`unsafe-str-decode`** (default: disabled): Skip UTF-8 validation -//! entirely for maximum performance (~20% faster lookups) -//! -//! **Note**: `simdutf8` and `unsafe-str-decode` are mutually exclusive. -//! -//! ## Database Compatibility -//! -//! This library supports all MaxMind DB format databases: -//! - **GeoIP2** databases (City, Country, Enterprise, ISP, etc.) -//! - **GeoLite2** databases (free versions) -//! - Custom MaxMind DB format databases -//! -//! ## Thread Safety -//! -//! The `Reader` is `Send` and `Sync`, making it safe to share across threads. -//! This makes it ideal for web servers and other concurrent applications. -//! -//! ## Quick Start -//! -//! ```rust -//! use maxminddb::{Reader, geoip2}; -//! use std::net::IpAddr; -//! -//! fn main() -> Result<(), Box> { -//! // Open database file -//! # let reader = Reader::open_readfile("test-data/test-data/GeoIP2-City-Test.mmdb")?; -//! # /* -//! let reader = Reader::open_readfile("/path/to/GeoIP2-City.mmdb")?; -//! # */ -//! -//! // Look up an IP address -//! let ip: IpAddr = "89.160.20.128".parse()?; -//! if let Some(city) = reader.lookup::(ip)? { -//! if let Some(country) = city.country { -//! println!("Country: {}", country.iso_code.unwrap_or("Unknown")); -//! } -//! } -//! -//! Ok(()) -//! } -//! ``` - -use std::cmp::Ordering; -use std::collections::BTreeMap; -use std::fmt::Display; -use std::fs; -use std::io; -use std::marker::PhantomData; -use std::net::{IpAddr, Ipv4Addr, Ipv6Addr}; -use std::path::Path; - -use ipnetwork::{IpNetwork, IpNetworkError}; -use serde::{de, Deserialize, Serialize}; -use thiserror::Error; - -#[cfg(feature = "mmap")] -pub use memmap2::Mmap; -#[cfg(feature = "mmap")] -use memmap2::MmapOptions; -#[cfg(feature = "mmap")] -use std::fs::File; - -#[cfg(all(feature = "simdutf8", feature = "unsafe-str-decode"))] -compile_error!("features `simdutf8` and `unsafe-str-decode` are mutually exclusive"); - -#[derive(Error, Debug)] -pub enum MaxMindDbError { - #[error("Invalid database: {0}")] - InvalidDatabase(String), - - #[error("I/O error: {0}")] - Io( - #[from] - #[source] - io::Error, - ), - - #[cfg(feature = "mmap")] - #[error("Memory map error: {0}")] - Mmap(#[source] io::Error), - - #[error("Decoding error: {0}")] - Decoding(String), - - #[error("Invalid network: {0}")] - InvalidNetwork( - #[from] - #[source] - IpNetworkError, - ), -} - -impl de::Error for MaxMindDbError { - fn custom(msg: T) -> Self { - MaxMindDbError::Decoding(format!("{msg}")) - } -} - -#[derive(Deserialize, Serialize, Clone, Debug)] -pub struct Metadata { - pub binary_format_major_version: u16, - pub binary_format_minor_version: u16, - pub build_epoch: u64, - pub database_type: String, - pub description: BTreeMap, - pub ip_version: u16, - pub languages: Vec, - pub node_count: u32, - pub record_size: u16, -} - -#[derive(Debug)] -struct WithinNode { - node: usize, - ip_int: IpInt, - prefix_len: usize, -} - -#[derive(Debug)] -pub struct Within<'de, T: Deserialize<'de>, S: AsRef<[u8]>> { - reader: &'de Reader, - node_count: usize, - stack: Vec, - phantom: PhantomData<&'de T>, -} - -#[derive(Debug)] -pub struct WithinItem { - pub ip_net: IpNetwork, - pub info: T, -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -enum IpInt { - V4(u32), - V6(u128), -} - -impl IpInt { - fn new(ip_addr: IpAddr) -> Self { - match ip_addr { - IpAddr::V4(v4) => IpInt::V4(v4.into()), - IpAddr::V6(v6) => IpInt::V6(v6.into()), - } - } - - #[inline(always)] - fn get_bit(&self, index: usize) -> bool { - match self { - IpInt::V4(ip) => (ip >> (31 - index)) & 1 == 1, - IpInt::V6(ip) => (ip >> (127 - index)) & 1 == 1, - } - } - - fn bit_count(&self) -> usize { - match self { - IpInt::V4(_) => 32, - IpInt::V6(_) => 128, - } - } - - fn is_ipv4_in_ipv6(&self) -> bool { - match self { - IpInt::V4(_) => false, - IpInt::V6(ip) => *ip <= 0xFFFFFFFF, - } - } -} - -impl<'de, T: Deserialize<'de>, S: AsRef<[u8]>> Iterator for Within<'de, T, S> { - type Item = Result, MaxMindDbError>; - - fn next(&mut self) -> Option { - while let Some(current) = self.stack.pop() { - let bit_count = current.ip_int.bit_count(); - - // Skip networks that are aliases for the IPv4 network - if self.reader.ipv4_start != 0 - && current.node == self.reader.ipv4_start - && bit_count == 128 - && !current.ip_int.is_ipv4_in_ipv6() - { - continue; - } - - match current.node.cmp(&self.node_count) { - Ordering::Greater => { - // This is a data node, emit it and we're done (until the following next call) - let ip_net = - match bytes_and_prefix_to_net(¤t.ip_int, current.prefix_len as u8) { - Ok(ip_net) => ip_net, - Err(e) => return Some(Err(e)), - }; - - // Call the new helper method to decode data - return match self.reader.decode_data_at_pointer(current.node) { - Ok(info) => Some(Ok(WithinItem { ip_net, info })), - Err(e) => Some(Err(e)), - }; - } - Ordering::Equal => { - // Dead end, nothing to do - } - Ordering::Less => { - // In order traversal of our children - // right/1-bit - let mut right_ip_int = current.ip_int; - - if current.prefix_len < bit_count { - let bit = current.prefix_len; - match &mut right_ip_int { - IpInt::V4(ip) => *ip |= 1 << (31 - bit), - IpInt::V6(ip) => *ip |= 1 << (127 - bit), - }; - } - - let node = match self.reader.read_node(current.node, 1) { - Ok(node) => node, - Err(e) => return Some(Err(e)), - }; - self.stack.push(WithinNode { - node, - ip_int: right_ip_int, - prefix_len: current.prefix_len + 1, - }); - // left/0-bit - let node = match self.reader.read_node(current.node, 0) { - Ok(node) => node, - Err(e) => return Some(Err(e)), - }; - self.stack.push(WithinNode { - node, - ip_int: current.ip_int, - prefix_len: current.prefix_len + 1, - }); - } - } - } - None - } -} - -/// A reader for the MaxMind DB format. The lifetime `'data` is tied to the -/// lifetime of the underlying buffer holding the contents of the database file. -/// -/// The `Reader` supports both file-based and memory-mapped access to MaxMind -/// DB files, including GeoIP2 and GeoLite2 databases. -/// -/// # Features -/// -/// - **`mmap`**: Enable memory-mapped file access for better performance -/// - **`simdutf8`**: Use SIMD-accelerated UTF-8 validation (faster string -/// decoding) -/// - **`unsafe-str-decode`**: Skip UTF-8 validation entirely (unsafe, but -/// ~20% faster) -#[derive(Debug)] -pub struct Reader> { - buf: S, - pub metadata: Metadata, - ipv4_start: usize, - pointer_base: usize, -} - -#[cfg(feature = "mmap")] -impl Reader { - /// Open a MaxMind DB database file by memory mapping it. - /// - /// # Example - /// - /// ``` - /// # #[cfg(feature = "mmap")] - /// # { - /// let reader = maxminddb::Reader::open_mmap("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); - /// # } - /// ``` - pub fn open_mmap>(database: P) -> Result, MaxMindDbError> { - let file_read = File::open(database)?; - let mmap = unsafe { MmapOptions::new().map(&file_read) }.map_err(MaxMindDbError::Mmap)?; - Reader::from_source(mmap) - } -} - -impl Reader> { - /// Open a MaxMind DB database file by loading it into memory. - /// - /// # Example - /// - /// ``` - /// let reader = maxminddb::Reader::open_readfile( - /// "test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); - /// ``` - pub fn open_readfile>(database: P) -> Result>, MaxMindDbError> { - let buf: Vec = fs::read(&database)?; // IO error converted via #[from] - Reader::from_source(buf) - } -} - -impl<'de, S: AsRef<[u8]>> Reader { - /// Open a MaxMind DB database from anything that implements AsRef<[u8]> - /// - /// # Example - /// - /// ``` - /// use std::fs; - /// let buf = fs::read("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); - /// let reader = maxminddb::Reader::from_source(buf).unwrap(); - /// ``` - pub fn from_source(buf: S) -> Result, MaxMindDbError> { - let data_section_separator_size = 16; - - let metadata_start = find_metadata_start(buf.as_ref())?; - let mut type_decoder = decoder::Decoder::new(&buf.as_ref()[metadata_start..], 0); - let metadata = Metadata::deserialize(&mut type_decoder)?; - - let search_tree_size = (metadata.node_count as usize) * (metadata.record_size as usize) / 4; - - let mut reader = Reader { - buf, - pointer_base: search_tree_size + data_section_separator_size, - metadata, - ipv4_start: 0, - }; - reader.ipv4_start = reader.find_ipv4_start()?; - - Ok(reader) - } - - /// Lookup the socket address in the opened MaxMind DB. - /// Returns `Ok(None)` if the address is not found in the database. - /// - /// # Examples - /// - /// Basic city lookup: - /// ``` - /// # use maxminddb::geoip2; - /// # use std::net::IpAddr; - /// # use std::str::FromStr; - /// # fn main() -> Result<(), maxminddb::MaxMindDbError> { - /// let reader = maxminddb::Reader::open_readfile( - /// "test-data/test-data/GeoIP2-City-Test.mmdb")?; - /// - /// let ip: IpAddr = FromStr::from_str("89.160.20.128").unwrap(); - /// match reader.lookup::(ip)? { - /// Some(city) => { - /// if let Some(city_names) = city.city.and_then(|c| c.names) { - /// if let Some(name) = city_names.get("en") { - /// println!("City: {}", name); - /// } - /// } - /// if let Some(country) = city.country.and_then(|c| c.iso_code) { - /// println!("Country: {}", country); - /// } - /// } - /// None => println!("No data found for IP {}", ip), - /// } - /// # Ok(()) - /// # } - /// ``` - /// - /// Lookup with different record types: - /// ``` - /// # use maxminddb::geoip2; - /// # use std::net::IpAddr; - /// # fn main() -> Result<(), maxminddb::MaxMindDbError> { - /// let reader = maxminddb::Reader::open_readfile( - /// "test-data/test-data/GeoIP2-City-Test.mmdb")?; - /// let ip: IpAddr = "89.160.20.128".parse().unwrap(); - /// - /// // Different record types for the same IP - /// let city: Option = reader.lookup(ip)?; - /// let country: Option = reader.lookup(ip)?; - /// - /// println!("City data available: {}", city.is_some()); - /// println!("Country data available: {}", country.is_some()); - /// # Ok(()) - /// # } - /// ``` - pub fn lookup(&'de self, address: IpAddr) -> Result, MaxMindDbError> - where - T: Deserialize<'de>, - { - self.lookup_prefix(address) - .map(|(option_value, _prefix_len)| option_value) - } - - /// Lookup the socket address in the opened MaxMind DB, returning the found value (if any) - /// and the prefix length of the network associated with the lookup. - /// - /// Returns `Ok((None, prefix_len))` if the address is found in the tree but has no data record. - /// Returns `Err(...)` for database errors (IO, corruption, decoding). - /// - /// Example: - /// - /// ``` - /// # use maxminddb::geoip2; - /// # use std::net::IpAddr; - /// # use std::str::FromStr; - /// # fn main() -> Result<(), maxminddb::MaxMindDbError> { - /// let reader = maxminddb::Reader::open_readfile( - /// "test-data/test-data/GeoIP2-City-Test.mmdb")?; - /// - /// let ip: IpAddr = "89.160.20.128".parse().unwrap(); // Known IP - /// let ip_unknown: IpAddr = "10.0.0.1".parse().unwrap(); // Unknown IP - /// - /// let (city_option, prefix_len) = reader.lookup_prefix::(ip)?; - /// if let Some(city) = city_option { - /// println!("Found {:?} at prefix length {}", city.city.unwrap().names.unwrap().get("en").unwrap(), prefix_len); - /// } else { - /// // This case is less likely with lookup_prefix if the IP resolves in the tree - /// println!("IP found in tree but no data (prefix_len: {})", prefix_len); - /// } - /// - /// let (city_option_unknown, prefix_len_unknown) = reader.lookup_prefix::(ip_unknown)?; - /// assert!(city_option_unknown.is_none()); - /// println!("Unknown IP resolved to prefix_len: {}", prefix_len_unknown); - /// # Ok(()) - /// # } - /// ``` - pub fn lookup_prefix( - &'de self, - address: IpAddr, - ) -> Result<(Option, usize), MaxMindDbError> - where - T: Deserialize<'de>, - { - let ip_int = IpInt::new(address); - // find_address_in_tree returns Result<(usize, usize), MaxMindDbError> -> (pointer, prefix_len) - let (pointer, prefix_len) = self.find_address_in_tree(&ip_int)?; - - if pointer == 0 { - // If pointer is 0, it signifies no data record was associated during tree traversal. - // Return None for the data, but include the calculated prefix_len. - return Ok((None, prefix_len)); - } - - // If pointer > 0, attempt to resolve and decode data using the helper method - match self.decode_data_at_pointer(pointer) { - Ok(value) => Ok((Some(value), prefix_len)), - Err(e) => Err(e), - } - } - - /// Iterate over blocks of IP networks in the opened MaxMind DB - /// - /// This method returns an iterator that yields all IP network blocks that - /// fall within the specified CIDR range and have associated data in the - /// database. - /// - /// # Examples - /// - /// Iterate over all IPv4 networks: - /// ``` - /// use ipnetwork::IpNetwork; - /// use maxminddb::{geoip2, Within}; - /// - /// let reader = maxminddb::Reader::open_readfile( - /// "test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); - /// - /// let ipv4_all = IpNetwork::V4("0.0.0.0/0".parse().unwrap()); - /// let mut count = 0; - /// for result in reader.within::(ipv4_all).unwrap() { - /// let item = result.unwrap(); - /// let city_name = item.info.city.as_ref().and_then(|c| c.names.as_ref()).and_then(|n| n.get("en")); - /// println!("Network: {}, City: {:?}", item.ip_net, city_name); - /// count += 1; - /// if count >= 10 { break; } // Limit output for example - /// } - /// ``` - /// - /// Search within a specific subnet: - /// ``` - /// use ipnetwork::IpNetwork; - /// use maxminddb::geoip2; - /// - /// let reader = maxminddb::Reader::open_readfile( - /// "test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); - /// - /// let subnet = IpNetwork::V4("192.168.0.0/16".parse().unwrap()); - /// match reader.within::(subnet) { - /// Ok(iter) => { - /// for result in iter { - /// match result { - /// Ok(item) => println!("Found: {}", item.ip_net), - /// Err(e) => eprintln!("Error processing item: {}", e), - /// } - /// } - /// } - /// Err(e) => eprintln!("Failed to create iterator: {}", e), - /// } - /// ``` - pub fn within(&'de self, cidr: IpNetwork) -> Result, MaxMindDbError> - where - T: Deserialize<'de>, - { - let ip_address = cidr.network(); - let prefix_len = cidr.prefix() as usize; - let ip_int = IpInt::new(ip_address); - let bit_count = ip_int.bit_count(); - - let mut node = self.start_node(bit_count); - let node_count = self.metadata.node_count as usize; - - let mut stack: Vec = Vec::with_capacity(bit_count - prefix_len); - - // Traverse down the tree to the level that matches the cidr mark - let mut i = 0_usize; - while i < prefix_len { - let bit = ip_int.get_bit(i); - node = self.read_node(node, bit as usize)?; - if node >= node_count { - // We've hit a dead end before we exhausted our prefix - break; - } - - i += 1; - } - - if node < node_count { - // Ok, now anything that's below node in the tree is "within", start with the node we - // traversed to as our to be processed stack. - stack.push(WithinNode { - node, - ip_int, - prefix_len, - }); - } - // else the stack will be empty and we'll be returning an iterator that visits nothing, - // which makes sense. - - let within: Within = Within { - reader: self, - node_count, - stack, - phantom: PhantomData, - }; - - Ok(within) - } - - fn find_address_in_tree(&self, ip_int: &IpInt) -> Result<(usize, usize), MaxMindDbError> { - let bit_count = ip_int.bit_count(); - let mut node = self.start_node(bit_count); - - let node_count = self.metadata.node_count as usize; - let mut prefix_len = bit_count; - - for i in 0..bit_count { - if node >= node_count { - prefix_len = i; - break; - } - let bit = ip_int.get_bit(i); - node = self.read_node(node, bit as usize)?; - } - match node_count { - // If node == node_count, it means we hit the placeholder "empty" node - // return 0 as the pointer value to signify "not found". - n if n == node => Ok((0, prefix_len)), - n if node > n => Ok((node, prefix_len)), - _ => Err(MaxMindDbError::InvalidDatabase( - "invalid node in search tree".to_owned(), - )), - } - } - - fn start_node(&self, length: usize) -> usize { - if length == 128 { - 0 - } else { - self.ipv4_start - } - } - - fn find_ipv4_start(&self) -> Result { - if self.metadata.ip_version != 6 { - return Ok(0); - } - - // We are looking up an IPv4 address in an IPv6 tree. Skip over the - // first 96 nodes. - let mut node: usize = 0_usize; - for _ in 0_u8..96 { - if node >= self.metadata.node_count as usize { - break; - } - node = self.read_node(node, 0)?; - } - Ok(node) - } - - #[inline(always)] - fn read_node(&self, node_number: usize, index: usize) -> Result { - let buf = self.buf.as_ref(); - let base_offset = node_number * (self.metadata.record_size as usize) / 4; - - let val = match self.metadata.record_size { - 24 => { - let offset = base_offset + index * 3; - to_usize(0, &buf[offset..offset + 3]) - } - 28 => { - let mut middle = buf[base_offset + 3]; - if index != 0 { - middle &= 0x0F - } else { - middle = (0xF0 & middle) >> 4 - } - let offset = base_offset + index * 4; - to_usize(middle, &buf[offset..offset + 3]) - } - 32 => { - let offset = base_offset + index * 4; - to_usize(0, &buf[offset..offset + 4]) - } - s => { - return Err(MaxMindDbError::InvalidDatabase(format!( - "unknown record size: \ - {s:?}" - ))) - } - }; - Ok(val) - } - - /// Resolves a pointer from the search tree to an offset in the data section. - fn resolve_data_pointer(&self, pointer: usize) -> Result { - let resolved = pointer - (self.metadata.node_count as usize) - 16; - - // Check bounds using pointer_base which marks the start of the data section - if resolved >= (self.buf.as_ref().len() - self.pointer_base) { - return Err(MaxMindDbError::InvalidDatabase( - "the MaxMind DB file's data pointer resolves to an invalid location".to_owned(), - )); - } - - Ok(resolved) - } - - /// Decodes data at the given pointer offset. - /// Assumes the pointer is valid and points to the data section. - fn decode_data_at_pointer(&'de self, pointer: usize) -> Result - where - T: Deserialize<'de>, - { - let resolved_offset = self.resolve_data_pointer(pointer)?; - let mut decoder = - decoder::Decoder::new(&self.buf.as_ref()[self.pointer_base..], resolved_offset); - T::deserialize(&mut decoder) - } -} - -// I haven't moved all patterns of this form to a generic function as -// the FromPrimitive trait is unstable -#[inline(always)] -fn to_usize(base: u8, bytes: &[u8]) -> usize { - bytes - .iter() - .fold(base as usize, |acc, &b| (acc << 8) | b as usize) -} - -#[inline] -fn bytes_and_prefix_to_net(bytes: &IpInt, prefix: u8) -> Result { - let (ip, prefix) = match bytes { - IpInt::V4(ip) => (IpAddr::V4(Ipv4Addr::from(*ip)), prefix), - IpInt::V6(ip) if bytes.is_ipv4_in_ipv6() => { - (IpAddr::V4(Ipv4Addr::from(*ip as u32)), prefix - 96) - } - IpInt::V6(ip) => (IpAddr::V6(Ipv6Addr::from(*ip)), prefix), - }; - IpNetwork::new(ip, prefix).map_err(MaxMindDbError::InvalidNetwork) -} - -fn find_metadata_start(buf: &[u8]) -> Result { - const METADATA_START_MARKER: &[u8] = b"\xab\xcd\xefMaxMind.com"; - - memchr::memmem::rfind(buf, METADATA_START_MARKER) - .map(|x| x + METADATA_START_MARKER.len()) - .ok_or_else(|| { - MaxMindDbError::InvalidDatabase( - "Could not find MaxMind DB metadata in file.".to_owned(), - ) - }) -} - -mod decoder; -pub mod geoip2; - -#[cfg(test)] -mod reader_test; - -#[cfg(test)] -mod tests { - use super::MaxMindDbError; - use ipnetwork::IpNetworkError; - use std::io::{Error, ErrorKind}; - - #[test] - fn test_error_display() { - assert_eq!( - format!( - "{}", - MaxMindDbError::InvalidDatabase("something went wrong".to_owned()) - ), - "Invalid database: something went wrong".to_owned(), - ); - let io_err = Error::new(ErrorKind::NotFound, "file not found"); - assert_eq!( - format!("{}", MaxMindDbError::from(io_err)), - "I/O error: file not found".to_owned(), - ); - - #[cfg(feature = "mmap")] - { - let mmap_io_err = Error::new(ErrorKind::PermissionDenied, "mmap failed"); - assert_eq!( - format!("{}", MaxMindDbError::Mmap(mmap_io_err)), - "Memory map error: mmap failed".to_owned(), - ); - } - - assert_eq!( - format!("{}", MaxMindDbError::Decoding("unexpected type".to_owned())), - "Decoding error: unexpected type".to_owned(), - ); - - let net_err = IpNetworkError::InvalidPrefix; - assert_eq!( - format!("{}", MaxMindDbError::from(net_err)), - "Invalid network: invalid prefix".to_owned(), - ); - } - - #[test] - fn test_lookup_returns_none_for_unknown_address() { - use super::Reader; - use crate::geoip2; - use std::net::IpAddr; - use std::str::FromStr; - - let reader = Reader::open_readfile("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); - let ip: IpAddr = FromStr::from_str("10.0.0.1").unwrap(); - - let result_lookup = reader.lookup::(ip); - assert!( - matches!(result_lookup, Ok(None)), - "lookup should return Ok(None) for unknown IP" - ); - - let result_lookup_prefix = reader.lookup_prefix::(ip); - assert!( - matches!(result_lookup_prefix, Ok((None, 8))), - "lookup_prefix should return Ok((None, 8)) for unknown IP, got {:?}", - result_lookup_prefix - ); - } - - #[test] - fn test_lookup_returns_some_for_known_address() { - use super::Reader; - use crate::geoip2; - use std::net::IpAddr; - use std::str::FromStr; - - let reader = Reader::open_readfile("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); - let ip: IpAddr = FromStr::from_str("89.160.20.128").unwrap(); - - let result_lookup = reader.lookup::(ip); - assert!( - matches!(result_lookup, Ok(Some(_))), - "lookup should return Ok(Some(_)) for known IP" - ); - assert!( - result_lookup.unwrap().unwrap().city.is_some(), - "Expected city data" - ); - - let result_lookup_prefix = reader.lookup_prefix::(ip); - assert!( - matches!(result_lookup_prefix, Ok((Some(_), _))), - "lookup_prefix should return Ok(Some(_)) for known IP" - ); - let (city_data, prefix_len) = result_lookup_prefix.unwrap(); - assert!( - city_data.unwrap().city.is_some(), - "Expected city data from prefix lookup" - ); - assert_eq!(prefix_len, 25, "Expected valid prefix length"); - } -} diff --git a/src/maxminddb/reader_test.rs b/src/maxminddb/reader_test.rs deleted file mode 100644 index 9eb37af9..00000000 --- a/src/maxminddb/reader_test.rs +++ /dev/null @@ -1,568 +0,0 @@ -use std::net::IpAddr; -use std::str::FromStr; - -use ipnetwork::IpNetwork; -use serde::Deserialize; -use serde_json::json; - -use crate::geoip2; -use crate::{MaxMindDbError, Reader, Within}; - -#[allow(clippy::float_cmp)] -#[test] -fn test_decoder() { - let _ = env_logger::try_init(); - - #[allow(non_snake_case)] - #[derive(Deserialize, Debug, Eq, PartialEq)] - struct MapXType { - arrayX: Vec, - utf8_stringX: String, - } - - #[allow(non_snake_case)] - #[derive(Deserialize, Debug, Eq, PartialEq)] - struct MapType { - mapX: MapXType, - } - - #[derive(Deserialize, Debug)] - struct TestType<'a> { - array: Vec, - boolean: bool, - bytes: &'a [u8], - double: f64, - float: f32, - int32: i32, - map: MapType, - uint16: u16, - uint32: u32, - uint64: u64, - uint128: u128, - utf8_string: String, - } - - let r = Reader::open_readfile("test-data/test-data/MaxMind-DB-test-decoder.mmdb"); - if let Err(err) = r { - panic!("error opening mmdb: {err:?}"); - } - let r = r.unwrap(); - let ip: IpAddr = FromStr::from_str("1.1.1.0").unwrap(); - let result: TestType = r.lookup(ip).unwrap().unwrap(); - - assert_eq!(result.array, vec![1_u32, 2_u32, 3_u32]); - assert!(result.boolean); - assert_eq!(result.bytes, vec![0_u8, 0_u8, 0_u8, 42_u8]); - assert_eq!(result.double, 42.123_456); - assert_eq!(result.float, 1.1); - assert_eq!(result.int32, -268_435_456); - - assert_eq!( - result.map, - MapType { - mapX: MapXType { - arrayX: vec![7, 8, 9], - utf8_stringX: "hello".to_string(), - }, - } - ); - - assert_eq!(result.uint16, 100); - assert_eq!(result.uint32, 268_435_456); - assert_eq!(result.uint64, 1_152_921_504_606_846_976); - assert_eq!( - result.uint128, - 1_329_227_995_784_915_872_903_807_060_280_344_576 - ); - - assert_eq!( - result.utf8_string, - "unicode! \u{262f} - \u{266b}".to_string() - ); -} - -#[test] -fn test_pointers_in_metadata() { - let _ = env_logger::try_init(); - - let r = Reader::open_readfile("test-data/test-data/MaxMind-DB-test-metadata-pointers.mmdb"); - if let Err(err) = r { - panic!("error opening mmdb: {err:?}"); - } - r.unwrap(); -} - -#[test] -fn test_broken_database() { - let _ = env_logger::try_init(); - - let r = Reader::open_readfile("test-data/test-data/GeoIP2-City-Test-Broken-Double-Format.mmdb") - .ok() - .unwrap(); - let ip: IpAddr = FromStr::from_str("2001:220::").unwrap(); - - #[derive(Deserialize, Debug)] - struct TestType {} - match r.lookup::(ip) { - Err(e) => assert!(matches!( - e, - MaxMindDbError::InvalidDatabase(_) // Check variant, message might vary slightly - )), - Ok(Some(_)) => panic!("Unexpected success with broken data"), - Ok(None) => panic!("Got None, expected InvalidDatabase"), - } -} - -#[test] -fn test_missing_database() { - let _ = env_logger::try_init(); - - let r = Reader::open_readfile("file-does-not-exist.mmdb"); - match r { - Ok(_) => panic!("Received Reader when opening non-existent file"), - Err(e) => assert!(matches!(e, MaxMindDbError::Io(_))), // Specific message might vary by OS/locale - } -} - -#[test] -fn test_non_database() { - let _ = env_logger::try_init(); - - let r = Reader::open_readfile("README.md"); - match r { - Ok(_) => panic!("Received Reader when opening a non-MMDB file"), - Err(e) => assert!( - matches!(&e, MaxMindDbError::InvalidDatabase(s) if s == "Could not find MaxMind DB metadata in file."), - "Expected InvalidDatabase error with specific message, but got: {:?}", - e - ), - } -} - -#[test] -fn test_reader() { - let _ = env_logger::try_init(); - - let sizes = [24_usize, 28, 32]; - for record_size in &sizes { - let versions = [4_usize, 6]; - for ip_version in &versions { - let filename = - format!("test-data/test-data/MaxMind-DB-test-ipv{ip_version}-{record_size}.mmdb"); - let reader = Reader::open_readfile(filename).ok().unwrap(); - - check_metadata(&reader, *ip_version, *record_size); - check_ip(&reader, *ip_version); - } - } -} - -/// Create Reader by explicitly reading the entire file into a buffer. -#[test] -fn test_reader_readfile() { - let _ = env_logger::try_init(); - - let sizes = [24_usize, 28, 32]; - for record_size in &sizes { - let versions = [4_usize, 6]; - for ip_version in &versions { - let filename = - format!("test-data/test-data/MaxMind-DB-test-ipv{ip_version}-{record_size}.mmdb"); - let reader = Reader::open_readfile(filename).ok().unwrap(); - - check_metadata(&reader, *ip_version, *record_size); - check_ip(&reader, *ip_version); - } - } -} - -#[test] -#[cfg(feature = "mmap")] -fn test_reader_mmap() { - let _ = env_logger::try_init(); - - let sizes = [24usize, 28, 32]; - for record_size in sizes.iter() { - let versions = [4usize, 6]; - for ip_version in versions.iter() { - let filename = format!( - "test-data/test-data/MaxMind-DB-test-ipv{}-{}.mmdb", - ip_version, record_size - ); - let reader = Reader::open_mmap(filename).ok().unwrap(); - - check_metadata(&reader, *ip_version, *record_size); - check_ip(&reader, *ip_version); - } - } -} - -#[test] -fn test_lookup_city() { - let _ = env_logger::try_init(); - - let filename = "test-data/test-data/GeoIP2-City-Test.mmdb"; - - let reader = Reader::open_readfile(filename).unwrap(); - - let ip: IpAddr = FromStr::from_str("89.160.20.112").unwrap(); - let city: geoip2::City = reader.lookup(ip).unwrap().unwrap(); - - let iso_code = city.country.and_then(|cy| cy.iso_code); - - assert_eq!(iso_code, Some("SE")); -} - -#[test] -fn test_lookup_country() { - let _ = env_logger::try_init(); - - let filename = "test-data/test-data/GeoIP2-Country-Test.mmdb"; - - let reader = Reader::open_readfile(filename).unwrap(); - - let ip: IpAddr = FromStr::from_str("89.160.20.112").unwrap(); - let country: geoip2::Country = reader.lookup(ip).unwrap().unwrap(); - let country = country.country.unwrap(); - - assert_eq!(country.iso_code, Some("SE")); - assert_eq!(country.is_in_european_union, Some(true)); -} - -#[test] -fn test_lookup_connection_type() { - let _ = env_logger::try_init(); - - let filename = "test-data/test-data/GeoIP2-Connection-Type-Test.mmdb"; - - let reader = Reader::open_readfile(filename).unwrap(); - - let ip: IpAddr = FromStr::from_str("96.1.20.112").unwrap(); - let connection_type: geoip2::ConnectionType = reader.lookup(ip).unwrap().unwrap(); - - assert_eq!(connection_type.connection_type, Some("Cable/DSL")); -} - -#[test] -fn test_lookup_annonymous_ip() { - let _ = env_logger::try_init(); - - let filename = "test-data/test-data/GeoIP2-Anonymous-IP-Test.mmdb"; - - let reader = Reader::open_readfile(filename).unwrap(); - - let ip: IpAddr = FromStr::from_str("81.2.69.123").unwrap(); - let anonymous_ip: geoip2::AnonymousIp = reader.lookup(ip).unwrap().unwrap(); - - assert_eq!(anonymous_ip.is_anonymous, Some(true)); - assert_eq!(anonymous_ip.is_public_proxy, Some(true)); - assert_eq!(anonymous_ip.is_anonymous_vpn, Some(true)); - assert_eq!(anonymous_ip.is_hosting_provider, Some(true)); - assert_eq!(anonymous_ip.is_tor_exit_node, Some(true)) -} - -#[test] -fn test_lookup_density_income() { - let _ = env_logger::try_init(); - - let filename = "test-data/test-data/GeoIP2-DensityIncome-Test.mmdb"; - - let reader = Reader::open_readfile(filename).unwrap(); - - let ip: IpAddr = FromStr::from_str("5.83.124.123").unwrap(); - let density_income: geoip2::DensityIncome = reader.lookup(ip).unwrap().unwrap(); - - assert_eq!(density_income.average_income, Some(32323)); - assert_eq!(density_income.population_density, Some(1232)) -} - -#[test] -fn test_lookup_domain() { - let _ = env_logger::try_init(); - - let filename = "test-data/test-data/GeoIP2-Domain-Test.mmdb"; - - let reader = Reader::open_readfile(filename).unwrap(); - - let ip: IpAddr = FromStr::from_str("66.92.80.123").unwrap(); - let domain: geoip2::Domain = reader.lookup(ip).unwrap().unwrap(); - - assert_eq!(domain.domain, Some("speakeasy.net")); -} - -#[test] -fn test_lookup_isp() { - let _ = env_logger::try_init(); - - let filename = "test-data/test-data/GeoIP2-ISP-Test.mmdb"; - - let reader = Reader::open_readfile(filename).unwrap(); - - let ip: IpAddr = FromStr::from_str("12.87.118.123").unwrap(); - let isp: geoip2::Isp = reader.lookup(ip).unwrap().unwrap(); - - assert_eq!(isp.autonomous_system_number, Some(7018)); - assert_eq!(isp.isp, Some("AT&T Services")); - assert_eq!(isp.organization, Some("AT&T Worldnet Services")); -} - -#[test] -fn test_lookup_asn() { - let _ = env_logger::try_init(); - - let filename = "test-data/test-data/GeoLite2-ASN-Test.mmdb"; - - let reader = Reader::open_readfile(filename).unwrap(); - - let ip: IpAddr = FromStr::from_str("1.128.0.123").unwrap(); - let asn: geoip2::Asn = reader.lookup(ip).unwrap().unwrap(); - - assert_eq!(asn.autonomous_system_number, Some(1221)); - assert_eq!(asn.autonomous_system_organization, Some("Telstra Pty Ltd")); -} - -#[test] -fn test_lookup_prefix() { - let _ = env_logger::try_init(); - let filename = "test-data/test-data/GeoIP2-City-Test.mmdb"; - let reader = Reader::open_readfile(filename).unwrap(); - - // --- IPv4 Check (Known) --- - let ip: IpAddr = "89.160.20.128".parse().unwrap(); - let result_v4 = reader.lookup_prefix::(ip); - assert!(result_v4.is_ok()); - let (city_opt_v4, prefix_len_v4) = result_v4.unwrap(); - assert!(city_opt_v4.is_some(), "Expected Some(City) for known IPv4"); - assert_eq!(prefix_len_v4, 25); - assert!(city_opt_v4.unwrap().country.is_some()); - - // --- IPv4 Check (Last Host, Known) --- - let ip_last: IpAddr = "89.160.20.254".parse().unwrap(); - let (city_opt_last, last_prefix_len) = reader.lookup_prefix::(ip_last).unwrap(); - assert!(city_opt_last.is_some(), "Expected Some(City) for last host"); - assert_eq!(last_prefix_len, 25); // Should be same network - - // --- IPv6 Check (Not Found in Data) --- - // This IP might resolve to a node in the tree, but that node might not point to data. - let ip_v6_not_found: IpAddr = "2c0f:ff00::1".parse().unwrap(); - let result_not_found = reader.lookup_prefix::(ip_v6_not_found); - assert!(result_not_found.is_ok()); - let (city_opt_nf, prefix_len_nf) = result_not_found.unwrap(); - assert!( - city_opt_nf.is_none(), - "Expected None data for non-existent IP 2c0f:ff00::1" - ); - assert_eq!( - prefix_len_nf, 6, - "Expected valid prefix length for not-found IPv6" - ); - - // --- IPv6 Check (Known Data) --- - let ip_v6_known: IpAddr = "2001:218:85a3:0:0:8a2e:370:7334".parse().unwrap(); - let result_known_v6 = reader.lookup_prefix::(ip_v6_known); - assert!(result_known_v6.is_ok()); - let (city_opt_v6, prefix_len_v6_known) = result_known_v6.unwrap(); - assert!(city_opt_v6.is_some(), "Expected Some(City) for known IPv6"); - assert_eq!( - prefix_len_v6_known, 32, - "Prefix length mismatch for known IPv6" - ); - assert!(city_opt_v6.unwrap().country.is_some()); -} - -#[test] -fn test_within_city() { - let _ = env_logger::try_init(); - - let filename = "test-data/test-data/GeoIP2-City-Test.mmdb"; - - let reader = Reader::open_readfile(filename).unwrap(); - - // --- Test iteration over entire DB ("::/0") --- - let ip_net_all = IpNetwork::V6("::/0".parse().unwrap()); - let mut iter_all: Within = reader.within(ip_net_all).unwrap(); - - // Get the first item - let first_item_result = iter_all.next(); - assert!( - first_item_result.is_some(), - "Iterator over ::/0 yielded no items" - ); - let _first_item = first_item_result.unwrap().unwrap(); - - // Count the remaining items to check total count - let mut n = 1; // Start at 1 since we already took the first item - for item_result in iter_all { - assert!(item_result.is_ok()); - n += 1; - } - assert_eq!(n, 243); - - // --- Test iteration over a specific smaller network --- - let specific = IpNetwork::V4("81.2.69.0/24".parse().unwrap()); - let mut iter_specific: Within = reader.within(specific).unwrap(); - - let expected = vec![ - // In order of iteration: - IpNetwork::V4("81.2.69.142/31".parse().unwrap()), - IpNetwork::V4("81.2.69.144/28".parse().unwrap()), - IpNetwork::V4("81.2.69.160/27".parse().unwrap()), - IpNetwork::V4("81.2.69.192/28".parse().unwrap()), - ]; - - let mut found_count = 0; - // Use into_iter() to consume the vector - for expected_net in expected.into_iter() { - let item_res = iter_specific.next(); - assert!( - item_res.is_some(), - "Expected more items in specific iterator" - ); - let item = item_res.unwrap().unwrap(); - assert_eq!( - item.ip_net, expected_net, - "Mismatch in specific network iteration" - ); - // Check associated data for one of them - if item.ip_net.prefix() == 31 { - // 81.2.69.142/31 - assert!(item.info.city.is_some()); - assert_eq!(item.info.city.unwrap().geoname_id, Some(2643743)); // London - } - found_count += 1; - } - assert!( - iter_specific.next().is_none(), - "Specific iterator should be exhausted after expected items" - ); - assert_eq!( - found_count, 4, - "Expected exactly 4 networks in 81.2.69.0/24" - ); -} - -fn check_metadata>(reader: &Reader, ip_version: usize, record_size: usize) { - let metadata = &reader.metadata; - - assert_eq!(metadata.binary_format_major_version, 2_u16); - assert_eq!(metadata.binary_format_minor_version, 0_u16); - assert!(metadata.build_epoch >= 1_397_457_605); - assert_eq!(metadata.database_type, "Test".to_string()); - - assert_eq!( - *metadata.description[&"en".to_string()], - "Test Database".to_string() - ); - assert_eq!( - *metadata.description[&"zh".to_string()], - "Test Database Chinese".to_string() - ); - - assert_eq!(metadata.ip_version, ip_version as u16); - assert_eq!(metadata.languages, vec!["en".to_string(), "zh".to_string()]); - - if ip_version == 4 { - assert_eq!(metadata.node_count, 164) - } else { - assert_eq!(metadata.node_count, 416) - } - - assert_eq!(metadata.record_size, record_size as u16) -} - -fn check_ip>(reader: &Reader, ip_version: usize) { - let subnets = match ip_version { - 6 => [ - "::1:ffff:ffff", - "::2:0:0", - "::2:0:0", - "::2:0:0", - "::2:0:0", - "::2:0:40", - "::2:0:40", - "::2:0:40", - "::2:0:50", - "::2:0:50", - "::2:0:50", - "::2:0:58", - "::2:0:58", - ], - _ => [ - "1.1.1.1", "1.1.1.2", "1.1.1.2", "1.1.1.4", "1.1.1.4", "1.1.1.4", "1.1.1.4", "1.1.1.8", - "1.1.1.8", "1.1.1.8", "1.1.1.16", "1.1.1.16", "1.1.1.16", - ], - }; - - #[derive(Deserialize, Debug, PartialEq)] - struct IpType { - ip: String, - } - - // Test lookups that are expected to succeed - for subnet in &subnets { - let ip: IpAddr = FromStr::from_str(subnet).unwrap(); - let result = reader.lookup::(ip); - - assert!( - result.is_ok(), - "Lookup failed unexpectedly for {}: {:?}", - subnet, - result.err() - ); - let value_option = result.unwrap(); - assert!( - value_option.is_some(), - "Lookup for {} returned None unexpectedly", - subnet - ); - let value = value_option.unwrap(); - - // The value stored is often the network address, not the specific IP looked up - // We need to parse the found IP and the subnet IP to check containment or equality. - // For the specific MaxMind-DB-test-ipv* files, the stored value IS the looked-up IP string. - assert_eq!(value.ip, *subnet); - } - - // Test lookups that are expected to return "not found" (Ok(None)) - let no_record = ["1.1.1.33", "255.254.253.123", "89fa::"]; - - for &address in &no_record { - if ip_version == 4 && address == "89fa::" { - continue; // Skip IPv6 address if testing IPv4 db - } - if ip_version == 6 && address != "89fa::" { - continue; // Skip IPv4 addresses if testing IPv6 db - } - - let ip: IpAddr = FromStr::from_str(address).unwrap(); - let result = reader.lookup::(ip); - - assert!( - matches!(result, Ok(None)), - "Expected Ok(None) for address {}, but got {:?}", - address, - result - ); - } -} - -#[test] -fn test_json_serialize() { - let _ = env_logger::try_init(); - - let filename = "test-data/test-data/GeoIP2-City-Test.mmdb"; - - let reader = Reader::open_readfile(filename).unwrap(); - - let ip: IpAddr = FromStr::from_str("89.160.20.112").unwrap(); - let city: geoip2::City = reader.lookup(ip).unwrap().unwrap(); - - let json_value = json!(city); - let json_string = json_value.to_string(); - - let expected_json_str = r#"{"city":{"geoname_id":2694762,"names":{"de":"Linköping","en":"Linköping","fr":"Linköping","ja":"リンシェーピング","zh-CN":"林雪平"}},"continent":{"code":"EU","geoname_id":6255148,"names":{"de":"Europa","en":"Europe","es":"Europa","fr":"Europe","ja":"ヨーロッパ","pt-BR":"Europa","ru":"Европа","zh-CN":"欧洲"}},"country":{"geoname_id":2661886,"is_in_european_union":true,"iso_code":"SE","names":{"de":"Schweden","en":"Sweden","es":"Suecia","fr":"Suède","ja":"スウェーデン王国","pt-BR":"Suécia","ru":"Швеция","zh-CN":"瑞典"}},"location":{"accuracy_radius":76,"latitude":58.4167,"longitude":15.6167,"time_zone":"Europe/Stockholm"},"registered_country":{"geoname_id":2921044,"is_in_european_union":true,"iso_code":"DE","names":{"de":"Deutschland","en":"Germany","es":"Alemania","fr":"Allemagne","ja":"ドイツ連邦共和国","pt-BR":"Alemanha","ru":"Германия","zh-CN":"德国"}},"subdivisions":[{"geoname_id":2685867,"iso_code":"E","names":{"en":"Östergötland County","fr":"Comté d'Östergötland"}}]}"#; - let expected_value: serde_json::Value = serde_json::from_str(expected_json_str).unwrap(); - - assert_eq!(json_value, expected_value); - assert_eq!(json_string, expected_json_str); -} diff --git a/src/metadata.rs b/src/metadata.rs new file mode 100644 index 00000000..2cb4bc29 --- /dev/null +++ b/src/metadata.rs @@ -0,0 +1,48 @@ +//! Database metadata types. + +use std::collections::BTreeMap; + +use serde::{Deserialize, Serialize}; + +/// Metadata about the MaxMind DB file. +#[derive(Deserialize, Serialize, Clone, Debug, PartialEq, Eq)] +pub struct Metadata { + /// Major version of the binary format (always 2). + pub binary_format_major_version: u16, + /// Minor version of the binary format (always 0). + pub binary_format_minor_version: u16, + /// Unix timestamp when the database was built. + pub build_epoch: u64, + /// Database type (e.g., "GeoIP2-City", "GeoLite2-Country"). + pub database_type: String, + /// Map of language codes to database descriptions. + pub description: BTreeMap, + /// IP version supported (4 or 6). + pub ip_version: u16, + /// Languages available in the database. + pub languages: Vec, + /// Number of nodes in the search tree. + pub node_count: u32, + /// Size of each record in bits (24, 28, or 32). + pub record_size: u16, +} + +impl Metadata { + /// Returns the database build time as a `SystemTime`. + /// + /// This converts the `build_epoch` Unix timestamp to a `SystemTime`. + /// + /// # Example + /// + /// ``` + /// use maxminddb::Reader; + /// + /// let reader = Reader::open_readfile("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); + /// let build_time = reader.metadata.build_time(); + /// println!("Database built: {:?}", build_time); + /// ``` + #[must_use] + pub fn build_time(&self) -> std::time::SystemTime { + std::time::UNIX_EPOCH + std::time::Duration::from_secs(self.build_epoch) + } +} diff --git a/src/reader.rs b/src/reader.rs new file mode 100644 index 00000000..d255eb99 --- /dev/null +++ b/src/reader.rs @@ -0,0 +1,671 @@ +//! MaxMind DB reader implementation. + +use std::collections::HashSet; +use std::fs; +use std::net::IpAddr; +use std::path::Path; + +use ipnetwork::IpNetwork; +use serde::Deserialize; + +#[cfg(feature = "mmap")] +pub use memmap2::Mmap; +#[cfg(feature = "mmap")] +use memmap2::MmapOptions; +#[cfg(feature = "mmap")] +use std::fs::File; + +use crate::decoder; +use crate::error::MaxMindDbError; +use crate::metadata::Metadata; +use crate::result::LookupResult; +use crate::within::{IpInt, Within, WithinNode, WithinOptions}; + +/// Size of the data section separator (16 zero bytes). +const DATA_SECTION_SEPARATOR_SIZE: usize = 16; + +/// A reader for the MaxMind DB format. The lifetime `'data` is tied to the +/// lifetime of the underlying buffer holding the contents of the database file. +/// +/// The `Reader` supports both file-based and memory-mapped access to MaxMind +/// DB files, including GeoIP2 and GeoLite2 databases. +/// +/// # Features +/// +/// - **`mmap`**: Enable memory-mapped file access for better performance +/// - **`simdutf8`**: Use SIMD-accelerated UTF-8 validation (faster string +/// decoding) +/// - **`unsafe-str-decode`**: Skip UTF-8 validation entirely (unsafe, but +/// ~20% faster) +pub struct Reader> { + pub(crate) buf: S, + /// Database metadata. + pub metadata: Metadata, + pub(crate) ipv4_start: usize, + /// Bit depth at which ipv4_start was found (0-96). Used to calculate + /// correct prefix lengths for IPv4 lookups in IPv6 databases. + pub(crate) ipv4_start_bit_depth: usize, + pub(crate) pointer_base: usize, +} + +impl> std::fmt::Debug for Reader { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Reader") + .field("buf_len", &self.buf.as_ref().len()) + .field("metadata", &self.metadata) + .field("ipv4_start", &self.ipv4_start) + .field("ipv4_start_bit_depth", &self.ipv4_start_bit_depth) + .field("pointer_base", &self.pointer_base) + .finish_non_exhaustive() + } +} + +#[cfg(feature = "mmap")] +impl Reader { + /// Open a MaxMind DB database file by memory mapping it. + /// + /// # Safety + /// + /// The caller must ensure that the database file is not modified or + /// truncated while the `Reader` exists. Modifying or truncating the + /// file while it is memory-mapped will result in undefined behavior. + /// + /// # Example + /// + /// ``` + /// # #[cfg(feature = "mmap")] + /// # { + /// // SAFETY: The database file will not be modified while the reader exists. + /// let reader = unsafe { + /// maxminddb::Reader::open_mmap("test-data/test-data/GeoIP2-City-Test.mmdb") + /// }.unwrap(); + /// # } + /// ``` + pub unsafe fn open_mmap>(database: P) -> Result, MaxMindDbError> { + let file_read = File::open(database)?; + let mmap = MmapOptions::new() + .map(&file_read) + .map_err(MaxMindDbError::Mmap)?; + Reader::from_source(mmap) + } +} + +impl Reader> { + /// Open a MaxMind DB database file by loading it into memory. + /// + /// # Example + /// + /// ``` + /// let reader = maxminddb::Reader::open_readfile( + /// "test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); + /// ``` + pub fn open_readfile>(database: P) -> Result>, MaxMindDbError> { + let buf: Vec = fs::read(&database)?; // IO error converted via #[from] + Reader::from_source(buf) + } +} + +impl<'de, S: AsRef<[u8]>> Reader { + /// Open a MaxMind DB database from anything that implements AsRef<[u8]> + /// + /// # Example + /// + /// ``` + /// use std::fs; + /// let buf = fs::read("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); + /// let reader = maxminddb::Reader::from_source(buf).unwrap(); + /// ``` + pub fn from_source(buf: S) -> Result, MaxMindDbError> { + let data_section_separator_size = 16; + + let metadata_start = find_metadata_start(buf.as_ref())?; + let mut type_decoder = decoder::Decoder::new(&buf.as_ref()[metadata_start..], 0); + let metadata = Metadata::deserialize(&mut type_decoder)?; + + let search_tree_size = (metadata.node_count as usize) * (metadata.record_size as usize) / 4; + + let mut reader = Reader { + buf, + pointer_base: search_tree_size + data_section_separator_size, + metadata, + ipv4_start: 0, + ipv4_start_bit_depth: 0, + }; + let (ipv4_start, ipv4_start_bit_depth) = reader.find_ipv4_start()?; + reader.ipv4_start = ipv4_start; + reader.ipv4_start_bit_depth = ipv4_start_bit_depth; + + Ok(reader) + } + + /// Lookup an IP address in the database. + /// + /// Returns a [`LookupResult`] that can be used to: + /// - Check if data exists with [`has_data()`](LookupResult::has_data) + /// - Get the network containing the IP with [`network()`](LookupResult::network) + /// - Decode the full record with [`decode()`](LookupResult::decode) + /// - Decode a specific path with [`decode_path()`](LookupResult::decode_path) + /// - Get a low-level decoder with [`decoder()`](LookupResult::decoder) + /// + /// # Examples + /// + /// Basic city lookup: + /// ``` + /// # use maxminddb::geoip2; + /// # use std::net::IpAddr; + /// # fn main() -> Result<(), maxminddb::MaxMindDbError> { + /// let reader = maxminddb::Reader::open_readfile( + /// "test-data/test-data/GeoIP2-City-Test.mmdb")?; + /// + /// let ip: IpAddr = "89.160.20.128".parse().unwrap(); + /// let result = reader.lookup(ip)?; + /// + /// if let Some(city) = result.decode::()? { + /// // Access nested structs directly - no Option unwrapping needed + /// if let Some(name) = city.city.names.english { + /// println!("City: {}", name); + /// } + /// } else { + /// println!("No data found for IP {}", ip); + /// } + /// # Ok(()) + /// # } + /// ``` + /// + /// Selective field access: + /// ``` + /// # use maxminddb::{Reader, PathElement}; + /// # use std::net::IpAddr; + /// # fn main() -> Result<(), maxminddb::MaxMindDbError> { + /// let reader = Reader::open_readfile( + /// "test-data/test-data/GeoIP2-City-Test.mmdb")?; + /// let ip: IpAddr = "89.160.20.128".parse().unwrap(); + /// + /// let result = reader.lookup(ip)?; + /// let country_code: Option = result.decode_path(&[ + /// PathElement::Key("country"), + /// PathElement::Key("iso_code"), + /// ])?; + /// + /// println!("Country: {:?}", country_code); + /// # Ok(()) + /// # } + /// ``` + pub fn lookup(&'de self, address: IpAddr) -> Result, MaxMindDbError> { + // Check for IPv6 address in IPv4-only database + if matches!(address, IpAddr::V6(_)) && self.metadata.ip_version == 4 { + return Err(MaxMindDbError::invalid_input( + "cannot look up IPv6 address in IPv4-only database", + )); + } + + let ip_int = IpInt::new(address); + let (pointer, prefix_len) = self.find_address_in_tree(&ip_int)?; + + // For IPv4 addresses in IPv6 databases, adjust prefix_len to reflect + // the actual bit depth in the tree. The ipv4_start_bit_depth tells us + // how deep in the IPv6 tree we were when we found the IPv4 subtree. + let prefix_len = if matches!(address, IpAddr::V4(_)) && self.metadata.ip_version == 6 { + self.ipv4_start_bit_depth + prefix_len + } else { + prefix_len + }; + + if pointer == 0 { + // IP not found in database + Ok(LookupResult::new_not_found(self, prefix_len as u8, address)) + } else { + // Resolve the pointer to a data offset + let data_offset = self.resolve_data_pointer(pointer)?; + Ok(LookupResult::new_found( + self, + data_offset, + prefix_len as u8, + address, + )) + } + } + + /// Iterate over all networks in the database. + /// + /// This is a convenience method equivalent to calling [`within()`](Self::within) + /// with `0.0.0.0/0` for IPv4-only databases or `::/0` for IPv6 databases. + /// + /// # Arguments + /// + /// * `options` - Controls which networks are yielded. Use [`Default::default()`] + /// for standard behavior. + /// + /// # Examples + /// + /// Iterate over all networks with default options: + /// ``` + /// use maxminddb::{geoip2, Reader}; + /// + /// let reader = Reader::open_readfile( + /// "test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); + /// + /// let mut count = 0; + /// for result in reader.networks(Default::default()).unwrap() { + /// let lookup = result.unwrap(); + /// count += 1; + /// if count >= 10 { break; } + /// } + /// ``` + pub fn networks(&'de self, options: WithinOptions) -> Result, MaxMindDbError> { + let cidr = if self.metadata.ip_version == 6 { + IpNetwork::V6("::/0".parse().unwrap()) + } else { + IpNetwork::V4("0.0.0.0/0".parse().unwrap()) + }; + self.within(cidr, options) + } + + /// Iterate over IP networks within a CIDR range. + /// + /// Returns an iterator that yields [`LookupResult`] for each network in the + /// database that falls within the specified CIDR range. + /// + /// # Arguments + /// + /// * `cidr` - The CIDR range to iterate over. + /// * `options` - Controls which networks are yielded. Use [`Default::default()`] + /// for standard behavior (skip aliases, skip networks without data, include + /// empty values). + /// + /// # Examples + /// + /// Iterate over all IPv4 networks: + /// ``` + /// use ipnetwork::IpNetwork; + /// use maxminddb::{geoip2, Reader}; + /// + /// let reader = Reader::open_readfile( + /// "test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); + /// + /// let ipv4_all = IpNetwork::V4("0.0.0.0/0".parse().unwrap()); + /// let mut count = 0; + /// for result in reader.within(ipv4_all, Default::default()).unwrap() { + /// let lookup = result.unwrap(); + /// let network = lookup.network().unwrap(); + /// let city: geoip2::City = lookup.decode().unwrap().unwrap(); + /// let city_name = city.city.names.english; + /// println!("Network: {}, City: {:?}", network, city_name); + /// count += 1; + /// if count >= 10 { break; } // Limit output for example + /// } + /// ``` + /// + /// Search within a specific subnet: + /// ``` + /// use ipnetwork::IpNetwork; + /// use maxminddb::{geoip2, Reader}; + /// + /// let reader = Reader::open_readfile( + /// "test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); + /// + /// let subnet = IpNetwork::V4("192.168.0.0/16".parse().unwrap()); + /// for result in reader.within(subnet, Default::default()).unwrap() { + /// match result { + /// Ok(lookup) => { + /// let network = lookup.network().unwrap(); + /// println!("Found: {}", network); + /// } + /// Err(e) => eprintln!("Error: {}", e), + /// } + /// } + /// ``` + /// + /// Include networks without data: + /// ``` + /// use ipnetwork::IpNetwork; + /// use maxminddb::{Reader, WithinOptions}; + /// + /// let reader = Reader::open_readfile( + /// "test-data/test-data/MaxMind-DB-test-mixed-24.mmdb").unwrap(); + /// + /// let opts = WithinOptions::default().include_networks_without_data(); + /// for result in reader.within("1.0.0.0/8".parse().unwrap(), opts).unwrap() { + /// let lookup = result.unwrap(); + /// if !lookup.has_data() { + /// println!("Network {} has no data", lookup.network().unwrap()); + /// } + /// } + /// ``` + pub fn within( + &'de self, + cidr: IpNetwork, + options: WithinOptions, + ) -> Result, MaxMindDbError> { + let ip_address = cidr.network(); + let prefix_len = cidr.prefix() as usize; + let ip_int = IpInt::new(ip_address); + let bit_count = ip_int.bit_count(); + + let mut node = self.start_node(bit_count); + let node_count = self.metadata.node_count as usize; + + let mut stack: Vec = Vec::with_capacity(bit_count - prefix_len); + + // Traverse down the tree to the level that matches the cidr mark + let mut depth = 0_usize; + for i in 0..prefix_len { + let bit = ip_int.get_bit(i); + node = self.read_node(node, bit as usize)?; + depth = i + 1; // We've now traversed i+1 bits (bits 0 through i) + + if node >= node_count { + // We've hit a data node or dead end before we exhausted our prefix. + // This means the requested CIDR is contained in a single record. + break; + } + } + + // Always push the node - it could be: + // - A data node (> node_count): will be yielded as a single record + // - The empty node (== node_count): will be skipped unless include_networks_without_data + // - An internal node (< node_count): will be traversed to find all contained records + stack.push(WithinNode { + node, + ip_int, + prefix_len: depth, + }); + + let within = Within { + reader: self, + node_count, + stack, + options, + }; + + Ok(within) + } + + fn find_address_in_tree(&self, ip_int: &IpInt) -> Result<(usize, usize), MaxMindDbError> { + let bit_count = ip_int.bit_count(); + let mut node = self.start_node(bit_count); + + let node_count = self.metadata.node_count as usize; + let mut prefix_len = bit_count; + + for i in 0..bit_count { + if node >= node_count { + prefix_len = i; + break; + } + let bit = ip_int.get_bit(i); + node = self.read_node(node, bit as usize)?; + } + match node_count { + // If node == node_count, it means we hit the placeholder "empty" node + // return 0 as the pointer value to signify "not found". + _ if node == node_count => Ok((0, prefix_len)), + _ if node > node_count => Ok((node, prefix_len)), + _ => Err(MaxMindDbError::invalid_database( + "invalid node in search tree", + )), + } + } + + #[inline] + fn start_node(&self, length: usize) -> usize { + if length == 128 { + 0 + } else { + self.ipv4_start + } + } + + /// Find the IPv4 start node and the bit depth at which it was found. + /// Returns (node, depth) where depth is how far into the tree we traversed. + fn find_ipv4_start(&self) -> Result<(usize, usize), MaxMindDbError> { + if self.metadata.ip_version != 6 { + return Ok((0, 0)); + } + + // We are looking up an IPv4 address in an IPv6 tree. Skip over the + // first 96 nodes. + let mut node: usize = 0; + let mut depth: usize = 0; + for i in 0_u8..96 { + if node >= self.metadata.node_count as usize { + depth = i as usize; + break; + } + node = self.read_node(node, 0)?; + depth = (i + 1) as usize; + } + Ok((node, depth)) + } + + #[inline(always)] + pub(crate) fn read_node( + &self, + node_number: usize, + index: usize, + ) -> Result { + let buf = self.buf.as_ref(); + let base_offset = node_number * (self.metadata.record_size as usize) / 4; + + let val = match self.metadata.record_size { + 24 => { + let offset = base_offset + index * 3; + (buf[offset] as usize) << 16 + | (buf[offset + 1] as usize) << 8 + | buf[offset + 2] as usize + } + 28 => { + let middle = if index != 0 { + buf[base_offset + 3] & 0x0F + } else { + (buf[base_offset + 3] & 0xF0) >> 4 + }; + let offset = base_offset + index * 4; + (middle as usize) << 24 + | (buf[offset] as usize) << 16 + | (buf[offset + 1] as usize) << 8 + | buf[offset + 2] as usize + } + 32 => { + let offset = base_offset + index * 4; + (buf[offset] as usize) << 24 + | (buf[offset + 1] as usize) << 16 + | (buf[offset + 2] as usize) << 8 + | buf[offset + 3] as usize + } + s => { + return Err(MaxMindDbError::invalid_database(format!( + "unknown record size: {s}" + ))) + } + }; + Ok(val) + } + + /// Resolves a pointer from the search tree to an offset in the data section. + #[inline] + pub(crate) fn resolve_data_pointer(&self, pointer: usize) -> Result { + let resolved = pointer - (self.metadata.node_count as usize) - 16; + + // Check bounds using pointer_base which marks the start of the data section + if resolved >= (self.buf.as_ref().len() - self.pointer_base) { + return Err(MaxMindDbError::invalid_database( + "the MaxMind DB file's data pointer resolves to an invalid location", + )); + } + + Ok(resolved) + } + + /// Performs comprehensive validation of the MaxMind DB file. + /// + /// This method validates: + /// - Metadata section: format versions, required fields, and value constraints + /// - Search tree: traverses all networks to verify tree structure integrity + /// - Data section separator: validates the 16-byte separator between tree and data + /// - Data section: verifies all data records referenced by the search tree + /// + /// The verifier is stricter than the MaxMind DB specification and may return + /// errors on some databases that are still readable by normal operations. + /// This method is useful for: + /// - Validating database files after download or generation + /// - Debugging database corruption issues + /// - Ensuring database integrity in critical applications + /// + /// Note: Verification traverses the entire database and may be slow on large files. + /// The method is thread-safe and can be called on an active Reader. + /// + /// # Example + /// + /// ``` + /// use maxminddb::Reader; + /// + /// let reader = Reader::open_readfile("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); + /// reader.verify().expect("Database should be valid"); + /// ``` + pub fn verify(&self) -> Result<(), MaxMindDbError> { + self.verify_metadata()?; + self.verify_database() + } + + fn verify_metadata(&self) -> Result<(), MaxMindDbError> { + let m = &self.metadata; + + if m.binary_format_major_version != 2 { + return Err(MaxMindDbError::invalid_database(format!( + "binary_format_major_version - Expected: 2 Actual: {}", + m.binary_format_major_version + ))); + } + if m.binary_format_minor_version != 0 { + return Err(MaxMindDbError::invalid_database(format!( + "binary_format_minor_version - Expected: 0 Actual: {}", + m.binary_format_minor_version + ))); + } + if m.database_type.is_empty() { + return Err(MaxMindDbError::invalid_database( + "database_type - Expected: non-empty string Actual: \"\"", + )); + } + if m.description.is_empty() { + return Err(MaxMindDbError::invalid_database( + "description - Expected: non-empty map Actual: {}", + )); + } + if m.ip_version != 4 && m.ip_version != 6 { + return Err(MaxMindDbError::invalid_database(format!( + "ip_version - Expected: 4 or 6 Actual: {}", + m.ip_version + ))); + } + if m.record_size != 24 && m.record_size != 28 && m.record_size != 32 { + return Err(MaxMindDbError::invalid_database(format!( + "record_size - Expected: 24, 28, or 32 Actual: {}", + m.record_size + ))); + } + if m.node_count == 0 { + return Err(MaxMindDbError::invalid_database( + "node_count - Expected: positive integer Actual: 0", + )); + } + Ok(()) + } + + fn verify_database(&self) -> Result<(), MaxMindDbError> { + let offsets = self.verify_search_tree()?; + self.verify_data_section_separator()?; + self.verify_data_section(offsets) + } + + fn verify_search_tree(&self) -> Result, MaxMindDbError> { + let mut offsets = HashSet::new(); + let opts = WithinOptions::default().include_networks_without_data(); + + // Maximum number of networks we can expect in a valid database. + // A database with N nodes can have at most 2N data entries (each leaf node + // can have data). We add some margin for safety. + let max_iterations = (self.metadata.node_count as usize).saturating_mul(3); + let mut iteration_count = 0usize; + + for result in self.networks(opts)? { + let lookup = result?; + if let Some(offset) = lookup.offset() { + offsets.insert(offset); + } + + iteration_count += 1; + if iteration_count > max_iterations { + return Err(MaxMindDbError::invalid_database(format!( + "search tree appears to have a cycle or invalid structure (exceeded {max_iterations} iterations)" + ))); + } + } + Ok(offsets) + } + + fn verify_data_section_separator(&self) -> Result<(), MaxMindDbError> { + let separator_start = + self.metadata.node_count as usize * self.metadata.record_size as usize / 4; + let separator_end = separator_start + DATA_SECTION_SEPARATOR_SIZE; + + if separator_end > self.buf.as_ref().len() { + return Err(MaxMindDbError::invalid_database_at( + "data section separator extends past end of file", + separator_start, + )); + } + + let separator = &self.buf.as_ref()[separator_start..separator_end]; + + for &b in separator { + if b != 0 { + return Err(MaxMindDbError::invalid_database_at( + format!("unexpected byte in data separator: {separator:?}"), + separator_start, + )); + } + } + Ok(()) + } + + fn verify_data_section(&self, offsets: HashSet) -> Result<(), MaxMindDbError> { + let data_section = &self.buf.as_ref()[self.pointer_base..]; + + // Verify each offset from the search tree points to valid, decodable data + for &offset in &offsets { + if offset >= data_section.len() { + return Err(MaxMindDbError::invalid_database_at( + format!( + "search tree pointer is beyond data section (len: {})", + data_section.len() + ), + offset, + )); + } + + let mut dec = decoder::Decoder::new(data_section, offset); + + // Try to skip/decode the value to verify it's valid + if let Err(e) = dec.skip_value_for_verification() { + return Err(MaxMindDbError::invalid_database_at( + format!("decoding error: {e}"), + offset, + )); + } + } + + Ok(()) + } +} + +fn find_metadata_start(buf: &[u8]) -> Result { + const METADATA_START_MARKER: &[u8] = b"\xab\xcd\xefMaxMind.com"; + + memchr::memmem::rfind(buf, METADATA_START_MARKER) + .map(|x| x + METADATA_START_MARKER.len()) + .ok_or_else(|| { + MaxMindDbError::invalid_database("could not find MaxMind DB metadata in file") + }) +} diff --git a/src/reader_test.rs b/src/reader_test.rs new file mode 100644 index 00000000..f16785b1 --- /dev/null +++ b/src/reader_test.rs @@ -0,0 +1,1287 @@ +use std::net::IpAddr; + +use ipnetwork::IpNetwork; +use serde::Deserialize; +use serde_json::json; + +use crate::geoip2; +use crate::{MaxMindDbError, Reader, Within, WithinOptions}; + +#[allow(clippy::float_cmp)] +#[test] +fn test_decoder() { + let _ = env_logger::try_init(); + + #[allow(non_snake_case)] + #[derive(Deserialize, Debug, Eq, PartialEq)] + struct MapXType { + arrayX: Vec, + utf8_stringX: String, + } + + #[allow(non_snake_case)] + #[derive(Deserialize, Debug, Eq, PartialEq)] + struct MapType { + mapX: MapXType, + } + + #[derive(Deserialize, Debug)] + struct TestType<'a> { + array: Vec, + boolean: bool, + bytes: &'a [u8], + double: f64, + float: f32, + int32: i32, + map: MapType, + uint16: u16, + uint32: u32, + uint64: u64, + uint128: u128, + utf8_string: String, + } + + let r = Reader::open_readfile("test-data/test-data/MaxMind-DB-test-decoder.mmdb") + .expect("error opening mmdb"); + let ip: IpAddr = "1.1.1.0".parse().unwrap(); + let lookup = r.lookup(ip).unwrap(); + assert!(lookup.has_data(), "Expected IP to be found"); + let result: TestType = lookup.decode().unwrap().unwrap(); + + assert_eq!(result.array, vec![1_u32, 2_u32, 3_u32]); + assert!(result.boolean); + assert_eq!(result.bytes, vec![0_u8, 0_u8, 0_u8, 42_u8]); + assert_eq!(result.double, 42.123_456); + assert_eq!(result.float, 1.1); + assert_eq!(result.int32, -268_435_456); + + assert_eq!( + result.map, + MapType { + mapX: MapXType { + arrayX: vec![7, 8, 9], + utf8_stringX: "hello".to_string(), + }, + } + ); + + assert_eq!(result.uint16, 100); + assert_eq!(result.uint32, 268_435_456); + assert_eq!(result.uint64, 1_152_921_504_606_846_976); + assert_eq!( + result.uint128, + 1_329_227_995_784_915_872_903_807_060_280_344_576 + ); + + assert_eq!( + result.utf8_string, + "unicode! \u{262f} - \u{266b}".to_string() + ); +} + +#[test] +fn test_pointers_in_metadata() { + let _ = env_logger::try_init(); + + Reader::open_readfile("test-data/test-data/MaxMind-DB-test-metadata-pointers.mmdb") + .expect("error opening mmdb"); +} + +#[test] +fn test_broken_database() { + let _ = env_logger::try_init(); + + let r = Reader::open_readfile("test-data/test-data/GeoIP2-City-Test-Broken-Double-Format.mmdb") + .ok() + .unwrap(); + let ip: IpAddr = "2001:220::".parse().unwrap(); + + #[derive(Deserialize, Debug)] + struct TestType {} + + let lookup = r.lookup(ip).unwrap(); + if lookup.has_data() { + match lookup.decode::() { + Err(e) => assert!(matches!( + e, + MaxMindDbError::InvalidDatabase { .. } // Check variant, message might vary slightly + )), + Ok(_) => panic!("Unexpected success with broken data"), + } + } else { + panic!("Expected IP to be found (with broken data)"); + } +} + +#[test] +fn test_missing_database() { + let _ = env_logger::try_init(); + + let r = Reader::open_readfile("file-does-not-exist.mmdb"); + match r { + Ok(_) => panic!("Received Reader when opening non-existent file"), + Err(e) => assert!(matches!(e, MaxMindDbError::Io(_))), // Specific message might vary by OS/locale + } +} + +#[test] +fn test_non_database() { + let _ = env_logger::try_init(); + + let r = Reader::open_readfile("README.md"); + match r { + Ok(_) => panic!("Received Reader when opening a non-MMDB file"), + Err(e) => assert!( + matches!(&e, MaxMindDbError::InvalidDatabase { message, .. } if message == "could not find MaxMind DB metadata in file"), + "Expected InvalidDatabase error with specific message, but got: {:?}", + e + ), + } +} + +#[test] +fn test_reader() { + let _ = env_logger::try_init(); + + let sizes = [24_usize, 28, 32]; + for record_size in &sizes { + let versions = [4_usize, 6]; + for ip_version in &versions { + let filename = + format!("test-data/test-data/MaxMind-DB-test-ipv{ip_version}-{record_size}.mmdb"); + let reader = Reader::open_readfile(filename).ok().unwrap(); + + check_metadata(&reader, *ip_version, *record_size); + check_ip(&reader, *ip_version); + } + } +} + +/// Create Reader by explicitly reading the entire file into a buffer. +#[test] +fn test_reader_readfile() { + let _ = env_logger::try_init(); + + let sizes = [24_usize, 28, 32]; + for record_size in &sizes { + let versions = [4_usize, 6]; + for ip_version in &versions { + let filename = + format!("test-data/test-data/MaxMind-DB-test-ipv{ip_version}-{record_size}.mmdb"); + let reader = Reader::open_readfile(filename).ok().unwrap(); + + check_metadata(&reader, *ip_version, *record_size); + check_ip(&reader, *ip_version); + } + } +} + +#[test] +#[cfg(feature = "mmap")] +fn test_reader_mmap() { + let _ = env_logger::try_init(); + + let sizes = [24usize, 28, 32]; + for record_size in sizes.iter() { + let versions = [4usize, 6]; + for ip_version in versions.iter() { + let filename = format!( + "test-data/test-data/MaxMind-DB-test-ipv{}-{}.mmdb", + ip_version, record_size + ); + // SAFETY: The test database file will not be modified during the test. + let reader = unsafe { Reader::open_mmap(filename) }.ok().unwrap(); + + check_metadata(&reader, *ip_version, *record_size); + check_ip(&reader, *ip_version); + } + } +} + +#[test] +fn test_lookup_city() { + let _ = env_logger::try_init(); + + let filename = "test-data/test-data/GeoIP2-City-Test.mmdb"; + + let reader = Reader::open_readfile(filename).unwrap(); + + let ip: IpAddr = "89.160.20.112".parse().unwrap(); + let lookup = reader.lookup(ip).unwrap(); + assert!(lookup.has_data()); + let city: geoip2::City = lookup.decode().unwrap().unwrap(); + + let iso_code = city.country.iso_code; + + assert_eq!(iso_code, Some("SE")); +} + +#[test] +fn test_lookup_country() { + let _ = env_logger::try_init(); + + let filename = "test-data/test-data/GeoIP2-Country-Test.mmdb"; + + let reader = Reader::open_readfile(filename).unwrap(); + + let ip: IpAddr = "89.160.20.112".parse().unwrap(); + let lookup = reader.lookup(ip).unwrap(); + assert!(lookup.has_data()); + let country: geoip2::Country = lookup.decode().unwrap().unwrap(); + + assert_eq!(country.country.iso_code, Some("SE")); + assert_eq!(country.country.is_in_european_union, Some(true)); +} + +#[test] +fn test_lookup_connection_type() { + let _ = env_logger::try_init(); + + let filename = "test-data/test-data/GeoIP2-Connection-Type-Test.mmdb"; + + let reader = Reader::open_readfile(filename).unwrap(); + + let ip: IpAddr = "96.1.20.112".parse().unwrap(); + let lookup = reader.lookup(ip).unwrap(); + assert!(lookup.has_data()); + let connection_type: geoip2::ConnectionType = lookup.decode().unwrap().unwrap(); + + assert_eq!(connection_type.connection_type, Some("Cable/DSL")); +} + +#[test] +fn test_lookup_annonymous_ip() { + let _ = env_logger::try_init(); + + let filename = "test-data/test-data/GeoIP2-Anonymous-IP-Test.mmdb"; + + let reader = Reader::open_readfile(filename).unwrap(); + + let ip: IpAddr = "81.2.69.123".parse().unwrap(); + let lookup = reader.lookup(ip).unwrap(); + assert!(lookup.has_data()); + let anonymous_ip: geoip2::AnonymousIp = lookup.decode().unwrap().unwrap(); + + assert_eq!(anonymous_ip.is_anonymous, Some(true)); + assert_eq!(anonymous_ip.is_public_proxy, Some(true)); + assert_eq!(anonymous_ip.is_anonymous_vpn, Some(true)); + assert_eq!(anonymous_ip.is_hosting_provider, Some(true)); + assert_eq!(anonymous_ip.is_tor_exit_node, Some(true)) +} + +#[test] +fn test_lookup_density_income() { + let _ = env_logger::try_init(); + + let filename = "test-data/test-data/GeoIP2-DensityIncome-Test.mmdb"; + + let reader = Reader::open_readfile(filename).unwrap(); + + let ip: IpAddr = "5.83.124.123".parse().unwrap(); + let lookup = reader.lookup(ip).unwrap(); + assert!(lookup.has_data()); + let density_income: geoip2::DensityIncome = lookup.decode().unwrap().unwrap(); + + assert_eq!(density_income.average_income, Some(32323)); + assert_eq!(density_income.population_density, Some(1232)) +} + +#[test] +fn test_lookup_domain() { + let _ = env_logger::try_init(); + + let filename = "test-data/test-data/GeoIP2-Domain-Test.mmdb"; + + let reader = Reader::open_readfile(filename).unwrap(); + + let ip: IpAddr = "66.92.80.123".parse().unwrap(); + let lookup = reader.lookup(ip).unwrap(); + assert!(lookup.has_data()); + let domain: geoip2::Domain = lookup.decode().unwrap().unwrap(); + + assert_eq!(domain.domain, Some("speakeasy.net")); +} + +#[test] +fn test_lookup_isp() { + let _ = env_logger::try_init(); + + let filename = "test-data/test-data/GeoIP2-ISP-Test.mmdb"; + + let reader = Reader::open_readfile(filename).unwrap(); + + let ip: IpAddr = "12.87.118.123".parse().unwrap(); + let lookup = reader.lookup(ip).unwrap(); + assert!(lookup.has_data()); + let isp: geoip2::Isp = lookup.decode().unwrap().unwrap(); + + assert_eq!(isp.autonomous_system_number, Some(7018)); + assert_eq!(isp.isp, Some("AT&T Services")); + assert_eq!(isp.organization, Some("AT&T Worldnet Services")); +} + +#[test] +fn test_lookup_asn() { + let _ = env_logger::try_init(); + + let filename = "test-data/test-data/GeoLite2-ASN-Test.mmdb"; + + let reader = Reader::open_readfile(filename).unwrap(); + + let ip: IpAddr = "1.128.0.123".parse().unwrap(); + let lookup = reader.lookup(ip).unwrap(); + assert!(lookup.has_data()); + let asn: geoip2::Asn = lookup.decode().unwrap().unwrap(); + + assert_eq!(asn.autonomous_system_number, Some(1221)); + assert_eq!(asn.autonomous_system_organization, Some("Telstra Pty Ltd")); +} + +#[test] +fn test_lookup_network() { + let _ = env_logger::try_init(); + let filename = "test-data/test-data/GeoIP2-City-Test.mmdb"; + let reader = Reader::open_readfile(filename).unwrap(); + + // --- IPv4 Check (Known) --- + let ip: IpAddr = "89.160.20.128".parse().unwrap(); + let lookup = reader.lookup(ip).unwrap(); + assert!(lookup.has_data(), "Expected Some(City) for known IPv4"); + let network = lookup.network().unwrap(); + assert_eq!(network.prefix(), 25); + let city: geoip2::City = lookup.decode().unwrap().unwrap(); + assert!(!city.country.is_empty()); + + // --- IPv4 Check (Last Host, Known) --- + let ip_last: IpAddr = "89.160.20.254".parse().unwrap(); + let lookup_last = reader.lookup(ip_last).unwrap(); + assert!(lookup_last.has_data(), "Expected Some(City) for last host"); + assert_eq!(lookup_last.network().unwrap().prefix(), 25); // Should be same network + + // --- IPv6 Check (Not Found in Data) --- + // This IP might resolve to a node in the tree, but that node might not point to data. + let ip_v6_not_found: IpAddr = "2c0f:ff00::1".parse().unwrap(); + let lookup_nf = reader.lookup(ip_v6_not_found).unwrap(); + assert!( + !lookup_nf.has_data(), + "Expected not found for non-existent IP 2c0f:ff00::1" + ); + assert_eq!( + lookup_nf.network().unwrap().prefix(), + 6, + "Expected valid prefix length for not-found IPv6" + ); + + // --- IPv6 Check (Known Data) --- + let ip_v6_known: IpAddr = "2001:218:85a3:0:0:8a2e:370:7334".parse().unwrap(); + let lookup_v6 = reader.lookup(ip_v6_known).unwrap(); + assert!(lookup_v6.has_data(), "Expected Some(City) for known IPv6"); + assert_eq!( + lookup_v6.network().unwrap().prefix(), + 32, + "Prefix length mismatch for known IPv6" + ); + let city_v6: geoip2::City = lookup_v6.decode().unwrap().unwrap(); + assert!(!city_v6.country.is_empty()); +} + +#[test] +fn test_within_city() { + let _ = env_logger::try_init(); + + let filename = "test-data/test-data/GeoIP2-City-Test.mmdb"; + + let reader = Reader::open_readfile(filename).unwrap(); + + // --- Test iteration over entire DB ("::/0") --- + let ip_net_all = IpNetwork::V6("::/0".parse().unwrap()); + let mut iter_all: Within<_> = reader.within(ip_net_all, Default::default()).unwrap(); + + // Get the first item + let first_item_result = iter_all.next(); + assert!( + first_item_result.is_some(), + "Iterator over ::/0 yielded no items" + ); + let _first_lookup = first_item_result.unwrap().unwrap(); + + // Count the remaining items to check total count + let mut n = 1; // Start at 1 since we already took the first item + for item_result in iter_all { + assert!(item_result.is_ok()); + n += 1; + } + assert_eq!(n, 243); + + // --- Test iteration over a specific smaller network --- + let specific = IpNetwork::V4("81.2.69.0/24".parse().unwrap()); + let mut iter_specific: Within<_> = reader.within(specific, Default::default()).unwrap(); + + let expected = vec![ + // In order of iteration: + IpNetwork::V4("81.2.69.142/31".parse().unwrap()), + IpNetwork::V4("81.2.69.144/28".parse().unwrap()), + IpNetwork::V4("81.2.69.160/27".parse().unwrap()), + IpNetwork::V4("81.2.69.192/28".parse().unwrap()), + ]; + + let mut found_count = 0; + // Use into_iter() to consume the vector + for expected_net in expected.into_iter() { + let item_res = iter_specific.next(); + assert!( + item_res.is_some(), + "Expected more items in specific iterator" + ); + let lookup = item_res.unwrap().unwrap(); + let network = lookup.network().unwrap(); + assert_eq!( + network, expected_net, + "Mismatch in specific network iteration" + ); + // Check associated data for one of them + if network.prefix() == 31 { + // 81.2.69.142/31 + let city: geoip2::City = lookup.decode().unwrap().unwrap(); + assert!(!city.city.is_empty()); + assert_eq!(city.city.geoname_id, Some(2643743)); // London + } + found_count += 1; + } + assert!( + iter_specific.next().is_none(), + "Specific iterator should be exhausted after expected items" + ); + assert_eq!( + found_count, 4, + "Expected exactly 4 networks in 81.2.69.0/24" + ); +} + +fn check_metadata>(reader: &Reader, ip_version: usize, record_size: usize) { + let metadata = &reader.metadata; + + assert_eq!(metadata.binary_format_major_version, 2_u16); + assert_eq!(metadata.binary_format_minor_version, 0_u16); + assert!(metadata.build_epoch >= 1_397_457_605); + assert_eq!(metadata.database_type, "Test".to_string()); + + assert_eq!( + *metadata.description[&"en".to_string()], + "Test Database".to_string() + ); + assert_eq!( + *metadata.description[&"zh".to_string()], + "Test Database Chinese".to_string() + ); + + assert_eq!(metadata.ip_version, ip_version as u16); + assert_eq!(metadata.languages, vec!["en".to_string(), "zh".to_string()]); + + if ip_version == 4 { + assert_eq!(metadata.node_count, 164) + } else { + assert_eq!(metadata.node_count, 416) + } + + assert_eq!(metadata.record_size, record_size as u16) +} + +fn check_ip>(reader: &Reader, ip_version: usize) { + let subnets = match ip_version { + 6 => [ + "::1:ffff:ffff", + "::2:0:0", + "::2:0:0", + "::2:0:0", + "::2:0:0", + "::2:0:40", + "::2:0:40", + "::2:0:40", + "::2:0:50", + "::2:0:50", + "::2:0:50", + "::2:0:58", + "::2:0:58", + ], + _ => [ + "1.1.1.1", "1.1.1.2", "1.1.1.2", "1.1.1.4", "1.1.1.4", "1.1.1.4", "1.1.1.4", "1.1.1.8", + "1.1.1.8", "1.1.1.8", "1.1.1.16", "1.1.1.16", "1.1.1.16", + ], + }; + + #[derive(Deserialize, Debug, PartialEq)] + struct IpType { + ip: String, + } + + // Test lookups that are expected to succeed + for subnet in &subnets { + let ip: IpAddr = subnet.parse().unwrap(); + let lookup = reader.lookup(ip); + + assert!( + lookup.is_ok(), + "Lookup failed unexpectedly for {}: {:?}", + subnet, + lookup.err() + ); + let lookup = lookup.unwrap(); + assert!( + lookup.has_data(), + "Lookup for {} returned not found unexpectedly", + subnet + ); + let value: IpType = lookup.decode().unwrap().unwrap(); + + // The value stored is often the network address, not the specific IP looked up + // We need to parse the found IP and the subnet IP to check containment or equality. + // For the specific MaxMind-DB-test-ipv* files, the stored value IS the looked-up IP string. + assert_eq!(value.ip, *subnet); + } + + // Test lookups that are expected to return "not found" + let no_record = ["1.1.1.33", "255.254.253.123", "89fa::"]; + + for &address in &no_record { + if ip_version == 4 && address == "89fa::" { + continue; // Skip IPv6 address if testing IPv4 db + } + if ip_version == 6 && address != "89fa::" { + continue; // Skip IPv4 addresses if testing IPv6 db + } + + let ip: IpAddr = address.parse().unwrap(); + let lookup = reader.lookup(ip).unwrap(); + + assert!( + !lookup.has_data(), + "Expected not found for address {}, but it was found", + address + ); + } +} + +#[test] +fn test_json_serialize() { + let _ = env_logger::try_init(); + + let filename = "test-data/test-data/GeoIP2-City-Test.mmdb"; + + let reader = Reader::open_readfile(filename).unwrap(); + + let ip: IpAddr = "89.160.20.112".parse().unwrap(); + let lookup = reader.lookup(ip).unwrap(); + assert!(lookup.has_data()); + let city: geoip2::City = lookup.decode().unwrap().unwrap(); + + let json_value = json!(city); + let json_string = json_value.to_string(); + + let expected_json_str = r#"{"city":{"geoname_id":2694762,"names":{"de":"Linköping","en":"Linköping","fr":"Linköping","ja":"リンシェーピング","zh-CN":"林雪平"}},"continent":{"code":"EU","geoname_id":6255148,"names":{"de":"Europa","en":"Europe","es":"Europa","fr":"Europe","ja":"ヨーロッパ","pt-BR":"Europa","ru":"Европа","zh-CN":"欧洲"}},"country":{"geoname_id":2661886,"is_in_european_union":true,"iso_code":"SE","names":{"de":"Schweden","en":"Sweden","es":"Suecia","fr":"Suède","ja":"スウェーデン王国","pt-BR":"Suécia","ru":"Швеция","zh-CN":"瑞典"}},"location":{"accuracy_radius":76,"latitude":58.4167,"longitude":15.6167,"time_zone":"Europe/Stockholm"},"registered_country":{"geoname_id":2921044,"is_in_european_union":true,"iso_code":"DE","names":{"de":"Deutschland","en":"Germany","es":"Alemania","fr":"Allemagne","ja":"ドイツ連邦共和国","pt-BR":"Alemanha","ru":"Германия","zh-CN":"德国"}},"subdivisions":[{"geoname_id":2685867,"iso_code":"E","names":{"en":"Östergötland County","fr":"Comté d'Östergötland"}}]}"#; + let expected_value: serde_json::Value = serde_json::from_str(expected_json_str).unwrap(); + + assert_eq!(json_value, expected_value); + assert_eq!(json_string, expected_json_str); +} + +// ============================================================================ +// Iteration Options Tests +// ============================================================================ + +/// Test networks() method iterates over entire database +#[test] +fn test_networks() { + let _ = env_logger::try_init(); + + // Test with different record sizes and IP versions + for record_size in &[24_u32, 28, 32] { + for ip_version in &[4_u32, 6] { + let filename = + format!("test-data/test-data/MaxMind-DB-test-ipv{ip_version}-{record_size}.mmdb"); + let reader = Reader::open_readfile(&filename).unwrap(); + + for result in reader.networks(Default::default()).unwrap() { + let lookup = result.unwrap(); + assert!( + lookup.has_data(), + "networks() should only yield found records by default" + ); + + #[derive(Deserialize)] + struct IpRecord { + ip: String, + } + let record: IpRecord = lookup.decode().unwrap().unwrap(); + let network = lookup.network().unwrap(); + assert_eq!( + record.ip, + network.ip().to_string(), + "record IP should match network IP" + ); + } + } + } +} + +/// Test that default options skip aliased networks +#[test] +fn test_default_skips_aliases() { + let _ = env_logger::try_init(); + + let reader = + Reader::open_readfile("test-data/test-data/MaxMind-DB-test-mixed-24.mmdb").unwrap(); + + // Without IncludeAliasedNetworks, iterating over ::/0 should yield IPv4 networks only once + let ip_net_all = IpNetwork::V6("::/0".parse().unwrap()); + + let expected_without_aliases = vec![ + "1.1.1.1/32", + "1.1.1.2/31", + "1.1.1.4/30", + "1.1.1.8/29", + "1.1.1.16/28", + "1.1.1.32/32", + "::1:ffff:ffff/128", + "::2:0:0/122", + "::2:0:40/124", + "::2:0:50/125", + "::2:0:58/127", + ]; + + let mut networks: Vec = Vec::new(); + for result in reader.within(ip_net_all, Default::default()).unwrap() { + let lookup = result.unwrap(); + networks.push(lookup.network().unwrap().to_string()); + } + + assert_eq!(networks, expected_without_aliases); +} + +/// Test IncludeAliasedNetworks option +#[test] +fn test_include_aliased_networks() { + let _ = env_logger::try_init(); + + let reader = + Reader::open_readfile("test-data/test-data/MaxMind-DB-test-mixed-24.mmdb").unwrap(); + + let ip_net_all = IpNetwork::V6("::/0".parse().unwrap()); + let opts = WithinOptions::default().include_aliased_networks(); + + // With IncludeAliasedNetworks, we should see IPv4 networks via various IPv6 prefixes + let expected_with_aliases = vec![ + "1.1.1.1/32", + "1.1.1.2/31", + "1.1.1.4/30", + "1.1.1.8/29", + "1.1.1.16/28", + "1.1.1.32/32", + "::1:ffff:ffff/128", + "::2:0:0/122", + "::2:0:40/124", + "::2:0:50/125", + "::2:0:58/127", + "::ffff:1.1.1.1/128", + "::ffff:1.1.1.2/127", + "::ffff:1.1.1.4/126", + "::ffff:1.1.1.8/125", + "::ffff:1.1.1.16/124", + "::ffff:1.1.1.32/128", + "2001:0:101:101::/64", + "2001:0:101:102::/63", + "2001:0:101:104::/62", + "2001:0:101:108::/61", + "2001:0:101:110::/60", + "2001:0:101:120::/64", + "2002:101:101::/48", + "2002:101:102::/47", + "2002:101:104::/46", + "2002:101:108::/45", + "2002:101:110::/44", + "2002:101:120::/48", + ]; + + let mut networks: Vec = Vec::new(); + for result in reader.within(ip_net_all, opts).unwrap() { + let lookup = result.unwrap(); + networks.push(lookup.network().unwrap().to_string()); + } + + assert_eq!(networks, expected_with_aliases); +} + +/// Test IncludeNetworksWithoutData option +#[test] +fn test_include_networks_without_data() { + let _ = env_logger::try_init(); + + let reader = + Reader::open_readfile("test-data/test-data/MaxMind-DB-test-mixed-24.mmdb").unwrap(); + + // Using 1.0.0.0/8 like the Go tests + let cidr: IpNetwork = "1.0.0.0/8".parse().unwrap(); + let opts = WithinOptions::default().include_networks_without_data(); + + let expected = vec![ + "1.0.0.0/16", + "1.1.0.0/24", + "1.1.1.0/32", + "1.1.1.1/32", + "1.1.1.2/31", + "1.1.1.4/30", + "1.1.1.8/29", + "1.1.1.16/28", + "1.1.1.32/32", + "1.1.1.33/32", + "1.1.1.34/31", + "1.1.1.36/30", + "1.1.1.40/29", + "1.1.1.48/28", + "1.1.1.64/26", + "1.1.1.128/25", + "1.1.2.0/23", + "1.1.4.0/22", + "1.1.8.0/21", + "1.1.16.0/20", + "1.1.32.0/19", + "1.1.64.0/18", + "1.1.128.0/17", + "1.2.0.0/15", + "1.4.0.0/14", + "1.8.0.0/13", + "1.16.0.0/12", + "1.32.0.0/11", + "1.64.0.0/10", + "1.128.0.0/9", + ]; + + let mut networks: Vec = Vec::new(); + let mut found_count = 0; + let mut not_found_count = 0; + + for result in reader.within(cidr, opts).unwrap() { + let lookup = result.unwrap(); + networks.push(lookup.network().unwrap().to_string()); + if lookup.has_data() { + found_count += 1; + } else { + not_found_count += 1; + } + } + + assert_eq!(networks, expected); + assert!( + not_found_count > 0, + "Should have some networks without data" + ); + assert!(found_count > 0, "Should have some networks with data"); +} + +/// Test SkipEmptyValues option +#[test] +fn test_skip_empty_values() { + let _ = env_logger::try_init(); + + let reader = + Reader::open_readfile("test-data/test-data/GeoIP2-Anonymous-IP-Test.mmdb").unwrap(); + + // Count networks without SkipEmptyValues + let mut count_without_skip = 0; + let mut empty_count = 0; + + for result in reader.networks(Default::default()).unwrap() { + let lookup = result.unwrap(); + count_without_skip += 1; + + if lookup.has_data() { + let data: std::collections::BTreeMap = + lookup.decode().unwrap().unwrap(); + if data.is_empty() { + empty_count += 1; + } + } + } + + // Count networks with SkipEmptyValues + let mut count_with_skip = 0; + let opts = WithinOptions::default().skip_empty_values(); + + for result in reader.networks(opts).unwrap() { + let lookup = result.unwrap(); + count_with_skip += 1; + + if lookup.has_data() { + let data: std::collections::BTreeMap = + lookup.decode().unwrap().unwrap(); + assert!( + !data.is_empty(), + "Should not see empty maps with skip_empty_values" + ); + } + } + + // Verify the option works + assert!( + empty_count > 0, + "Test database should have empty values, found {} empty out of {}", + empty_count, + count_without_skip + ); + assert_eq!( + count_without_skip - empty_count, + count_with_skip, + "SkipEmptyValues should skip exactly the empty values" + ); +} + +/// Test SkipEmptyValues with other options combined +#[test] +fn test_skip_empty_values_with_other_options() { + let _ = env_logger::try_init(); + + let reader = + Reader::open_readfile("test-data/test-data/GeoIP2-Anonymous-IP-Test.mmdb").unwrap(); + + // Test with IncludeNetworksWithoutData - should still skip empty maps + let opts = WithinOptions::default() + .include_networks_without_data() + .skip_empty_values(); + + let mut count = 0; + for result in reader.networks(opts).unwrap() { + let lookup = result.unwrap(); + count += 1; + + if lookup.has_data() { + let data: std::collections::BTreeMap = + lookup.decode().unwrap().unwrap(); + assert!( + !data.is_empty(), + "Should not see empty maps even with other options" + ); + } + } + + assert!(count > 0, "Should have some networks"); +} + +/// Test various NetworksWithin scenarios matching Go tests +#[test] +fn test_networks_within_scenarios() { + let _ = env_logger::try_init(); + + struct TestCase { + network: &'static str, + database: &'static str, + expected: Vec<&'static str>, + } + + let test_cases = vec![ + TestCase { + network: "0.0.0.0/0", + database: "ipv4", + expected: vec![ + "1.1.1.1/32", + "1.1.1.2/31", + "1.1.1.4/30", + "1.1.1.8/29", + "1.1.1.16/28", + "1.1.1.32/32", + ], + }, + TestCase { + network: "1.1.1.1/30", + database: "ipv4", + expected: vec!["1.1.1.1/32", "1.1.1.2/31"], + }, + TestCase { + network: "1.1.1.2/31", + database: "ipv4", + expected: vec!["1.1.1.2/31"], + }, + TestCase { + network: "1.1.1.1/32", + database: "ipv4", + expected: vec!["1.1.1.1/32"], + }, + TestCase { + network: "1.1.1.2/32", + database: "ipv4", + expected: vec!["1.1.1.2/31"], + }, + TestCase { + network: "1.1.1.3/32", + database: "ipv4", + expected: vec!["1.1.1.2/31"], + }, + TestCase { + network: "1.1.1.19/32", + database: "ipv4", + expected: vec!["1.1.1.16/28"], + }, + TestCase { + network: "255.255.255.0/24", + database: "ipv4", + expected: vec![], + }, + TestCase { + network: "1.1.1.1/32", + database: "mixed", + expected: vec!["1.1.1.1/32"], + }, + TestCase { + network: "255.255.255.0/24", + database: "mixed", + expected: vec![], + }, + TestCase { + network: "::1:ffff:ffff/128", + database: "ipv6", + expected: vec!["::1:ffff:ffff/128"], + }, + TestCase { + network: "::/0", + database: "ipv6", + expected: vec![ + "::1:ffff:ffff/128", + "::2:0:0/122", + "::2:0:40/124", + "::2:0:50/125", + "::2:0:58/127", + ], + }, + TestCase { + network: "::2:0:40/123", + database: "ipv6", + expected: vec!["::2:0:40/124", "::2:0:50/125", "::2:0:58/127"], + }, + TestCase { + network: "0:0:0:0:0:ffff:ffff:ff00/120", + database: "ipv6", + expected: vec![], + }, + TestCase { + network: "0.0.0.0/0", + database: "mixed", + expected: vec![ + "1.1.1.1/32", + "1.1.1.2/31", + "1.1.1.4/30", + "1.1.1.8/29", + "1.1.1.16/28", + "1.1.1.32/32", + ], + }, + TestCase { + network: "1.1.1.16/28", + database: "mixed", + expected: vec!["1.1.1.16/28"], + }, + TestCase { + network: "1.1.1.4/30", + database: "ipv4", + expected: vec!["1.1.1.4/30"], + }, + ]; + + for record_size in &[24_u32, 28, 32] { + for test in &test_cases { + let filename = format!( + "test-data/test-data/MaxMind-DB-test-{}-{}.mmdb", + test.database, record_size + ); + let reader = Reader::open_readfile(&filename).unwrap(); + + let cidr: IpNetwork = test.network.parse().unwrap(); + let mut networks: Vec = Vec::new(); + + for result in reader.within(cidr, Default::default()).unwrap() { + let lookup = result.unwrap(); + networks.push(lookup.network().unwrap().to_string()); + } + + let expected: Vec = test.expected.iter().map(|s| s.to_string()).collect(); + assert_eq!( + networks, expected, + "Mismatch for {} in {}-{}: expected {:?}, got {:?}", + test.network, test.database, record_size, expected, networks + ); + } + } +} + +/// Test GeoIP database-specific NetworksWithin +#[test] +fn test_geoip_networks_within() { + let _ = env_logger::try_init(); + + let reader = Reader::open_readfile("test-data/test-data/GeoIP2-Country-Test.mmdb").unwrap(); + + let cidr: IpNetwork = "81.2.69.128/26".parse().unwrap(); + let expected = vec!["81.2.69.142/31", "81.2.69.144/28", "81.2.69.160/27"]; + + let mut networks: Vec = Vec::new(); + for result in reader.within(cidr, Default::default()).unwrap() { + let lookup = result.unwrap(); + networks.push(lookup.network().unwrap().to_string()); + } + + assert_eq!(networks, expected); +} + +/// Test that verify() succeeds on valid databases (matching Go's TestVerifyOnGoodDatabases) +#[test] +fn test_verify_good_databases() { + let _ = env_logger::try_init(); + + let databases = [ + "GeoIP2-Anonymous-IP-Test.mmdb", + "GeoIP2-City-Test.mmdb", + "GeoIP2-Connection-Type-Test.mmdb", + "GeoIP2-Country-Test.mmdb", + "GeoIP2-Domain-Test.mmdb", + "GeoIP2-ISP-Test.mmdb", + "GeoIP2-Precision-Enterprise-Test.mmdb", + "MaxMind-DB-no-ipv4-search-tree.mmdb", + "MaxMind-DB-string-value-entries.mmdb", + "MaxMind-DB-test-decoder.mmdb", + "MaxMind-DB-test-ipv4-24.mmdb", + "MaxMind-DB-test-ipv4-28.mmdb", + "MaxMind-DB-test-ipv4-32.mmdb", + "MaxMind-DB-test-ipv6-24.mmdb", + "MaxMind-DB-test-ipv6-28.mmdb", + "MaxMind-DB-test-ipv6-32.mmdb", + "MaxMind-DB-test-mixed-24.mmdb", + "MaxMind-DB-test-mixed-28.mmdb", + "MaxMind-DB-test-mixed-32.mmdb", + "MaxMind-DB-test-nested.mmdb", + ]; + + for database in &databases { + let path = format!("test-data/test-data/{}", database); + let reader = Reader::open_readfile(&path) + .unwrap_or_else(|e| panic!("Failed to open {}: {}", database, e)); + + reader + .verify() + .unwrap_or_else(|e| panic!("verify() failed for {}: {}", database, e)); + } +} + +/// Test that verify() returns errors on broken databases (matching Go's TestVerifyOnBrokenDatabases) +#[test] +fn test_verify_broken_double_format() { + let _ = env_logger::try_init(); + + let reader = + Reader::open_readfile("test-data/test-data/GeoIP2-City-Test-Broken-Double-Format.mmdb") + .unwrap(); + + let result = reader.verify(); + assert!( + result.is_err(), + "Expected verify() to return error for Broken-Double-Format, but it succeeded" + ); +} + +#[test] +fn test_verify_broken_pointers() { + let _ = env_logger::try_init(); + + let reader = + Reader::open_readfile("test-data/test-data/MaxMind-DB-test-broken-pointers-24.mmdb") + .unwrap(); + + let result = reader.verify(); + assert!( + result.is_err(), + "Expected verify() to return error for broken-pointers, but it succeeded" + ); +} + +#[test] +fn test_verify_broken_search_tree() { + let _ = env_logger::try_init(); + + let reader = + Reader::open_readfile("test-data/test-data/MaxMind-DB-test-broken-search-tree-24.mmdb") + .unwrap(); + + let result = reader.verify(); + assert!( + result.is_err(), + "Expected verify() to return error for broken-search-tree, but it succeeded" + ); +} + +/// Test that size hints are properly returned for sequences and maps +#[test] +fn test_size_hints() { + use serde::de::{Deserializer, MapAccess, SeqAccess, Visitor}; + use std::fmt; + + let _ = env_logger::try_init(); + + // Wrapper that captures size_hint for sequences + struct SeqSizeHint { + hint: Option, + values: Vec, + } + + impl<'de> Deserialize<'de> for SeqSizeHint { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + struct V; + impl<'de> Visitor<'de> for V { + type Value = SeqSizeHint; + fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str("sequence") + } + fn visit_seq>(self, mut seq: A) -> Result { + let hint = seq.size_hint(); + let mut values = Vec::new(); + while let Some(v) = seq.next_element()? { + values.push(v); + } + Ok(SeqSizeHint { hint, values }) + } + } + deserializer.deserialize_seq(V) + } + } + + // Wrapper that captures size_hint for maps + struct MapSizeHint { + hint: Option, + len: usize, + } + + impl<'de> Deserialize<'de> for MapSizeHint { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + struct V; + impl<'de> Visitor<'de> for V { + type Value = MapSizeHint; + fn expecting(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str("map") + } + fn visit_map>(self, mut map: A) -> Result { + let hint = map.size_hint(); + let mut len = 0; + while map.next_entry::()?.is_some() { + len += 1; + } + Ok(MapSizeHint { hint, len }) + } + } + deserializer.deserialize_map(V) + } + } + + #[derive(Deserialize)] + struct TestType { + array: SeqSizeHint, + map: MapSizeHint, + } + + let r = Reader::open_readfile("test-data/test-data/MaxMind-DB-test-decoder.mmdb").unwrap(); + let ip: IpAddr = "1.1.1.0".parse().unwrap(); + let lookup = r.lookup(ip).unwrap(); + assert!(lookup.has_data()); + let result: TestType = lookup.decode().unwrap().unwrap(); + + // Verify array size hint matches actual length + assert_eq!(result.array.hint, Some(3)); + assert_eq!(result.array.values, vec![1, 2, 3]); + + // Verify map size hint matches actual entry count + assert_eq!(result.map.hint, Some(result.map.len)); + assert!(result.map.len > 0, "Map should have entries"); +} + +/// Test that deserialize_ignored_any efficiently skips values +#[test] +fn test_ignored_any() { + use serde::de::IgnoredAny; + + let _ = env_logger::try_init(); + + // Struct that only reads some fields, ignoring others via IgnoredAny + #[derive(Deserialize, Debug)] + struct PartialRead { + utf8_string: String, + // These fields use IgnoredAny to skip decoding + array: IgnoredAny, + map: IgnoredAny, + uint128: IgnoredAny, + } + + let r = Reader::open_readfile("test-data/test-data/MaxMind-DB-test-decoder.mmdb").unwrap(); + let ip: IpAddr = "1.1.1.0".parse().unwrap(); + let lookup = r.lookup(ip).unwrap(); + assert!(lookup.has_data()); + let result: PartialRead = lookup.decode().unwrap().unwrap(); + + assert_eq!(result.utf8_string, "unicode! ☯ - ♫"); +} + +/// Test that string values can be deserialized into enums +#[test] +fn test_enum_deserialization() { + let _ = env_logger::try_init(); + + #[derive(Deserialize, Debug, PartialEq)] + enum ConnType { + #[serde(rename = "Cable/DSL")] + CableDsl, + } + + #[derive(Deserialize)] + struct Record { + connection_type: ConnType, + } + + let r = Reader::open_readfile("test-data/test-data/GeoIP2-Connection-Type-Test.mmdb").unwrap(); + let ip: IpAddr = "96.1.20.112".parse().unwrap(); + let lookup = r.lookup(ip).unwrap(); + assert!(lookup.has_data()); + let result: Record = lookup.decode().unwrap().unwrap(); + + assert_eq!(result.connection_type, ConnType::CableDsl); +} + +/// Test serde flatten attribute with HashMap +/// +/// Real-world GeoIP2/GeoLite2 databases don't contain u128 values, so +/// `#[serde(flatten)]` works without issues. +#[test] +fn test_serde_flatten() { + use serde::de::IgnoredAny; + + let _ = env_logger::try_init(); + + #[derive(Deserialize, Debug)] + struct PartialCountry { + continent: Continent, + #[serde(flatten)] + _rest: std::collections::HashMap, + } + + #[derive(Deserialize, Debug)] + struct Continent { + code: String, + } + + let r = Reader::open_readfile("test-data/test-data/GeoIP2-Country-Test.mmdb").unwrap(); + let ip: IpAddr = "81.2.69.160".parse().unwrap(); + let lookup = r.lookup(ip).unwrap(); + assert!(lookup.has_data()); + + let result: PartialCountry = lookup.decode().unwrap().unwrap(); + assert_eq!(result.continent.code, "EU"); +} diff --git a/src/result.rs b/src/result.rs new file mode 100644 index 00000000..d3ccaaa6 --- /dev/null +++ b/src/result.rs @@ -0,0 +1,696 @@ +//! Lookup result types for deferred decoding. +//! +//! This module provides `LookupResult`, which enables lazy decoding of +//! MaxMind DB records. Instead of immediately deserializing data, you +//! get a lightweight handle that can be decoded later or navigated +//! selectively via paths. + +use std::net::IpAddr; + +use ipnetwork::IpNetwork; +use serde::Deserialize; + +use crate::decoder::{TYPE_ARRAY, TYPE_MAP}; +use crate::error::MaxMindDbError; +use crate::reader::Reader; + +/// The result of looking up an IP address in a MaxMind DB. +/// +/// This is a lightweight handle (~40 bytes) that stores the lookup result +/// without immediately decoding the data. You can: +/// +/// - Check if data exists with [`has_data()`](Self::has_data) +/// - Get the network containing the IP with [`network()`](Self::network) +/// - Decode the full record with [`decode()`](Self::decode) +/// - Decode a specific path with [`decode_path()`](Self::decode_path) +/// +/// # Example +/// +/// ``` +/// use maxminddb::{Reader, geoip2, PathElement}; +/// use std::net::IpAddr; +/// +/// let reader = Reader::open_readfile("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); +/// let ip: IpAddr = "89.160.20.128".parse().unwrap(); +/// +/// let result = reader.lookup(ip).unwrap(); +/// +/// if result.has_data() { +/// // Full decode +/// let city: geoip2::City = result.decode().unwrap().unwrap(); +/// +/// // Or selective decode via path +/// let country_code: Option = result.decode_path(&[ +/// PathElement::Key("country"), +/// PathElement::Key("iso_code"), +/// ]).unwrap(); +/// println!("Country: {:?}", country_code); +/// } +/// ``` +#[derive(Debug, Clone, Copy)] +pub struct LookupResult<'a, S: AsRef<[u8]>> { + reader: &'a Reader, + /// Offset into the data section, or None if not found. + data_offset: Option, + prefix_len: u8, + ip: IpAddr, +} + +impl<'a, S: AsRef<[u8]>> LookupResult<'a, S> { + /// Creates a new LookupResult for a found IP. + pub(crate) fn new_found( + reader: &'a Reader, + data_offset: usize, + prefix_len: u8, + ip: IpAddr, + ) -> Self { + LookupResult { + reader, + data_offset: Some(data_offset), + prefix_len, + ip, + } + } + + /// Creates a new LookupResult for an IP not in the database. + pub(crate) fn new_not_found(reader: &'a Reader, prefix_len: u8, ip: IpAddr) -> Self { + LookupResult { + reader, + data_offset: None, + prefix_len, + ip, + } + } + + /// Returns true if the database contains data for this IP address. + /// + /// Note that `false` means the database has no data for this IP, + /// which is different from an error during lookup. + #[inline] + pub fn has_data(&self) -> bool { + self.data_offset.is_some() + } + + /// Returns the network containing the looked-up IP address. + /// + /// This is the most specific network in the database that contains + /// the IP, regardless of whether data was found. + /// + /// The returned network preserves the IP version of the original lookup: + /// - IPv4 lookups return IPv4 networks (unless prefix < 96, see below) + /// - IPv6 lookups return IPv6 networks (including IPv4-mapped addresses) + /// + /// Special case: If an IPv4 address is looked up in an IPv6 database but + /// the matching record is at a prefix length < 96 (e.g., a database with + /// no IPv4 subtree), an IPv6 network is returned since there's no valid + /// IPv4 representation. + pub fn network(&self) -> Result { + let (ip, prefix) = match self.ip { + IpAddr::V4(v4) => { + // For IPv4 lookups in IPv6 databases, prefix_len includes the + // 96-bit offset. Subtract it to get the IPv4 prefix. + // For IPv4 databases, prefix_len is already 0-32. + if self.prefix_len >= 96 { + // IPv6 database: subtract 96 to get IPv4 prefix + (IpAddr::V4(v4), self.prefix_len - 96) + } else if self.prefix_len > 32 { + // IPv6 database with record at prefix < 96 (e.g., ::/64). + // Return IPv6 network since there's no valid IPv4 representation. + use std::net::Ipv6Addr; + (IpAddr::V6(Ipv6Addr::UNSPECIFIED), self.prefix_len) + } else { + // IPv4 database: use prefix directly + (IpAddr::V4(v4), self.prefix_len) + } + } + IpAddr::V6(v6) => { + // For IPv6 lookups, preserve the IPv6 form (including IPv4-mapped) + (IpAddr::V6(v6), self.prefix_len) + } + }; + + // Mask the IP to the network address + let network_ip = mask_ip(ip, prefix); + IpNetwork::new(network_ip, prefix).map_err(MaxMindDbError::InvalidNetwork) + } + + /// Returns the data section offset if found, for use as a cache key. + /// + /// Multiple IP addresses often point to the same data record. This + /// offset can be used to deduplicate decoding or cache results. + /// + /// Returns `None` if the IP was not found. + #[inline] + pub fn offset(&self) -> Option { + self.data_offset + } + + /// Decodes the full record into the specified type. + /// + /// Returns: + /// - `Ok(Some(T))` if found and successfully decoded + /// - `Ok(None)` if the IP was not found in the database + /// - `Err(...)` if decoding fails + /// + /// # Example + /// + /// ``` + /// use maxminddb::{Reader, geoip2}; + /// use std::net::IpAddr; + /// + /// let reader = Reader::open_readfile("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); + /// let ip: IpAddr = "89.160.20.128".parse().unwrap(); + /// + /// let result = reader.lookup(ip).unwrap(); + /// if let Some(city) = result.decode::()? { + /// println!("Found city data"); + /// } + /// # Ok::<(), maxminddb::MaxMindDbError>(()) + /// ``` + pub fn decode(&self) -> Result, MaxMindDbError> + where + T: Deserialize<'a>, + { + let Some(offset) = self.data_offset else { + return Ok(None); + }; + + let buf = &self.reader.buf.as_ref()[self.reader.pointer_base..]; + let mut decoder = super::decoder::Decoder::new(buf, offset); + T::deserialize(&mut decoder).map(Some) + } + + /// Decodes a value at a specific path within the record. + /// + /// Returns: + /// - `Ok(Some(T))` if the path exists and was successfully decoded + /// - `Ok(None)` if the path doesn't exist (key missing, index out of bounds) + /// - `Err(...)` if there's a type mismatch during navigation (e.g., `Key` on an array) + /// + /// If `has_data() == false`, returns `Ok(None)`. + /// + /// # Path Elements + /// + /// - `PathElement::Key("name")` - Navigate into a map by key + /// - `PathElement::Index(0)` - Navigate into an array by index (0 = first element) + /// - `PathElement::IndexFromEnd(0)` - Navigate from the end (0 = last element) + /// + /// # Example + /// + /// ``` + /// use maxminddb::{Reader, PathElement}; + /// use std::net::IpAddr; + /// + /// let reader = Reader::open_readfile("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); + /// let ip: IpAddr = "89.160.20.128".parse().unwrap(); + /// + /// let result = reader.lookup(ip).unwrap(); + /// + /// // Navigate to country.iso_code + /// let iso_code: Option = result.decode_path(&[ + /// PathElement::Key("country"), + /// PathElement::Key("iso_code"), + /// ]).unwrap(); + /// + /// // Navigate to subdivisions[0].names.en + /// let subdiv_name: Option = result.decode_path(&[ + /// PathElement::Key("subdivisions"), + /// PathElement::Index(0), + /// PathElement::Key("names"), + /// PathElement::Key("en"), + /// ]).unwrap(); + /// ``` + pub fn decode_path(&self, path: &[PathElement<'_>]) -> Result, MaxMindDbError> + where + T: Deserialize<'a>, + { + let Some(offset) = self.data_offset else { + return Ok(None); + }; + + let buf = &self.reader.buf.as_ref()[self.reader.pointer_base..]; + let mut decoder = super::decoder::Decoder::new(buf, offset); + + // Navigate through the path, tracking position for error context + for (i, element) in path.iter().enumerate() { + // Closure to add path context to errors during navigation. + // Shows path up to and including the current element where the error occurred. + let with_path = |e| add_path_context(e, &path[..=i]); + + match *element { + PathElement::Key(key) => { + let (_, type_num) = decoder.peek_type().map_err(with_path)?; + if type_num != TYPE_MAP { + return Err(MaxMindDbError::decoding_at_path( + format!("expected map for Key(\"{key}\"), got type {type_num}"), + decoder.offset(), + render_path(&path[..=i]), + )); + } + + // Consume the map header and get size + let size = decoder.consume_map_header().map_err(with_path)?; + + let mut found = false; + for _ in 0..size { + let k = decoder.read_string().map_err(with_path)?; + if k == key { + found = true; + break; + } else { + decoder.skip_value().map_err(with_path)?; + } + } + + if !found { + return Ok(None); + } + } + PathElement::Index(idx) => { + let (_, type_num) = decoder.peek_type().map_err(with_path)?; + if type_num != TYPE_ARRAY { + return Err(MaxMindDbError::decoding_at_path( + format!("expected array for Index({idx}), got type {type_num}"), + decoder.offset(), + render_path(&path[..=i]), + )); + } + + // Consume the array header and get size + let size = decoder.consume_array_header().map_err(with_path)?; + + if idx >= size { + return Ok(None); // Out of bounds + } + + // Skip to the target index + for _ in 0..idx { + decoder.skip_value().map_err(with_path)?; + } + } + PathElement::IndexFromEnd(idx) => { + let (_, type_num) = decoder.peek_type().map_err(with_path)?; + if type_num != TYPE_ARRAY { + return Err(MaxMindDbError::decoding_at_path( + format!("expected array for IndexFromEnd({idx}), got type {type_num}"), + decoder.offset(), + render_path(&path[..=i]), + )); + } + + // Consume the array header and get size + let size = decoder.consume_array_header().map_err(with_path)?; + + if idx >= size { + return Ok(None); // Out of bounds + } + + let actual_idx = size - 1 - idx; + + // Skip to the target index + for _ in 0..actual_idx { + decoder.skip_value().map_err(with_path)?; + } + } + } + } + + // Decode the value at the current position + T::deserialize(&mut decoder) + .map(Some) + .map_err(|e| add_path_context(e, path)) + } +} + +/// Adds path context to a Decoding error if it doesn't already have one. +fn add_path_context(err: MaxMindDbError, path: &[PathElement<'_>]) -> MaxMindDbError { + match err { + MaxMindDbError::Decoding { + message, + offset, + path: None, + } => MaxMindDbError::Decoding { + message, + offset, + path: Some(render_path(path)), + }, + _ => err, + } +} + +/// Renders path elements as a JSON-pointer-like string (e.g., "/city/names/0"). +fn render_path(path: &[PathElement<'_>]) -> String { + use std::fmt::Write; + let mut s = String::new(); + for elem in path { + s.push('/'); + match elem { + PathElement::Key(k) => s.push_str(k), + PathElement::Index(i) => write!(s, "{i}").unwrap(), + PathElement::IndexFromEnd(i) => write!(s, "{}", -((*i as isize) + 1)).unwrap(), + } + } + s +} + +/// A path element for navigating into nested data structures. +/// +/// Used with [`LookupResult::decode_path()`] to selectively decode +/// specific fields without parsing the entire record. +/// +/// # Creating Path Elements +/// +/// You can create path elements directly or use the [`path!`](crate::path) macro +/// for a more convenient syntax: +/// +/// ``` +/// use maxminddb::{path, PathElement}; +/// +/// // Direct construction +/// let path = [PathElement::Key("country"), PathElement::Key("iso_code")]; +/// +/// // Using the macro - string literals become Keys, integers become Indexes +/// let path = path!["country", "iso_code"]; +/// let path = path!["subdivisions", 0, "names"]; // Mixed keys and indexes +/// let path = path!["array", -1]; // Negative indexes count from the end +/// ``` +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum PathElement<'a> { + /// Navigate into a map by key. + Key(&'a str), + /// Navigate into an array by index (0-based from the start). + /// + /// - `Index(0)` - first element + /// - `Index(1)` - second element + Index(usize), + /// Navigate into an array by index from the end. + /// + /// - `IndexFromEnd(0)` - last element + /// - `IndexFromEnd(1)` - second-to-last element + IndexFromEnd(usize), +} + +impl<'a> From<&'a str> for PathElement<'a> { + fn from(s: &'a str) -> Self { + PathElement::Key(s) + } +} + +impl From for PathElement<'_> { + /// Converts an integer to a path element. + /// + /// - Non-negative values become `Index(n)` + /// - Negative values become `IndexFromEnd(-n - 1)`, so `-1` is the last element + fn from(n: i32) -> Self { + if n >= 0 { + PathElement::Index(n as usize) + } else { + PathElement::IndexFromEnd((-n - 1) as usize) + } + } +} + +impl From for PathElement<'_> { + fn from(n: usize) -> Self { + PathElement::Index(n) + } +} + +impl From for PathElement<'_> { + /// Converts a signed integer to a path element. + /// + /// - Non-negative values become `Index(n)` + /// - Negative values become `IndexFromEnd(-n - 1)`, so `-1` is the last element + fn from(n: isize) -> Self { + if n >= 0 { + PathElement::Index(n as usize) + } else { + PathElement::IndexFromEnd((-n - 1) as usize) + } + } +} + +/// Creates a path for use with [`LookupResult::decode_path()`](crate::LookupResult::decode_path). +/// +/// This macro provides a convenient way to construct paths with mixed string keys +/// and integer indexes. +/// +/// # Syntax +/// +/// - String literals become [`PathElement::Key`] +/// - Non-negative integers become [`PathElement::Index`] +/// - Negative integers become [`PathElement::IndexFromEnd`] (e.g., `-1` is the last element) +/// +/// # Examples +/// +/// ``` +/// use maxminddb::{Reader, path}; +/// use std::net::IpAddr; +/// +/// let reader = Reader::open_readfile("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); +/// let ip: IpAddr = "89.160.20.128".parse().unwrap(); +/// let result = reader.lookup(ip).unwrap(); +/// +/// // Navigate to country.iso_code +/// let iso_code: Option = result.decode_path(&path!["country", "iso_code"]).unwrap(); +/// +/// // Navigate to subdivisions[0].names.en +/// let subdiv: Option = result.decode_path(&path!["subdivisions", 0, "names", "en"]).unwrap(); +/// ``` +/// +/// ``` +/// use maxminddb::{Reader, path}; +/// use std::net::IpAddr; +/// +/// let reader = Reader::open_readfile("test-data/test-data/MaxMind-DB-test-decoder.mmdb").unwrap(); +/// let ip: IpAddr = "::1.1.1.0".parse().unwrap(); +/// let result = reader.lookup(ip).unwrap(); +/// +/// // Access the last element of an array +/// let last: Option = result.decode_path(&path!["array", -1]).unwrap(); +/// assert_eq!(last, Some(3)); +/// +/// // Access the second-to-last element +/// let second_to_last: Option = result.decode_path(&path!["array", -2]).unwrap(); +/// assert_eq!(second_to_last, Some(2)); +/// ``` +#[macro_export] +macro_rules! path { + ($($elem:expr),* $(,)?) => { + [$($crate::PathElement::from($elem)),*] + }; +} + +/// Masks an IP address to its network address given a prefix length. +fn mask_ip(ip: IpAddr, prefix: u8) -> IpAddr { + match ip { + IpAddr::V4(v4) => { + if prefix >= 32 { + IpAddr::V4(v4) + } else { + let int: u32 = v4.into(); + let mask = if prefix == 0 { + 0 + } else { + !0u32 << (32 - prefix) + }; + IpAddr::V4((int & mask).into()) + } + } + IpAddr::V6(v6) => { + if prefix >= 128 { + IpAddr::V6(v6) + } else { + let int: u128 = v6.into(); + let mask = if prefix == 0 { + 0 + } else { + !0u128 << (128 - prefix) + }; + IpAddr::V6((int & mask).into()) + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_mask_ipv4() { + let ip: IpAddr = "192.168.1.100".parse().unwrap(); + assert_eq!(mask_ip(ip, 24), "192.168.1.0".parse::().unwrap()); + assert_eq!(mask_ip(ip, 16), "192.168.0.0".parse::().unwrap()); + assert_eq!(mask_ip(ip, 32), "192.168.1.100".parse::().unwrap()); + assert_eq!(mask_ip(ip, 0), "0.0.0.0".parse::().unwrap()); + } + + #[test] + fn test_mask_ipv6() { + let ip: IpAddr = "2001:db8:85a3::8a2e:370:7334".parse().unwrap(); + assert_eq!( + mask_ip(ip, 64), + "2001:db8:85a3::".parse::().unwrap() + ); + assert_eq!(mask_ip(ip, 32), "2001:db8::".parse::().unwrap()); + } + + #[test] + fn test_path_element_debug() { + assert_eq!(format!("{:?}", PathElement::Key("test")), "Key(\"test\")"); + assert_eq!(format!("{:?}", PathElement::Index(5)), "Index(5)"); + assert_eq!( + format!("{:?}", PathElement::IndexFromEnd(0)), + "IndexFromEnd(0)" + ); + } + + #[test] + fn test_path_element_from_str() { + let elem: PathElement = "key".into(); + assert_eq!(elem, PathElement::Key("key")); + } + + #[test] + fn test_path_element_from_i32() { + // Positive values become Index + let elem: PathElement = PathElement::from(0i32); + assert_eq!(elem, PathElement::Index(0)); + + let elem: PathElement = PathElement::from(5i32); + assert_eq!(elem, PathElement::Index(5)); + + // Negative values become IndexFromEnd + // -1 → IndexFromEnd(0) (last element) + let elem: PathElement = PathElement::from(-1i32); + assert_eq!(elem, PathElement::IndexFromEnd(0)); + + // -2 → IndexFromEnd(1) (second-to-last) + let elem: PathElement = PathElement::from(-2i32); + assert_eq!(elem, PathElement::IndexFromEnd(1)); + + // -3 → IndexFromEnd(2) + let elem: PathElement = PathElement::from(-3i32); + assert_eq!(elem, PathElement::IndexFromEnd(2)); + } + + #[test] + fn test_path_element_from_usize() { + let elem: PathElement = PathElement::from(0usize); + assert_eq!(elem, PathElement::Index(0)); + + let elem: PathElement = PathElement::from(42usize); + assert_eq!(elem, PathElement::Index(42)); + } + + #[test] + fn test_path_element_from_isize() { + let elem: PathElement = PathElement::from(0isize); + assert_eq!(elem, PathElement::Index(0)); + + let elem: PathElement = PathElement::from(-1isize); + assert_eq!(elem, PathElement::IndexFromEnd(0)); + } + + #[test] + fn test_path_macro_keys_only() { + let p = path!["country", "iso_code"]; + assert_eq!(p.len(), 2); + assert_eq!(p[0], PathElement::Key("country")); + assert_eq!(p[1], PathElement::Key("iso_code")); + } + + #[test] + fn test_path_macro_mixed() { + let p = path!["subdivisions", 0, "names", "en"]; + assert_eq!(p.len(), 4); + assert_eq!(p[0], PathElement::Key("subdivisions")); + assert_eq!(p[1], PathElement::Index(0)); + assert_eq!(p[2], PathElement::Key("names")); + assert_eq!(p[3], PathElement::Key("en")); + } + + #[test] + fn test_path_macro_negative_indexes() { + let p = path!["array", -1]; + assert_eq!(p.len(), 2); + assert_eq!(p[0], PathElement::Key("array")); + assert_eq!(p[1], PathElement::IndexFromEnd(0)); // last element + + let p = path!["data", -2, "value"]; + assert_eq!(p[1], PathElement::IndexFromEnd(1)); // second-to-last + } + + #[test] + fn test_path_macro_trailing_comma() { + let p = path!["a", "b",]; + assert_eq!(p.len(), 2); + } + + #[test] + fn test_path_macro_empty() { + let p: [PathElement; 0] = path![]; + assert_eq!(p.len(), 0); + } + + #[test] + fn test_render_path() { + assert_eq!(render_path(&[]), ""); + assert_eq!(render_path(&[PathElement::Key("city")]), "/city"); + assert_eq!( + render_path(&[PathElement::Key("city"), PathElement::Key("names")]), + "/city/names" + ); + assert_eq!( + render_path(&[PathElement::Key("arr"), PathElement::Index(0)]), + "/arr/0" + ); + assert_eq!( + render_path(&[PathElement::Key("arr"), PathElement::Index(42)]), + "/arr/42" + ); + // IndexFromEnd(0) = last = -1, IndexFromEnd(1) = second-to-last = -2 + assert_eq!( + render_path(&[PathElement::Key("arr"), PathElement::IndexFromEnd(0)]), + "/arr/-1" + ); + assert_eq!( + render_path(&[PathElement::Key("arr"), PathElement::IndexFromEnd(1)]), + "/arr/-2" + ); + } + + #[test] + fn test_decode_path_error_includes_path() { + use crate::Reader; + + let reader = Reader::open_readfile("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); + let ip: IpAddr = "89.160.20.128".parse().unwrap(); + let result = reader.lookup(ip).unwrap(); + + // Try to navigate with Index on a map (root is a map, not array) + let err = result + .decode_path::(&[PathElement::Index(0)]) + .unwrap_err(); + let err_str = err.to_string(); + assert!( + err_str.contains("path: /0"), + "error should include path context: {err_str}" + ); + assert!( + err_str.contains("expected array"), + "error should mention expected type: {err_str}" + ); + + // Try to navigate deeper and fail at second element + let err = result + .decode_path::(&[PathElement::Key("city"), PathElement::Index(0)]) + .unwrap_err(); + let err_str = err.to_string(); + assert!( + err_str.contains("path: /city/0"), + "error should include full path to failure: {err_str}" + ); + } +} diff --git a/src/within.rs b/src/within.rs new file mode 100644 index 00000000..233bd2ae --- /dev/null +++ b/src/within.rs @@ -0,0 +1,276 @@ +//! Network iteration types. + +use std::cmp::Ordering; +use std::net::IpAddr; + +use crate::decoder; +use crate::error::MaxMindDbError; +use crate::reader::Reader; +use crate::result::LookupResult; + +/// Options for network iteration. +/// +/// Controls which networks are yielded when iterating over the database +/// with [`Reader::within()`] or [`Reader::networks()`]. +/// +/// # Example +/// +/// ``` +/// use maxminddb::WithinOptions; +/// +/// // Default options (skip aliases, skip networks without data, include empty values) +/// let opts = WithinOptions::default(); +/// +/// // Include aliased networks (IPv4 networks via IPv6 aliases) +/// let opts = WithinOptions::default().include_aliased_networks(); +/// +/// // Skip empty values and include networks without data +/// let opts = WithinOptions::default() +/// .skip_empty_values() +/// .include_networks_without_data(); +/// ``` +#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] +pub struct WithinOptions { + /// Include IPv4 networks multiple times when accessed via IPv6 aliases. + include_aliased_networks: bool, + /// Include networks that have no associated data record. + include_networks_without_data: bool, + /// Skip networks whose data is an empty map or empty array. + skip_empty_values: bool, +} + +impl WithinOptions { + /// Include IPv4 networks multiple times when accessed via IPv6 aliases. + /// + /// In IPv6 databases, IPv4 networks are stored at `::0/96`. However, the + /// same data is accessible through several IPv6 prefixes (e.g., + /// `::ffff:0:0/96` for IPv4-mapped IPv6). By default, these aliases are + /// skipped to avoid yielding the same network multiple times. + /// + /// When enabled, the iterator will yield these aliased networks. + #[must_use] + pub fn include_aliased_networks(mut self) -> Self { + self.include_aliased_networks = true; + self + } + + /// Include networks that have no associated data record. + /// + /// Some tree nodes point to "no data" (the node_count sentinel). By default + /// these are skipped. When enabled, these networks are yielded and + /// [`LookupResult::has_data()`] returns `false` for them. + #[must_use] + pub fn include_networks_without_data(mut self) -> Self { + self.include_networks_without_data = true; + self + } + + /// Skip networks whose data is an empty map or empty array. + /// + /// Some databases store empty maps `{}` or empty arrays `[]` for records + /// without meaningful data. This option filters them out. + #[must_use] + pub fn skip_empty_values(mut self) -> Self { + self.skip_empty_values = true; + self + } +} + +#[derive(Debug)] +pub(crate) struct WithinNode { + pub(crate) node: usize, + pub(crate) ip_int: IpInt, + pub(crate) prefix_len: usize, +} + +/// Iterator over IP networks within a CIDR range. +/// +/// Created by [`Reader::within()`](crate::Reader::within) or +/// [`Reader::networks()`](crate::Reader::networks). Yields +/// [`LookupResult`] for each network in the database that falls +/// within the specified range. +/// +/// Networks are yielded in depth-first order through the search tree. +/// Use [`LookupResult::decode()`](crate::LookupResult::decode) to +/// deserialize the data for each result. +/// +/// # Example +/// +/// ``` +/// use maxminddb::{Reader, WithinOptions, geoip2}; +/// +/// let reader = Reader::open_readfile("test-data/test-data/GeoIP2-City-Test.mmdb").unwrap(); +/// for result in reader.within("89.160.20.0/24".parse().unwrap(), Default::default()).unwrap() { +/// let lookup = result.unwrap(); +/// if let Some(city) = lookup.decode::().unwrap() { +/// println!("{}: {:?}", lookup.network().unwrap(), city.city.names.english); +/// } +/// } +/// ``` +#[derive(Debug)] +pub struct Within<'de, S: AsRef<[u8]>> { + pub(crate) reader: &'de Reader, + pub(crate) node_count: usize, + pub(crate) stack: Vec, + pub(crate) options: WithinOptions, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum IpInt { + V4(u32), + V6(u128), +} + +impl IpInt { + pub(crate) fn new(ip_addr: IpAddr) -> Self { + match ip_addr { + IpAddr::V4(v4) => IpInt::V4(v4.into()), + IpAddr::V6(v6) => IpInt::V6(v6.into()), + } + } + + #[inline(always)] + pub(crate) fn get_bit(&self, index: usize) -> bool { + match self { + IpInt::V4(ip) => (ip >> (31 - index)) & 1 == 1, + IpInt::V6(ip) => (ip >> (127 - index)) & 1 == 1, + } + } + + pub(crate) fn bit_count(&self) -> usize { + match self { + IpInt::V4(_) => 32, + IpInt::V6(_) => 128, + } + } + + pub(crate) fn is_ipv4_in_ipv6(&self) -> bool { + match self { + IpInt::V4(_) => false, + IpInt::V6(ip) => *ip <= 0xFFFFFFFF, + } + } +} + +impl<'de, S: AsRef<[u8]>> Iterator for Within<'de, S> { + type Item = Result, MaxMindDbError>; + + fn next(&mut self) -> Option { + while let Some(current) = self.stack.pop() { + let bit_count = current.ip_int.bit_count(); + + // Skip networks that are aliases for the IPv4 network (unless option is set) + if !self.options.include_aliased_networks + && self.reader.ipv4_start != 0 + && current.node == self.reader.ipv4_start + && bit_count == 128 + && !current.ip_int.is_ipv4_in_ipv6() + { + continue; + } + + match current.node.cmp(&self.node_count) { + Ordering::Greater => { + // This is a data node, emit it and we're done (until the following next call) + let ip_addr = ip_int_to_addr(¤t.ip_int); + + // Resolve the pointer to a data offset + let data_offset = match self.reader.resolve_data_pointer(current.node) { + Ok(offset) => offset, + Err(e) => return Some(Err(e)), + }; + + // Check if we should skip empty values + if self.options.skip_empty_values { + match self.is_empty_value_at(data_offset) { + Ok(true) => continue, // Skip empty value + Ok(false) => {} // Not empty, proceed + Err(e) => return Some(Err(e)), + } + } + + return Some(Ok(LookupResult::new_found( + self.reader, + data_offset, + current.prefix_len as u8, + ip_addr, + ))); + } + Ordering::Equal => { + // Dead end (no data) - include if option is set + if self.options.include_networks_without_data { + let ip_addr = ip_int_to_addr(¤t.ip_int); + return Some(Ok(LookupResult::new_not_found( + self.reader, + current.prefix_len as u8, + ip_addr, + ))); + } + // Otherwise skip (current behavior) + } + Ordering::Less => { + // In order traversal of our children + // right/1-bit + let mut right_ip_int = current.ip_int; + + if current.prefix_len < bit_count { + let bit = current.prefix_len; + match &mut right_ip_int { + IpInt::V4(ip) => *ip |= 1 << (31 - bit), + IpInt::V6(ip) => *ip |= 1 << (127 - bit), + }; + } + + let node = match self.reader.read_node(current.node, 1) { + Ok(node) => node, + Err(e) => return Some(Err(e)), + }; + self.stack.push(WithinNode { + node, + ip_int: right_ip_int, + prefix_len: current.prefix_len + 1, + }); + // left/0-bit + let node = match self.reader.read_node(current.node, 0) { + Ok(node) => node, + Err(e) => return Some(Err(e)), + }; + self.stack.push(WithinNode { + node, + ip_int: current.ip_int, + prefix_len: current.prefix_len + 1, + }); + } + } + } + None + } +} + +impl<'de, S: AsRef<[u8]>> Within<'de, S> { + /// Check if the value at the given data offset is an empty map or array. + fn is_empty_value_at(&self, data_offset: usize) -> Result { + let buf = &self.reader.buf.as_ref()[self.reader.pointer_base..]; + let mut dec = decoder::Decoder::new(buf, data_offset); + let (size, type_num) = dec.peek_type()?; + match type_num { + decoder::TYPE_MAP | decoder::TYPE_ARRAY => Ok(size == 0), + _ => Ok(false), // Non-container types are never "empty" + } + } +} + +/// Convert IpInt to IpAddr +pub(crate) fn ip_int_to_addr(ip_int: &IpInt) -> IpAddr { + match ip_int { + IpInt::V4(ip) => IpAddr::V4((*ip).into()), + IpInt::V6(ip) => { + // Check if this is an IPv4-mapped IPv6 address + if *ip <= 0xFFFFFFFF { + IpAddr::V4((*ip as u32).into()) + } else { + IpAddr::V6((*ip).into()) + } + } + } +}