diff --git a/CHANGELOG.md b/CHANGELOG.md index e22afb0..345e9b1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,13 @@ PUBLISHING PROCEDURE: 5. After publishing, the next PR author will add a new "## Unreleased" section --> +## Unreleased + +### Changed + +- Memory sizes now use base-1000 units throughout, which may slightly affect the automatic model-instance count heuristic. +- Improved the chunk-quality score reported by the `audit-chunks` and `dump-chunks` commands. The score now more reliably flags genuinely bad partitioner output. + ## 0.6.1 (2026-05-20) ### Changed diff --git a/README.md b/README.md index cfdb281..a2b05f8 100644 --- a/README.md +++ b/README.md @@ -173,7 +173,7 @@ Create `~/.monodex/monodex-config.json`: The `embeddingModel` section controls memory and CPU usage for embedding generation: -- **`modelInstances`**: Number of ONNX sessions. Each session uses approximately 700MB-1GB for the model weights and runtime, but the auto-detection heuristic plans for 2.5 GiB per instance to provide conservative headroom for memory fragmentation, peak usage during inference, and avoiding OOM on memory-constrained systems. Use `"auto"` to automatically size based on available system memory, or an integer ≥ 1 for explicit control. +- **`modelInstances`**: Number of ONNX sessions. Each session uses approximately 700MB-1GB for the model weights and runtime, but the auto-detection heuristic plans for 2.5 GB per instance to provide conservative headroom for memory fragmentation, peak usage during inference, and avoiding OOM on memory-constrained systems. Use `"auto"` to automatically size based on available system memory, or an integer ≥ 1 for explicit control. - **`threadsPerInstance`**: Threads per ONNX session for intra-op parallelism. Use `"auto"` to automatically size based on CPU cores, or an integer ≥ 1 for explicit control. **Catalog types:** @@ -364,7 +364,7 @@ monodex dump-chunks --file ./src/JsonFile.ts --target-size 4000 monodex audit-chunks --count 20 --folder /path/to/project ``` -**Chunk Quality Score**: 0-100%, higher is better. Scores below 95% may indicate chunking issues. Note: `dump-chunks` and `audit-chunks` use AST-only mode (fallback disabled) to accurately measure partitioner quality. +**Chunk Quality Score**: 0-100%, higher is better. The score is a maintainer heuristic, not a pass/fail metric. Scores below roughly 85% are worth inspecting; scores below roughly 60% usually indicate tiny chunks, oversized chunks, or severe over-splitting. Note: `dump-chunks` and `audit-chunks` use AST-only mode (fallback disabled) to accurately measure partitioner quality. ### Debug FTS Tokenization diff --git a/docs/design/chunker.md b/docs/design/chunker.md index be86c99..9f1a897 100644 --- a/docs/design/chunker.md +++ b/docs/design/chunker.md @@ -102,12 +102,12 @@ The result of the split-search is reflected in the breadcrumb attached to each c ### Quality scoring -`src/engine/partitioner/scoring.rs` computes a 0-100% score for a complete partitioning, used by `audit-chunks` to summarize chunker behavior across a sample of files. The score combines two badnesses: +`src/engine/partitioner/scoring.rs` computes a 0-100% score for a complete partitioning, used by `audit-chunks` and `dump-chunks` to summarize chunker behavior. The score is a maintainer triage heuristic, not a calibrated metric. It combines two badnesses: -- **Count badness.** Penalizes producing too many chunks relative to the ideal partition (total content size divided by max chunk size, rounded up). A file that should partition into 3 chunks but produces 7 has high count badness. -- **Micro badness.** Penalizes individual chunks being either too small (size below the threshold) or too large (size at or above max). For each chunk, a per-chunk badness is computed and averaged across the partition. +- **Size badness.** A per-chunk penalty that is zero across the healthy band `[SMALL_CHUNK_CHARS, TARGET_CHARS]` and nonzero only for chunks below `SMALL_CHUNK_CHARS` or above `TARGET_CHARS`. A single whole-file chunk at or below `TARGET_CHARS` is never penalized (it cannot be grown and must not be split, so a small whole-file chunk is not a runt). +- **Count badness.** A penalty that is forgiving of moderate over-splitting and only rises sharply as chunk count approaches the all-runt case. -The final score is `100 * (1 - count_badness)^α * (1 - micro_badness)^β` with both exponents currently set to 1. Scores below 95% are considered indicators of a chunking problem worth examining; the partitioner's quality is not a settled-once metric but something tuned over time, and these scoring weights are subject to revision. +The two badnesses combine multiplicatively with no exponents. Scores below roughly 85% are worth inspecting; scores below roughly 60% usually indicate tiny chunks, oversized chunks, or severe over-splitting. ### Development tools diff --git a/src/engine/partitioner/scoring.rs b/src/engine/partitioner/scoring.rs index d83d267..2631987 100644 --- a/src/engine/partitioner/scoring.rs +++ b/src/engine/partitioner/scoring.rs @@ -4,80 +4,76 @@ use super::types::{PartitionedChunk, SMALL_CHUNK_CHARS, TARGET_CHARS}; +/// Compute a 0-100% quality score for a partitioned file. +/// +/// The score is a maintainer triage heuristic for `audit-chunks` and `dump-chunks`, +/// not a calibrated metric. It measures two independent dimensions: +/// +/// - **Size badness**: penalizes chunks outside the healthy band `[SMALL_CHUNK_CHARS, TARGET_CHARS]`. +/// Chunks within the band have zero penalty. A single whole-file chunk at or below +/// `TARGET_CHARS` is never penalized (it cannot be grown and must not be split). +/// +/// - **Count badness**: penalizes producing far more chunks than the content requires. +/// Moderate over-splitting is forgiven; the penalty rises sharply as chunk count +/// approaches the all-runt case. +/// +/// The two badnesses combine multiplicatively with no exponents. Scores below roughly +/// 85% are worth inspecting; scores below roughly 60% usually indicate tiny chunks, +/// oversized chunks, or severe over-splitting. pub fn chunk_quality_score(chunks: &[PartitionedChunk], file_chars: usize) -> f64 { if chunks.is_empty() || file_chars == 0 { return 100.0; } - let max_chunk_size = TARGET_CHARS.min(file_chars); let chunk_count = chunks.len(); - - // Compute chunk sizes in characters let chunk_sizes: Vec = chunks.iter().map(|c| c.text.len()).collect(); - let total_chars: usize = chunk_sizes.iter().sum(); - // Ideal number of chunks - let ideal_chunk_count = total_chars.div_ceil(max_chunk_size); // ceil division - - // 1) Count badness: 0 at ideal chunk count, 1 at all 1-char chunks - let count_badness = if total_chars == ideal_chunk_count { - 0.0 - } else { - (chunk_count as f64 - ideal_chunk_count as f64) - / (total_chars as f64 - ideal_chunk_count as f64) - }; - - // Helper: chunk badness (0 at max size, 1 at 1 char) - // For oversized chunks, weight by how much work is unfinished - let chunk_badness = |size: usize| -> f64 { - if size >= max_chunk_size { - // Estimate: if we could split correctly, we'd get N chunks - // Weight the badness as if there were N unsplittable chunks - (size as f64 / max_chunk_size as f64).max(1.0) - } else { - ((max_chunk_size - size) as f64 / (max_chunk_size - 1) as f64).powi(2) - } - }; - - // 2) Micro-chunk badness relative to ideal partition - let ideal_last_chunk_size = - total_chars - max_chunk_size * (ideal_chunk_count.saturating_sub(1)); - let ideal_partition_badness = if ideal_chunk_count == 0 { - 0.0 - } else if ideal_chunk_count == 1 { - chunk_badness(ideal_last_chunk_size) - } else { - // All but last chunk are at max size (badness 0), last chunk may be smaller - chunk_badness(ideal_last_chunk_size) - }; - - let actual_partition_badness: f64 = chunk_sizes.iter().map(|&s| chunk_badness(s)).sum(); - - // Normalize by number of chunks, not total chars - // This gives an average badness per chunk, which is more meaningful - // Worst case: each chunk has badness 1.0 (either tiny or oversized with ratio 1.0) - let avg_badness = actual_partition_badness / chunk_count.max(1) as f64; - - // Also compute worst case normalized similarly - let ideal_avg_badness = ideal_partition_badness / ideal_chunk_count.max(1) as f64; - let worst_avg_badness = 1.0; // a chunk with badness 1.0 is the worst reasonable case - - let micro_badness = if worst_avg_badness == ideal_avg_badness { - 0.0 - } else { - (avg_badness - ideal_avg_badness) / (worst_avg_badness - ideal_avg_badness) - }; - - // Clamp for numerical safety - let count_badness = count_badness.clamp(0.0, 1.0); - let micro_badness = micro_badness.clamp(0.0, 1.0); - - // Final score: weight micro_badness (beta=1 gives linear penalty) - let alpha = 1.0; - let beta = 1.0; - let score = 100.0 * (1.0 - count_badness).powf(alpha) * (1.0 - micro_badness).powf(beta); + // Special case: a single whole-file chunk at or below TARGET_CHARS is never penalized. + // Such a chunk is the entire file; it cannot be grown and must not be split, + // so a small whole-file chunk is not a runt. + if chunk_count == 1 && chunk_sizes[0] <= TARGET_CHARS { + return 100.0; + } + + // Compute per-chunk size penalties. + // 0 for chunks in [SMALL_CHUNK_CHARS, TARGET_CHARS] + // (SMALL_CHUNK_CHARS - size) / SMALL_CHUNK_CHARS for chunks below SMALL_CHUNK_CHARS + // ((size - TARGET_CHARS) / TARGET_CHARS).min(1.0) for chunks above TARGET_CHARS + let size_penalties: Vec = chunk_sizes + .iter() + .map(|&size| { + if (SMALL_CHUNK_CHARS..=TARGET_CHARS).contains(&size) { + 0.0 + } else if size < SMALL_CHUNK_CHARS { + (SMALL_CHUNK_CHARS - size) as f64 / SMALL_CHUNK_CHARS as f64 + } else { + // size > TARGET_CHARS + ((size - TARGET_CHARS) as f64 / TARGET_CHARS as f64).min(1.0) + } + }) + .collect(); + + // size_badness is the mean of per-chunk size penalties, in [0, 1] by construction. + let size_badness = size_penalties.iter().sum::() / chunk_count.max(1) as f64; + + // Compute count_badness. + // ideal = max(1, total_chars.div_ceil(TARGET_CHARS)) + // worst = max(ideal + 1, total_chars / SMALL_CHUNK_CHARS) + // surplus = chunk_count.saturating_sub(ideal) + // count_badness = (surplus / (worst - ideal)).min(1.0) + let ideal = total_chars.div_ceil(TARGET_CHARS).max(1); + let worst = (ideal + 1).max(total_chars / SMALL_CHUNK_CHARS); + let surplus = chunk_count.saturating_sub(ideal); + let count_badness = (surplus as f64 / (worst - ideal) as f64).min(1.0); + // Combine multiplicatively. + // Both badnesses are in [0, 1], so (1 - badness) is in [0, 1]. + // The product is in [0, 1], and 100 * product is in [0, 100]. + let score = 100.0 * (1.0 - size_badness) * (1.0 - count_badness); + + // Final clamp as a numerical safety net only; every intermediate value + // is already in range by construction. score.clamp(0.0, 100.0) } @@ -138,3 +134,141 @@ impl ChunkQualityReport { ) } } + +#[cfg(test)] +mod tests { + use super::*; + + /// Helper to create a minimal PartitionedChunk with the given text size. + fn make_chunk(size: usize) -> PartitionedChunk { + PartitionedChunk { + source_uri: "test.ts".to_string(), + catalog: "test".to_string(), + content_hash: "hash".to_string(), + breadcrumb: "test.ts".to_string(), + text: "x".repeat(size), + start_line: 1, + end_line: 1, + chunk_type: "code".to_string(), + chunk_kind: "content".to_string(), + symbol_name: None, + split_part_ordinal: None, + split_part_count: None, + } + } + + #[test] + fn test_empty_input_scores_100() { + let chunks: Vec = vec![]; + let score = chunk_quality_score(&chunks, 0); + assert_eq!(score, 100.0); + } + + #[test] + fn test_all_target_sized_chunks_scores_100() { + // A partition of all TARGET_CHARS-sized chunks scores 100. + let chunks = vec![make_chunk(TARGET_CHARS), make_chunk(TARGET_CHARS)]; + let file_chars = 2 * TARGET_CHARS; + let score = chunk_quality_score(&chunks, file_chars); + assert_eq!(score, 100.0); + } + + #[test] + fn test_oversized_single_chunk_scores_0() { + // An oversized single chunk at twice TARGET_CHARS scores 0. + let chunks = vec![make_chunk(2 * TARGET_CHARS)]; + let file_chars = 2 * TARGET_CHARS; + let score = chunk_quality_score(&chunks, file_chars); + // size penalty = ((2*TARGET - TARGET) / TARGET).min(1.0) = 1.0 + // size_badness = 1.0 + // (1 - size_badness) = 0, so score = 0 + assert_eq!(score, 0.0); + } + + #[test] + fn test_many_runt_chunks_scores_near_0() { + // A file split into many sub-SMALL_CHUNK_CHARS runts scores near 0. + let runt_size = 100; // well below SMALL_CHUNK_CHARS (500) + let num_runts = 20; + let chunks: Vec = (0..num_runts).map(|_| make_chunk(runt_size)).collect(); + let file_chars = runt_size * num_runts; + let score = chunk_quality_score(&chunks, file_chars); + // Each runt has size penalty = (500 - 100) / 500 = 0.8 + // size_badness = 0.8 + // ideal = max(1, 2000 / 6000) = 1 + // worst = max(2, 2000 / 500) = max(2, 4) = 4 + // surplus = 20 - 1 = 19 + // count_badness = min(1.0, 19 / 3) = 1.0 + // score = 100 * (1 - 0.8) * (1 - 1.0) = 100 * 0.2 * 0 = 0 + assert_eq!(score, 0.0); + } + + #[test] + fn test_single_small_whole_file_chunk_scores_100() { + // A single whole-file chunk below TARGET_CHARS scores 100. + let small_size = 1000; // below TARGET_CHARS (6000) + let chunks = vec![make_chunk(small_size)]; + let file_chars = small_size; + let score = chunk_quality_score(&chunks, file_chars); + assert_eq!(score, 100.0); + } + + #[test] + fn test_size_healthy_but_over_split_scores_high_but_below_100() { + // A size-healthy file split into roughly twice the ideal chunk count + // scores high but below 100 (count-penalized only). + // Use chunks in the healthy band [500, 6000]. + let chunk_size = 3000; // in healthy band + let num_chunks = 4; + let chunks: Vec = + (0..num_chunks).map(|_| make_chunk(chunk_size)).collect(); + let file_chars = chunk_size * num_chunks; // 12000 chars + + // ideal = max(1, 12000 / 6000) = 2 + // worst = max(3, 12000 / 500) = max(3, 24) = 24 + // surplus = 4 - 2 = 2 + // count_badness = 2 / 22 ≈ 0.091 + // size_badness = 0 (all chunks in healthy band) + // score = 100 * 1.0 * (1 - 0.091) ≈ 90.9 + let score = chunk_quality_score(&chunks, file_chars); + assert!(score > 85.0 && score < 100.0, "score was {}", score); + } + + #[test] + fn test_chunk_below_small_chunk_chars_has_penalty() { + // A single chunk below SMALL_CHUNK_CHARS (but not a whole file) should have + // a non-zero size penalty. But since it's the only chunk and <= TARGET_CHARS, + // it gets the special case and scores 100. + // So test with two chunks: one healthy, one small. + let chunks = vec![make_chunk(TARGET_CHARS), make_chunk(100)]; // 100 < SMALL_CHUNK_CHARS + let file_chars = TARGET_CHARS + 100; + let score = chunk_quality_score(&chunks, file_chars); + // First chunk: size penalty = 0 (in healthy band) + // Second chunk: size penalty = (500 - 100) / 500 = 0.8 + // size_badness = (0 + 0.8) / 2 = 0.4 + // ideal = max(1, 6100 / 6000) = 2 + // worst = max(3, 6100 / 500) = max(3, 13) = 13 + // surplus = 2 - 2 = 0 + // count_badness = 0 + // score = 100 * (1 - 0.4) * (1 - 0) = 60 + assert!((score - 60.0).abs() < 0.1, "score was {}", score); + } + + #[test] + fn test_chunk_above_target_chars_has_penalty() { + // A chunk above TARGET_CHARS should have a size penalty. + let oversized = TARGET_CHARS + 1000; // 7000 + let chunks = vec![make_chunk(oversized)]; + let file_chars = oversized; + let score = chunk_quality_score(&chunks, file_chars); + // Single chunk but > TARGET_CHARS, so no special case. + // size penalty = ((7000 - 6000) / 6000).min(1.0) = 1000/6000 ≈ 0.167 + // size_badness = 0.167 + // ideal = max(1, 7000 / 6000) = 2 + // worst = max(3, 7000 / 500) = max(3, 14) = 14 + // surplus = 1 - 2 = 0 (saturating_sub) + // count_badness = 0 + // score = 100 * (1 - 0.167) * 1 = 83.3 + assert!(score > 80.0 && score < 90.0, "score was {}", score); + } +} diff --git a/src/engine/partitioner/snapshots/monodex__engine__partitioner__tests__colorize_summary.snap b/src/engine/partitioner/snapshots/monodex__engine__partitioner__tests__colorize_summary.snap index 22ec24c..a275ef9 100644 --- a/src/engine/partitioner/snapshots/monodex__engine__partitioner__tests__colorize_summary.snap +++ b/src/engine/partitioner/snapshots/monodex__engine__partitioner__tests__colorize_summary.snap @@ -3,7 +3,7 @@ source: src/engine/partitioner/tests.rs expression: summary --- === QUALITY SCORE === -Score: 86.1% +Score: 92.9% Total chunks: 3 Small chunks (<500 chars): 0 Chars: 1467-3324 (mean 2676) diff --git a/src/engine/partitioner/snapshots/monodex__engine__partitioner__tests__environment_config_summary.snap b/src/engine/partitioner/snapshots/monodex__engine__partitioner__tests__environment_config_summary.snap index bd0d9cc..381843d 100644 --- a/src/engine/partitioner/snapshots/monodex__engine__partitioner__tests__environment_config_summary.snap +++ b/src/engine/partitioner/snapshots/monodex__engine__partitioner__tests__environment_config_summary.snap @@ -3,7 +3,7 @@ source: src/engine/partitioner/tests.rs expression: summary --- === QUALITY SCORE === -Score: 88.9% +Score: 96.2% Total chunks: 7 Small chunks (<500 chars): 0 Chars: 2860-5357 (mean 4160) diff --git a/src/engine/partitioner/snapshots/monodex__engine__partitioner__tests__experiments_configuration_summary.snap b/src/engine/partitioner/snapshots/monodex__engine__partitioner__tests__experiments_configuration_summary.snap index 2170de7..25242e9 100644 --- a/src/engine/partitioner/snapshots/monodex__engine__partitioner__tests__experiments_configuration_summary.snap +++ b/src/engine/partitioner/snapshots/monodex__engine__partitioner__tests__experiments_configuration_summary.snap @@ -3,7 +3,7 @@ source: src/engine/partitioner/tests.rs expression: summary --- === QUALITY SCORE === -Score: 86.5% +Score: 92.3% Total chunks: 3 Small chunks (<500 chars): 0 Chars: 1183-3644 (mean 2566) diff --git a/src/engine/partitioner/snapshots/monodex__engine__partitioner__tests__ipackagejson_summary.snap b/src/engine/partitioner/snapshots/monodex__engine__partitioner__tests__ipackagejson_summary.snap index 4b9ae1a..75c7116 100644 --- a/src/engine/partitioner/snapshots/monodex__engine__partitioner__tests__ipackagejson_summary.snap +++ b/src/engine/partitioner/snapshots/monodex__engine__partitioner__tests__ipackagejson_summary.snap @@ -3,7 +3,7 @@ source: src/engine/partitioner/tests.rs expression: summary --- === QUALITY SCORE === -Score: 84.4% +Score: 94.4% Total chunks: 3 Small chunks (<500 chars): 0 Chars: 2826-4030 (mean 3437) diff --git a/src/engine/partitioner/snapshots/monodex__engine__partitioner__tests__jsonfile_summary.snap b/src/engine/partitioner/snapshots/monodex__engine__partitioner__tests__jsonfile_summary.snap index f5b60d4..828da65 100644 --- a/src/engine/partitioner/snapshots/monodex__engine__partitioner__tests__jsonfile_summary.snap +++ b/src/engine/partitioner/snapshots/monodex__engine__partitioner__tests__jsonfile_summary.snap @@ -3,7 +3,7 @@ source: src/engine/partitioner/tests.rs expression: summary --- === QUALITY SCORE === -Score: 88.3% +Score: 94.6% Total chunks: 6 Small chunks (<500 chars): 0 Chars: 3051-4046 (mean 3453) diff --git a/src/engine/partitioner/snapshots/monodex__engine__partitioner__tests__module_minifier_plugin_summary.snap b/src/engine/partitioner/snapshots/monodex__engine__partitioner__tests__module_minifier_plugin_summary.snap index 277f49a..a4288a5 100644 --- a/src/engine/partitioner/snapshots/monodex__engine__partitioner__tests__module_minifier_plugin_summary.snap +++ b/src/engine/partitioner/snapshots/monodex__engine__partitioner__tests__module_minifier_plugin_summary.snap @@ -3,7 +3,7 @@ source: src/engine/partitioner/tests.rs expression: summary --- === QUALITY SCORE === -Score: 80.0% +Score: 95.3% Total chunks: 6 Small chunks (<500 chars): 0 Chars: 1248-5807 (mean 3960) diff --git a/src/engine/partitioner/snapshots/monodex__engine__partitioner__tests__parameter_form_large_summary.snap b/src/engine/partitioner/snapshots/monodex__engine__partitioner__tests__parameter_form_large_summary.snap index f7ef926..5e28fac 100644 --- a/src/engine/partitioner/snapshots/monodex__engine__partitioner__tests__parameter_form_large_summary.snap +++ b/src/engine/partitioner/snapshots/monodex__engine__partitioner__tests__parameter_form_large_summary.snap @@ -3,7 +3,7 @@ source: src/engine/partitioner/tests.rs expression: summary --- === QUALITY SCORE === -Score: 79.4% +Score: 94.1% Total chunks: 3 Small chunks (<500 chars): 0 Chars: 1943-5151 (mean 3279) diff --git a/src/engine/partitioner/snapshots/monodex__engine__partitioner__tests__parameter_form_summary.snap b/src/engine/partitioner/snapshots/monodex__engine__partitioner__tests__parameter_form_summary.snap index f7ef926..5e28fac 100644 --- a/src/engine/partitioner/snapshots/monodex__engine__partitioner__tests__parameter_form_summary.snap +++ b/src/engine/partitioner/snapshots/monodex__engine__partitioner__tests__parameter_form_summary.snap @@ -3,7 +3,7 @@ source: src/engine/partitioner/tests.rs expression: summary --- === QUALITY SCORE === -Score: 79.4% +Score: 94.1% Total chunks: 3 Small chunks (<500 chars): 0 Chars: 1943-5151 (mean 3279) diff --git a/src/engine/partitioner/snapshots/monodex__engine__partitioner__tests__project_watcher_summary.snap b/src/engine/partitioner/snapshots/monodex__engine__partitioner__tests__project_watcher_summary.snap index 6470f39..7b7e756 100644 --- a/src/engine/partitioner/snapshots/monodex__engine__partitioner__tests__project_watcher_summary.snap +++ b/src/engine/partitioner/snapshots/monodex__engine__partitioner__tests__project_watcher_summary.snap @@ -3,7 +3,7 @@ source: src/engine/partitioner/tests.rs expression: summary --- === QUALITY SCORE === -Score: 93.7% +Score: 93.9% Total chunks: 6 Small chunks (<500 chars): 0 Chars: 1613-3666 (mean 3122) diff --git a/src/engine/partitioner/snapshots/monodex__engine__partitioner__tests__tunneled_summary.snap b/src/engine/partitioner/snapshots/monodex__engine__partitioner__tests__tunneled_summary.snap index 6d87a59..9a97c2c 100644 --- a/src/engine/partitioner/snapshots/monodex__engine__partitioner__tests__tunneled_summary.snap +++ b/src/engine/partitioner/snapshots/monodex__engine__partitioner__tests__tunneled_summary.snap @@ -3,7 +3,7 @@ source: src/engine/partitioner/tests.rs expression: summary --- === QUALITY SCORE === -Score: 83.2% +Score: 93.8% Total chunks: 3 Small chunks (<500 chars): 0 Chars: 1920-3959 (mean 3043) diff --git a/src/engine/system_info.rs b/src/engine/system_info.rs index 5c0fb60..e257b75 100644 --- a/src/engine/system_info.rs +++ b/src/engine/system_info.rs @@ -7,7 +7,7 @@ use anyhow::Result; /// RAM used per ONNX model instance (in bytes) /// /// Based on empirical measurement, each ONNX session uses approximately 700MB-1GB -/// for the model weights plus runtime overhead. However, we use 2.5 GiB as the +/// for the model weights plus runtime overhead. However, we use 2.5 GB as the /// planning constant to provide conservative headroom for: /// - Memory fragmentation /// - Peak usage during inference @@ -16,14 +16,14 @@ use anyhow::Result; /// /// This conservative sizing ensures that the auto-detection heuristic errs on the /// side of using fewer instances rather than risking OOM failures. -pub const PER_INSTANCE_RAM: u64 = 2 * 1024 * 1024 * 1024 + 512 * 1024 * 1024; // 2.5 GiB +pub const PER_INSTANCE_RAM: u64 = 2_500_000_000; // 2.5 GB /// Baseline RAM to reserve for OS and other processes (in bytes) -/// We reserve the larger of 4 GiB or 25% of total RAM -const BASELINE_RESERVE_MIN: u64 = 4 * 1024 * 1024 * 1024; // 4 GiB +/// We reserve the larger of 4 GB or 25% of total RAM +const BASELINE_RESERVE_MIN: u64 = 4_000_000_000; // 4 GB /// Additional overhead for embedding process (tokenizer, buffers, etc.) -pub const EMBEDDING_OVERHEAD: u64 = 512 * 1024 * 1024; // 0.5 GiB +pub const EMBEDDING_OVERHEAD: u64 = 500_000_000; // 0.5 GB /// Resolved embedding configuration #[derive(Debug, Clone)] @@ -76,7 +76,7 @@ pub fn get_physical_core_count() -> usize { /// /// ```text /// effective_total_ram = min(total_memory, cgroup_memory_limit) [Linux only] -/// baseline_reserve = max(4 GiB, 25% of effective_total_ram) +/// baseline_reserve = max(4 GB, 25% of effective_total_ram) /// usable_ram = effective_total_ram - baseline_reserve /// ram_limited_instances = floor(usable_ram / PER_INSTANCE_RAM) /// @@ -182,8 +182,8 @@ fn get_effective_total_ram(sys: &sysinfo::System, total_memory: u64) -> (u64, bo /// Format bytes as human-readable string (e.g., "16.0 GB") pub fn format_bytes(bytes: u64) -> String { - const GB: u64 = 1024 * 1024 * 1024; - const MB: u64 = 1024 * 1024; + const GB: u64 = 1_000_000_000; + const MB: u64 = 1_000_000; if bytes >= GB { format!("{:.1} GB", bytes as f64 / GB as f64) @@ -225,8 +225,8 @@ mod tests { fn test_format_bytes() { assert_eq!(format_bytes(0), "0 bytes"); assert_eq!(format_bytes(1024), "1024 bytes"); - assert_eq!(format_bytes(1024 * 1024), "1.0 MB"); - assert_eq!(format_bytes(2 * 1024 * 1024 * 1024), "2.0 GB"); - assert_eq!(format_bytes(2_500_000_000), "2.3 GB"); + assert_eq!(format_bytes(1_000_000), "1.0 MB"); + assert_eq!(format_bytes(2_000_000_000), "2.0 GB"); + assert_eq!(format_bytes(2_500_000_000), "2.5 GB"); } }