Skip to content

Commit 7adda55

Browse files
fix: bytes scanned in query (#1464)
* fix: bytes scanned in query instead of using file_size from manifest -- which is size of json we should use ingestion_size -- which is compressed size * update the compressed size for query bytes scanned
1 parent 16189eb commit 7adda55

File tree

1 file changed

+6
-4
lines changed

1 file changed

+6
-4
lines changed

src/query/stream_schema_provider.rs

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -327,7 +327,7 @@ impl StandardTableProvider {
327327
let mut partitioned_files = Vec::from_iter((0..target_partition).map(|_| Vec::new()));
328328
let mut column_statistics = HashMap::<String, Option<TypedStatistics>>::new();
329329
let mut count = 0;
330-
let mut total_file_size = 0u64;
330+
let mut total_compressed_size = 0u64;
331331
let mut file_count = 0u64;
332332
for (index, file) in manifest_files
333333
.into_iter()
@@ -339,13 +339,14 @@ impl StandardTableProvider {
339339
mut file_path,
340340
num_rows,
341341
columns,
342-
file_size,
343342
..
344343
} = file;
345344

346345
// Track billing metrics for files scanned in query
347346
file_count += 1;
348-
total_file_size += file_size;
347+
// Calculate actual compressed bytes that will be read from storage
348+
let compressed_bytes: u64 = columns.iter().map(|col| col.compressed_size).sum();
349+
total_compressed_size += compressed_bytes;
349350

350351
// object_store::path::Path doesn't automatically deal with Windows path separators
351352
// to do that, we are using from_absolute_path() which takes into consideration the underlying filesystem
@@ -406,7 +407,8 @@ impl StandardTableProvider {
406407
// Track billing metrics for query scan
407408
let current_date = chrono::Utc::now().date_naive().to_string();
408409
increment_files_scanned_in_query_by_date(file_count, &current_date);
409-
increment_bytes_scanned_in_query_by_date(total_file_size, &current_date);
410+
// Use compressed size as it represents actual bytes read from storage (S3/object store charges)
411+
increment_bytes_scanned_in_query_by_date(total_compressed_size, &current_date);
410412

411413
(partitioned_files, statistics)
412414
}

0 commit comments

Comments
 (0)