This repository has been archived by the owner on Feb 18, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 221
/
memory.rs
113 lines (107 loc) · 4.38 KB
/
memory.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
use crate::array::*;
use crate::bitmap::Bitmap;
use crate::datatypes::PhysicalType;
fn validity_size(validity: &Option<Bitmap>) -> usize {
validity.as_ref().map(|b| b.as_slice().0.len()).unwrap_or(0)
}
macro_rules! dyn_binary {
($array:expr, $ty:ty, $o:ty) => {{
let array = $array.as_any().downcast_ref::<$ty>().unwrap();
array.values().len()
+ array.offsets().len() * std::mem::size_of::<$o>()
+ validity_size(array.validity())
}};
}
macro_rules! dyn_dict {
($array:expr, $ty:ty) => {{
let array = $array
.as_any()
.downcast_ref::<DictionaryArray<$ty>>()
.unwrap();
estimated_bytes_size(array.keys()) + estimated_bytes_size(array.values().as_ref())
}};
}
/// Returns the total (heap) allocated size of the array in bytes.
/// # Implementation
/// This estimation is the sum of the size of its buffers, validity, including nested arrays.
/// Multiple arrays may share buffers and bitmaps. Therefore, the size of 2 arrays is not the
/// sum of the sizes computed from this function. In particular, [`StructArray`]'s size is an upper bound.
///
/// When an array is sliced, its allocated size remains constant because the buffer unchanged.
/// However, this function will yield a smaller number. This is because this function returns
/// the visible size of the buffer, not its total capacity.
///
/// FFI buffers are included in this estimation.
pub fn estimated_bytes_size(array: &dyn Array) -> usize {
use PhysicalType::*;
match array.data_type().to_physical_type() {
Null => 0,
Boolean => {
let array = array.as_any().downcast_ref::<BooleanArray>().unwrap();
array.values().as_slice().0.len() + validity_size(array.validity())
}
Primitive(primitive) => with_match_primitive_type!(primitive, |$T| {
let array = array
.as_any()
.downcast_ref::<PrimitiveArray<$T>>()
.unwrap();
array.values().len() * std::mem::size_of::<$T>() + validity_size(array.validity())
}),
Binary => dyn_binary!(array, BinaryArray<i32>, i32),
FixedSizeBinary => {
let array = array
.as_any()
.downcast_ref::<FixedSizeBinaryArray>()
.unwrap();
array.values().len() + validity_size(array.validity())
}
LargeBinary => dyn_binary!(array, BinaryArray<i64>, i64),
Utf8 => dyn_binary!(array, Utf8Array<i32>, i32),
LargeUtf8 => dyn_binary!(array, Utf8Array<i64>, i64),
List => {
let array = array.as_any().downcast_ref::<ListArray<i32>>().unwrap();
estimated_bytes_size(array.values().as_ref())
+ array.offsets().len() * std::mem::size_of::<i32>()
+ validity_size(array.validity())
}
FixedSizeList => {
let array = array.as_any().downcast_ref::<ListArray<i64>>().unwrap();
estimated_bytes_size(array.values().as_ref()) + validity_size(array.validity())
}
LargeList => {
let array = array.as_any().downcast_ref::<ListArray<i64>>().unwrap();
estimated_bytes_size(array.values().as_ref())
+ array.offsets().len() * std::mem::size_of::<i64>()
+ validity_size(array.validity())
}
Struct => {
let array = array.as_any().downcast_ref::<StructArray>().unwrap();
array
.values()
.iter()
.map(|x| x.as_ref())
.map(estimated_bytes_size)
.sum::<usize>()
+ validity_size(array.validity())
}
Union => {
let array = array.as_any().downcast_ref::<UnionArray>().unwrap();
let types = array.types().len() * std::mem::size_of::<i8>();
let offsets = array
.offsets()
.as_ref()
.map(|x| x.len() * std::mem::size_of::<i32>())
.unwrap_or_default();
let fields = array
.fields()
.iter()
.map(|x| x.as_ref())
.map(estimated_bytes_size)
.sum::<usize>();
types + offsets + fields
}
Dictionary(key_type) => with_match_physical_dictionary_key_type!(key_type, |$T| {
dyn_dict!(array, $T)
}),
}
}