This repository has been archived by the owner on Feb 18, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 221
/
utf8.rs
66 lines (56 loc) · 1.71 KB
/
utf8.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
use std::collections::VecDeque;
use std::io::{Read, Seek};
use arrow_format::ipc;
use crate::array::{Offset, Utf8Array};
use crate::buffer::Buffer;
use crate::datatypes::DataType;
use crate::error::Result;
use super::super::deserialize::Node;
use super::super::read_basic::*;
pub fn read_utf8<O: Offset, R: Read + Seek>(
field_nodes: &mut VecDeque<Node>,
data_type: DataType,
buffers: &mut VecDeque<&ipc::Schema::Buffer>,
reader: &mut R,
block_offset: u64,
is_little_endian: bool,
compression: Option<ipc::Message::BodyCompression>,
) -> Result<Utf8Array<O>> {
let field_node = field_nodes.pop_front().unwrap();
let validity = read_validity(
buffers,
field_node,
reader,
block_offset,
is_little_endian,
compression,
)?;
let offsets: Buffer<O> = read_buffer(
buffers,
1 + field_node.length() as usize,
reader,
block_offset,
is_little_endian,
compression,
)
// Older versions of the IPC format sometimes do not report an offset
.or_else(|_| Result::Ok(Buffer::<O>::from(&[O::default()])))?;
let last_offset = offsets.as_slice()[offsets.len() - 1].to_usize();
let values = read_buffer(
buffers,
last_offset,
reader,
block_offset,
is_little_endian,
compression,
)?;
Ok(Utf8Array::<O>::from_data(
data_type, offsets, values, validity,
))
}
pub fn skip_utf8(field_nodes: &mut VecDeque<Node>, buffers: &mut VecDeque<&ipc::Schema::Buffer>) {
let _ = field_nodes.pop_front().unwrap();
let _ = buffers.pop_front().unwrap();
let _ = buffers.pop_front().unwrap();
let _ = buffers.pop_front().unwrap();
}