This repository has been archived by the owner on Feb 18, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 221
/
stream.rs
71 lines (58 loc) · 2.16 KB
/
stream.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
//! Contains `async` APIs to write to parquet.
use futures::AsyncWrite;
use parquet2::metadata::{KeyValue, SchemaDescriptor};
use parquet2::write::RowGroupIter;
use crate::datatypes::*;
use crate::error::{ArrowError, Result};
use super::file::add_arrow_schema;
use super::{to_parquet_schema, WriteOptions};
/// An interface to write a parquet to a [`AsyncWrite`]
pub struct FileStreamer<W: AsyncWrite + Unpin + Send> {
writer: parquet2::write::FileStreamer<W>,
schema: Schema,
}
// Accessors
impl<W: AsyncWrite + Unpin + Send> FileStreamer<W> {
/// The options assigned to the file
pub fn options(&self) -> &WriteOptions {
self.writer.options()
}
/// The [`SchemaDescriptor`] assigned to this file
pub fn parquet_schema(&self) -> &SchemaDescriptor {
self.writer.schema()
}
/// The [`Schema`] assigned to this file
pub fn schema(&self) -> &Schema {
&self.schema
}
}
impl<W: AsyncWrite + Unpin + Send> FileStreamer<W> {
/// Returns a new [`FileStreamer`].
/// # Error
/// If it is unable to derive a parquet schema from [`Schema`].
pub fn try_new(writer: W, schema: Schema, options: WriteOptions) -> Result<Self> {
let parquet_schema = to_parquet_schema(&schema)?;
let created_by = Some("Arrow2 - Native Rust implementation of Arrow".to_string());
Ok(Self {
writer: parquet2::write::FileStreamer::new(writer, parquet_schema, options, created_by),
schema,
})
}
/// Writes the header of the file
pub async fn start(&mut self) -> Result<()> {
Ok(self.writer.start().await?)
}
/// Writes a row group to the file.
pub async fn write(
&mut self,
row_group: RowGroupIter<'_, ArrowError>,
num_rows: usize,
) -> Result<()> {
Ok(self.writer.write(row_group, num_rows).await?)
}
/// Writes the footer of the parquet file. Returns the total size of the file.
pub async fn end(self, key_value_metadata: Option<Vec<KeyValue>>) -> Result<(u64, W)> {
let key_value_metadata = add_arrow_schema(&self.schema, key_value_metadata);
Ok(self.writer.end(key_value_metadata).await?)
}
}