Skip to content

Commit

Permalink
Add serde support
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Jun 3, 2021
1 parent 91bfe3b commit a5f17b0
Show file tree
Hide file tree
Showing 7 changed files with 389 additions and 56 deletions.
2 changes: 2 additions & 0 deletions polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ default = ["docs",
"dtype-slim",
]
ndarray = ["polars-core/ndarray"]
# serde support for dataframes and series
serde = ["polars-core/serde"]
parquet = ["polars-io", "polars-core/parquet", "polars-lazy/parquet", "polars-io/parquet"]
lazy = ["polars-core/lazy", "polars-lazy"]
# commented out until UB is fixed
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-core/src/datatypes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ use std::fmt::{Display, Formatter};

pub struct Utf8Type {}

#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct ListType {}

pub struct CategoricalType {}
Expand Down Expand Up @@ -410,7 +411,6 @@ impl PartialOrd for AnyValue<'_> {
}

#[derive(Clone, PartialEq, Eq, Debug, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum DataType {
Boolean,
UInt8,
Expand Down
3 changes: 3 additions & 0 deletions polars/polars-core/src/frame/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,11 @@ pub mod select;
mod upstream_traits;
use crate::prelude::sort::prepare_argsort;
use crate::POOL;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};

#[derive(Clone)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct DataFrame {
pub(crate) columns: Vec<Series>,
}
Expand Down
101 changes: 101 additions & 0 deletions polars/polars-core/src/serde/chunked_array.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
use super::DeDataType;
use crate::prelude::*;
use serde::ser::SerializeStruct;
use serde::{Serialize, Serializer};
use std::cell::RefCell;

pub struct IterSer<I>
where
I: IntoIterator,
<I as IntoIterator>::Item: Serialize,
{
iter: RefCell<Option<I>>,
}

impl<I> IterSer<I>
where
I: IntoIterator,
<I as IntoIterator>::Item: Serialize,
{
fn new(iter: I) -> Self {
IterSer {
iter: RefCell::new(Some(iter)),
}
}
}

impl<I> Serialize for IterSer<I>
where
I: IntoIterator,
<I as IntoIterator>::Item: Serialize,
{
fn serialize<S>(
&self,
serializer: S,
) -> std::result::Result<<S as Serializer>::Ok, <S as Serializer>::Error>
where
S: Serializer,
{
let iter: I = self.iter.borrow_mut().take().unwrap();
serializer.collect_seq(iter.into_iter())
}
}

impl<T> Serialize for ChunkedArray<T>
where
T: PolarsNumericType,
T::Native: Serialize,
{
fn serialize<S>(
&self,
serializer: S,
) -> std::result::Result<<S as Serializer>::Ok, <S as Serializer>::Error>
where
S: Serializer,
{
let mut state = serializer.serialize_struct("series", 3)?;
state.serialize_field("name", self.name())?;
let dtype: DeDataType = self.dtype().into();
state.serialize_field("datatype", &dtype)?;
state.serialize_field("values", &IterSer::new(self.into_iter()))?;
state.end()
}
}

macro_rules! impl_serialize {
($ca: ident) => {
impl Serialize for $ca {
fn serialize<S>(
&self,
serializer: S,
) -> std::result::Result<<S as Serializer>::Ok, <S as Serializer>::Error>
where
S: Serializer,
{
let mut state = serializer.serialize_struct("series", 3)?;
state.serialize_field("name", self.name())?;
let dtype: DeDataType = self.dtype().into();
state.serialize_field("datatype", &dtype)?;
state.serialize_field("values", &IterSer::new(self.into_iter()))?;
state.end()
}
}
};
}

impl_serialize!(Utf8Chunked);
impl_serialize!(BooleanChunked);
impl_serialize!(ListChunked);

impl Serialize for CategoricalChunked {
fn serialize<S>(
&self,
serializer: S,
) -> std::result::Result<<S as Serializer>::Ok, <S as Serializer>::Error>
where
S: Serializer,
{
let ca = self.cast::<Utf8Type>().unwrap();
ca.serialize(serializer)
}
}
90 changes: 90 additions & 0 deletions polars/polars-core/src/serde/mod.rs
Original file line number Diff line number Diff line change
@@ -1 +1,91 @@
use serde::{Deserialize, Serialize};
pub mod chunked_array;
pub mod series;
use crate::prelude::*;

/// Intermediate enum. Needed because [crate::datatypes::DataType] has
/// a &static str and thus requires Deserialize<&static>
#[derive(Serialize, Deserialize, Debug)]
enum DeDataType<'a> {
Boolean,
UInt8,
UInt16,
UInt32,
UInt64,
Int8,
Int16,
Int32,
Int64,
Float32,
Float64,
Utf8,
Date32,
Date64,
Time64(TimeUnit),
List,
Object(&'a str),
Null,
Categorical,
}

impl From<&DataType> for DeDataType<'_> {
fn from(dt: &DataType) -> Self {
match dt {
DataType::Int32 => DeDataType::Int32,
DataType::UInt32 => DeDataType::UInt32,
DataType::Int64 => DeDataType::Int64,
DataType::UInt64 => DeDataType::UInt64,
DataType::Date32 => DeDataType::Date32,
DataType::Date64 => DeDataType::Date64,
DataType::Float32 => DeDataType::Float32,
DataType::Float64 => DeDataType::Float64,
DataType::Utf8 => DeDataType::Utf8,
DataType::Boolean => DeDataType::Boolean,
DataType::Null => DeDataType::Null,
DataType::List(_) => DeDataType::List,
#[cfg(feature = "object")]
DataType::Object(s) => DeDataType::Object(s),
_ => unimplemented!(),
}
}
}

#[cfg(test)]
mod test {
use super::*;
use crate::prelude::*;

#[test]
fn test_serde() -> Result<()> {
let ca = UInt32Chunked::new_from_opt_slice("foo", &[Some(1), None, Some(2)]);

let json = serde_json::to_string(&ca).unwrap();
dbg!(&json);

let out = serde_json::from_str::<Series>(&json).unwrap();
assert!(ca.into_series().series_equal_missing(&out));

let ca = Utf8Chunked::new_from_opt_slice("foo", &[Some("foo"), None, Some("bar")]);

let json = serde_json::to_string(&ca).unwrap();
dbg!(&json);

let out = serde_json::from_str::<Series>(&json).unwrap();
assert!(ca.into_series().series_equal_missing(&out));

Ok(())
}

#[test]
fn test_serde_df() {
let s = Series::new("foo", &[1, 2, 3]);
let s1 = Series::new("bar", &[Some(true), None, Some(false)]);
let s_list = Series::new("list", &[s.clone(), s.clone(), s.clone()]);

let df = DataFrame::new(vec![s, s_list, s1]).unwrap();
let json = serde_json::to_string(&df).unwrap();
dbg!(&json);
let out = serde_json::from_str::<DataFrame>(&json).unwrap();
assert!(df.frame_equal_missing(&out));
}
}

0 comments on commit a5f17b0

Please sign in to comment.