Skip to content

Commit

Permalink
python: read_ipc_schema
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Sep 17, 2021
1 parent 4e9503f commit 7c0e034
Show file tree
Hide file tree
Showing 7 changed files with 90 additions and 1 deletion.
1 change: 1 addition & 0 deletions polars/polars-core/src/export.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pub use arrow;
1 change: 1 addition & 0 deletions polars/polars-core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ pub mod datatypes;
#[cfg(feature = "docs")]
pub mod doc;
pub mod error;
pub mod export;
mod fmt;
pub mod frame;
pub mod functions;
Expand Down
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ Feather/ IPC
:toctree: api/

read_ipc
read_ipc_schema
DataFrame.to_ipc

Parquet
Expand Down
25 changes: 25 additions & 0 deletions py-polars/polars/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@

from .convert import from_arrow

try:
from polars.polars import ipc_schema as _ipc_schema
except ImportError:
pass

try:
import connectorx as cx

Expand All @@ -49,6 +54,7 @@
"read_ipc",
"scan_csv",
"scan_parquet",
"read_ipc_schema",
]


Expand Down Expand Up @@ -399,6 +405,25 @@ def scan_parquet(
)


def read_ipc_schema(
file: Union[str, BinaryIO, Path, bytes]
) -> Dict[str, Type["pl.DataType"]]:
"""
Get a schema of the IPC file without reading data.
Parameters
----------
file
Path to a file or a file like object.
Returns
-------
Dictionary mapping column names to datatypes
"""
return _ipc_schema(file)


def read_ipc(
file: Union[str, BinaryIO, Path, bytes],
use_pyarrow: bool = True,
Expand Down
24 changes: 24 additions & 0 deletions py-polars/src/conversion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,30 @@ impl IntoPy<PyObject> for Wrap<AnyValue<'_>> {
}
}

impl ToPyObject for Wrap<DataType> {
fn to_object(&self, py: Python) -> PyObject {
let pl = PyModule::import(py, "polars").unwrap();

match &self.0 {
DataType::Int8 => pl.getattr("Int8").unwrap().into(),
DataType::Int16 => pl.getattr("Int16").unwrap().into(),
DataType::Int32 => pl.getattr("Int32").unwrap().into(),
DataType::Int64 => pl.getattr("Int64").unwrap().into(),
DataType::UInt8 => pl.getattr("UInt8").unwrap().into(),
DataType::UInt16 => pl.getattr("UInt16").unwrap().into(),
DataType::UInt32 => pl.getattr("UInt32").unwrap().into(),
DataType::UInt64 => pl.getattr("UInt64").unwrap().into(),
DataType::Float32 => pl.getattr("Float32").unwrap().into(),
DataType::Float64 => pl.getattr("Float64").unwrap().into(),
DataType::Boolean => pl.getattr("Boolean").unwrap().into(),
DataType::Utf8 => pl.getattr("Utf8").unwrap().into(),
DataType::List(_) => pl.getattr("List").unwrap().into(),
dt => panic!("{} not supported", dt)
}

}
}

impl ToPyObject for Wrap<AnyValue<'_>> {
fn to_object(&self, py: Python) -> PyObject {
self.clone().into_py(py)
Expand Down
30 changes: 29 additions & 1 deletion py-polars/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ use crate::{
dsl,
},
series::PySeries,
file::EitherRustPythonFile
};

pub mod apply;
Expand All @@ -28,9 +29,13 @@ pub mod prelude;
pub mod series;
pub mod utils;

use crate::conversion::{get_df, get_pyseq};
use crate::conversion::{get_df, get_pyseq, Wrap};
use polars_core::export::arrow::io::ipc::read::read_file_metadata;
use crate::error::PyPolarsEr;
use mimalloc::MiMalloc;
use pyo3::types::PyDict;
use crate::prelude::DataType;
use crate::file::get_either_file;

#[global_allocator]
static GLOBAL: MiMalloc = MiMalloc;
Expand Down Expand Up @@ -135,6 +140,28 @@ fn concat_df(dfs: &PyAny) -> PyResult<PyDataFrame> {
Ok(df.into())
}




#[pyfunction]
fn ipc_schema(py: Python, py_f: PyObject) -> PyResult<PyObject>{
let metadata = match get_either_file(py_f, false)? {
EitherRustPythonFile::Rust(mut r) => {
read_file_metadata(&mut r).map_err(PyPolarsEr::from)?
},
EitherRustPythonFile::Py(mut r) => {
read_file_metadata(&mut r).map_err(PyPolarsEr::from)?
}
};

let dict = PyDict::new(py);
for field in metadata.schema().fields() {
let dt: Wrap<DataType> = Wrap((&field.data_type).into());
dict.set_item(field.name(), dt.to_object(py))?;
}
Ok(dict.to_object(py))
}

#[pymodule]
fn polars(_py: Python, m: &PyModule) -> PyResult<()> {
m.add_class::<PySeries>().unwrap();
Expand All @@ -159,5 +186,6 @@ fn polars(_py: Python, m: &PyModule) -> PyResult<()> {
m.add_wrapped(wrap_pyfunction!(concat_str)).unwrap();
m.add_wrapped(wrap_pyfunction!(concat_lst)).unwrap();
m.add_wrapped(wrap_pyfunction!(concat_df)).unwrap();
m.add_wrapped(wrap_pyfunction!(ipc_schema)).unwrap();
Ok(())
}
9 changes: 9 additions & 0 deletions py-polars/tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,3 +214,12 @@ def test_to_json():
assert (
df.to_json() == '{"columns":[{"name":"a","datatype":"Int64","values":[1,2,3]}]}'
)


def test_ipc_schema():
df = pl.DataFrame({"a": [1, 2], "b": ["a", None], "c": [True, False]})
f = io.BytesIO()
df.to_ipc(f)
f.seek(0)

assert pl.read_ipc_schema(f) == {"a": pl.Int64, "b": pl.Utf8, "c": pl.Boolean}

0 comments on commit 7c0e034

Please sign in to comment.