Skip to content

Commit

Permalink
Python: inner dtype and use pyo3 to get dtypes
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Dec 13, 2021
1 parent 9187061 commit 82c3694
Show file tree
Hide file tree
Showing 11 changed files with 44 additions and 50 deletions.
2 changes: 1 addition & 1 deletion py-polars/.flake8
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
max-line-length = 180
# E203, W503: due to black fmt
ignore = E203,W503
exclude = legacy, docs
exclude = legacy, docs, venv

2 changes: 1 addition & 1 deletion py-polars/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ pre-commit:
$(PYTHON_BIN)/black .
$(PYTHON_BIN)/blackdoc .
$(PYTHON_BIN)/mypy
$(PYTHON_BIN) flake8 .
$(PYTHON_BIN) flake8
make -C .. fmt_toml
$(PYTHON) -m cargo fmt --all

Expand Down
3 changes: 2 additions & 1 deletion py-polars/docs/source/reference/series.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,10 @@ Attributes
:toctree: api/

Series.dtype
Series.inner_dtype
Series.name
Series.shape
Series.skew
Series.arr
Series.dt
Series.str

Expand Down
22 changes: 0 additions & 22 deletions py-polars/polars/datatypes.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import ctypes
import typing as tp
from typing import Any, Dict, Type

try:
Expand Down Expand Up @@ -88,27 +87,6 @@ class Categorical(DataType):
pass


# Don't change the order of these!
DTYPES: tp.List[Type[DataType]] = [
Int8,
Int16,
Int32,
Int64,
UInt8,
UInt16,
UInt32,
UInt64,
Float32,
Float64,
Boolean,
Utf8,
List,
Date,
Datetime,
Time,
Object,
Categorical,
]
_DTYPE_TO_FFINAME: Dict[Type[DataType], str] = {
Int8: "i8",
Int16: "i16",
Expand Down
11 changes: 2 additions & 9 deletions py-polars/polars/internals/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,14 +56,7 @@
_DOCUMENTING = True

from polars._html import NotebookFormatter
from polars.datatypes import (
DTYPES,
Boolean,
DataType,
Datetime,
UInt32,
py_type_to_dtype,
)
from polars.datatypes import Boolean, DataType, Datetime, UInt32, py_type_to_dtype
from polars.utils import _process_null_values

try:
Expand Down Expand Up @@ -1631,7 +1624,7 @@ def dtypes(self) -> tp.List[Type[DataType]]:
--------
schema : Return a dict of [column name, dtype]
"""
return [DTYPES[idx] for idx in self._df.dtypes()]
return self._df.dtypes()

@property
def schema(self) -> Dict[str, Type[DataType]]:
Expand Down
14 changes: 12 additions & 2 deletions py-polars/polars/internals/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
_DOCUMENTING = True

from polars.datatypes import (
DTYPES,
Boolean,
DataType,
Date,
Expand Down Expand Up @@ -639,7 +638,18 @@ def dtype(self) -> Type[DataType]:
<class 'polars.datatypes.Int64'>
"""
return DTYPES[self._s.dtype()]
return self._s.dtype()

@property
def inner_dtype(self) -> Optional[Type[DataType]]:
"""
Get the inner dtype in of a List typed Series
Returns
-------
DataType
"""
return self._s.inner_dtype()

def describe(self) -> "pli.DataFrame":
"""
Expand Down
5 changes: 5 additions & 0 deletions py-polars/src/conversion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,11 @@ impl ToPyObject for Wrap<DataType> {
DataType::Boolean => pl.getattr("Boolean").unwrap().into(),
DataType::Utf8 => pl.getattr("Utf8").unwrap().into(),
DataType::List(_) => pl.getattr("List").unwrap().into(),
DataType::Date => pl.getattr("Date").unwrap().into(),
DataType::Datetime => pl.getattr("Datetime").unwrap().into(),
DataType::Object(_) => pl.getattr("Object").unwrap().into(),
DataType::Categorical => pl.getattr("Categorical").unwrap().into(),
DataType::Time => pl.getattr("Time").unwrap().into(),
dt => panic!("{} not supported", dt),
}
}
Expand Down
14 changes: 5 additions & 9 deletions py-polars/src/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ use crate::apply::dataframe::{
apply_lambda_with_utf8_out_type,
};
use crate::conversion::{ObjectValue, Wrap};
use crate::datatypes::PyDataType;
use crate::file::get_mmap_bytes_reader;
use crate::lazy::dataframe::PyLazyFrame;
use crate::prelude::{dicts_to_rows, str_to_null_strategy};
Expand Down Expand Up @@ -481,15 +480,12 @@ impl PyDataFrame {
}

/// Get datatypes
pub fn dtypes(&self) -> Vec<u8> {
self.df
.dtypes()
pub fn dtypes(&self, py: Python) -> PyObject {
let iter = self
.df
.iter()
.map(|arrow_dtype| {
let dt: PyDataType = arrow_dtype.into();
dt as u8
})
.collect()
.map(|s| Wrap(s.dtype().clone()).to_object(py));
PyList::new(py, iter).to_object(py)
}

pub fn n_chunks(&self) -> PyResult<usize> {
Expand Down
2 changes: 1 addition & 1 deletion py-polars/src/datatypes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use pyo3::{FromPyObject, PyAny, PyResult};

// Don't change the order of these!
#[repr(u8)]
pub enum PyDataType {
pub(crate) enum PyDataType {
Int8,
Int16,
Int32,
Expand Down
13 changes: 9 additions & 4 deletions py-polars/src/series.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use crate::apply::series::ApplyLambda;
use crate::arrow_interop::to_rust::array_to_rust;
use crate::dataframe::PyDataFrame;
use crate::datatypes::PyDataType;
use crate::error::PyPolarsEr;
use crate::list_construction::py_seq_to_list;
use crate::utils::{downsample_str_to_rule, reinterpret, str_to_polarstype};
Expand Down Expand Up @@ -360,9 +359,15 @@ impl PySeries {
self.series.rename(name);
}

pub fn dtype(&self) -> u8 {
let dt: PyDataType = self.series.dtype().into();
dt as u8
pub fn dtype(&self, py: Python) -> PyObject {
Wrap(self.series.dtype().clone()).to_object(py)
}

pub fn inner_dtype(&self, py: Python) -> Option<PyObject> {
self.series
.dtype()
.inner_dtype()
.map(|dt| Wrap(dt.clone()).to_object(py))
}

pub fn mean(&self) -> Option<f64> {
Expand Down
6 changes: 6 additions & 0 deletions py-polars/tests/test_lists.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,9 @@ def test_contains() -> None:

out = pl.select(pl.lit(a).arr.contains(2)).to_series()
testing.assert_series_equal(out, expected)


def test_dtype() -> None:
a = pl.Series("a", [[1, 2, 3], [2, 5], [6, 7, 8, 9]])
assert a.dtype == pl.List
assert a.inner_dtype == pl.Int64

0 comments on commit 82c3694

Please sign in to comment.