Skip to content

Commit

Permalink
python data type units (#3609)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Jun 7, 2022
1 parent 47b3b20 commit 9a7c042
Show file tree
Hide file tree
Showing 6 changed files with 123 additions and 16 deletions.
9 changes: 9 additions & 0 deletions polars/polars-core/src/datatypes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -647,6 +647,15 @@ impl Display for TimeUnit {
}

impl TimeUnit {
pub fn to_ascii(self) -> &'static str {
use TimeUnit::*;
match self {
Nanoseconds => "ns",
Microseconds => "us",
Milliseconds => "ms",
}
}

pub fn to_arrow(self) -> ArrowTimeUnit {
match self {
TimeUnit::Nanoseconds => ArrowTimeUnit::Nanosecond,
Expand Down
49 changes: 47 additions & 2 deletions py-polars/polars/datatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,13 +154,58 @@ class Date(DataType):
class Datetime(DataType):
"""Calendar date and time type"""

pass
def __init__(self, time_unit: str = "us", time_zone: Optional[str] = None):
"""
Calendar date and time type
Parameters
----------
time_unit
Any of {'ns', 'us', 'ms'}
time_zone
Timezone string as defined in pytz
"""
self.tu = time_unit
self.tz = time_zone

def __eq__(self, other: Type[DataType]) -> bool: # type: ignore
# allow comparing object instances to class
if type(other) is type and issubclass(other, Datetime):
return True
if isinstance(other, Datetime):
return self.tu == other.tu and self.tz == other.tz
else:
return False

def __hash__(self) -> int:
return hash(Datetime)


class Duration(DataType):
"""Time duration/delta type"""

pass
def __init__(self, time_unit: str = "us"):
"""
Time duration/delta type
Parameters
----------
time_unit
Any of {'ns', 'us', 'ms'}
"""
self.tu = time_unit

def __eq__(self, other: Type[DataType]) -> bool: # type: ignore
# allow comparing object instances to class
if type(other) is type and issubclass(other, Duration):
return True
if isinstance(other, Duration):
return self.tu == other.tu
else:
return False

def __hash__(self) -> int:
return hash(Duration)


class Time(DataType):
Expand Down
17 changes: 14 additions & 3 deletions py-polars/polars/internals/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,13 @@ def alias(self, name: str) -> "Expr":

def exclude(
self,
columns: Union[str, List[str], Type[DataType], Sequence[Type[DataType]]],
columns: Union[
str,
List[str],
Union[DataType, Type[DataType]],
DataType,
Sequence[Union[DataType, Type[DataType]]],
],
) -> "Expr":
"""
Exclude certain columns from a wildcard/regex selection.
Expand Down Expand Up @@ -369,11 +375,16 @@ def exclude(
if isinstance(columns, str):
columns = [columns]
return wrap_expr(self._pyexpr.exclude(columns))
elif not isinstance(columns, Sequence) and issubclass(columns, DataType):
elif not isinstance(columns, Sequence) or isinstance(columns, DataType):
columns = [columns]
return wrap_expr(self._pyexpr.exclude_dtype(columns))

if not all([isinstance(a, str) or issubclass(a, DataType) for a in columns]):
if not all(
[
isinstance(a, str) or (type(a) is type and issubclass(a, DataType))
for a in columns
]
):
raise ValueError("input should be all string or all DataType")

if isinstance(columns[0], str):
Expand Down
30 changes: 26 additions & 4 deletions py-polars/src/conversion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use crate::dataframe::PyDataFrame;
use crate::error::PyPolarsErr;
use crate::lazy::dataframe::PyLazyFrame;
use crate::prelude::*;
use crate::py_modules::POLARS;
use crate::series::PySeries;
use polars::chunked_array::object::PolarsObjectSafe;
use polars::frame::row::Row;
Expand Down Expand Up @@ -241,7 +242,7 @@ impl IntoPy<PyObject> for Wrap<AnyValue<'_>> {

impl ToPyObject for Wrap<DataType> {
fn to_object(&self, py: Python) -> PyObject {
let pl = PyModule::import(py, "polars").unwrap();
let pl = POLARS.as_ref(py);

match &self.0 {
DataType::Int8 => pl.getattr("Int8").unwrap().into(),
Expand All @@ -262,8 +263,17 @@ impl ToPyObject for Wrap<DataType> {
list_class.call1((inner,)).unwrap().into()
}
DataType::Date => pl.getattr("Date").unwrap().into(),
DataType::Datetime(_, _) => pl.getattr("Datetime").unwrap().into(),
DataType::Duration(_) => pl.getattr("Duration").unwrap().into(),
DataType::Datetime(tu, tz) => {
let datetime_class = pl.getattr("Datetime").unwrap();
datetime_class
.call1((tu.to_ascii(), tz.clone()))
.unwrap()
.into()
}
DataType::Duration(tu) => {
let duration_class = pl.getattr("Duration").unwrap();
duration_class.call1((tu.to_ascii(),)).unwrap().into()
}
DataType::Object(_) => pl.getattr("Object").unwrap().into(),
DataType::Categorical(_) => pl.getattr("Categorical").unwrap().into(),
DataType::Time => pl.getattr("Time").unwrap().into(),
Expand Down Expand Up @@ -351,8 +361,20 @@ impl FromPyObject<'_> for Wrap<DataType> {
dt => panic!("{} not expected as Python type for dtype conversion", dt),
}
}
"Duration" => {
let tu = ob.getattr("tu").unwrap();
let tu = tu.extract::<Wrap<TimeUnit>>()?.0;
DataType::Duration(tu)
}
"Datetime" => {
let tu = ob.getattr("tu").unwrap();
let tu = tu.extract::<Wrap<TimeUnit>>()?.0;
let tz = ob.getattr("tz").unwrap();
let tz = tz.extract()?;
DataType::Datetime(tu, tz)
}
"List" => {
let inner = ob.getattr("inner")?;
let inner = ob.getattr("inner").unwrap();
let inner = inner.extract::<Wrap<DataType>>()?;
DataType::List(Box::new(inner.0))
}
Expand Down
9 changes: 2 additions & 7 deletions py-polars/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -403,14 +403,9 @@ fn py_date_range(
every: &str,
closed: Wrap<ClosedWindow>,
name: &str,
tu: &str,
tu: Wrap<TimeUnit>,
) -> PySeries {
let tu = match tu {
"ns" => TimeUnit::Nanoseconds,
"ms" => TimeUnit::Milliseconds,
_ => panic!("{}", "expected one of {'ns', 'ms'}"),
};
polars::time::date_range_impl(name, start, stop, Duration::parse(every), closed.0, tu)
polars::time::date_range_impl(name, start, stop, Duration::parse(every), closed.0, tu.0)
.into_series()
.into()
}
Expand Down
25 changes: 25 additions & 0 deletions py-polars/tests/test_datelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -938,3 +938,28 @@ def test_duration_aggregations() -> None:
[timedelta(days=2), timedelta(days=2)],
],
}


def test_datetime_units() -> None:
df = pl.DataFrame(
{
"ns": pl.date_range(
datetime(2020, 1, 1), datetime(2020, 5, 1), "1mo", time_unit="ns"
),
"us": pl.date_range(
datetime(2020, 1, 1), datetime(2020, 5, 1), "1mo", time_unit="us"
),
"ms": pl.date_range(
datetime(2020, 1, 1), datetime(2020, 5, 1), "1mo", time_unit="ms"
),
}
)
names = set(df.columns)

for unit in ["ns", "us", "ms"]:
subset = names - set([unit])

assert (
len(set(df.select([pl.all().exclude(pl.Datetime(unit))]).columns) - subset)
== 0
)

0 comments on commit 9a7c042

Please sign in to comment.