Skip to content

Commit

Permalink
[python] add all supported dtypes
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Oct 3, 2020
1 parent 1cac33f commit e84f7f7
Show file tree
Hide file tree
Showing 3 changed files with 151 additions and 10 deletions.
86 changes: 85 additions & 1 deletion py-polars/pypolars/datatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,62 @@ class Date64:
pass


class Time32Millisecond:
pass


class Time32Second:
pass


class Time64Nanosecond:
pass


class Time64Microsecond:
pass


class DurationNanosecond:
pass


class DurationMicrosecond:
pass


class DurationMillisecond:
pass


class DurationSecond:
pass


class IntervalDayTime:
pass


class IntervalYearMonth:
pass


class TimestampNanosecond:
pass


class TimestampMicrosecond:
pass


class TimestampMillisecond:
pass


class TimestampSecond:
pass


# Don't change the order of these!
dtypes = [
Int8,
Expand All @@ -78,6 +134,20 @@ class Date64:
LargeList,
Date32,
Date64,
Time32Millisecond,
Time32Second,
Time64Nanosecond,
Time64Microsecond,
DurationNanosecond,
DurationMicrosecond,
DurationMillisecond,
DurationSecond,
IntervalDayTime,
IntervalYearMonth,
TimestampNanosecond,
TimestampMicrosecond,
TimestampMillisecond,
TimestampSecond,
]
DTYPE_TO_FFINAME = {
Int8: "i8",
Expand All @@ -92,9 +162,23 @@ class Date64:
Float64: "f64",
Bool: "bool",
Utf8: "str",
LargeList: "largelist",
LargeList: "large_list",
Date32: "date32",
Date64: "date64",
Time32Millisecond: "time32_millisecond",
Time32Second: "time32_second",
Time64Nanosecond: "time64_nanosecond",
Time64Microsecond: "time64_microsecond",
DurationNanosecond: "duration_nanosecond",
DurationMicrosecond: "duration_microsecond",
DurationMillisecond: "duration_millisecond",
DurationSecond: "duration_second",
IntervalDayTime: "interval_daytime",
IntervalYearMonth: "interval_yearmonth",
TimestampNanosecond: "timestamp_nanosecond",
TimestampMicrosecond: "timestamp_microsecond",
TimestampMillisecond: "timestamp_millisecond",
TimestampSecond: "timestamp_second",
}


Expand Down
45 changes: 37 additions & 8 deletions py-polars/pypolars/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def from_parquet(
@staticmethod
def from_ipc(file: Union[str, BinaryIO]) -> DataFrame:
"""
Read into a DataFrame from Arrow IPC stream format.
Read into a DataFrame from Arrow IPC stream format. This is also called the feather format.
Parameters
----------
Expand All @@ -107,9 +107,25 @@ def from_ipc(file: Union[str, BinaryIO]) -> DataFrame:
self._df = PyDataFrame.from_ipc(file)
return self

@staticmethod
def from_feather(file: Union[str, BinaryIO]) -> DataFrame:
"""
Read into a DataFrame from Arrow IPC stream format. This is also called the feather format.
Parameters
----------
file
Path to a file or a file like object.
Returns
-------
DataFrame
"""
return DataFrame.from_ipc(file)

def to_csv(
self,
path: str,
file: Union[TextIO, str],
batch_size: int = 100000,
has_headers: bool = True,
delimiter: str = ",",
Expand All @@ -119,7 +135,7 @@ def to_csv(
Parameters
----------
path
file
write location
batch_size
Size of the write buffer. Increase to have faster io.
Expand All @@ -128,20 +144,33 @@ def to_csv(
delimiter
Space elements with this symbol.
"""
self._df.to_csv(path, batch_size, has_headers, ord(delimiter))
self._df.to_csv(file, batch_size, has_headers, ord(delimiter))

def to_ipc(self, file: Union[BinaryIO, str], batch_size):
"""
Write to Arrow IPC binary stream, or a feather file.
Parameters
----------
file
write location
batch_size
Size of the write buffer. Increase to have faster io.
"""
self._df.to_ipc(file, batch_size)

def to_ipc(self, path: str, batch_size):
def to_feather(self, file: Union[BinaryIO, str], batch_size):
"""
Write to Arrow IPC binary stream.
Write to Arrow IPC binary stream, or a feather file.
Parameters
----------
path
file
write location
batch_size
Size of the write buffer. Increase to have faster io.
"""
self._df.to_ipc(path, batch_size)
self.to_ipc(file, batch_size)

def __str__(self) -> str:
return self._df.as_str()
Expand Down
30 changes: 29 additions & 1 deletion py-polars/src/datatypes.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use polars::datatypes::ArrowDataType;
use polars::datatypes::{ArrowDataType, IntervalUnit, TimeUnit};

// Don't change the order of these!
#[repr(u8)]
Expand All @@ -18,6 +18,20 @@ pub enum DataType {
LargeList,
Date32,
Date64,
Time32Millisecond,
Time32Second,
Time64Nanosecond,
Time64Microsecond,
DurationNanosecond,
DurationMicrosecond,
DurationMillisecond,
DurationSecond,
IntervalDayTime,
IntervalYearMonth,
TimestampNanosecond,
TimestampMicrosecond,
TimestampMillisecond,
TimestampSecond,
}

impl From<&ArrowDataType> for DataType {
Expand All @@ -39,6 +53,20 @@ impl From<&ArrowDataType> for DataType {
ArrowDataType::LargeList(_) => LargeList,
ArrowDataType::Date32(_) => Date32,
ArrowDataType::Date64(_) => Date64,
ArrowDataType::Time32(TimeUnit::Millisecond) => Time32Millisecond,
ArrowDataType::Time32(TimeUnit::Second) => Time32Second,
ArrowDataType::Time64(TimeUnit::Nanosecond) => Time64Nanosecond,
ArrowDataType::Time64(TimeUnit::Microsecond) => Time64Microsecond,
ArrowDataType::Interval(IntervalUnit::DayTime) => IntervalDayTime,
ArrowDataType::Interval(IntervalUnit::YearMonth) => IntervalYearMonth,
ArrowDataType::Duration(TimeUnit::Nanosecond) => DurationNanosecond,
ArrowDataType::Duration(TimeUnit::Microsecond) => DurationMicrosecond,
ArrowDataType::Duration(TimeUnit::Millisecond) => DurationMillisecond,
ArrowDataType::Duration(TimeUnit::Second) => DurationSecond,
ArrowDataType::Timestamp(TimeUnit::Nanosecond, _) => TimestampNanosecond,
ArrowDataType::Timestamp(TimeUnit::Microsecond, _) => TimestampMicrosecond,
ArrowDataType::Timestamp(TimeUnit::Millisecond, _) => TimestampMillisecond,
ArrowDataType::Timestamp(TimeUnit::Second, _) => TimestampSecond,
dt => panic!(format!("datatype: {:?} not supported", dt)),
}
}
Expand Down

0 comments on commit e84f7f7

Please sign in to comment.