Skip to content
This repository was archived by the owner on Nov 1, 2024. It is now read-only.
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 8 additions & 45 deletions torcharrow/icolumn.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,8 +254,8 @@ def cast(self, dtype):
res._append_value(fun(i))
return res._finalize()
else:
raise TypeError('f"{astype}({dtype}) is not supported")')
raise TypeError('f"{astype} for {type(self).__name__} is not supported")')
raise TypeError(f"{dtype} for {type(self).__name__} is not supported")
raise TypeError(f"{self.dtype} for {type(self).__name__} is not supported")

# public simple observers -------------------------------------------------

Expand Down Expand Up @@ -705,7 +705,7 @@ def filter(
dtype: boolean, length: 2, null_count: 0
"""
if columns is not None:
raise TypeError(f"columns parameter for flat columns not supported")
raise TypeError("columns parameter for flat columns not supported")

if not isinstance(predicate, ty.Iterable) and not callable(predicate):
raise TypeError(
Expand Down Expand Up @@ -1006,8 +1006,6 @@ def fill_null(self, fill_value: ty.Union[dt.ScalarTypes, ty.Dict]):
"""
self._prototype_support_warning("fill_null")

if not isinstance(fill_value, Column._scalar_types):
raise TypeError(f"fill_null with {type(fill_value)} is not supported")
if isinstance(fill_value, Column._scalar_types):
res = Scope._EmptyColumn(self.dtype.constructor(nullable=False))
for m, i in self._items():
Expand All @@ -1017,7 +1015,9 @@ def fill_null(self, fill_value: ty.Union[dt.ScalarTypes, ty.Dict]):
res._append_value(fill_value)
return res._finalize()
else:
raise TypeError(f"fill_null with {type(fill_value)} is not supported")
raise TypeError(
f"fill_null with {type(fill_value).__name__} is not supported"
)

@trace
@expression
Expand Down Expand Up @@ -1050,7 +1050,7 @@ def drop_null(self, how: ty.Literal["any", "all", None] = None):

if how is not None:
# "any or "all" is only used for DataFrame
raise TypeError(f"how parameter for flat columns not supported")
raise TypeError("how parameter for flat columns not supported")

if dt.is_primitive(self.dtype):
res = Scope._EmptyColumn(self.dtype.constructor(nullable=False))
Expand All @@ -1076,7 +1076,7 @@ def drop_duplicates(
# TODO Add functionality for first and last
assert keep == "first"
if subset is not None:
raise TypeError(f"subset parameter for flat columns not supported")
raise TypeError("subset parameter for flat columns not supported")
res = Scope._EmptyColumn(self._dtype)
res._extend(list(OrderedDict.fromkeys(self)))
return res._finalize()
Expand Down Expand Up @@ -1417,40 +1417,3 @@ def _to_tensor_default(self):
def _count(self):
"""Return number of non-NA/null observations pgf the column/frame"""
return len(self) - self.null_count

@trace
@expression
def _nlargest(
self,
n=5,
columns: ty.Optional[ty.List[str]] = None,
keep: ty.Literal["last", "first"] = "first",
):
"""Returns a new data of the *n* largest element."""
# keep="all" not supported
if columns is not None:
raise TypeError(
"computing n-largest on non-structured column can't have 'columns' parameter"
)
return self.sort(ascending=False).head(n)

@trace
@expression
def _nsmallest(self, n=5, columns: ty.Optional[ty.List[str]] = None, keep="first"):
"""Returns a new data of the *n* smallest element."""
# keep="all" not supported
if columns is not None:
raise TypeError(
"computing n-smallest on non-structured column can't have 'columns' parameter"
)

return self.sort(ascending=True).head(n)

@trace
@expression
def _nunique(self, drop_null=True):
"""Returns the number of unique values of the column"""
if not drop_null:
return len(set(self))
else:
return len(set(i for i in self if i is not None))
17 changes: 17 additions & 0 deletions torcharrow/test/test_list_column.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,23 @@ def base_test_fixed_size_list(self):
f"Unexpected failure reason: {str(ex.exception)}",
)

def base_test_cast(self):
list_dtype = dt.List(item_dtype=dt.int64, fixed_size=2)
c_list = ta.column(
[[1, 2], [3, 4]],
dtype=list_dtype,
device=self.device,
)

int_dtype = dt.int64
# TODO: Nested cast should be supported in the future
for arg in (int_dtype, list_dtype):
with self.assertRaisesRegexp(
expected_exception=TypeError,
expected_regex=r"List\(int64, fixed_size=2\) for.*is not supported",
):
c_list.cast(arg)


if __name__ == "__main__":
unittest.main()
3 changes: 3 additions & 0 deletions torcharrow/test/test_list_column_cpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ def test_map_reduce_etc(self):
def test_fixed_size_list(self):
self.base_test_fixed_size_list()

def test_cast(self):
self.base_test_cast()


if __name__ == "__main__":
unittest.main()
52 changes: 52 additions & 0 deletions torcharrow/test/test_map_column.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,58 @@ def base_test_keys_values_get(self):
self.assertEqual(list(c.maps.values()), [[123], [45, 67], None])
self.assertEqual(list(c.maps.get("de", 0)), [0, 45, None])

def base_test_get_operator(self):
col_rep = [
{"helsinki": [-1.0, 21.0], "moscow": [-4.0, 24.0]},
{},
{"nowhere": [], "algiers": [11.0, 25, 2], "kinshasa": [22.0, 26.0]},
]
c = ta.column(
col_rep,
device=self.device,
)
indicies = [0, 2]
expected = [col_rep[i] for i in indicies]
result = [c[i] for i in indicies]
self.assertEqual(expected, result)

def base_test_slice_operation(self):
col_rep = [
{"helsinki": [-1.0, 21.0], "moscow": [-4.0, 24.0]},
{},
{"nowhere": [], "algiers": [11.0, 25, 2], "kinshasa": [22.0, 26.0]},
{"london": [], "new york": [500]},
]
c = ta.column(
col_rep,
device=self.device,
)
expected_slice_every_other = col_rep[0:4:2]
result_every_other = c[0:4:2]
self.assertEqual(expected_slice_every_other, list(result_every_other))

expected_slice_most = col_rep[1:]
result_most = c[1:4:1]
self.assertEqual(expected_slice_most, list(result_most))

def base_test_equality_operators(self):
col_rep = [
{"helsinki": [-1.0, 21.0], "moscow": [-4.0, 24.0]},
{"boston": [-4.0]},
{"nowhere": [], "algiers": [11.0, 25, 2], "kinshasa": [22.0, 26.0]},
{"london": [], "new york": [500]},
]
c = ta.column(
col_rep,
device=self.device,
)
c2 = ta.column(
col_rep,
device=self.device,
)
self.assertTrue(all(c == c2))
self.assertFalse(any(c != c2))


if __name__ == "__main__":
unittest.main()
9 changes: 9 additions & 0 deletions torcharrow/test/test_map_column_cpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,15 @@ def test_infer(self):
def test_keys_values_get(self):
self.base_test_keys_values_get()

def test_get_operator(self):
self.base_test_get_operator()

def test_slice_operation(self):
self.base_test_slice_operation()

def test_equality_operators(self):
self.base_test_equality_operators()


if __name__ == "__main__":
unittest.main()
Loading