pytorch · andrewaikens87 · Jun 29, 2022 · Jun 29, 2022 · Jun 29, 2022 · Jun 29, 2022
diff --git a/torcharrow/icolumn.py b/torcharrow/icolumn.py
@@ -254,8 +254,8 @@ def cast(self, dtype):
                         res._append_value(fun(i))
                 return res._finalize()
             else:
-                raise TypeError('f"{astype}({dtype}) is not supported")')
-        raise TypeError('f"{astype} for {type(self).__name__} is not supported")')
+                raise TypeError(f"{dtype} for {type(self).__name__} is not supported")
+        raise TypeError(f"{self.dtype} for {type(self).__name__} is not supported")
 
     # public simple observers -------------------------------------------------
 
@@ -705,7 +705,7 @@ def filter(
         dtype: boolean, length: 2, null_count: 0
         """
         if columns is not None:
-            raise TypeError(f"columns parameter for flat columns not supported")
+            raise TypeError("columns parameter for flat columns not supported")
 
         if not isinstance(predicate, ty.Iterable) and not callable(predicate):
             raise TypeError(
@@ -1006,8 +1006,6 @@ def fill_null(self, fill_value: ty.Union[dt.ScalarTypes, ty.Dict]):
         """
         self._prototype_support_warning("fill_null")
 
-        if not isinstance(fill_value, Column._scalar_types):
-            raise TypeError(f"fill_null with {type(fill_value)} is not supported")
         if isinstance(fill_value, Column._scalar_types):
             res = Scope._EmptyColumn(self.dtype.constructor(nullable=False))
             for m, i in self._items():
@@ -1017,7 +1015,9 @@ def fill_null(self, fill_value: ty.Union[dt.ScalarTypes, ty.Dict]):
                     res._append_value(fill_value)
             return res._finalize()
         else:
-            raise TypeError(f"fill_null with {type(fill_value)} is not supported")
+            raise TypeError(
+                f"fill_null with {type(fill_value).__name__} is not supported"
+            )
 
     @trace
     @expression
@@ -1050,7 +1050,7 @@ def drop_null(self, how: ty.Literal["any", "all", None] = None):
 
         if how is not None:
             # "any or "all" is only used for DataFrame
-            raise TypeError(f"how parameter for flat columns not supported")
+            raise TypeError("how parameter for flat columns not supported")
 
         if dt.is_primitive(self.dtype):
             res = Scope._EmptyColumn(self.dtype.constructor(nullable=False))
@@ -1076,7 +1076,7 @@ def drop_duplicates(
         # TODO Add functionality for first and last
         assert keep == "first"
         if subset is not None:
-            raise TypeError(f"subset parameter for flat columns not supported")
+            raise TypeError("subset parameter for flat columns not supported")
         res = Scope._EmptyColumn(self._dtype)
         res._extend(list(OrderedDict.fromkeys(self)))
         return res._finalize()
@@ -1417,40 +1417,3 @@ def _to_tensor_default(self):
     def _count(self):
         """Return number of non-NA/null observations pgf the column/frame"""
         return len(self) - self.null_count
-
-    @trace
-    @expression
-    def _nlargest(
-        self,
-        n=5,
-        columns: ty.Optional[ty.List[str]] = None,
-        keep: ty.Literal["last", "first"] = "first",
-    ):
-        """Returns a new data of the *n* largest element."""
-        # keep="all" not supported
-        if columns is not None:
-            raise TypeError(
-                "computing n-largest on non-structured column can't have 'columns' parameter"
-            )
-        return self.sort(ascending=False).head(n)
-
-    @trace
-    @expression
-    def _nsmallest(self, n=5, columns: ty.Optional[ty.List[str]] = None, keep="first"):
-        """Returns a new data of the *n* smallest element."""
-        # keep="all" not supported
-        if columns is not None:
-            raise TypeError(
-                "computing n-smallest on non-structured column can't have 'columns' parameter"
-            )
-
-        return self.sort(ascending=True).head(n)
-
-    @trace
-    @expression
-    def _nunique(self, drop_null=True):
-        """Returns the number of unique values of the column"""
-        if not drop_null:
-            return len(set(self))
-        else:
-            return len(set(i for i in self if i is not None))
diff --git a/torcharrow/test/test_list_column.py b/torcharrow/test/test_list_column.py
@@ -208,6 +208,23 @@ def base_test_fixed_size_list(self):
             f"Unexpected failure reason: {str(ex.exception)}",
         )
 
+    def base_test_cast(self):
+        list_dtype = dt.List(item_dtype=dt.int64, fixed_size=2)
+        c_list = ta.column(
+            [[1, 2], [3, 4]],
+            dtype=list_dtype,
+            device=self.device,
+        )
+
+        int_dtype = dt.int64
+        # TODO: Nested cast should be supported in the future
+        for arg in (int_dtype, list_dtype):
+            with self.assertRaisesRegexp(
+                expected_exception=TypeError,
+                expected_regex=r"List\(int64, fixed_size=2\) for.*is not supported",
+            ):
+                c_list.cast(arg)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/torcharrow/test/test_list_column_cpu.py b/torcharrow/test/test_list_column_cpu.py
@@ -46,6 +46,9 @@ def test_map_reduce_etc(self):
     def test_fixed_size_list(self):
         self.base_test_fixed_size_list()
 
+    def test_cast(self):
+        self.base_test_cast()
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/torcharrow/test/test_map_column.py b/torcharrow/test/test_map_column.py
@@ -63,6 +63,58 @@ def base_test_keys_values_get(self):
         self.assertEqual(list(c.maps.values()), [[123], [45, 67], None])
         self.assertEqual(list(c.maps.get("de", 0)), [0, 45, None])
 
+    def base_test_get_operator(self):
+        col_rep = [
+            {"helsinki": [-1.0, 21.0], "moscow": [-4.0, 24.0]},
+            {},
+            {"nowhere": [], "algiers": [11.0, 25, 2], "kinshasa": [22.0, 26.0]},
+        ]
+        c = ta.column(
+            col_rep,
+            device=self.device,
+        )
+        indicies = [0, 2]
+        expected = [col_rep[i] for i in indicies]
+        result = [c[i] for i in indicies]
+        self.assertEqual(expected, result)
+
+    def base_test_slice_operation(self):
+        col_rep = [
+            {"helsinki": [-1.0, 21.0], "moscow": [-4.0, 24.0]},
+            {},
+            {"nowhere": [], "algiers": [11.0, 25, 2], "kinshasa": [22.0, 26.0]},
+            {"london": [], "new york": [500]},
+        ]
+        c = ta.column(
+            col_rep,
+            device=self.device,
+        )
+        expected_slice_every_other = col_rep[0:4:2]
+        result_every_other = c[0:4:2]
+        self.assertEqual(expected_slice_every_other, list(result_every_other))
+
+        expected_slice_most = col_rep[1:]
+        result_most = c[1:4:1]
+        self.assertEqual(expected_slice_most, list(result_most))
+
+    def base_test_equality_operators(self):
+        col_rep = [
+            {"helsinki": [-1.0, 21.0], "moscow": [-4.0, 24.0]},
+            {"boston": [-4.0]},
+            {"nowhere": [], "algiers": [11.0, 25, 2], "kinshasa": [22.0, 26.0]},
+            {"london": [], "new york": [500]},
+        ]
+        c = ta.column(
+            col_rep,
+            device=self.device,
+        )
+        c2 = ta.column(
+            col_rep,
+            device=self.device,
+        )
+        self.assertTrue(all(c == c2))
+        self.assertFalse(any(c != c2))
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/torcharrow/test/test_map_column_cpu.py b/torcharrow/test/test_map_column_cpu.py
@@ -22,6 +22,15 @@ def test_infer(self):
     def test_keys_values_get(self):
         self.base_test_keys_values_get()
 
+    def test_get_operator(self):
+        self.base_test_get_operator()
+
+    def test_slice_operation(self):
+        self.base_test_slice_operation()
+
+    def test_equality_operators(self):
+        self.base_test_equality_operators()
+
 
 if __name__ == "__main__":
     unittest.main()