modin-project · YarShev · Feb 5, 2024 · Feb 5, 2024
@@ -869,14 +869,7 @@ def split_pandas_df_into_partitions(
         put_func = cls._partition_class.put
         # even a full-axis slice can cost something (https://github.com/pandas-dev/pandas/issues/55202)
         # so we try not to do it if unnecessary.
-        # FIXME: it appears that this optimization doesn't work for Unidist correctly as it
-        # doesn't explicitly copy the data when putting it into storage (as the rest engines do)
-        # causing it to eventially share memory with a pandas object that was provided by user.
-        # Everything works fine if we do this column slicing as pandas then would set some flags
-        # to perform in COW mode apparently (and so it wouldn't crash our tests).
-        # @YarShev promised that this will be eventially fixed on Unidist's side, but for now there's
-        # this hacky condition
-        if col_chunksize >= len(df.columns) and Engine.get() != "Unidist":
+        if col_chunksize >= len(df.columns):
             col_parts = [df]
         else:
             col_parts = [

@@ -971,3 +971,15 @@ def make_frame(lib):
 def test_get(key):
     modin_df, pandas_df = create_test_dfs({"col0": [0, 1]})
     eval_general(modin_df, pandas_df, lambda df: df.get(key))
+
+
+def test_df_immutability():
+    """
+    Verify that modifications of the source data doesn't propagate to Modin's DataFrame objects.
+    """
+    src_data = pandas.DataFrame({"a": [1]})
+
+    md_df = pd.DataFrame(src_data)
+    src_data.iloc[0, 0] = 100
+
+    assert md_df._to_pandas().iloc[0, 0] == 1