rapidsai · rapids-bot · Jan 19, 2022 · Jan 18, 2022 · Jan 18, 2022 · Jan 18, 2022
@@ -200,12 +200,19 @@ cpdef read_parquet(filepaths_or_buffers, columns=None, row_groups=None,
 
     update_struct_field_names(df, c_out_table.metadata.schema_info)
 
-    # update the decimal precision of each column
     if meta is not None:
-        for col, col_meta in zip(column_names, meta["columns"]):
+        # Book keep each column metadata as the order
+        # of `meta["columns"]` and `column_names` are not
+        # guaranteed to be deterministic and same always.
+        meta_data_per_column = {}
+        for col_meta in meta["columns"]:
+            meta_data_per_column[col_meta['name']] = col_meta
+
+        # update the decimal precision of each column
+        for col in column_names:
             if is_decimal_dtype(df._data[col].dtype):
                 df._data[col].dtype.precision = (
-                    col_meta["metadata"]["precision"]
+                    meta_data_per_column[col]["metadata"]["precision"]
                 )
 
     # Set the index column

@@ -2373,3 +2373,21 @@ def test_parquet_writer_row_group_size(
         math.ceil(num_rows / size_rows), math.ceil(8 * num_rows / size_bytes)
     )
     assert expected_num_rows == row_groups
+
+
+def test_parquet_reader_decimal_columns():
+    df = cudf.DataFrame(
+        {
+            "col1": cudf.Series([1, 2, 3], dtype=cudf.Decimal64Dtype(10, 2)),
+            "col2": [10, 11, 12],
+            "col3": [12, 13, 14],
+            "col4": ["a", "b", "c"],
+        }
+    )
+    buffer = BytesIO()
+    df.to_parquet(buffer)
+
+    actual = cudf.read_parquet(buffer, columns=["col3", "col2", "col1"])
+    expected = pd.read_parquet(buffer, columns=["col3", "col2", "col1"])
+
+    assert_eq(actual, expected)