From db9a456ef0f88812f341e5a2e401becc8303706a Mon Sep 17 00:00:00 2001
From: mrastgoo <mojdeh.rastgoo@gmail.com>
Date: Sat, 13 May 2023 16:09:19 +0200
Subject: [PATCH 1/6] filters parameters in pd.read_parqeut

---
 pandas/io/parquet.py | 29 +++++++++++++++++++++++++++--
 1 file changed, 27 insertions(+), 2 deletions(-)

diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py
index e8670757e1669..e4c9dab3ebb5a 100644
--- a/pandas/io/parquet.py
+++ b/pandas/io/parquet.py
@@ -228,6 +228,7 @@ def read(
         self,
         path,
         columns=None,
+        filters=None,
         use_nullable_dtypes: bool = False,
         dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
         storage_options: StorageOptions = None,
@@ -257,7 +258,11 @@ def read(
         )
         try:
             pa_table = self.api.parquet.read_table(
-                path_or_handle, columns=columns, filesystem=filesystem, **kwargs
+                path_or_handle,
+                columns=columns,
+                filesystem=filesystem,
+                filters=filters,
+                **kwargs,
             )
             result = pa_table.to_pandas(**to_pandas_kwargs)
 
@@ -335,6 +340,7 @@ def read(
         self,
         path,
         columns=None,
+        filters=None,
         storage_options: StorageOptions = None,
         filesystem=None,
         **kwargs,
@@ -375,7 +381,7 @@ def read(
 
         try:
             parquet_file = self.api.ParquetFile(path, **parquet_kwargs)
-            return parquet_file.to_pandas(columns=columns, **kwargs)
+            return parquet_file.to_pandas(columns=columns, filters=filters, **kwargs)
         finally:
             if handles is not None:
                 handles.close()
@@ -483,6 +489,7 @@ def read_parquet(
     path: FilePath | ReadBuffer[bytes],
     engine: str = "auto",
     columns: list[str] | None = None,
+    filters: list[tuple] | list[list[tuple]] | None = None,
     storage_options: StorageOptions = None,
     use_nullable_dtypes: bool | lib.NoDefault = lib.no_default,
     dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
@@ -517,6 +524,21 @@ def read_parquet(
         if you wish to use its implementation.
     columns : list, default=None
         If not None, only these columns will be read from the file.
+    filters : List[Tuple] or List[List[Tuple]], default None
+        To filter out data.
+        Filter syntax: [[(column, op, val), ...],...]
+        where op is [==, =, >, >=, <, <=, !=, in, not in]
+        The innermost tuples are transposed into a set of filters applied
+        through an `AND` operation.
+        The outer list combines these sets of filters through an `OR`
+        operation.
+        A single list of tuples can also be used, meaning that no `OR`
+        operation between set of filters is to be conducted.
+
+        Using this argument will NOT result in row-wise filtering of the final
+        partitions unless ``engine="pyarrow"`` is also specified.  For
+        other engines, filtering is only performed at the partition level, that is,
+        to prevent the loading of some row-groups and/or files.
 
     {storage_options}
 
@@ -555,6 +577,8 @@ def read_parquet(
     Returns
     -------
     DataFrame
+
+
     """
     impl = get_engine(engine)
 
@@ -575,6 +599,7 @@ def read_parquet(
     return impl.read(
         path,
         columns=columns,
+        filters=filters,
         storage_options=storage_options,
         use_nullable_dtypes=use_nullable_dtypes,
         dtype_backend=dtype_backend,

From 4624beedd1b61e854a2338b29bcf602a75136cb3 Mon Sep 17 00:00:00 2001
From: mrastgoo <mojdeh.rastgoo@gmail.com>
Date: Sat, 13 May 2023 17:38:56 +0200
Subject: [PATCH 2/6] linter

---
 pandas/io/parquet.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py
index e4c9dab3ebb5a..671d8d4333f69 100644
--- a/pandas/io/parquet.py
+++ b/pandas/io/parquet.py
@@ -578,7 +578,6 @@ def read_parquet(
     -------
     DataFrame
 
-
     """
     impl = get_engine(engine)
 

From 9b4439d9abb1fb3dd08670e9a2901b78faf8be0a Mon Sep 17 00:00:00 2001
From: mrastgoo <mojdeh.rastgoo@gmail.com>
Date: Sun, 14 May 2023 10:44:59 +0200
Subject: [PATCH 3/6] docstring validation

---
 pandas/io/parquet.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py
index 671d8d4333f69..986bbf686839d 100644
--- a/pandas/io/parquet.py
+++ b/pandas/io/parquet.py
@@ -577,7 +577,6 @@ def read_parquet(
     Returns
     -------
     DataFrame
-
     """
     impl = get_engine(engine)
 

From 4e94179e49d4d33f5c2a8fa18a67d65581e24aa0 Mon Sep 17 00:00:00 2001
From: mrastgoo <mojdeh.rastgoo@gmail.com>
Date: Wed, 5 Jul 2023 21:54:13 +0200
Subject: [PATCH 4/6] test for filter args in pd.read_parquet

---
 pandas/tests/io/test_parquet.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index 35bf75d3928f8..490f783cbb5a5 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -424,6 +424,20 @@ def test_read_columns(self, engine):
         check_round_trip(
             df, engine, expected=expected, read_kwargs={"columns": ["string"]}
         )
+        
+    def test_read_filters(self, engine, tmp_path):
+        df = pd.DataFrame({"int": list(range(4)), "part": list("aabb"),})
+
+        expected = pd.DataFrame({"int": [0, 1]})
+        check_round_trip(
+            df,
+            engine,
+            path=tmp_path,
+            expected=expected,
+            write_kwargs={"partition_cols": ["part"]},
+            read_kwargs={"filters": [("part", "==", "a")], "columns":["int"]},
+            repeat=1,
+        )
 
     def test_write_index(self, engine, using_copy_on_write, request):
         check_names = engine != "fastparquet"

From 36cbfe2b3b943bbdae272092cee03e8fc05eec39 Mon Sep 17 00:00:00 2001
From: mrastgoo <mojdeh.rastgoo@gmail.com>
Date: Wed, 5 Jul 2023 23:59:05 +0200
Subject: [PATCH 5/6] black

---
 pandas/tests/io/test_parquet.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index 490f783cbb5a5..a38c6be12ed89 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -424,9 +424,14 @@ def test_read_columns(self, engine):
         check_round_trip(
             df, engine, expected=expected, read_kwargs={"columns": ["string"]}
         )
-        
+
     def test_read_filters(self, engine, tmp_path):
-        df = pd.DataFrame({"int": list(range(4)), "part": list("aabb"),})
+        df = pd.DataFrame(
+            {
+                "int": list(range(4)),
+                "part": list("aabb"),
+            }
+        )
 
         expected = pd.DataFrame({"int": [0, 1]})
         check_round_trip(
@@ -435,7 +440,7 @@ def test_read_filters(self, engine, tmp_path):
             path=tmp_path,
             expected=expected,
             write_kwargs={"partition_cols": ["part"]},
-            read_kwargs={"filters": [("part", "==", "a")], "columns":["int"]},
+            read_kwargs={"filters": [("part", "==", "a")], "columns": ["int"]},
             repeat=1,
         )
 

From d324e1b9db9f33918416abb30ef7a72aaec480d2 Mon Sep 17 00:00:00 2001
From: mrastgoo <mojdeh.rastgoo@gmail.com>
Date: Wed, 2 Aug 2023 09:53:39 +0200
Subject: [PATCH 6/6] addressing reviews

---
 doc/source/whatsnew/v2.1.0.rst |  1 +
 pandas/io/parquet.py           | 36 ++++++++++++++++++----------------
 2 files changed, 20 insertions(+), 17 deletions(-)

diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
index 6c91c4b512f41..4aa6630fe909f 100644
--- a/doc/source/whatsnew/v2.1.0.rst
+++ b/doc/source/whatsnew/v2.1.0.rst
@@ -669,6 +669,7 @@ I/O
 ^^^
 - :meth:`DataFrame.to_orc` now raising ``ValueError`` when non-default :class:`Index` is given (:issue:`51828`)
 - :meth:`DataFrame.to_sql` now raising ``ValueError`` when the name param is left empty while using SQLAlchemy to connect (:issue:`52675`)
+- Added ``filters`` parameter to :func:`read_parquet` to filter out data, compatible with both ``engines`` (:issue:`53212`)
 - Bug in :func:`json_normalize`, fix json_normalize cannot parse metadata fields list type (:issue:`37782`)
 - Bug in :func:`read_csv` where it would error when ``parse_dates`` was set to a list or dictionary with ``engine="pyarrow"`` (:issue:`47961`)
 - Bug in :func:`read_csv`, with ``engine="pyarrow"`` erroring when specifying a ``dtype`` with ``index_col`` (:issue:`53229`)
diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py
index 141fc421a2ed6..90d59b0dfcfc8 100644
--- a/pandas/io/parquet.py
+++ b/pandas/io/parquet.py
@@ -489,11 +489,11 @@ def read_parquet(
     path: FilePath | ReadBuffer[bytes],
     engine: str = "auto",
     columns: list[str] | None = None,
-    filters: list[tuple] | list[list[tuple]] | None = None,
     storage_options: StorageOptions | None = None,
     use_nullable_dtypes: bool | lib.NoDefault = lib.no_default,
     dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
     filesystem: Any = None,
+    filters: list[tuple] | list[list[tuple]] | None = None,
     **kwargs,
 ) -> DataFrame:
     """
@@ -524,22 +524,6 @@ def read_parquet(
         if you wish to use its implementation.
     columns : list, default=None
         If not None, only these columns will be read from the file.
-    filters : List[Tuple] or List[List[Tuple]], default None
-        To filter out data.
-        Filter syntax: [[(column, op, val), ...],...]
-        where op is [==, =, >, >=, <, <=, !=, in, not in]
-        The innermost tuples are transposed into a set of filters applied
-        through an `AND` operation.
-        The outer list combines these sets of filters through an `OR`
-        operation.
-        A single list of tuples can also be used, meaning that no `OR`
-        operation between set of filters is to be conducted.
-
-        Using this argument will NOT result in row-wise filtering of the final
-        partitions unless ``engine="pyarrow"`` is also specified.  For
-        other engines, filtering is only performed at the partition level, that is,
-        to prevent the loading of some row-groups and/or files.
-
     {storage_options}
 
         .. versionadded:: 1.3.0
@@ -572,6 +556,24 @@ def read_parquet(
 
         .. versionadded:: 2.1.0
 
+    filters : List[Tuple] or List[List[Tuple]], default None
+        To filter out data.
+        Filter syntax: [[(column, op, val), ...],...]
+        where op is [==, =, >, >=, <, <=, !=, in, not in]
+        The innermost tuples are transposed into a set of filters applied
+        through an `AND` operation.
+        The outer list combines these sets of filters through an `OR`
+        operation.
+        A single list of tuples can also be used, meaning that no `OR`
+        operation between set of filters is to be conducted.
+
+        Using this argument will NOT result in row-wise filtering of the final
+        partitions unless ``engine="pyarrow"`` is also specified.  For
+        other engines, filtering is only performed at the partition level, that is,
+        to prevent the loading of some row-groups and/or files.
+
+        .. versionadded:: 2.1.0
+
     **kwargs
         Any additional kwargs are passed to the engine.