apacheGH-36973: [CI][Python] Archery linter integrated with flake8==6…

….1.0 (apache#36976) ### Rationale for this change Fix archery lint issues with new version of flake8 dependency. ### What changes are included in this PR? * Lint errors. ### Are these changes tested? Yes. ``` $ flake8 --version 6.1.0 (mccabe: 0.7.0, pycodestyle: 2.11.0, pyflakes: 3.1.0) CPython 3.11.4 on Darwin $ archery lint --python INFO:archery:Running Python formatter (autopep8) INFO:archery:Running Python linter (flake8) INFO:archery:Running Cython linter (cython-lint) $ ``` ### Are there any user-facing changes? No * Closes: apache#36973 Authored-by: Dane Pitkin <dane@voltrondata.com> Signed-off-by: Raúl Cumplido <raulcumplido@gmail.com>
loicalleyne · Nov 13, 2023 · 6450fcc · 6450fcc
1 parent 69ea006
commit 6450fcc
Show file tree

Hide file tree

Showing 8 changed files with 20 additions and 21 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -37,7 +37,7 @@ repos:
         entry: --entrypoint /bin/hadolint hadolint/hadolint:latest -
         exclude: ^dev/.*$
   - repo: https://github.com/pycqa/flake8
-    rev: 5.0.3
+    rev: 6.1.0
     hooks:
       - id: flake8
         name: Python Format

diff --git a/dev/archery/setup.py b/dev/archery/setup.py
@@ -28,9 +28,8 @@
 jinja_req = 'jinja2>=2.11'
 
 extras = {
-    'lint': [
-        'numpydoc==1.1.0', 'autopep8', 'flake8', 'cython-lint', 'cmake_format==0.6.13'
-    ],
+    'lint': ['numpydoc==1.1.0', 'autopep8', 'flake8==6.1.0', 'cython-lint',
+             'cmake_format==0.6.13'],
     'benchmark': ['pandas'],
     'docker': ['ruamel.yaml', 'python-dotenv'],
     'release': ['pygithub', jinja_req, 'jira', 'semver', 'gitpython'],

diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py
@@ -301,7 +301,7 @@ def test_asarray():
     np_arr = np.asarray([_ for _ in arr])
     assert np_arr.tolist() == [0, 1, 2, 3]
     assert np_arr.dtype == np.dtype('O')
-    assert type(np_arr[0]) == pa.lib.Int64Value
+    assert isinstance(np_arr[0], pa.lib.Int64Value)
 
     # Calling with the arrow array gives back an array with 'int64' dtype
     np_arr = np.asarray(arr)

diff --git a/python/pyarrow/tests/test_csv.py b/python/pyarrow/tests/test_csv.py
@@ -1936,7 +1936,7 @@ def test_write_quoting_style():
             except Exception as e:
                 # This will trigger when we try to write a comma (,)
                 # without quotes, which is invalid
-                assert type(e) == res
+                assert isinstance(e, res)
                 break
         assert buf.getvalue() == res
         buf.seek(0)

diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py
@@ -5021,8 +5021,8 @@ def test_dataset_filter(tempdir, dstype):
 
     # Ensure chained filtering works.
     result = ds1.filter(pc.field("colA") < 3).filter(pc.field("col2") == "a")
-    assert type(result) == (ds.FileSystemDataset if dstype ==
-                            "fs" else ds.InMemoryDataset)
+    expected = ds.FileSystemDataset if dstype == "fs" else ds.InMemoryDataset
+    assert isinstance(result, expected)
 
     assert result.to_table() == pa.table({
         "colA": [1],
@@ -5181,9 +5181,9 @@ def test_read_table_nested_columns(tempdir, format):
                       "a.dotted.field": [1, 2],
                       "interaction": [
         {"type": None, "element": "button",
-         "values": [1, 2], "structs":[{"foo": "bar"}, None]},
+         "values": [1, 2], "structs": [{"foo": "bar"}, None]},
         {"type": "scroll", "element": "window",
-         "values": [None, 3, 4], "structs":[{"fizz": "buzz"}]}
+         "values": [None, 3, 4], "structs": [{"fizz": "buzz"}]}
     ]})
     ds.write_dataset(table, tempdir / "table", format=format)
     ds1 = ds.dataset(tempdir / "table", format=format)

diff --git a/python/pyarrow/tests/test_extension_type.py b/python/pyarrow/tests/test_extension_type.py
@@ -340,8 +340,8 @@ def test_ext_scalar_from_array():
     assert len(scalars_a) == 4
 
     assert ty1.__arrow_ext_scalar_class__() == UuidScalarType
-    assert type(a[0]) == UuidScalarType
-    assert type(scalars_a[0]) == UuidScalarType
+    assert isinstance(a[0], UuidScalarType)
+    assert isinstance(scalars_a[0], UuidScalarType)
 
     for s, val in zip(scalars_a, data):
         assert isinstance(s, pa.ExtensionScalar)
@@ -737,7 +737,7 @@ def __arrow_ext_deserialize__(cls, storage_type, serialized):
 
     def __eq__(self, other):
         if isinstance(other, pa.BaseExtensionType):
-            return (type(self) == type(other) and
+            return (isinstance(self, type(other)) and
                     self.freq == other.freq)
         else:
             return NotImplemented
@@ -799,15 +799,15 @@ def test_generic_ext_type_ipc(registered_period_type):
     arr = pa.ExtensionArray.from_storage(period_type, storage)
     batch = pa.RecordBatch.from_arrays([arr], ["ext"])
     # check the built array has exactly the expected clss
-    assert type(arr) == period_class
+    assert isinstance(arr, period_class)
 
     buf = ipc_write_batch(batch)
     del batch
     batch = ipc_read_batch(buf)
 
     result = batch.column(0)
     # check the deserialized array class is the expected one
-    assert type(result) == period_class
+    assert isinstance(result, period_class)
     assert result.type.extension_name == "test.period"
     assert arr.storage.to_pylist() == [1, 2, 3, 4]
 
@@ -830,7 +830,7 @@ def test_generic_ext_type_ipc(registered_period_type):
     result = batch.column(0)
     assert isinstance(result.type, PeriodType)
     assert result.type.freq == 'H'
-    assert type(result) == period_class
+    assert isinstance(result, period_class)
 
 
 def test_generic_ext_type_ipc_unknown(registered_period_type):
@@ -1261,15 +1261,15 @@ def test_tensor_type_ipc(tensor_type):
 
     # check the built array has exactly the expected clss
     tensor_class = tensor_type.__arrow_ext_class__()
-    assert type(arr) == tensor_class
+    assert isinstance(arr, tensor_class)
 
     buf = ipc_write_batch(batch)
     del batch
     batch = ipc_read_batch(buf)
 
     result = batch.column(0)
     # check the deserialized array class is the expected one
-    assert type(result) == tensor_class
+    assert isinstance(result, tensor_class)
     assert result.type.extension_name == "arrow.fixed_shape_tensor"
     assert arr.storage.to_pylist() == [[1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6]]
 

diff --git a/python/pyarrow/tests/test_flight.py b/python/pyarrow/tests/test_flight.py
@@ -1495,7 +1495,7 @@ def test_tls_override_hostname():
     """Check that incorrectly overriding the hostname fails."""
     certs = example_tls_certs()
 
-    with ConstantFlightServer(tls_certificates=certs["certificates"]) as s,\
+    with ConstantFlightServer(tls_certificates=certs["certificates"]) as s, \
         flight.connect(('localhost', s.port),
                        tls_root_certs=certs["root_cert"],
                        override_hostname="fakehostname") as client:

diff --git a/python/pyarrow/tests/test_table.py b/python/pyarrow/tests/test_table.py
@@ -511,7 +511,7 @@ def test_recordbatch_basics():
         ('c0', [0, 1, 2, 3, 4]),
         ('c1', [-10, -5, 0, None, 10])
     ])
-    assert type(pydict) == dict
+    assert isinstance(pydict, dict)
 
     with pytest.raises(IndexError):
         # bounds checking
@@ -949,7 +949,7 @@ def test_table_basics():
         ('a', [0, 1, 2, 3, 4]),
         ('b', [-10, -5, 0, 5, 10])
     ])
-    assert type(pydict) == dict
+    assert isinstance(pydict, dict)
 
     columns = []
     for col in table.itercolumns():