From aa17f91e5818cef082ff8dbdc1b2526befad07c4 Mon Sep 17 00:00:00 2001 From: David Linke Date: Mon, 26 Jun 2023 18:04:56 +0200 Subject: [PATCH] Code improvements based on ruff linter --- src/voc4cat/__init__.py | 1 - src/voc4cat/merge_vocab.py | 12 ++-- src/voc4cat/util.py | 20 +++--- src/voc4cat/wrapper.py | 134 +++++++++++++++++------------------- tests/conftest.py | 2 +- tests/test_merge_vocab.py | 1 - tests/test_util.py | 23 +++---- tests/test_wrapper.py | 137 ++++++++++++++++++------------------- 8 files changed, 151 insertions(+), 179 deletions(-) diff --git a/src/voc4cat/__init__.py b/src/voc4cat/__init__.py index a9c0388..6487d9e 100644 --- a/src/voc4cat/__init__.py +++ b/src/voc4cat/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- from importlib.metadata import PackageNotFoundError, version try: diff --git a/src/voc4cat/merge_vocab.py b/src/voc4cat/merge_vocab.py index 8e0e8b3..43fb7d6 100644 --- a/src/voc4cat/merge_vocab.py +++ b/src/voc4cat/merge_vocab.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # This script is mainly useful for CI. import os import shutil @@ -16,14 +15,14 @@ def main(ttl_inbox, vocab): retcode = 0 for p in os.listdir(ttl_inbox): new = ttl_inbox / Path(p) - if not new.suffix == ".ttl" or new.is_dir(): + if new.suffix != ".ttl" or new.is_dir(): print(f'Skipping "{new}"') continue if os.path.exists(vocab / Path(new).name): exists = vocab / Path(new).name cmd = ["git", "merge-file", "--theirs", str(exists), str(exists), str(new)] - print("Running cmd: {0}".format(" ".join(cmd))) - outp = subprocess.run(cmd, capture_output=True) + print("Running cmd: {}".format(" ".join(cmd))) + outp = subprocess.run(cmd, capture_output=True) # noqa: S603 print(outp.stdout) if retcode := outp.returncode != 0: break @@ -37,13 +36,12 @@ def main_cli(args=None): if args is None: # script run via entrypoint args = sys.argv[1:] - if len(args) != 2: + if len(args) != 2: # noqa: PLR2004 print("Usage: python merge_vocab.py ") return 1 outbox, vocab = args if os.path.exists(outbox) and os.path.exists(vocab): - retcode = main(Path(outbox), Path(vocab)) - return retcode + return main(Path(outbox), Path(vocab)) print(f'This script requires both folders to exist: "{outbox}" and "{vocab}"') return 1 diff --git a/src/voc4cat/util.py b/src/voc4cat/util.py index b924ff7..4dfb6e7 100644 --- a/src/voc4cat/util.py +++ b/src/voc4cat/util.py @@ -1,11 +1,10 @@ -# -*- coding: utf-8 -*- from operator import itemgetter from warnings import warn import networkx as nx -def _get_edges(text_with_level, base_level): # noqa: WPS231 +def _get_edges(text_with_level, base_level): edges = [] level_parent_map = {} for concept, level in text_with_level: @@ -32,7 +31,7 @@ def get_concept_and_level_from_indented_line(line, sep): level = len(split_line) - 1 concept = split_line[level] if sep is not None and len(sep) > 1 and concept.startswith(sep[0]): - warn(f'Line "{concept}": Incomplete separator "{sep}"?') + warn(f'Line "{concept}": Incomplete separator "{sep}"?', stacklevel=1) return concept, level @@ -47,9 +46,8 @@ def dag_from_indented_text(text, sep=" "): if concept not in nodes: nodes.append(concept) if text_with_level and (level - 1) > text_with_level[-1][1]: - raise ValueError( - f'Indentation inreases by more than one level for "{concept}".' - ) + msg = f'Indentation increases by more than one level for "{concept}".' + raise ValueError(msg) text_with_level.append((concept, level)) if text_with_level: @@ -57,9 +55,8 @@ def dag_from_indented_text(text, sep=" "): # Check if first line is at base level. concept, level = text_with_level[0] if base_level != level: - raise ValueError( - f'First line "{concept}" must be at lowest indentation level.' - ) + msg = f'First line "{concept}" must be at lowest indentation level.' + raise ValueError(msg) else: base_level = 0 @@ -161,9 +158,8 @@ def dag_from_narrower(narrower): for child in children: # check for undefined children if child not in nodes: - raise ValueError( - f'Concept "{child}" needs to defined if used as narrower concept.' - ) + msg = f'Concept "{child}" needs to defined if used as narrower concept.' + raise ValueError(msg) edges.append((concept, child)) dag = nx.DiGraph() diff --git a/src/voc4cat/wrapper.py b/src/voc4cat/wrapper.py index 88d3220..14c6264 100644 --- a/src/voc4cat/wrapper.py +++ b/src/voc4cat/wrapper.py @@ -1,8 +1,6 @@ -# -*- coding: utf-8 -*- """A wrapper to extend VocExcel with more commands.""" import argparse -import datetime import glob import os import sys @@ -30,7 +28,6 @@ ORGANISATIONS["NFDI4Cat"] = URIRef("http://example.org/nfdi4cat/") ORGANISATIONS["LIKAT"] = URIRef("https://www.catalysis.de/") ORGANISATIONS_INVERSE.update({v: k for k, v in ORGANISATIONS.items()}) -NOW = datetime.datetime.now().strftime("%Y%m%dT%H%M%S") def is_file_available(fname, ftype): @@ -56,8 +53,7 @@ def has_file_in_more_than_one_format(dir_): if len(file_names) == len(unique_file_names): return False seen = set() - duplicates = [x for x in file_names if x in seen or seen.add(x)] - return duplicates + return [x for x in file_names if x in seen or seen.add(x)] def is_supported_template(wb): @@ -70,7 +66,8 @@ def may_overwrite(no_warn, xlf, outfile, func): warn( f'Option "--{func.__name__.replace("_", "-")}" ' f"will overwrite the existing file {outfile}\n" - "Run again with --no-warn option to overwrite the file." + "Run again with --no-warn option to overwrite the file.", + stacklevel=1, ) return False return True @@ -98,19 +95,18 @@ def make_ids(fpath, outfile, search_prefix, start_id): # Load in data_only mode to get cell values not formulas. wb = openpyxl.load_workbook(fpath, data_only=True) is_supported_template(wb) - VOC_BASE_IRI = wb["Concept Scheme"].cell(row=2, column=2).value - if VOC_BASE_IRI is None: - VOC_BASE_IRI = "https://example.org/" - wb["Concept Scheme"].cell(row=2, column=2).value = VOC_BASE_IRI + voc_base_iri = wb["Concept Scheme"].cell(row=2, column=2).value + if voc_base_iri is None: + voc_base_iri = "https://example.org/" + wb["Concept Scheme"].cell(row=2, column=2).value = voc_base_iri try: start_id = int(start_id) except ValueError: start_id = -1 if start_id <= 0: - raise ValueError( - 'For option --make-ids the "start_id" must be an integer greater than 0.' - ) + msg = 'For option --make-ids the "start_id" must be an integer greater than 0.' + raise ValueError(msg) id_gen = count(int(start_id)) replaced_iris = {} @@ -126,7 +122,7 @@ def make_ids(fpath, outfile, search_prefix, start_id): if iri in replaced_iris: iri_new = replaced_iris[iri] else: - iri_new = VOC_BASE_IRI + f"{next(id_gen):07d}" + iri_new = voc_base_iri + f"{next(id_gen):07d}" print(f"[{sheet}] Replaced CURI {iri} by {iri_new}") replaced_iris[iri] = iri_new row[0].value = iri_new @@ -207,10 +203,8 @@ def hierarchy_from_indent(fpath, outfile, sep): merged = [] for old, new in zip(old_data, new_data, strict=True): if (old and new) and (old != new): - raise ValueError( - f"Cannot merge rows for {iri}. " - "Resolve differences manually." - ) + msg = f"Cannot merge rows for {iri}. Resolve differences manually." + raise ValueError(msg) merged.append(old if old else new) row_by_iri[iri][lang] = merged else: @@ -218,13 +212,13 @@ def hierarchy_from_indent(fpath, outfile, sep): ws.cell(row_no, col_no).value for col_no in range(2, col_last) ] max_row = row_no + + # stop processing a sheet after 3 empty rows + elif subsequent_empty_rows < 2: # noqa: PLR2004 + subsequent_empty_rows += 1 else: - # stop processing a sheet after 3 empty rows - if subsequent_empty_rows < 2: - subsequent_empty_rows += 1 - else: - subsequent_empty_rows = 0 - break + subsequent_empty_rows = 0 + break term_dag = dag_from_indented_text("\n".join(concepts_indented)) children_by_iri = dag_to_narrower(term_dag) @@ -271,17 +265,14 @@ def hierarchy_to_indent(fpath, outfile, sep): subsequent_empty_rows = 0 row_by_iri = defaultdict(dict) col_last = 9 - # read all IRI, preferred labels, childrenURIs from the sheet + # read all IRI, preferred labels, children_uris from the sheet for rows_total, row in enumerate( # pragma: no branch ws.iter_rows(min_row=3, max_col=col_last, values_only=True) ): if row[0] and row[1]: iri = row[0] lang = row[2] - if not row[6]: - childrenURIs = [] - else: - childrenURIs = [c.strip() for c in row[6].split(",")] + children_uris = [] if not row[6] else [c.strip() for c in row[6].split(",")] # We need to check if ChildrenIRI, Provenance & Source Vocab URL # are consistent across languages since SKOS has no support for # per language statements. (SKOS-XL would add this) @@ -292,24 +283,23 @@ def hierarchy_to_indent(fpath, outfile, sep): merged = [] for old, new in zip(old_data, new_data): if (old and new) and (old != new): - raise ValueError( - f"Merge conflict for concept {iri}. " - f'New: "{new}" - Before: "{old}"' - ) + msg = f'Merge conflict for concept {iri}. New: "{new}" - Before: "{old}"' + raise ValueError(msg) merged.append(old if old else new) row_by_iri[iri][lang] = [row[col] for col in range(1, 6)] + merged else: row_by_iri[iri][lang] = [row[col] for col in range(1, col_last)] - concept_children_dict[iri] = childrenURIs + concept_children_dict[iri] = children_uris + # stop processing a sheet after 3 empty rows + elif subsequent_empty_rows < 2: # noqa: PLR2004 + subsequent_empty_rows += 1 else: - # stop processing a sheet after 3 empty rows - if subsequent_empty_rows < 2: - subsequent_empty_rows += 1 - else: - subsequent_empty_rows = 0 - rows_total = rows_total - 2 - break + subsequent_empty_rows = 0 + rows_total -= 2 # noqa: PLW2901 + break + else: + pass term_dag = dag_from_narrower(concept_children_dict) concept_levels = dag_to_node_levels(term_dag) @@ -334,17 +324,17 @@ def hierarchy_to_indent(fpath, outfile, sep): ws.cell(row, 3).value = lang if (iri, lang) in iri_written: # case 2 - for col, stored_value in zip_longest( + for col, _ in zip_longest( range(4, col_last + 1), row_by_iri[iri][lang][2:] ): ws.cell(row, column=col).value = None row += 1 continue - else: - for col, stored_value in zip_longest( - range(4, col_last + 1), row_by_iri[iri][lang][2:] - ): - ws.cell(row, column=col).value = stored_value + + for col, stored_value in zip_longest( + range(4, col_last + 1), row_by_iri[iri][lang][2:] + ): + ws.cell(row, column=col).value = stored_value # clear children IRI column G ws.cell(row, column=7).value = None row += 1 @@ -406,38 +396,38 @@ def check(fpath, outfile): color = PatternFill("solid", start_color="00FFCC00") # orange subsequent_empty_rows = 0 - seen_conceptIRIs = [] + seen_concept_iris = [] failed_check = False for row in ws.iter_rows(min_row=3, max_col=3): # pragma: no branch if row[0].value and row[1].value: - conceptIRI, _, lang = [ + concept_iri, _, lang = ( c.value.strip() if c.value is not None else "" for c in row - ] + ) - new_conceptIRI = f'"{conceptIRI}"@{lang.lower()}' - if new_conceptIRI in seen_conceptIRIs: + new_concept_iri = f'"{concept_iri}"@{lang.lower()}' + if new_concept_iri in seen_concept_iris: failed_check = True print( - f'ERROR: Same Concept IRI "{conceptIRI}" used more than once for ' + f'ERROR: Same Concept IRI "{concept_iri}" used more than once for ' f'language "{lang}"' ) # colorize problematic cells row[0].fill = color row[2].fill = color - seen_in_row = 3 + seen_conceptIRIs.index(new_conceptIRI) + seen_in_row = 3 + seen_concept_iris.index(new_concept_iri) ws[f"A{seen_in_row}"].fill = color ws[f"C{seen_in_row}"].fill = color else: - seen_conceptIRIs.append(new_conceptIRI) + seen_concept_iris.append(new_concept_iri) subsequent_empty_rows = 0 + + # stop processing a sheet after 3 empty rows + elif subsequent_empty_rows < 2: # noqa: PLR2004 + subsequent_empty_rows += 1 else: - # stop processing a sheet after 3 empty rows - if subsequent_empty_rows < 2: - subsequent_empty_rows += 1 - else: - subsequent_empty_rows = 0 - break + subsequent_empty_rows = 0 + break if failed_check: wb.save(outfile) @@ -466,7 +456,7 @@ def main_cli(args=None): if args is None: # voc4cat run via entrypoint args = sys.argv[1:] - has_args = True if args else False + has_args = bool(args) parser = argparse.ArgumentParser( prog="voc4cat", formatter_class=argparse.ArgumentDefaultsHelpFormatter @@ -610,9 +600,8 @@ def main_cli(args=None): if args_wrapper.indent_separator is not None: sep = args_wrapper.indent_separator if not len(sep): - raise ValueError( - "Setting the indent separator to zero length is not allowed." - ) + msg = "Setting the indent separator to zero length is not allowed." + raise ValueError(msg) else: # Excel's default indent / openpyxl.styles.Alignment(indent=0) sep = None @@ -627,11 +616,10 @@ def main_cli(args=None): outfile = args_wrapper.file_to_preprocess else: outfile = Path(outdir) / Path(f"{fname}.{fsuffix}") - elif args_wrapper.file_to_preprocess.is_dir(): + elif args_wrapper.file_to_preprocess.is_dir(): # pragma: no cover # processing all files in directory is not supported for now. - raise NotImplementedError( - "Processing all files in directory not implemented for this option." - ) + msg = "Processing all files in directory not implemented for this option." + raise NotImplementedError(msg) else: print(f"File not found: {args_wrapper.file_to_preprocess}.") return 1 @@ -708,7 +696,7 @@ def main_cli(args=None): err += run_ontospy(infile.with_suffix(".ttl"), doc_path) else: print( - "Expected xlsx-file or directory but got: {0}".format( + "Expected xlsx-file or directory but got: {}".format( args_wrapper.file_to_preprocess ) ) @@ -716,7 +704,7 @@ def main_cli(args=None): elif args_wrapper and args_wrapper.file_to_preprocess: if os.path.isdir(args_wrapper.file_to_preprocess): dir_ = args_wrapper.file_to_preprocess - if duplicates := has_file_in_more_than_one_format(dir_): # noqa: WPS332 + if duplicates := has_file_in_more_than_one_format(dir_): print( "Files may only be present in one format. Found more than one " "format for:\n " + "\n ".join(duplicates) @@ -757,7 +745,7 @@ def main_cli(args=None): outfile = Path(f"{fprefix}.ttl") else: outfile = Path(outdir) / Path(f"{fname}.ttl") - locargs = ["--outputfile", str(outfile)] + locargs + locargs = ["--outputfile", str(outfile), *locargs] err += run_vocexcel(locargs) if turtle_files: @@ -772,7 +760,7 @@ def main_cli(args=None): outfile = Path(f"{fprefix}.xlsx") else: outfile = Path(outdir) / Path(f"{fname}.xlsx") - locargs = ["--outputfile", str(outfile)] + locargs + locargs = ["--outputfile", str(outfile), *locargs] err += run_vocexcel(locargs) if ( diff --git a/tests/conftest.py b/tests/conftest.py index f234a14..5621c17 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2,7 +2,7 @@ import pytest -@pytest.fixture +@pytest.fixture() def datadir(): """DATADIR as a LocalPath""" from pathlib import Path diff --git a/tests/test_merge_vocab.py b/tests/test_merge_vocab.py index 0a822e4..22932b8 100644 --- a/tests/test_merge_vocab.py +++ b/tests/test_merge_vocab.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import shutil from unittest import mock diff --git a/tests/test_util.py b/tests/test_util.py index d4d58b7..ecd86d8 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -1,6 +1,4 @@ -# -*- coding: utf-8 -*- import pytest - from voc4cat.util import ( dag_from_indented_text, dag_from_narrower, @@ -231,11 +229,10 @@ def test_redefinition_within_text(): def test_undefined_child(): narower = {"a1": [], "a2": ["c"]} - with pytest.raises(ValueError) as excinfo: + with pytest.raises( + ValueError, match='Concept "c" needs to defined if used as narrower concept.' + ): dag_from_narrower(narower) - assert 'Concept "c" needs to defined if used as narrower concept.' in str( - excinfo.value # noqa: WPS441 - ) def test_empty_text(): @@ -260,20 +257,18 @@ def test_none_as_sep(): def test_bad_dedent(): text = " x1\nx2" - with pytest.raises(ValueError) as excinfo: + with pytest.raises( + ValueError, match='First line "x1" must be at lowest indentation level.' + ): dag_from_indented_text(text) - assert 'First line "x1" must be at lowest indentation level.' in str( - excinfo.value # noqa: WPS441 - ) def test_bad_indent(): text = "x1\n--x2" - with pytest.raises(ValueError) as excinfo: + with pytest.raises( + ValueError, match='Indentation increases by more than one level for "x2"' + ): dag_from_indented_text(text, sep="-") - assert 'Indentation inreases by more than one level for "x2"' in str( - excinfo.value # noqa: WPS441 - ) def test_non_matching_indent_warning(): diff --git a/tests/test_wrapper.py b/tests/test_wrapper.py index 776b022..a69662a 100644 --- a/tests/test_wrapper.py +++ b/tests/test_wrapper.py @@ -1,11 +1,9 @@ -# -*- coding: utf-8 -*- import os import shutil from pathlib import Path import pytest from openpyxl.reader.excel import load_workbook - from voc4cat.wrapper import main_cli, run_ontospy CS_SIMPLE = "concept-scheme-simple.xlsx" @@ -89,16 +87,15 @@ def test_make_ids_no_voc_base_iri(datadir, tmp_path): def test_make_ids_invalid_id(datadir): - with pytest.raises(ValueError) as excinfo: + with pytest.raises( + ValueError, + match='For option --make-ids the "start_id" must be an integer greater than 0.', + ): main_cli(["--make-ids", "ex", "###", "--no-warn", str(datadir / CS_SIMPLE)]) - assert ( - 'For option --make-ids the "start_id" must be an integer greater than 0.' - in str(excinfo.value) # noqa: WPS441 - ) @pytest.mark.parametrize( - "indir, outdir", + ("indir", "outdir"), [(True, ""), (True, "out"), (False, ""), (False, "out")], ids=[ "in:dir, out:default", @@ -110,18 +107,18 @@ def test_make_ids_invalid_id(datadir): def test_make_ids_variants(datadir, tmp_path, indir, outdir): # fmt: off expected_concepts = [ - ("ex:test/0001001", "term1", "en", "def for term1", "en", "AltLbl for term1", "ex:test/0001002, ex:test/0001003",), # noqa:E501 - ("ex:test/0001002", "term2", "en", "def for term2", "en", "AltLbl for term2", None,), # noqa:E501 - ("ex:test/0001003", "term3", "en", "def for term3", "en", "AltLbl for term3", "ex:test/0001004",), # noqa:E501 - ("ex:test/0001004", "term4", "en", "def for term4", "en", "AltLbl for term4", None, ), # noqa:E501 - ("ex:test/0001005", "term5", "en", "def for term5", "en", "AltLbl for term5", None, ), # noqa:E501 - ("ex:test/0001006", "term6", "en", "def for term6", "en", "AltLbl for term6", None,), # noqa:E501 + ("ex:test/0001001", "term1", "en", "def for term1", "en", "AltLbl for term1", "ex:test/0001002, ex:test/0001003",), + ("ex:test/0001002", "term2", "en", "def for term2", "en", "AltLbl for term2", None,), + ("ex:test/0001003", "term3", "en", "def for term3", "en", "AltLbl for term3", "ex:test/0001004",), + ("ex:test/0001004", "term4", "en", "def for term4", "en", "AltLbl for term4", None, ), + ("ex:test/0001005", "term5", "en", "def for term5", "en", "AltLbl for term5", None, ), + ("ex:test/0001006", "term6", "en", "def for term6", "en", "AltLbl for term6", None,), ] expected_collections = [ - ("ex:test/0001007", "con", "def for con", "ex:test/0001001, ex:test/0001002, ex:test/0001003, ex:test/0001004",), # noqa:E501 + ("ex:test/0001007", "con", "def for con", "ex:test/0001001, ex:test/0001002, ex:test/0001003, ex:test/0001004",), ] expected_additional = [ - ("ex:test/0001001", "ex:test/0001002", "ex:test/0001003", "ex:test/0001004", "ex:test/0001005", "ex:test/0001006", ), # noqa:E501 + ("ex:test/0001001", "ex:test/0001002", "ex:test/0001003", "ex:test/0001004", "ex:test/0001005", "ex:test/0001006", ), ] # fmt: on shutil.copy(datadir / CS_SIMPLE, tmp_path) @@ -131,10 +128,7 @@ def test_make_ids_variants(datadir, tmp_path, indir, outdir): + (["--output-directory", outdir] if outdir else []) + ([str(tmp_path)] if indir else [str(tmp_path / CS_SIMPLE)]) ) - if outdir: - xlsxfile = tmp_path / outdir / CS_SIMPLE - else: - xlsxfile = tmp_path / CS_SIMPLE + xlsxfile = tmp_path / outdir / CS_SIMPLE if outdir else tmp_path / CS_SIMPLE wb = load_workbook(filename=xlsxfile, read_only=True, data_only=True) ws = wb["Concepts"] for row, expected_row in zip( @@ -156,17 +150,23 @@ def test_make_ids_variants(datadir, tmp_path, indir, outdir): def test_make_ids_multilang(tmp_path, datadir): # fmt: off expected_concepts = [ - ("ex:test/0001001", "term1", "en", "def for term1", "en", "AltLbl for term1", "ex:test/0001002, ex:test/0001003",), # noqa:E501 - ("ex:test/0001002", "term2", "en", "def for term2", "en", "AltLbl for term2", "ex:test/0001004",), # noqa:E501 - ("ex:test/0001003", "term3", "en", "def for term3", "en", "AltLbl for term3", "ex:test/0001004",), # noqa:E501 - ("ex:test/0001004", "term4", "en", "def for term4", "en", "AltLbl for term4", None, ), # noqa:E501 - ('ex:test/0001004', 'Begr4', 'de', 'Def für Begr4', 'de', 'AltLbl für Begr4', None, ), # noqa:E501 - ('ex:test/0001001', 'Begr1', 'de', 'Def für Begr1', 'de', 'AltLbl für Begr1', None, ), # noqa:E501 + ("ex:test/0001001", "term1", "en", "def for term1", "en", "AltLbl for term1", "ex:test/0001002, ex:test/0001003",), + ("ex:test/0001002", "term2", "en", "def for term2", "en", "AltLbl for term2", "ex:test/0001004",), + ("ex:test/0001003", "term3", "en", "def for term3", "en", "AltLbl for term3", "ex:test/0001004",), + ("ex:test/0001004", "term4", "en", "def for term4", "en", "AltLbl for term4", None, ), + ("ex:test/0001004", "Begr4", "de", "Def für Begr4", "de", "AltLbl für Begr4", None, ), + ("ex:test/0001001", "Begr1", "de", "Def für Begr1", "de", "AltLbl für Begr1", None, ), ] # fmt: on main_cli( - ["--make-ids", "ex", "1001", "--output-directory"] - + [str(tmp_path), str(datadir / CS_CYCLES_MULTI_LANG)] + [ + "--make-ids", + "ex", + "1001", + "--output-directory", + str(tmp_path), + str(datadir / CS_CYCLES_MULTI_LANG), + ] ) xlsxfile = tmp_path / CS_CYCLES_MULTI_LANG wb = load_workbook(filename=xlsxfile, read_only=True, data_only=True) @@ -187,17 +187,17 @@ def test_hierarchy_from_indent_on_dir(tmp_path, capsys): @pytest.mark.parametrize( - "xlsxfile, indent", + ("xlsxfile", "indent"), [(CS_CYCLES_INDENT_IRI, None), (CS_CYCLES_INDENT_DOT, "..")], ids=["indent:Excel", "indent:dots"], ) def test_hierarchy_from_indent(datadir, tmp_path, xlsxfile, indent): # fmt: off expected = [ # data in children-IRI-representation - ('ex:test/term1', 'term1', 'en', 'def for term1', 'en', 'AltLbl for term1', 'ex:test/term2, ex:test/term3', 'Prov for term1', 'ex:XYZ/term1'), # noqa:E501 - ('ex:test/term2', 'term2', 'en', 'def for term2', 'en', 'AltLbl for term2', 'ex:test/term4', 'Prov for term2', 'ex:XYZ/term2'), # noqa:E501 - ('ex:test/term3', 'term3', 'en', 'def for term3', 'en', 'AltLbl for term3', 'ex:test/term4', 'Prov for term3', 'ex:XYZ/term3'), # noqa:E501 - ('ex:test/term4', 'term4', 'en', 'def for term4', 'en', 'AltLbl for term4', None, 'Prov for term4', 'ex:XYZ/term4'), # noqa:E501 + ("ex:test/term1", "term1", "en", "def for term1", "en", "AltLbl for term1", "ex:test/term2, ex:test/term3", "Prov for term1", "ex:XYZ/term1"), + ("ex:test/term2", "term2", "en", "def for term2", "en", "AltLbl for term2", "ex:test/term4", "Prov for term2", "ex:XYZ/term2"), + ("ex:test/term3", "term3", "en", "def for term3", "en", "AltLbl for term3", "ex:test/term4", "Prov for term3", "ex:XYZ/term3"), + ("ex:test/term4", "term4", "en", "def for term4", "en", "AltLbl for term4", None, "Prov for term4", "ex:XYZ/term4"), (None, None, None, None, None, None, None, None, None) ] # fmt: on @@ -222,7 +222,7 @@ def test_hierarchy_from_indent(datadir, tmp_path, xlsxfile, indent): os.chdir(tmp_path) wb = load_workbook(filename=xlsxfile, read_only=True, data_only=True) ws = wb["Concepts"] - for row, expected_row in zip(ws.iter_rows(min_row=3, values_only=True), expected): + for row, _expected_row in zip(ws.iter_rows(min_row=3, values_only=True), expected): assert len(row) == expected_len assert row in expected # We intentionally don't check the row position here! @@ -230,12 +230,12 @@ def test_hierarchy_from_indent(datadir, tmp_path, xlsxfile, indent): def test_hierarchy_from_indent_multilang(datadir, tmp_path): # fmt: off expected = [ # data in children-IRI-representation - ('ex:test/term1', 'term1', 'en', 'def for term1', 'en', 'AltLbl for term1', 'ex:test/term2, ex:test/term3', 'Prov for term1', 'ex:XYZ/term1'), # noqa:E501 - ('ex:test/term1', 'Begr1', 'de', 'Def für Begr1', 'de', 'AltLbl für Begr1', 'ex:test/term2, ex:test/term3', 'Prov for term1', 'ex:XYZ/term1'), # noqa:E501 - ('ex:test/term2', 'term2', 'en', 'def for term2', 'en', 'AltLbl for term2', 'ex:test/term4', 'Prov for term2', 'ex:XYZ/term2'), # noqa:E501 - ('ex:test/term3', 'term3', 'en', 'def for term3', 'en', 'AltLbl for term3', 'ex:test/term4', 'Prov for term3', 'ex:XYZ/term3'), # noqa:E501 - ('ex:test/term4', 'term4', 'en', 'def for term4', 'en', 'AltLbl for term4', None, 'Prov for term4', 'ex:XYZ/term4'), # noqa:E501 - ('ex:test/term4', 'Begr4', 'de', 'Def für Begr4', 'de', 'AltLbl für Begr4', None, 'Prov for term4', 'ex:XYZ/term4'), # noqa:E501 + ("ex:test/term1", "term1", "en", "def for term1", "en", "AltLbl for term1", "ex:test/term2, ex:test/term3", "Prov for term1", "ex:XYZ/term1"), + ("ex:test/term1", "Begr1", "de", "Def für Begr1", "de", "AltLbl für Begr1", "ex:test/term2, ex:test/term3", "Prov for term1", "ex:XYZ/term1"), + ("ex:test/term2", "term2", "en", "def for term2", "en", "AltLbl for term2", "ex:test/term4", "Prov for term2", "ex:XYZ/term2"), + ("ex:test/term3", "term3", "en", "def for term3", "en", "AltLbl for term3", "ex:test/term4", "Prov for term3", "ex:XYZ/term3"), + ("ex:test/term4", "term4", "en", "def for term4", "en", "AltLbl for term4", None, "Prov for term4", "ex:XYZ/term4"), + ("ex:test/term4", "Begr4", "de", "Def für Begr4", "de", "AltLbl für Begr4", None, "Prov for term4", "ex:XYZ/term4"), (None, None, None, None, None, None, None, None, None) ] # fmt: on @@ -254,7 +254,7 @@ def test_hierarchy_from_indent_multilang(datadir, tmp_path): filename=CS_CYCLES_MULTI_LANG_IND, read_only=True, data_only=True ) ws = wb["Concepts"] - for row, expected_row in zip(ws.iter_rows(min_row=3, values_only=True), expected): + for row, _expected_row in zip(ws.iter_rows(min_row=3, values_only=True), expected): assert len(row) == expected_len assert row in expected # We intentionally don't check the row position here! @@ -270,11 +270,10 @@ def test_hierarchy_from_indent_merge(datadir, tmp_path): new_filename = "indent_merge_problem.xlsx" wb.save(new_filename) wb.close() - with pytest.raises(ValueError) as excinfo: + with pytest.raises( + ValueError, match=f"Cannot merge rows for {iri}. Resolve differences manually." + ): main_cli(["--hierarchy-from-indent", "--no-warn", str(tmp_path / new_filename)]) - assert f"Cannot merge rows for {iri}. Resolve differences manually." in str( - excinfo.value # noqa: WPS441 - ) @pytest.mark.parametrize( @@ -285,13 +284,13 @@ def test_hierarchy_from_indent_merge(datadir, tmp_path): def test_hierarchy_to_indent(datadir, tmp_path, indent): # fmt: off expected_rows = [ # data in children-IRI-representation - ('ex:test/term1', 'term1', 'en', 'def for term1', 'en', 'AltLbl for term1', None, 'Prov for term1', 'ex:XYZ/term1'), # noqa:E501 - ('ex:test/term3', '..term3', 'en', 'def for term3', 'en', 'AltLbl for term3', None, 'Prov for term3', 'ex:XYZ/term3'), # noqa:E501 - ('ex:test/term4', '....term4', 'en', 'def for term4', 'en', 'AltLbl for term4', None, 'Prov for term4', 'ex:XYZ/term4'), # noqa:E501 - ('ex:test/term2', 'term2', 'en', 'def for term2', 'en', 'AltLbl for term2', None, 'Prov for term2', 'ex:XYZ/term2'), # noqa:E501 - ('ex:test/term4', '..term4', 'en', None, None, None, None, None, None), - ('ex:test/term1', 'term1', 'en', None, None, None, None, None, None), - ('ex:test/term2', '..term2', 'en', None, None, None, None, None, None), + ("ex:test/term1", "term1", "en", "def for term1", "en", "AltLbl for term1", None, "Prov for term1", "ex:XYZ/term1"), + ("ex:test/term3", "..term3", "en", "def for term3", "en", "AltLbl for term3", None, "Prov for term3", "ex:XYZ/term3"), + ("ex:test/term4", "....term4", "en", "def for term4", "en", "AltLbl for term4", None, "Prov for term4", "ex:XYZ/term4"), + ("ex:test/term2", "term2", "en", "def for term2", "en", "AltLbl for term2", None, "Prov for term2", "ex:XYZ/term2"), + ("ex:test/term4", "..term4", "en", None, None, None, None, None, None), + ("ex:test/term1", "term1", "en", None, None, None, None, None, None), + ("ex:test/term2", "..term2", "en", None, None, None, None, None, None), (None, None, None, None, None, None, None, None, None), ] # fmt: on @@ -335,15 +334,15 @@ def test_hierarchy_to_indent(datadir, tmp_path, indent): def test_hierarchy_to_indent_multilanguage(datadir, tmp_path): # fmt: off expected_rows = [ # data in children-IRI-representation - ('ex:test/term1', 'term1', 'en', 'def for term1', 'en', 'AltLbl for term1', None, 'Prov for term1', 'ex:XYZ/term1'), # noqa:E501 - ('ex:test/term1', 'Begr1', 'de', 'Def für Begr1', 'de', 'AltLbl für Begr1', None, 'Prov for term1', 'ex:XYZ/term1'), # noqa:E501 - ('ex:test/term3', '..term3', 'en', 'def for term3', 'en', 'AltLbl for term3', None, 'Prov for term3', 'ex:XYZ/term3'), # noqa:E501 - ('ex:test/term4', '....term4', 'en', 'def for term4', 'en', 'AltLbl for term4', None, 'Prov for term4', 'ex:XYZ/term4'), # noqa:E501 - ('ex:test/term4', '....Begr4', 'de', 'Def für Begr4', 'de', 'AltLbl für Begr4', None, 'Prov for term4', 'ex:XYZ/term4'), # noqa:E501 - ('ex:test/term2', 'term2', 'en', 'def for term2', 'en', 'AltLbl for term2', None, 'Prov for term2', 'ex:XYZ/term2'), # noqa:E501 - ('ex:test/term4', '..term4', 'en', None, None, None, None, None, None), - ('ex:test/term1', 'term1', 'en', None, None, None, None, None, None), - ('ex:test/term2', '..term2', 'en', None, None, None, None, None, None), + ("ex:test/term1", "term1", "en", "def for term1", "en", "AltLbl for term1", None, "Prov for term1", "ex:XYZ/term1"), + ("ex:test/term1", "Begr1", "de", "Def für Begr1", "de", "AltLbl für Begr1", None, "Prov for term1", "ex:XYZ/term1"), + ("ex:test/term3", "..term3", "en", "def for term3", "en", "AltLbl for term3", None, "Prov for term3", "ex:XYZ/term3"), + ("ex:test/term4", "....term4", "en", "def for term4", "en", "AltLbl for term4", None, "Prov for term4", "ex:XYZ/term4"), + ("ex:test/term4", "....Begr4", "de", "Def für Begr4", "de", "AltLbl für Begr4", None, "Prov for term4", "ex:XYZ/term4"), + ("ex:test/term2", "term2", "en", "def for term2", "en", "AltLbl for term2", None, "Prov for term2", "ex:XYZ/term2"), + ("ex:test/term4", "..term4", "en", None, None, None, None, None, None), + ("ex:test/term1", "term1", "en", None, None, None, None, None, None), + ("ex:test/term2", "..term2", "en", None, None, None, None, None, None), (None, None, None, None, None, None, None, None, None), ] # fmt: on @@ -385,9 +384,8 @@ def test_hierarchy_to_indent_merge(datadir, tmp_path): new_filename = "indent_merge_problem.xlsx" wb.save(new_filename) wb.close() - with pytest.raises(ValueError) as excinfo: + with pytest.raises(ValueError, match=f"Merge conflict for concept {iri}"): main_cli(["--hierarchy-to-indent", "--no-warn", str(tmp_path / new_filename)]) - assert f"Merge conflict for concept {iri}" in str(excinfo.value) # noqa: WPS441 @pytest.mark.parametrize( @@ -456,7 +454,7 @@ def test_run_ontospy_checks(tmp_path, capsys): @pytest.mark.parametrize( - "test_file,err,msg", + ("test_file", "err", "msg"), [ (CS_CYCLES, 0, "All checks passed successfully."), ( @@ -468,7 +466,7 @@ def test_run_ontospy_checks(tmp_path, capsys): ], ids=["no error", "with error"], ) -def test_check(datadir, tmp_path, capsys, test_file, err, msg): +def test_check(datadir, tmp_path, capsys, test_file, err, msg): # noqa: PLR0913 dst = tmp_path / test_file shutil.copy(datadir / test_file, dst) exit_code = main_cli(["--check", "--no-warn", str(dst)]) @@ -496,11 +494,10 @@ def test_nonexisting_file(datadir, capsys): def test_no_separator(datadir): os.chdir(datadir) - with pytest.raises(ValueError) as excinfo: + with pytest.raises( + ValueError, match="Setting the indent separator to zero length is not allowed." + ): main_cli(["--indent-separator", "", CS_CYCLES]) - assert "Setting the indent separator to zero length is not allowed." in str( - excinfo.value # noqa: WPS441 - ) def test_duplicates(datadir, tmp_path, capsys): @@ -541,7 +538,7 @@ def test_run_vocexcel(datadir, tmp_path, test_file): @pytest.mark.parametrize( - "outputdir,testfile", + ("outputdir", "testfile"), [ ("out", CS_CYCLES), ("", CS_CYCLES),