From 0e5422569a4429e8139f64c2fcd678b7c208364e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9gis=20Behmo?= Date: Fri, 22 Jan 2021 10:35:53 +0100 Subject: [PATCH] Fix grouping of safelisted annotations Annotation grouping was based on the filename and line number. Unfortunately, this fails when the annotation comes from the safelist, where the filename is the safelist, and the line number is 0. This was causing issues in pii annotation parsing on edx-platform. To address this, we group annotations by line number and extra[model_id] fields. --- CHANGELOG.rst | 7 ++++++- code_annotations/__init__.py | 2 +- code_annotations/base.py | 11 ++++++++--- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index b345578..4b0120b 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -11,7 +11,12 @@ Change Log .. There should always be an "Unreleased" section for changes pending release. -[1.0.0] - 2021-01-25 +[1.0.1] - 2021-01-22 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +* Fix grouping of safelisted annotations + +[1.0.0] - 2021-01-21 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * BREAKING CHANGE: Improvement of some error messages diff --git a/code_annotations/__init__.py b/code_annotations/__init__.py index 176b190..29ca68b 100644 --- a/code_annotations/__init__.py +++ b/code_annotations/__init__.py @@ -2,4 +2,4 @@ Extensible tools for parsing annotations in codebases. """ -__version__ = '1.0.0' +__version__ = '1.0.1' diff --git a/code_annotations/base.py b/code_annotations/base.py index 8d5b290..8b97277 100644 --- a/code_annotations/base.py +++ b/code_annotations/base.py @@ -447,23 +447,28 @@ def iter_groups(self, annotations): """ Iterate on groups of annotations. - Annotations are considered as a group when they all have the same `line_number`, which should point to the - beginning of the annotation group. + Annotations are considered as a group when they all have the same `line_number` and optional + `extra['object_id']`. The line number points to the beginning of the annotation group. The `object_id` is set + mostly for annotations parsed from a safelist. Yield: annotations (annotation list) """ current_group = [] current_line_number = None + current_object_id = None for annotation in annotations: line_number = annotation["line_number"] + object_id = annotation.get("extra", {}).get("object_id") line_number_changed = line_number != current_line_number - if line_number_changed: + object_id_changed = object_id != current_object_id + if line_number_changed or object_id_changed: if current_group: yield current_group current_group.clear() current_group.append(annotation) current_line_number = line_number + current_object_id = object_id if current_group: yield current_group