microsoft · vpvpvpvp · Feb 8, 2023 · Feb 8, 2023 · Feb 8, 2023 · Feb 8, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,16 @@ All notable changes to this project will be documented in this file.
 
 ## [Unreleased]
 
+### Added
+#### Image redactor
+* Added abstract class `QRRecognizer` for QR code recognizers
+* Added `OpenCVQRRecongnizer` which uses OpenCV to recognize QR codes
+* Added `QRImageAnalyzerEngine` which uses `QRRecognizer` for QR code recognition and `AnalyzerEngine` to analyze its contents for PII entities
+
+### Changed
+#### Image redactor
+* Modified `ImagePiiVerifyEngine` and `ImageRedactorEngine` to allow using `QRImageAnalyzerEngine` as an alternative to `ImageAnalyzerEngine`
+
 ## [2.2.32] - 25.01.2023
 ### Changed
 #### General

diff --git a/docs/assets/qr-image-redactor-design.png b/docs/assets/qr-image-redactor-design.png
diff --git a/presidio-image-redactor/Dockerfile b/presidio-image-redactor/Dockerfile
@@ -6,7 +6,7 @@ ENV PIP_NO_CACHE_DIR=1
 WORKDIR /usr/bin/${NAME}
 
 RUN apt-get update \
-  && apt-get install tesseract-ocr -y \
+  && apt-get install tesseract-ocr ffmpeg libsm6 libxext6 -y \
   && rm -rf /var/lib/apt/lists/* \
   && tesseract -v
 

diff --git a/presidio-image-redactor/Pipfile b/presidio-image-redactor/Pipfile
@@ -12,6 +12,8 @@ pydicom = ">=2.3.0"
 pypng = ">=0.20220715.0"
 matplotlib = "==3.6.2"
 typing-extensions = "*"
+opencv-python = ">=4.5.0"
+importlib-resources = "*"
 
 [dev-packages]
 pytest = "*"

diff --git a/presidio-image-redactor/Pipfile.lock b/presidio-image-redactor/Pipfile.lock
diff --git a/presidio-image-redactor/README.MD b/presidio-image-redactor/README.MD
@@ -16,6 +16,10 @@ Process for standard images:
 
 ![Image Redactor Design](../docs/assets/image-redactor-design.png)
 
+Process for images with QR codes:
+
+![QRImage Redactor Design](../docs/assets/qr-image-redactor-design.png)
+
 Process for DICOM files:
 
 ![DICOM image Redactor Design](../docs/assets/dicom-image-redactor-design.png)
@@ -117,6 +121,30 @@ curl -XPOST "http://localhost:3000/redact" -H "content-type: multipart/form-data
 Python script example can be found under:
 /presidio/e2e-tests/tests/test_image_redactor.py
 
+## Getting started (images with QR codes)
+
+`QRImageAnalyzerEngine` is used by `ImageRedactorEngineto` to redact QR codes.
+
+```python
+from PIL import Image
+from presidio_image_redactor import ImageRedactorEngine
+from presidio_image_redactor import QRImageAnalyzerEngine
+
+# Get the image to redact using PIL lib (pillow)
+image = Image.open("presidio-image-redactor/tests/integration/resources/qr.png")
+
+# Initialize the engine
+engine = ImageRedactorEngine(image_analyzer_engine=QRImageAnalyzerEngine())
+
+# Redact the image with pink color
+redacted_image = engine.redact(image, (255, 192, 203))
+
+# save the redacted image 
+redacted_image.save("new_image.png")
+# uncomment to open the image for viewing
+# redacted_image.show()
+```
+
 ## Getting started (DICOM images)
 
 This module only redacts pixel data and does not scrub text PHI which may exist in the DICOM metadata.

diff --git a/presidio-image-redactor/presidio_image_redactor/__init__.py b/presidio-image-redactor/presidio_image_redactor/__init__.py
@@ -5,11 +5,13 @@
 from .tesseract_ocr import TesseractOCR
 from .bbox import BboxProcessor
 from .image_analyzer_engine import ImageAnalyzerEngine
+from .qr_image_analyzer_engine import QRImageAnalyzerEngine
 from .image_redactor_engine import ImageRedactorEngine
 from .image_pii_verify_engine import ImagePiiVerifyEngine
 from .dicom_image_redactor_engine import DicomImageRedactorEngine
 from .dicom_image_pii_verify_engine import DicomImagePiiVerifyEngine
 
+
 # Set up default logging (with NullHandler)
 logging.getLogger("presidio-image-redactor").addHandler(logging.NullHandler())
 
@@ -18,6 +20,7 @@
     "TesseractOCR",
     "BboxProcessor",
     "ImageAnalyzerEngine",
+    "QRImageAnalyzerEngine",
     "ImageRedactorEngine",
     "ImagePiiVerifyEngine",
     "DicomImageRedactorEngine",

diff --git a/presidio-image-redactor/presidio_image_redactor/image_pii_verify_engine.py b/presidio-image-redactor/presidio_image_redactor/image_pii_verify_engine.py
@@ -1,9 +1,10 @@
 from PIL import Image, ImageChops
 from presidio_image_redactor.image_analyzer_engine import ImageAnalyzerEngine
+from presidio_image_redactor import QRImageAnalyzerEngine
 import matplotlib
 import io
 from matplotlib import pyplot as plt
-from typing import Optional
+from typing import Optional, Union
 
 
 def fig2img(fig):
@@ -19,7 +20,10 @@ def fig2img(fig):
 class ImagePiiVerifyEngine:
     """ImagePiiVerifyEngine class only supporting Pii verification currently."""
 
-    def __init__(self, image_analyzer_engine: Optional[ImageAnalyzerEngine] = None):
+    def __init__(
+        self,
+        image_analyzer_engine: Union[ImageAnalyzerEngine, QRImageAnalyzerEngine] = None,
+    ):
         if not image_analyzer_engine:
             image_analyzer_engine = ImageAnalyzerEngine()
         self.image_analyzer_engine = image_analyzer_engine
@@ -42,9 +46,12 @@ def verify(
 
         image = ImageChops.duplicate(image)
         image_x, image_y = image.size
-        bboxes = self.image_analyzer_engine.analyze(
-            image, ocr_kwargs, **text_analyzer_kwargs
-        )
+        if isinstance(self.image_analyzer_engine, QRImageAnalyzerEngine):
+            bboxes = self.image_analyzer_engine.analyze(image, **text_analyzer_kwargs)
+        else:
+            bboxes = self.image_analyzer_engine.analyze(
+                image, ocr_kwargs, **text_analyzer_kwargs
+            )
         fig, ax = plt.subplots()
         image_r = 70
         fig.set_size_inches(image_x / image_r, image_y / image_r)

diff --git a/presidio-image-redactor/presidio_image_redactor/image_redactor_engine.py b/presidio-image-redactor/presidio_image_redactor/image_redactor_engine.py
@@ -2,7 +2,11 @@
 
 from PIL import Image, ImageDraw, ImageChops
 
-from presidio_image_redactor import ImageAnalyzerEngine, BboxProcessor
+from presidio_image_redactor import (
+    ImageAnalyzerEngine,
+    QRImageAnalyzerEngine,
+    BboxProcessor,
+)
 
 
 class ImageRedactorEngine:
@@ -11,7 +15,10 @@ class ImageRedactorEngine:
     :param image_analyzer_engine: Engine which performs OCR + PII detection.
     """
 
-    def __init__(self, image_analyzer_engine: ImageAnalyzerEngine = None):
+    def __init__(
+        self,
+        image_analyzer_engine: Union[ImageAnalyzerEngine, QRImageAnalyzerEngine] = None,
+    ):
         if not image_analyzer_engine:
             self.image_analyzer_engine = ImageAnalyzerEngine()
         else:
@@ -42,9 +49,12 @@ def redact(
 
         image = ImageChops.duplicate(image)
 
-        bboxes = self.image_analyzer_engine.analyze(
-            image, ocr_kwargs, **text_analyzer_kwargs
-        )
+        if isinstance(self.image_analyzer_engine, QRImageAnalyzerEngine):
+            bboxes = self.image_analyzer_engine.analyze(image, **text_analyzer_kwargs)
+        else:
+            bboxes = self.image_analyzer_engine.analyze(
+                image, ocr_kwargs, **text_analyzer_kwargs
+            )
         draw = ImageDraw.Draw(image)
 
         for box in bboxes:

diff --git a/presidio-image-redactor/presidio_image_redactor/qr_image_analyzer_engine.py b/presidio-image-redactor/presidio_image_redactor/qr_image_analyzer_engine.py
@@ -0,0 +1,62 @@
+from typing import List, Optional
+
+from presidio_analyzer import AnalyzerEngine
+
+from presidio_image_redactor.entities import ImageRecognizerResult
+from presidio_image_redactor.qr_recognizer import QRRecognizer
+from presidio_image_redactor.qr_recognizer import OpenCVQRRecongnizer
+
+
+class QRImageAnalyzerEngine:
+    """QRImageAnalyzerEngine class.
+
+    :param analyzer_engine: The Presidio AnalyzerEngine instance
+        to be used to detect PII in text
+    :param qr: the QRRecognizer object to detect and decode text in QR codes
+    """
+
+    def __init__(
+        self,
+        analyzer_engine: Optional[AnalyzerEngine] = None,
+        qr: Optional[QRRecognizer] = None,
+    ):
+        if not analyzer_engine:
+            analyzer_engine = AnalyzerEngine()
+        self.analyzer_engine = analyzer_engine
+
+        if not qr:
+            qr = OpenCVQRRecongnizer()
+        self.qr = qr
+
+    def analyze(
+        self, image: object, **text_analyzer_kwargs
+    ) -> List[ImageRecognizerResult]:
+        """Analyse method to analyse the given image.
+
+        :param image: PIL Image/numpy array to be processed.
+        :param text_analyzer_kwargs: Additional values for the analyze method
+        in AnalyzerEngine.
+
+        :return: List of the extract entities with image bounding boxes.
+        """
+        bboxes = []
+
+        qr_result = self.qr.recognize(image)
+        for qr_code in qr_result:
+            analyzer_result = self.analyzer_engine.analyze(
+                text=qr_code.text, language="en", **text_analyzer_kwargs
+            )
+            for res in analyzer_result:
+                bboxes.append(
+                    ImageRecognizerResult(
+                        res.entity_type,
+                        res.start,
+                        res.end,
+                        res.score,
+                        qr_code.bbox[0],
+                        qr_code.bbox[1],
+                        qr_code.bbox[2],
+                        qr_code.bbox[3],
+                    )
+                )
+        return bboxes