From dcc221f426414ecc5c7dadcebeb36c1271a59071 Mon Sep 17 00:00:00 2001
From: ZachCafego <zacharycafego@gmail.com>
Date: Fri, 30 Jun 2023 14:18:02 -0400
Subject: [PATCH 1/4] Added rollup functionality, unittest

---
 .../clip_component/clip_component.py          | 101 ++++++++++--------
 python/ClipDetection/tests/data/rollup.csv    |   7 ++
 python/ClipDetection/tests/test_clip.py       |  16 ++-
 3 files changed, 81 insertions(+), 43 deletions(-)
 create mode 100644 python/ClipDetection/tests/data/rollup.csv

diff --git a/python/ClipDetection/clip_component/clip_component.py b/python/ClipDetection/clip_component/clip_component.py
index 9a6df34f..2e049239 100644
--- a/python/ClipDetection/clip_component/clip_component.py
+++ b/python/ClipDetection/clip_component/clip_component.py
@@ -28,6 +28,7 @@
 import os
 import csv
 from pkg_resources import resource_filename
+from typing import Iterable, Mapping, TypedDict
 
 from PIL import Image
 import cv2
@@ -54,14 +55,17 @@ def __init__(self):
         self._wrapper = ClipWrapper()
 
     def get_detections_from_image_reader(self, image_job, image_reader):
+        num_detections = 0
         try:
             logger.info("received image job: %s", image_job)
             image = image_reader.get_image()
-            detections = self._wrapper.get_classifications(image, image_job.job_properties)
-            logger.info(f"Job complete. Found {len(detections)} detections.")
-            return detections
+            detections = self._wrapper.get_classifications((image,), image_job.job_properties)
+            for detection in detections:
+                yield detection
+                num_detections += 1
+            logger.info(f"Job complete. Found {num_detections} detection{'s' if num_detections > 1 else ''}.")
         
-        except Exception:
+        except Exception as e:
             logger.exception(f"Failed to complete job {image_job.job_name} due to the following exception:")
             raise
 
@@ -83,50 +87,64 @@ def __init__(self):
         self._inferencing_server = None
         self._triton_server_url = None
     
-    def get_classifications(self, image, job_properties):
-        image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
+    def get_classifications(self, images, job_properties: Mapping[str, str]) -> mpf.ImageLocation:
         kwargs = self._parse_properties(job_properties)
-        image_width, image_height = image.size
-
         self._check_template_list(kwargs['template_path'], kwargs['num_templates'])
         self._check_class_list(kwargs['classification_path'], kwargs['classification_list'])
 
-        image = ImagePreprocessor(kwargs['enable_cropping']).preprocess(image).to(device)
+        self._preprocessor = ImagePreprocessor(kwargs['enable_cropping'])
 
-        if kwargs['enable_triton']:
-            if self._inferencing_server is None or kwargs['triton_server'] != self._triton_server_url:
-                self._inferencing_server = CLIPInferencingServer(kwargs['triton_server'])
-                self._triton_server_url = kwargs['triton_server']
-
-            results = self._inferencing_server.get_responses(image)
-            image_tensors= torch.Tensor(np.copy(results)).to(device=device)
-            image_features = torch.mean(image_tensors, 0)
-        else:
+        for image in images:
+            image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
+            image_width, image_height = image.size
+            
+            image = self._preprocessor.preprocess(image).to(device)
+
+            if kwargs['enable_triton']:
+                if self._inferencing_server is None or kwargs['triton_server'] != self._triton_server_url:
+                    self._inferencing_server = CLIPInferencingServer(kwargs['triton_server'])
+                    self._triton_server_url = kwargs['triton_server']
+
+                results = self._inferencing_server.get_responses(image)
+                image_tensors= torch.Tensor(np.copy(results)).to(device=device)
+                image_features = torch.mean(image_tensors, 0)
+            else:
+                with torch.no_grad():
+                    image_features = self._model.encode_image(image).float()
+                image_features = torch.mean(image_features, 0).unsqueeze(0)
+            
             with torch.no_grad():
-                image_features = self._model.encode_image(image).float()
-            image_features = torch.mean(image_features, 0).unsqueeze(0)
-        
-        with torch.no_grad():
-            image_features /= image_features.norm(dim=-1, keepdim=True)
-
-        similarity = (100.0 * image_features @ self._text_features).softmax(dim=-1).to(device)
-        similarity = torch.mean(similarity, 0)
-        values, indices = similarity.topk(kwargs['num_classifications'])
-
-        classification_list = '; '.join([self._class_mapping[list(self._class_mapping.keys())[int(index)]] for index in indices])
-        classification_confidence_list = '; '.join([str(value.item()) for value in values])
-        
-        detection_properties = {
-            "CLASSIFICATION": classification_list.split('; ')[0],
-            "CLASSIFICATION CONFIDENCE LIST": classification_confidence_list,
-            "CLASSIFICATION LIST": classification_list
-        }
-        
-        if kwargs['include_features']:
-            detection_properties['FEATURE'] = base64.b64encode(image_features.cpu().numpy()).decode()
+                image_features /= image_features.norm(dim=-1, keepdim=True)
+
+            similarity = (100.0 * image_features @ self._text_features).softmax(dim=-1).to(device)
+            similarity = torch.mean(similarity, 0)
+            values, indices = similarity.topk(len(self._class_mapping))
+
+            classification_list = []
+            classification_confidence_list = []
+            count = 0
+            for value, index in zip(values, indices):
+                if count >= kwargs['num_classifications']:
+                    break
+                class_name = self._class_mapping[list(self._class_mapping.keys())[int(index)]]
+                if class_name not in classification_list:
+                    classification_list.append(class_name)
+                    classification_confidence_list.append(str(value.item()))
+                    count += 1
+
+            classification_list = '; '.join(classification_list)
+            classification_confidence_list = '; '.join(classification_confidence_list)
+            
+            detection_properties = {
+                "CLASSIFICATION": classification_list.split('; ')[0],
+                "CLASSIFICATION CONFIDENCE LIST": classification_confidence_list,
+                "CLASSIFICATION LIST": classification_list
+            }
+            
+            if kwargs['include_features']:
+                detection_properties['FEATURE'] = base64.b64encode(image_features.cpu().numpy()).decode()
 
-        return [
-            mpf.ImageLocation(
+            yield mpf.ImageLocation(
                 x_left_upper = 0,
                 y_left_upper = 0,
                 width = image_width,
@@ -134,7 +152,6 @@ def get_classifications(self, image, job_properties):
                 confidence = float(classification_confidence_list.split('; ')[0]),
                 detection_properties = detection_properties
             )
-        ]
 
     def _parse_properties(self, job_properties):
         classification_list = self._get_prop(job_properties, "CLASSIFICATION_LIST", 'coco', ['coco', 'imagenet'])
diff --git a/python/ClipDetection/tests/data/rollup.csv b/python/ClipDetection/tests/data/rollup.csv
new file mode 100644
index 00000000..d251e69c
--- /dev/null
+++ b/python/ClipDetection/tests/data/rollup.csv
@@ -0,0 +1,7 @@
+dog,indoor animal
+cat,indoor animal
+lion,wild animal
+sedan,vehicle
+truck,vehicle
+guitar,musical instrument
+house,building
\ No newline at end of file
diff --git a/python/ClipDetection/tests/test_clip.py b/python/ClipDetection/tests/test_clip.py
index f29a6bc8..359617e1 100644
--- a/python/ClipDetection/tests/test_clip.py
+++ b/python/ClipDetection/tests/test_clip.py
@@ -63,7 +63,7 @@ def test_image_file(self):
     
     def test_image_file_custom(self):
         job = mpf.ImageJob(
-            job_name='test-image',
+            job_name='test-image-custom',
             data_uri=self._get_test_file('riot.jpg'),
             job_properties=dict(
                 NUMBER_OF_CLASSIFICATIONS = 4,
@@ -77,6 +77,20 @@ def test_image_file_custom(self):
         self.assertEqual(job.job_properties["NUMBER_OF_CLASSIFICATIONS"], len(self._output_to_list(result.detection_properties["CLASSIFICATION LIST"])))
         self.assertTrue("violent scene" in self._output_to_list(result.detection_properties["CLASSIFICATION LIST"]))
         self.assertEqual("violent scene", result.detection_properties["CLASSIFICATION"])
+    
+    def test_image_file_rollup(self):
+        job = mpf.ImageJob(
+            job_name='test-image-rollup',
+            data_uri=self._get_test_file('dog.jpg'),
+            job_properties=dict(
+                NUMBER_OF_CLASSIFICATIONS = 4,
+                CLASSIFICATION_PATH = self._get_test_file("rollup.csv")
+            ),
+            media_properties={},
+            feed_forward_location=None
+        )
+        result = list(ClipComponent().get_detections_from_image(job))[0]
+        self.assertEqual("indoor animal", result.detection_properties["CLASSIFICATION"])
 
     @staticmethod
     def _get_test_file(filename):

From dbdae02f0e0ca5e78cd9c8063ff84f5e62aa0d8e Mon Sep 17 00:00:00 2001
From: jrobble <jrobble@mitre.org>
Date: Thu, 6 Jul 2023 16:56:03 -0400
Subject: [PATCH 2/4] Make test run faster.

---
 python/ClipDetection/tests/test_clip.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/python/ClipDetection/tests/test_clip.py b/python/ClipDetection/tests/test_clip.py
index 359617e1..50171345 100644
--- a/python/ClipDetection/tests/test_clip.py
+++ b/python/ClipDetection/tests/test_clip.py
@@ -84,7 +84,9 @@ def test_image_file_rollup(self):
             data_uri=self._get_test_file('dog.jpg'),
             job_properties=dict(
                 NUMBER_OF_CLASSIFICATIONS = 4,
-                CLASSIFICATION_PATH = self._get_test_file("rollup.csv")
+                NUMBER_OF_TEMPLATES = 1,
+                CLASSIFICATION_PATH = self._get_test_file("rollup.csv"),
+                ENABLE_CROPPING='False'
             ),
             media_properties={},
             feed_forward_location=None

From 7ec3d1d3cabfc13fc0d562634a30cc0278069597 Mon Sep 17 00:00:00 2001
From: jrobble <jrobble@mitre.org>
Date: Thu, 6 Jul 2023 21:42:20 -0400
Subject: [PATCH 3/4] Expand env. vars.

---
 python/ClipDetection/clip_component/clip_component.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/ClipDetection/clip_component/clip_component.py b/python/ClipDetection/clip_component/clip_component.py
index 2e049239..8dd03324 100644
--- a/python/ClipDetection/clip_component/clip_component.py
+++ b/python/ClipDetection/clip_component/clip_component.py
@@ -155,13 +155,13 @@ def get_classifications(self, images, job_properties: Mapping[str, str]) -> mpf.
 
     def _parse_properties(self, job_properties):
         classification_list = self._get_prop(job_properties, "CLASSIFICATION_LIST", 'coco', ['coco', 'imagenet'])
-        classification_path = self._get_prop(job_properties, "CLASSIFICATION_PATH", '')
+        classification_path = os.path.expandvars(self._get_prop(job_properties, "CLASSIFICATION_PATH", ''))
         enable_cropping = self._get_prop(job_properties, "ENABLE_CROPPING", True)
         enable_triton = self._get_prop(job_properties, "ENABLE_TRITON", False)
         include_features = self._get_prop(job_properties, "INCLUDE_FEATURES", False)
         num_classifications = self._get_prop(job_properties, "NUMBER_OF_CLASSIFICATIONS", 1)
         num_templates = self._get_prop(job_properties, "NUMBER_OF_TEMPLATES", 80, [1, 7, 80])
-        template_path = self._get_prop(job_properties, "TEMPLATE_PATH", '')
+        template_path = os.path.expandvars(self._get_prop(job_properties, "TEMPLATE_PATH", ''))
         triton_server = self._get_prop(job_properties, "TRITON_SERVER", 'clip-detection-server:8001')
 
         return dict(    
@@ -418,4 +418,4 @@ def _get_crops(imgs):
         return crops
     
     
-EXPORT_MPF_COMPONENT = ClipComponent
\ No newline at end of file
+EXPORT_MPF_COMPONENT = ClipComponent

From c5d5a03deb6b6baa7659309bb8022ea327340248 Mon Sep 17 00:00:00 2001
From: ZachCafego <zacharycafego@gmail.com>
Date: Fri, 7 Jul 2023 11:12:25 -0400
Subject: [PATCH 4/4] Addressed first round of changes.

---
 python/ClipDetection/clip_component/clip_component.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/python/ClipDetection/clip_component/clip_component.py b/python/ClipDetection/clip_component/clip_component.py
index 8dd03324..85d5b669 100644
--- a/python/ClipDetection/clip_component/clip_component.py
+++ b/python/ClipDetection/clip_component/clip_component.py
@@ -28,7 +28,7 @@
 import os
 import csv
 from pkg_resources import resource_filename
-from typing import Iterable, Mapping, TypedDict
+from typing import Mapping, Iterable
 
 from PIL import Image
 import cv2
@@ -66,7 +66,7 @@ def get_detections_from_image_reader(self, image_job, image_reader):
             logger.info(f"Job complete. Found {num_detections} detection{'s' if num_detections > 1 else ''}.")
         
         except Exception as e:
-            logger.exception(f"Failed to complete job {image_job.job_name} due to the following exception:")
+            logger.exception(f'Job failed due to: {e}')
             raise
 
 class ClipWrapper(object):
@@ -75,6 +75,7 @@ def __init__(self):
         model, _ = clip.load('ViT-B/32', device=device, download_root='/models')
         logger.info("Model loaded.")
         self._model = model
+        self._preprocessor = None
 
         self._classification_path = ''
         self._template_path = ''
@@ -87,7 +88,7 @@ def __init__(self):
         self._inferencing_server = None
         self._triton_server_url = None
     
-    def get_classifications(self, images, job_properties: Mapping[str, str]) -> mpf.ImageLocation:
+    def get_classifications(self, images, job_properties: Mapping[str, str]) -> Iterable[mpf.ImageLocation]:
         kwargs = self._parse_properties(job_properties)
         self._check_template_list(kwargs['template_path'], kwargs['num_templates'])
         self._check_class_list(kwargs['classification_path'], kwargs['classification_list'])