openvino/coreml: wip refactor text recognition

koush · Apr 22, 2024 · 1294fc2 · 1294fc2
1 parent 2fb6331
commit 1294fc2
Show file tree

Hide file tree

Showing 10 changed files with 514 additions and 23 deletions.
diff --git a/plugins/coreml/src/coreml/__init__.py b/plugins/coreml/src/coreml/__init__.py
@@ -14,6 +14,11 @@
 
 from common import yolo
 from coreml.recognition import CoreMLRecognition
+
+try:
+    from coreml.text_recognition import CoreMLTextRecognition
+except:
+    CoreMLTextRecognition = None
 from predict import Prediction, PredictPlugin
 from predict.rectangle import Rectangle
 
@@ -131,25 +136,43 @@ def __init__(self, nativeId: str | None = None):
 
     async def prepareRecognitionModels(self):
         try:
+            devices = [
+                {
+                    "nativeId": "recognition",
+                    "type": scrypted_sdk.ScryptedDeviceType.Builtin.value,
+                    "interfaces": [
+                        scrypted_sdk.ScryptedInterface.ObjectDetection.value,
+                    ],
+                    "name": "CoreML Recognition",
+                },
+            ]
+
+            if CoreMLTextRecognition:
+                devices.append(
+                    {
+                        "nativeId": "textrecognition",
+                        "type": scrypted_sdk.ScryptedDeviceType.Builtin.value,
+                        "interfaces": [
+                            scrypted_sdk.ScryptedInterface.ObjectDetection.value,
+                        ],
+                        "name": "CoreML Text Recognition",
+                    },
+                )
+
             await scrypted_sdk.deviceManager.onDevicesChanged(
                 {
-                    "devices": [
-                        {
-                            "nativeId": "recognition",
-                            "type": scrypted_sdk.ScryptedDeviceType.Builtin.value,
-                            "interfaces": [
-                                scrypted_sdk.ScryptedInterface.ObjectDetection.value,
-                            ],
-                            "name": "CoreML Recognition",
-                        }
-                    ]
+                    "devices": devices,
                 }
             )
         except:
             pass
 
     async def getDevice(self, nativeId: str) -> Any:
-        return CoreMLRecognition(nativeId)
+        if nativeId == "recognition":
+            return CoreMLRecognition(nativeId)
+        if nativeId == "textrecognition":
+            return CoreMLTextRecognition(nativeId)
+        raise Exception("unknown device")
 
     async def getSettings(self) -> list[Setting]:
         model = self.storage.getItem("model") or "Default"
@@ -174,7 +197,7 @@ def get_input_details(self) -> Tuple[int, int, int]:
 
     def get_input_size(self) -> Tuple[float, float]:
         return (self.inputwidth, self.inputheight)
-    
+
     async def detect_batch(self, inputs: List[Any]) -> List[Any]:
         out_dicts = await asyncio.get_event_loop().run_in_executor(
             predictExecutor, lambda: self.model.predict(inputs)

diff --git a/plugins/coreml/src/coreml/text_recognition.py b/plugins/coreml/src/coreml/text_recognition.py
@@ -0,0 +1,39 @@
+from __future__ import annotations
+
+import os
+
+import coremltools as ct
+
+from predict.text_recognize import TextRecognition
+
+
+class CoreMLTextRecognition(TextRecognition):
+    def __init__(self, nativeId: str | None = None):
+        super().__init__(nativeId=nativeId)
+
+    def downloadModel(self, model: str):
+        model_version = "v7"
+        mlmodel = "model"
+
+        files = [
+            f"{model}/{model}.mlpackage/Data/com.apple.CoreML/weights/weight.bin",
+            f"{model}/{model}.mlpackage/Data/com.apple.CoreML/{mlmodel}.mlmodel",
+            f"{model}/{model}.mlpackage/Manifest.json",
+        ]
+
+        for f in files:
+            p = self.downloadFile(
+                f"https://github.com/koush/coreml-models/raw/main/{f}",
+                f"{model_version}/{f}",
+            )
+            modelFile = os.path.dirname(p)
+
+        model = ct.models.MLModel(modelFile)
+        inputName = model.get_spec().description.input[0].name
+        return model, inputName
+
+    def predictDetectModel(self, input):
+        model, inputName = self.detectModel
+        out_dict = model.predict({inputName: input})
+        results = list(out_dict.values())[0]
+        return results
diff --git a/plugins/coreml/src/requirements.optional.txt b/plugins/coreml/src/requirements.optional.txt
@@ -0,0 +1 @@
+opencv-python
diff --git a/plugins/openvino/src/ov/__init__.py b/plugins/openvino/src/ov/__init__.py
@@ -18,6 +18,10 @@
 from predict.rectangle import Rectangle
 
 from .recognition import OpenVINORecognition
+try:
+    from .text_recognition import OpenVINOTextRecognition
+except:
+    OpenVINOTextRecognition = None
 
 predictExecutor = concurrent.futures.ThreadPoolExecutor(1, "OpenVINO-Predict")
 
@@ -326,22 +330,40 @@ def torelative(value: float):
 
     async def prepareRecognitionModels(self):
         try:
+            devices = [
+                {
+                    "nativeId": "recognition",
+                    "type": scrypted_sdk.ScryptedDeviceType.Builtin.value,
+                    "interfaces": [
+                        scrypted_sdk.ScryptedInterface.ObjectDetection.value,
+                    ],
+                    "name": "OpenVINO Recognition",
+                },
+            ]
+
+            if OpenVINOTextRecognition:
+                devices.append(
+                    {
+                        "nativeId": "textrecognition",
+                        "type": scrypted_sdk.ScryptedDeviceType.Builtin.value,
+                        "interfaces": [
+                            scrypted_sdk.ScryptedInterface.ObjectDetection.value,
+                        ],
+                        "name": "OpenVINO Text Recognition",
+                    },
+                )
+
             await scrypted_sdk.deviceManager.onDevicesChanged(
                 {
-                    "devices": [
-                        {
-                            "nativeId": "recognition",
-                            "type": scrypted_sdk.ScryptedDeviceType.Builtin.value,
-                            "interfaces": [
-                                scrypted_sdk.ScryptedInterface.ObjectDetection.value,
-                            ],
-                            "name": "OpenVINO Recognition",
-                        }
-                    ]
+                    "devices": devices,
                 }
             )
         except:
             pass
 
     async def getDevice(self, nativeId: str) -> Any:
-        return OpenVINORecognition(self, nativeId)
+        if nativeId == "recognition":
+            return OpenVINORecognition(self, nativeId)
+        elif nativeId == "textrecognition":
+            return OpenVINOTextRecognition(self, nativeId)
+        raise Exception("unknown device")
diff --git a/plugins/openvino/src/ov/text_recognition.py b/plugins/openvino/src/ov/text_recognition.py
@@ -0,0 +1,36 @@
+from __future__ import annotations
+
+import openvino.runtime as ov
+
+from predict.text_recognize import TextRecognition
+
+
+class OpenVINOTextRecognition(TextRecognition):
+    def __init__(self, plugin, nativeId: str | None = None):
+        self.plugin = plugin
+
+        super().__init__(nativeId=nativeId)
+
+    def downloadModel(self, model: str):
+        ovmodel = "best"
+        precision = self.plugin.precision
+        model_version = "v5"
+        xmlFile = self.downloadFile(
+            f"https://raw.githubusercontent.com/koush/openvino-models/main/{model}/{precision}/{ovmodel}.xml",
+            f"{model_version}/{model}/{precision}/{ovmodel}.xml",
+        )
+        binFile = self.downloadFile(
+            f"https://raw.githubusercontent.com/koush/openvino-models/main/{model}/{precision}/{ovmodel}.bin",
+            f"{model_version}/{model}/{precision}/{ovmodel}.bin",
+        )
+        print(xmlFile, binFile)
+        return self.plugin.core.compile_model(xmlFile, self.plugin.mode)
+
+    def predictDetectModel(self, input):
+        infer_request = self.detectModel.create_infer_request()
+        im = ov.Tensor(array=input)
+        input_tensor = im
+        infer_request.set_input_tensor(input_tensor)
+        infer_request.start_async()
+        infer_request.wait()
+        return infer_request.output_tensors[0].data
diff --git a/plugins/openvino/src/requirements.optional.txt b/plugins/openvino/src/requirements.optional.txt
@@ -0,0 +1 @@
+opencv-python
diff --git a/plugins/openvino/src/requirements.txt b/plugins/openvino/src/requirements.txt
@@ -4,3 +4,4 @@ openvino==2024.0.0
 # pillow-simd confirmed not building with arm64 linux or apple silicon
 Pillow>=5.4.1; sys_platform != 'linux' or platform_machine != 'x86_64'
 pillow-simd; sys_platform == 'linux' and platform_machine == 'x86_64'
+