Skip to content

Commit

Permalink
predict: move face/text recognition into separate models
Browse files Browse the repository at this point in the history
  • Loading branch information
koush committed Apr 23, 2024
1 parent ff2d1d5 commit 4d98ccf
Show file tree
Hide file tree
Showing 6 changed files with 16 additions and 12 deletions.
6 changes: 3 additions & 3 deletions plugins/coreml/src/coreml/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from scrypted_sdk import Setting, SettingValue

from common import yolo
from coreml.recognition import CoreMLRecognition
from coreml.face_recognition import CoreMLFaceRecognition

try:
from coreml.text_recognition import CoreMLTextRecognition
Expand Down Expand Up @@ -143,7 +143,7 @@ async def prepareRecognitionModels(self):
"interfaces": [
scrypted_sdk.ScryptedInterface.ObjectDetection.value,
],
"name": "CoreML Recognition",
"name": "CoreML Face Recognition",
},
]

Expand All @@ -169,7 +169,7 @@ async def prepareRecognitionModels(self):

async def getDevice(self, nativeId: str) -> Any:
if nativeId == "recognition":
return CoreMLRecognition(nativeId)
return CoreMLFaceRecognition(nativeId)
if nativeId == "textrecognition":
return CoreMLTextRecognition(nativeId)
raise Exception("unknown device")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
# from Foundation import NSData, NSMakeSize

# import Vision
from predict.recognize import RecognizeDetection
from predict.face_recognize import FaceRecognizeDetection


def euclidean_distance(arr1, arr2):
Expand All @@ -26,7 +26,7 @@ def cosine_similarity(vector_a, vector_b):

predictExecutor = concurrent.futures.ThreadPoolExecutor(8, "Vision-Predict")

class CoreMLRecognition(RecognizeDetection):
class CoreMLFaceRecognition(FaceRecognizeDetection):
def __init__(self, nativeId: str | None = None):
super().__init__(nativeId=nativeId)

Expand Down
6 changes: 3 additions & 3 deletions plugins/openvino/src/ov/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from predict import Prediction, PredictPlugin
from predict.rectangle import Rectangle

from .recognition import OpenVINORecognition
from .face_recognition import OpenVINOFaceRecognition
try:
from .text_recognition import OpenVINOTextRecognition
except:
Expand Down Expand Up @@ -337,7 +337,7 @@ async def prepareRecognitionModels(self):
"interfaces": [
scrypted_sdk.ScryptedInterface.ObjectDetection.value,
],
"name": "OpenVINO Recognition",
"name": "OpenVINO Face Recognition",
},
]

Expand All @@ -363,7 +363,7 @@ async def prepareRecognitionModels(self):

async def getDevice(self, nativeId: str) -> Any:
if nativeId == "recognition":
return OpenVINORecognition(self, nativeId)
return OpenVINOFaceRecognition(self, nativeId)
elif nativeId == "textrecognition":
return OpenVINOTextRecognition(self, nativeId)
raise Exception("unknown device")
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import numpy as np

from predict.recognize import RecognizeDetection
from predict.face_recognize import FaceRecognizeDetection


def euclidean_distance(arr1, arr2):
Expand All @@ -19,7 +19,7 @@ def cosine_similarity(vector_a, vector_b):
similarity = dot_product / (norm_a * norm_b)
return similarity

class OpenVINORecognition(RecognizeDetection):
class OpenVINOFaceRecognition(FaceRecognizeDetection):
def __init__(self, plugin, nativeId: str | None = None):
self.plugin = plugin

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def cosine_similarity(vector_a, vector_b):

predictExecutor = concurrent.futures.ThreadPoolExecutor(1, "Recognize")

class RecognizeDetection(PredictPlugin):
class FaceRecognizeDetection(PredictPlugin):
def __init__(self, nativeId: str | None = None):
super().__init__(nativeId=nativeId)

Expand Down Expand Up @@ -154,6 +154,10 @@ async def run_detection_image(
ret = await super().run_detection_image(image, detection_session)

detections = ret["detections"]

# filter any non face detections because this is using an old model that includes plates and text
detections = [d for d in detections if d["className"] == "face"]

# non max suppression on detections
for i in range(len(detections)):
d1 = detections[i]
Expand Down
2 changes: 1 addition & 1 deletion plugins/tensorflow-lite/src/predict/text_recognize.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ async def detect_once(
estimate_num_chars = False
ratio_h = ratio_w = 1
text_threshold = 0.4
link_threshold = 0.7
link_threshold = 0.9
low_text = 0.4
poly = False

Expand Down

0 comments on commit 4d98ccf

Please sign in to comment.