diff --git a/cpp/KeywordTagging/KeywordTagging.cpp b/cpp/KeywordTagging/KeywordTagging.cpp index 7f466574..d715eee7 100644 --- a/cpp/KeywordTagging/KeywordTagging.cpp +++ b/cpp/KeywordTagging/KeywordTagging.cpp @@ -379,10 +379,6 @@ bool KeywordTagging::Close() { return true; } -string KeywordTagging::GetDetectionType() { - return "TEXT"; -} - vector KeywordTagging::GetDetections(const MPFGenericJob &job) { LOG4CXX_DEBUG(hw_logger_, "Processing \"" + job.data_uri + "\"."); diff --git a/cpp/KeywordTagging/KeywordTagging.h b/cpp/KeywordTagging/KeywordTagging.h index 4aa07f60..a5a932d3 100644 --- a/cpp/KeywordTagging/KeywordTagging.h +++ b/cpp/KeywordTagging/KeywordTagging.h @@ -52,8 +52,6 @@ class KeywordTagging : public MPFDetectionComponent { bool Supports(MPFDetectionDataType data_type) override; - std::string GetDetectionType() override; - private: log4cxx::LoggerPtr hw_logger_; diff --git a/cpp/KeywordTagging/plugin-files/descriptor/descriptor.json b/cpp/KeywordTagging/plugin-files/descriptor/descriptor.json index 6fcd6686..492559a3 100644 --- a/cpp/KeywordTagging/plugin-files/descriptor/descriptor.json +++ b/cpp/KeywordTagging/plugin-files/descriptor/descriptor.json @@ -15,6 +15,7 @@ "name": "KEYWORDTAGGING", "description": "Performs keyword tagging.", "actionType": "DETECTION", + "trackType": "TEXT", "outputChangedCounter" : 1, "requiresCollection": { "states": [] diff --git a/cpp/OalprLicensePlateTextDetection/LicensePlateTextDetection.cpp b/cpp/OalprLicensePlateTextDetection/LicensePlateTextDetection.cpp index bc3eaf1b..e1b1804c 100644 --- a/cpp/OalprLicensePlateTextDetection/LicensePlateTextDetection.cpp +++ b/cpp/OalprLicensePlateTextDetection/LicensePlateTextDetection.cpp @@ -44,11 +44,6 @@ using namespace alpr; using log4cxx::Logger; -//----------------------------------------------------------------------------- -/* virtual */ std::string LicensePlateTextDetection::GetDetectionType() { - return "TEXT"; -} - //----------------------------------------------------------------------------- /* virtual */ bool LicensePlateTextDetection::Init() { //Set locale diff --git a/cpp/OalprLicensePlateTextDetection/LicensePlateTextDetection.h b/cpp/OalprLicensePlateTextDetection/LicensePlateTextDetection.h index 8b10d09d..70fa2e44 100644 --- a/cpp/OalprLicensePlateTextDetection/LicensePlateTextDetection.h +++ b/cpp/OalprLicensePlateTextDetection/LicensePlateTextDetection.h @@ -73,8 +73,6 @@ class LicensePlateTextDetection : public MPF::COMPONENT::MPFImageAndVideoDetecti */ std::vector GetDetections(const MPF::COMPONENT::MPFVideoJob &job) override; - std::string GetDetectionType() override; - private: std::vector GetDetectionsFromVideoCapture( const MPF::COMPONENT::MPFVideoJob &job, diff --git a/cpp/OalprLicensePlateTextDetection/plugin-files/descriptor/descriptor.json b/cpp/OalprLicensePlateTextDetection/plugin-files/descriptor/descriptor.json index ff6ab357..5371ab91 100644 --- a/cpp/OalprLicensePlateTextDetection/plugin-files/descriptor/descriptor.json +++ b/cpp/OalprLicensePlateTextDetection/plugin-files/descriptor/descriptor.json @@ -15,6 +15,7 @@ "name": "OALPR", "description": "Detects license plate text in images and videos using the Open Automatic License Plate Recognition (OALPR) Library.", "actionType": "DETECTION", + "trackType": "TEXT", "outputChangedCounter" : 1, "requiresCollection": { "states": [] diff --git a/cpp/OcvDnnDetection/OcvDnnDetection.cpp b/cpp/OcvDnnDetection/OcvDnnDetection.cpp index b2309bb7..2b621952 100644 --- a/cpp/OcvDnnDetection/OcvDnnDetection.cpp +++ b/cpp/OcvDnnDetection/OcvDnnDetection.cpp @@ -47,10 +47,6 @@ using namespace MPF::COMPONENT; -//----------------------------------------------------------------------------- -std::string OcvDnnDetection::GetDetectionType() { - return "CLASS"; -} //----------------------------------------------------------------------------- bool OcvDnnDetection::Init() { diff --git a/cpp/OcvDnnDetection/OcvDnnDetection.h b/cpp/OcvDnnDetection/OcvDnnDetection.h index f0a41e21..8a093972 100644 --- a/cpp/OcvDnnDetection/OcvDnnDetection.h +++ b/cpp/OcvDnnDetection/OcvDnnDetection.h @@ -65,9 +65,6 @@ class OcvDnnDetection : public MPF::COMPONENT::MPFImageAndVideoDetectionComponen std::vector GetDetections(const MPF::COMPONENT::MPFImageJob &job) override; - std::string GetDetectionType() override; - - private: log4cxx::LoggerPtr logger_; diff --git a/cpp/OcvDnnDetection/plugin-files/descriptor/descriptor.json b/cpp/OcvDnnDetection/plugin-files/descriptor/descriptor.json index 0f7919b2..0b8f2529 100644 --- a/cpp/OcvDnnDetection/plugin-files/descriptor/descriptor.json +++ b/cpp/OcvDnnDetection/plugin-files/descriptor/descriptor.json @@ -14,6 +14,7 @@ "name": "DNNCV", "description": "Performs object classification using the OpenCV Deep Neural Networks module.", "actionType": "DETECTION", + "trackType": "CLASS", "requiresCollection": { "states": [] }, diff --git a/cpp/OcvFaceDetection/OcvFaceDetection.cpp b/cpp/OcvFaceDetection/OcvFaceDetection.cpp index 361c35a5..cfc7a561 100644 --- a/cpp/OcvFaceDetection/OcvFaceDetection.cpp +++ b/cpp/OcvFaceDetection/OcvFaceDetection.cpp @@ -65,11 +65,6 @@ using namespace MPF; using namespace COMPONENT; - -string OcvFaceDetection::GetDetectionType() { - return "FACE"; -} - void OcvFaceDetection::SetModes(bool display_window, bool print_debug_info) { imshow_on = display_window; diff --git a/cpp/OcvFaceDetection/OcvFaceDetection.h b/cpp/OcvFaceDetection/OcvFaceDetection.h index 7b3b39cd..a8de45b1 100644 --- a/cpp/OcvFaceDetection/OcvFaceDetection.h +++ b/cpp/OcvFaceDetection/OcvFaceDetection.h @@ -132,8 +132,6 @@ public : bool Init() override; bool Close() override; - std::string GetDetectionType() override; - std::vector GetDetections(const MPF::COMPONENT::MPFVideoJob &job) override; std::vector GetDetections(const MPF::COMPONENT::MPFImageJob &job) override; diff --git a/cpp/OcvFaceDetection/plugin-files/descriptor/descriptor.json b/cpp/OcvFaceDetection/plugin-files/descriptor/descriptor.json index eb3e7243..04fa78c2 100644 --- a/cpp/OcvFaceDetection/plugin-files/descriptor/descriptor.json +++ b/cpp/OcvFaceDetection/plugin-files/descriptor/descriptor.json @@ -14,6 +14,7 @@ "name": "FACECV", "description": "Detects faces in images and videos using the Open Computer Vision (OCV) library.", "actionType": "DETECTION", + "trackType": "FACE", "outputChangedCounter" : 1, "requiresCollection": { "states": [] diff --git a/cpp/OcvYoloDetection/OcvYoloDetection.cpp b/cpp/OcvYoloDetection/OcvYoloDetection.cpp index 0150ba4a..a14d4751 100644 --- a/cpp/OcvYoloDetection/OcvYoloDetection.cpp +++ b/cpp/OcvYoloDetection/OcvYoloDetection.cpp @@ -245,11 +245,6 @@ bool OcvYoloDetection::Close() { } -std::string OcvYoloDetection::GetDetectionType() { - return "CLASS"; -} - - void OcvYoloDetection::InitYoloNetwork(const Properties &jobProperties, const Config &config) { auto modelName = GetProperty(jobProperties, "MODEL_NAME", "tiny yolo"); auto modelsDirPath = GetProperty(jobProperties, "MODELS_DIR_PATH", "."); diff --git a/cpp/OcvYoloDetection/OcvYoloDetection.h b/cpp/OcvYoloDetection/OcvYoloDetection.h index 3a19dcc0..58254741 100644 --- a/cpp/OcvYoloDetection/OcvYoloDetection.h +++ b/cpp/OcvYoloDetection/OcvYoloDetection.h @@ -49,8 +49,6 @@ class OcvYoloDetection : public MPF::COMPONENT::MPFImageAndVideoDetectionCompone bool Close() override; - std::string GetDetectionType() override; - std::vector GetDetections( const MPF::COMPONENT::MPFVideoJob &job) override; diff --git a/cpp/OcvYoloDetection/plugin-files/descriptor/descriptor.json b/cpp/OcvYoloDetection/plugin-files/descriptor/descriptor.json index 87823423..e3798d57 100644 --- a/cpp/OcvYoloDetection/plugin-files/descriptor/descriptor.json +++ b/cpp/OcvYoloDetection/plugin-files/descriptor/descriptor.json @@ -14,6 +14,7 @@ "name": "OCVYOLO", "description": "Detects objects in images and videos using the Open Computer Vision (OCV) library using Yolo.", "actionType": "DETECTION", + "trackType": "CLASS", "outputChangedCounter" : 1, "requiresCollection": { "states": [] diff --git a/cpp/SceneChangeDetection/SceneChangeDetection.cpp b/cpp/SceneChangeDetection/SceneChangeDetection.cpp index 685b87a5..13d1a517 100644 --- a/cpp/SceneChangeDetection/SceneChangeDetection.cpp +++ b/cpp/SceneChangeDetection/SceneChangeDetection.cpp @@ -41,10 +41,6 @@ using namespace MPF::COMPONENT; using namespace cv; -std::string SceneChangeDetection::GetDetectionType() { - return "SCENE"; -} - bool SceneChangeDetection::Init() { // Determine where the executable is running. diff --git a/cpp/SceneChangeDetection/SceneChangeDetection.h b/cpp/SceneChangeDetection/SceneChangeDetection.h index 46761186..3b081a38 100644 --- a/cpp/SceneChangeDetection/SceneChangeDetection.h +++ b/cpp/SceneChangeDetection/SceneChangeDetection.h @@ -49,8 +49,6 @@ class SceneChangeDetection : public MPF::COMPONENT::MPFVideoDetectionComponentAd std::vector GetDetections( const MPF::COMPONENT::MPFVideoJob &job) override; - std::string GetDetectionType() override; - private: log4cxx::LoggerPtr logger_; cv::Mat dilateKernel; diff --git a/cpp/SceneChangeDetection/plugin-files/descriptor/descriptor.json b/cpp/SceneChangeDetection/plugin-files/descriptor/descriptor.json index 61a37254..179c7b7e 100644 --- a/cpp/SceneChangeDetection/plugin-files/descriptor/descriptor.json +++ b/cpp/SceneChangeDetection/plugin-files/descriptor/descriptor.json @@ -12,6 +12,7 @@ "name" : "SCENECHANGE", "description" : "Segments a video into scenes based on 4 detectors.", "actionType" : "DETECTION", + "trackType": "SCENE", "outputChangedCounter" : 1, "requiresCollection" : { "states" : [] diff --git a/cpp/TesseractOCRTextDetection/TesseractOCRTextDetection.cpp b/cpp/TesseractOCRTextDetection/TesseractOCRTextDetection.cpp index 9ecdc534..b81207cd 100755 --- a/cpp/TesseractOCRTextDetection/TesseractOCRTextDetection.cpp +++ b/cpp/TesseractOCRTextDetection/TesseractOCRTextDetection.cpp @@ -127,10 +127,6 @@ bool TesseractOCRTextDetection::Close() { } -string TesseractOCRTextDetection::GetDetectionType() { - return "TEXT"; -} - bool TesseractOCRTextDetection::Supports(MPFDetectionDataType data_type) { return data_type == MPFDetectionDataType::IMAGE || data_type == MPFDetectionDataType::VIDEO diff --git a/cpp/TesseractOCRTextDetection/TesseractOCRTextDetection.h b/cpp/TesseractOCRTextDetection/TesseractOCRTextDetection.h index bbf7912b..a9c15d0a 100755 --- a/cpp/TesseractOCRTextDetection/TesseractOCRTextDetection.h +++ b/cpp/TesseractOCRTextDetection/TesseractOCRTextDetection.h @@ -72,8 +72,6 @@ namespace MPF { std::vector GetDetections(const MPFAudioJob &job) override; - std::string GetDetectionType() override; - bool Supports(MPFDetectionDataType data_type) override; private: diff --git a/cpp/TesseractOCRTextDetection/plugin-files/descriptor/descriptor.json b/cpp/TesseractOCRTextDetection/plugin-files/descriptor/descriptor.json index 8d8a8a8c..a8b54800 100755 --- a/cpp/TesseractOCRTextDetection/plugin-files/descriptor/descriptor.json +++ b/cpp/TesseractOCRTextDetection/plugin-files/descriptor/descriptor.json @@ -15,6 +15,7 @@ "name": "TESSERACTOCR", "description": "Performs Tesseract optical character recognition.", "actionType": "DETECTION", + "trackType": "TEXT", "outputChangedCounter" : 1, "requiresCollection": { "states": [] diff --git a/cpp/TrtisDetection/TrtisDetection.cpp b/cpp/TrtisDetection/TrtisDetection.cpp index 9970eee4..d2b2e12f 100644 --- a/cpp/TrtisDetection/TrtisDetection.cpp +++ b/cpp/TrtisDetection/TrtisDetection.cpp @@ -202,10 +202,6 @@ TrtisIpIrv2CocoJobConfig::TrtisIpIrv2CocoJobConfig(const MPFJob &job, maxSpaceGapPxSq = maxSpaceGap * maxSpaceGap * frameDiagSq; } -/******************************************************************************/ -string TrtisDetection::GetDetectionType() { - return "FEATURE"; -} /******************************************************************************/ bool TrtisDetection::Close() { diff --git a/cpp/TrtisDetection/TrtisDetection.h b/cpp/TrtisDetection/TrtisDetection.h index 7c8e9e14..cfc30a97 100644 --- a/cpp/TrtisDetection/TrtisDetection.h +++ b/cpp/TrtisDetection/TrtisDetection.h @@ -122,7 +122,6 @@ namespace MPF{ bool Close() override; vector GetDetections(const MPFVideoJob &job) override; vector GetDetections(const MPFImageJob &job) override; - string GetDetectionType() override; private: diff --git a/cpp/TrtisDetection/plugin-files/descriptor/descriptor.json b/cpp/TrtisDetection/plugin-files/descriptor/descriptor.json index fae11ed0..36f3be21 100644 --- a/cpp/TrtisDetection/plugin-files/descriptor/descriptor.json +++ b/cpp/TrtisDetection/plugin-files/descriptor/descriptor.json @@ -14,6 +14,7 @@ "name": "TRTIS", "description": "Returns a model-specific inference result from an input image.", "actionType": "DETECTION", + "trackType": "FEATURE", "outputChangedCounter" : 1, "requiresCollection": { "states": [] diff --git a/java/SphinxSpeechDetection/plugin-files/descriptor/descriptor.json b/java/SphinxSpeechDetection/plugin-files/descriptor/descriptor.json index 8fa7148e..29f7449f 100644 --- a/java/SphinxSpeechDetection/plugin-files/descriptor/descriptor.json +++ b/java/SphinxSpeechDetection/plugin-files/descriptor/descriptor.json @@ -14,6 +14,7 @@ "name": "SPHINX", "description": "Detects and transcribes English language speech in audio and video files.", "actionType": "DETECTION", + "trackType": "SPEECH", "outputChangedCounter" : 1, "requiresCollection": { "states": [] diff --git a/java/SphinxSpeechDetection/src/main/java/org/mitre/mpf/detection/sphinx/speech/SphinxSpeechDetectionComponent.java b/java/SphinxSpeechDetection/src/main/java/org/mitre/mpf/detection/sphinx/speech/SphinxSpeechDetectionComponent.java index 1e27f77e..d7693a53 100644 --- a/java/SphinxSpeechDetection/src/main/java/org/mitre/mpf/detection/sphinx/speech/SphinxSpeechDetectionComponent.java +++ b/java/SphinxSpeechDetection/src/main/java/org/mitre/mpf/detection/sphinx/speech/SphinxSpeechDetectionComponent.java @@ -55,10 +55,6 @@ public SphinxSpeechDetectionComponent() { speechProcessor = new SphinxSpeechDetectionProcessor(); } - @Override - public String getDetectionType() { - return "SPEECH"; - } @Override public List getDetections(MPFAudioJob job) throws MPFComponentDetectionError { diff --git a/java/TikaImageDetection/plugin-files/descriptor/descriptor.json b/java/TikaImageDetection/plugin-files/descriptor/descriptor.json index 44184f57..ef2e33d0 100755 --- a/java/TikaImageDetection/plugin-files/descriptor/descriptor.json +++ b/java/TikaImageDetection/plugin-files/descriptor/descriptor.json @@ -9,6 +9,7 @@ "name": "TIKAIMAGE", "description": "The Apache Tika image detection component.", "actionType": "DETECTION", + "trackType": "MEDIA", "outputChangedCounter" : 1, "requiresCollection": { "states": [] diff --git a/java/TikaImageDetection/src/main/java/org/mitre/mpf/detection/tika/TikaImageDetectionComponent.java b/java/TikaImageDetection/src/main/java/org/mitre/mpf/detection/tika/TikaImageDetectionComponent.java index b670e796..d9ed30f5 100755 --- a/java/TikaImageDetection/src/main/java/org/mitre/mpf/detection/tika/TikaImageDetectionComponent.java +++ b/java/TikaImageDetection/src/main/java/org/mitre/mpf/detection/tika/TikaImageDetectionComponent.java @@ -219,11 +219,6 @@ public boolean supports(MPFDataType mpfDataType) { return MPFDataType.UNKNOWN == mpfDataType; } - @Override - public String getDetectionType() { - return "MEDIA"; - } - @Override public List getDetections(MPFImageJob job) throws MPFComponentDetectionError { throw new MPFComponentDetectionError(MPFDetectionError.MPF_UNSUPPORTED_DATA_TYPE, diff --git a/java/TikaTextDetection/plugin-files/descriptor/descriptor.json b/java/TikaTextDetection/plugin-files/descriptor/descriptor.json index 819dcc73..f2ef831f 100755 --- a/java/TikaTextDetection/plugin-files/descriptor/descriptor.json +++ b/java/TikaTextDetection/plugin-files/descriptor/descriptor.json @@ -9,6 +9,7 @@ "name": "TIKATEXT", "description": "The Apache Tika text detection component.", "actionType": "DETECTION", + "trackType": "TEXT", "outputChangedCounter" : 1, "requiresCollection": { "states": [] diff --git a/java/TikaTextDetection/src/main/java/org/mitre/mpf/detection/tika/TikaTextDetectionComponent.java b/java/TikaTextDetection/src/main/java/org/mitre/mpf/detection/tika/TikaTextDetectionComponent.java index 24a350a6..ad03bfcc 100755 --- a/java/TikaTextDetection/src/main/java/org/mitre/mpf/detection/tika/TikaTextDetectionComponent.java +++ b/java/TikaTextDetection/src/main/java/org/mitre/mpf/detection/tika/TikaTextDetectionComponent.java @@ -285,10 +285,6 @@ public boolean supports(MPFDataType mpfDataType) { return MPFDataType.UNKNOWN.equals(mpfDataType); } - public String getDetectionType() { - return "TEXT"; - } - public List getDetections(MPFImageJob job) throws MPFComponentDetectionError { throw new MPFComponentDetectionError(MPFDetectionError.MPF_UNSUPPORTED_DATA_TYPE, "Image detection not supported."); } diff --git a/python/ArgosTranslation/argos_translation_component/argos_translation_component.py b/python/ArgosTranslation/argos_translation_component/argos_translation_component.py index 0f254013..0a0dd764 100644 --- a/python/ArgosTranslation/argos_translation_component/argos_translation_component.py +++ b/python/ArgosTranslation/argos_translation_component/argos_translation_component.py @@ -40,7 +40,6 @@ class ArgosTranslationComponent: - detection_type = 'TRANSLATION' def get_detections_from_video(self, job: mpf.VideoJob) -> Sequence[mpf.VideoTrack]: logger.info(f'Received video job.') diff --git a/python/ArgosTranslation/plugin-files/descriptor/descriptor.json b/python/ArgosTranslation/plugin-files/descriptor/descriptor.json index 1958a0a6..06040cbb 100644 --- a/python/ArgosTranslation/plugin-files/descriptor/descriptor.json +++ b/python/ArgosTranslation/plugin-files/descriptor/descriptor.json @@ -9,6 +9,7 @@ "name": "ARGOSTRANSLATION", "description": "Uses Argos Translate to perform translation.", "actionType": "DETECTION", + "trackType": "TRANSLATION", "requiresCollection": { "states": [] }, diff --git a/python/AzureFormDetection/acs_form_detection_component/acs_form_detection_component.py b/python/AzureFormDetection/acs_form_detection_component/acs_form_detection_component.py index 2c6864b5..cc262ae7 100644 --- a/python/AzureFormDetection/acs_form_detection_component/acs_form_detection_component.py +++ b/python/AzureFormDetection/acs_form_detection_component/acs_form_detection_component.py @@ -47,7 +47,6 @@ class AcsFormDetectionComponent(mpf_util.ImageReaderMixin, object): - detection_type = 'TEXT' def get_detections_from_generic(self, generic_job): try: diff --git a/python/AzureFormDetection/plugin-files/descriptor/descriptor.json b/python/AzureFormDetection/plugin-files/descriptor/descriptor.json index 3e5e559e..aa9e103a 100644 --- a/python/AzureFormDetection/plugin-files/descriptor/descriptor.json +++ b/python/AzureFormDetection/plugin-files/descriptor/descriptor.json @@ -9,6 +9,7 @@ "name": "AZUREFORM", "description": "Uses Azure Cognitive Services to perform form recognition in documents and images.", "actionType": "DETECTION", + "trackType": "TEXT", "outputChangedCounter" : 1, "requiresCollection": { "states": [] diff --git a/python/AzureOcrTextDetection/acs_ocr_component/acs_ocr_component.py b/python/AzureOcrTextDetection/acs_ocr_component/acs_ocr_component.py index bdc45a5d..570da41c 100644 --- a/python/AzureOcrTextDetection/acs_ocr_component/acs_ocr_component.py +++ b/python/AzureOcrTextDetection/acs_ocr_component/acs_ocr_component.py @@ -44,7 +44,6 @@ class AcsOcrComponent(mpf_util.ImageReaderMixin, mpf_util.VideoCaptureMixin): - detection_type = 'TEXT' def get_detections_from_image_reader(self, image_job, image_reader): try: diff --git a/python/AzureOcrTextDetection/plugin-files/descriptor/descriptor.json b/python/AzureOcrTextDetection/plugin-files/descriptor/descriptor.json index 4460ae9d..4ed087ba 100644 --- a/python/AzureOcrTextDetection/plugin-files/descriptor/descriptor.json +++ b/python/AzureOcrTextDetection/plugin-files/descriptor/descriptor.json @@ -9,6 +9,7 @@ "name": "AZUREOCR", "description": "Uses Azure Cognitive Services to perform optical character recognition.", "actionType": "DETECTION", + "trackType": "TEXT", "outputChangedCounter" : 1, "requiresCollection": { "states": [] diff --git a/python/AzureReadTextDetection/acs_read_detection_component/acs_read_detection_component.py b/python/AzureReadTextDetection/acs_read_detection_component/acs_read_detection_component.py index 09bb9db4..b986afec 100644 --- a/python/AzureReadTextDetection/acs_read_detection_component/acs_read_detection_component.py +++ b/python/AzureReadTextDetection/acs_read_detection_component/acs_read_detection_component.py @@ -44,7 +44,6 @@ class AcsReadDetectionComponent(mpf_util.VideoCaptureMixin, mpf_util.ImageReaderMixin, object): - detection_type = 'TEXT' @staticmethod def get_detections_from_generic(generic_job): diff --git a/python/AzureReadTextDetection/plugin-files/descriptor/descriptor.json b/python/AzureReadTextDetection/plugin-files/descriptor/descriptor.json index ca4bb340..9fe4e1bd 100644 --- a/python/AzureReadTextDetection/plugin-files/descriptor/descriptor.json +++ b/python/AzureReadTextDetection/plugin-files/descriptor/descriptor.json @@ -9,6 +9,7 @@ "name": "AZUREREAD", "description": "Uses Azure Cognitive Services to perform optical character recognition using the Azure Read API.", "actionType": "DETECTION", + "trackType": "TEXT", "outputChangedCounter" : 1, "requiresCollection": { "states": [] diff --git a/python/AzureSpeechDetection/README.md b/python/AzureSpeechDetection/README.md index 95c196e6..3201aa20 100644 --- a/python/AzureSpeechDetection/README.md +++ b/python/AzureSpeechDetection/README.md @@ -36,8 +36,7 @@ Returned `AudioTrack` objects have the following members in their `detection_pro | Property Key | Description | |--------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `LONG_SPEAKER_ID` | A unique speaker identifier, of the form "`--<#>`, where `` and `` are integers indicating the segment range (in frame counts for video jobs, milliseconds for audio jobs) for sub-jobs when a job has been segmented by the Workflow Manager. The final `#` portion of the ID is a 1-indexed counter for speaker identity within the indicated segment range. When jobs are not segmented, or not submitted through the Workflow Manager at all, `stop_offset` may instead be `EOF`, indicating that the job extends to the end of the file. | -| `SPEAKER_ID` | A dummy field set to "0". | +| `SPEAKER_ID` | A unique speaker identifier, of the form "`--<#>`, where `` and `` are integers indicating the segment range (in frame counts for video jobs, milliseconds for audio jobs) for sub-jobs when a job has been segmented by the Workflow Manager. The final `#` portion of the ID is a 1-indexed counter for speaker identity within the indicated segment range. When jobs are not segmented, or not submitted through the Workflow Manager at all, `stop_offset` may instead be `EOF`, indicating that the job extends to the end of the file. | | `GENDER` | Only present if supplied by an upstream component. The gender of the speaker. | | `GENDER_CONFIDENCE` | Only present if supplied by an upstream component. The confidence of the gender classification. | | `TRANSCRIPT` | The text of the utterance transcript. Words are space-separated. | @@ -59,7 +58,7 @@ AudioTracks also have the `start_time` and `stop_time` of their associated utter # Language Identifiers -The following are the BCP-47 codes and their corresponding languages which Azure Speech-to-Text supports. +The following are the BCP-47 codes and their corresponding languages which Azure Speech-to-Text supports. | Language | BCP-47 | Language | BCP-47 | @@ -167,4 +166,4 @@ If the language code supplied by a feed-forward track is not handled in `acs_spe | `UZB` | Uzbek | `uz-UZ` | | `VIE` | Vietnamese | `vi-VN` | | `YUE` | Chinese (Cantonese) | `zh-HK`* | -| `ZUL` | Zulu | `zu-ZA` | \ No newline at end of file +| `ZUL` | Zulu | `zu-ZA` | diff --git a/python/AzureSpeechDetection/acs_speech_component/acs_speech_component.py b/python/AzureSpeechDetection/acs_speech_component/acs_speech_component.py index e151265d..c0b0ada3 100644 --- a/python/AzureSpeechDetection/acs_speech_component/acs_speech_component.py +++ b/python/AzureSpeechDetection/acs_speech_component/acs_speech_component.py @@ -40,7 +40,6 @@ class AcsSpeechComponent(object): - detection_type = 'SPEECH' def __init__(self): logger.info('Creating instance of AcsSpeechDetectionProcessor') @@ -53,9 +52,6 @@ def get_detections_from_job( ) -> List[mpf.AudioTrack]: try: job_config = AzureJobConfig(job) - except mpf_util.TriggerMismatch as e: - logger.info(f"Feed-forward track does not meet trigger condition: {e}") - raise except mpf_util.NoInBoundsSpeechSegments as e: logger.warning(f"Feed-forward track does not contain in-bounds segments: {e}") raise @@ -70,21 +66,13 @@ def get_detections_from_job( logger.exception(f'Exception raised while processing audio: {e}') raise - # Remove this block to drop LONG_SPEAKER_ID - for track in audio_tracks: - track.detection_properties['LONG_SPEAKER_ID'] = track.detection_properties['SPEAKER_ID'] - track.detection_properties['SPEAKER_ID'] = '0' - logger.info('Processing complete. Found %d tracks.' % len(audio_tracks)) return audio_tracks def get_detections_from_audio(self, job: mpf.AudioJob) -> List[mpf.AudioTrack]: logger.info('Received audio job') + return self.get_detections_from_job(job) - try: - return self.get_detections_from_job(job) - except mpf_util.TriggerMismatch: - return [job.feed_forward_track] def get_detections_from_video( self, @@ -101,10 +89,7 @@ def get_detections_from_video( ) fpms = float(job.media_properties['FPS']) / 1000.0 - try: - audio_tracks = self.get_detections_from_job(job) - except mpf_util.TriggerMismatch: - return [job.feed_forward_track] + audio_tracks = self.get_detections_from_job(job) try: # Convert audio tracks to video tracks with placeholder frame locs diff --git a/python/AzureSpeechDetection/acs_speech_component/job_parsing.py b/python/AzureSpeechDetection/acs_speech_component/job_parsing.py index 7d6517c5..59a8eda7 100644 --- a/python/AzureSpeechDetection/acs_speech_component/job_parsing.py +++ b/python/AzureSpeechDetection/acs_speech_component/job_parsing.py @@ -55,7 +55,7 @@ def __init__(self, job: Union[mpf.AudioJob, mpf.VideoJob]): self.cleanup: bool super().__init__(job) - if self.is_triggered_job: + if self.speaker: self.diarize = False @staticmethod diff --git a/python/AzureSpeechDetection/plugin-files/descriptor/descriptor.json b/python/AzureSpeechDetection/plugin-files/descriptor/descriptor.json index 2bde0933..a9217152 100644 --- a/python/AzureSpeechDetection/plugin-files/descriptor/descriptor.json +++ b/python/AzureSpeechDetection/plugin-files/descriptor/descriptor.json @@ -9,7 +9,8 @@ "name": "AZURESPEECH", "description": "Uses Azure Cognitive Services to perform speech-to-text.", "actionType": "DETECTION", - "outputChangedCounter" : 1, + "trackType": "SPEECH", + "outputChangedCounter" : 2, "requiresCollection": { "states": [] }, diff --git a/python/AzureSpeechDetection/tests/test_acs_speech.py b/python/AzureSpeechDetection/tests/test_acs_speech.py index 99fb3586..2d899120 100644 --- a/python/AzureSpeechDetection/tests/test_acs_speech.py +++ b/python/AzureSpeechDetection/tests/test_acs_speech.py @@ -188,7 +188,7 @@ def test_diarization(self): # There should be two speakers with diarization, one without len_raw, len_dia = [ len(set([ - track.detection_properties['LONG_SPEAKER_ID'] + track.detection_properties['SPEAKER_ID'] for track in result ])) for result in results diff --git a/python/AzureTranslation/acs_translation_component/acs_translation_component.py b/python/AzureTranslation/acs_translation_component/acs_translation_component.py index 740009fb..c1d3f679 100644 --- a/python/AzureTranslation/acs_translation_component/acs_translation_component.py +++ b/python/AzureTranslation/acs_translation_component/acs_translation_component.py @@ -48,7 +48,6 @@ class AcsTranslationComponent: - detection_type = 'TRANSLATION' @staticmethod def get_detections_from_video(job: mpf.VideoJob) -> Sequence[mpf.VideoTrack]: diff --git a/python/AzureTranslation/plugin-files/descriptor/descriptor.json b/python/AzureTranslation/plugin-files/descriptor/descriptor.json index 6a35ea9d..f74bdf78 100644 --- a/python/AzureTranslation/plugin-files/descriptor/descriptor.json +++ b/python/AzureTranslation/plugin-files/descriptor/descriptor.json @@ -9,6 +9,7 @@ "name": "AZURETRANSLATION", "description": "Uses Azure Cognitive Services to perform translation.", "actionType": "DETECTION", + "trackType": "TRANSLATION", "outputChangedCounter" : 1, "requiresCollection": { "states": [] diff --git a/python/ClipDetection/clip_component/clip_component.py b/python/ClipDetection/clip_component/clip_component.py index 85d5b669..bcea3132 100644 --- a/python/ClipDetection/clip_component/clip_component.py +++ b/python/ClipDetection/clip_component/clip_component.py @@ -49,7 +49,6 @@ device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') class ClipComponent(mpf_util.ImageReaderMixin): - detection_type = 'CLASS' def __init__(self): self._wrapper = ClipWrapper() diff --git a/python/ClipDetection/plugin-files/descriptor/descriptor.json b/python/ClipDetection/plugin-files/descriptor/descriptor.json index 97fe74dd..65d0c1d7 100644 --- a/python/ClipDetection/plugin-files/descriptor/descriptor.json +++ b/python/ClipDetection/plugin-files/descriptor/descriptor.json @@ -8,8 +8,8 @@ "algorithm": { "name": "CLIP", "description": "CLIP classification.", - "detectionType": "CLASS", "actionType": "DETECTION", + "trackType": "CLASS", "outputChangedCounter": 1, "requiresCollection": { "states": [] diff --git a/python/EastTextDetection/east_component/east_component.py b/python/EastTextDetection/east_component/east_component.py index ca667f25..18af8618 100644 --- a/python/EastTextDetection/east_component/east_component.py +++ b/python/EastTextDetection/east_component/east_component.py @@ -37,7 +37,6 @@ class EastComponent(mpf_util.ImageReaderMixin, mpf_util.VideoCaptureMixin, object): - detection_type = 'TEXT REGION' def __init__(self): logger.info('Creating instance of EastComponent') diff --git a/python/EastTextDetection/plugin-files/descriptor/descriptor.json b/python/EastTextDetection/plugin-files/descriptor/descriptor.json index ebb53733..5540e976 100644 --- a/python/EastTextDetection/plugin-files/descriptor/descriptor.json +++ b/python/EastTextDetection/plugin-files/descriptor/descriptor.json @@ -9,6 +9,7 @@ "name": "EAST", "description": "EAST scene text detection.", "actionType": "DETECTION", + "trackType": "TEXT REGION", "outputChangedCounter" : 1, "requiresCollection": { "states": [] diff --git a/python/NlpTextCorrection/nlp_correction_component/nlp_correction_component.py b/python/NlpTextCorrection/nlp_correction_component/nlp_correction_component.py index 892daa23..a41d33d9 100644 --- a/python/NlpTextCorrection/nlp_correction_component/nlp_correction_component.py +++ b/python/NlpTextCorrection/nlp_correction_component/nlp_correction_component.py @@ -41,7 +41,6 @@ class NlpCorrectionComponent(object): - detection_type = 'TEXT' def __init__(self): self.initialized = False diff --git a/python/NlpTextCorrection/plugin-files/descriptor/descriptor.json b/python/NlpTextCorrection/plugin-files/descriptor/descriptor.json index 445b9865..717466ac 100644 --- a/python/NlpTextCorrection/plugin-files/descriptor/descriptor.json +++ b/python/NlpTextCorrection/plugin-files/descriptor/descriptor.json @@ -9,6 +9,7 @@ "name": "NLPTEXTCORRECTION", "description": "Uses the Hunspell library to correct text output.", "actionType": "DETECTION", + "trackType": "TEXT", "outputChangedCounter" : 1, "requiresCollection": { "states": [] diff --git a/python/WhisperSpeechDetection/README.md b/python/WhisperSpeechDetection/README.md index 40fa79e6..08fa5891 100644 --- a/python/WhisperSpeechDetection/README.md +++ b/python/WhisperSpeechDetection/README.md @@ -8,9 +8,13 @@ This component uses the OpenAI Whisper model. This component identifies the language spoken in audio and video clips. # Input Properties -- `WHISPER_MODEL_SIZE`: Size of the Whisper model. Whisper has `tiny`, `base`, `small`, `medium`, and `large` models available for multilingual models. English-only models are available in `tiny`, `base`, `small`, and `medium`. +- `WHISPER_MODEL_SIZE`: Size of the Whisper model. Whisper has `tiny`, `base`, `small`, `medium`, and `large` models available for multilingual models. English-only models are available in `tiny`, `base`, `small`, and `medium`. - `WHISPER_MODEL_LANG`: Whisper has English-only models and multilingual models. Set to `en` for English-only models and `multi` for multilingual models. -- `WHISPER_MODE`: Determines whether Whisper will perform language detection, speech-to-text transcription, or speech translation. English-only models can only transcribe English audio. Set to `LANGUAGE_DETECTION` for spoken language detection, `TRANSCRIPTION` for speech-to-text transcription, and `SPEECH_TRANSLATION` for speech translation. +- `WHISPER_MODE`: Determines whether Whisper will perform language detection, speech-to-text + transcription, or speech translation. If multiple languages are spoken in a single piece of media, + language detection will detect only one of them. English-only models can only transcribe English + audio. Set to `LANGUAGE_DETECTION` for spoken language detection, `TRANSCRIPTION` for + speech-to-text transcription, and `SPEECH_TRANSLATION` for speech translation. - `AUDIO_LANGUAGE`: Optional property that indicates the language to use for audio translation or transcription. If left as an empty string, Whisper will automatically detect a single language from the first 30 seconds of audio. # Output Properties @@ -28,8 +32,8 @@ Size | Provided Language | Result for Spanish Part | Result for English Part ------|-------------------|-------------------------|------------------------- base | Auto-detected | Correctly transcribed | Gibberish large | Auto-detected | Correctly transcribed | Translated to Spanish -base | English | Translated to English | Correctly transcribed -large | English | Translated to English | Correctly transcribed +base | English | Translated to English | Correctly transcribed +large | English | Translated to English | Correctly transcribed ### Translate ### @@ -106,4 +110,4 @@ All translations are to English. | `uk` | `ukr` | Ukrainian | | `ur` | `urd` | Urdu | | `vi` | `vie` | Vietnamese | -| `cy` | `cym` | Welsh | \ No newline at end of file +| `cy` | `cym` | Welsh | diff --git a/python/WhisperSpeechDetection/plugin-files/descriptor/descriptor.json b/python/WhisperSpeechDetection/plugin-files/descriptor/descriptor.json index 1c0cae20..2080f4ed 100644 --- a/python/WhisperSpeechDetection/plugin-files/descriptor/descriptor.json +++ b/python/WhisperSpeechDetection/plugin-files/descriptor/descriptor.json @@ -9,6 +9,7 @@ "name": "WHISPERSPEECH", "description": "Uses OpenAI's Whisper model to perform language detection in speech.", "actionType": "DETECTION", + "trackType": "SPEECH", "requiresCollection": { "states": [] }, diff --git a/python/WhisperSpeechDetection/whisper_speech_detection_component/whisper_speech_detection_component.py b/python/WhisperSpeechDetection/whisper_speech_detection_component/whisper_speech_detection_component.py index cbd67dbe..e7089dbd 100644 --- a/python/WhisperSpeechDetection/whisper_speech_detection_component/whisper_speech_detection_component.py +++ b/python/WhisperSpeechDetection/whisper_speech_detection_component/whisper_speech_detection_component.py @@ -39,7 +39,6 @@ warnings.filterwarnings('ignore', category=ResourceWarning, module='multilingual.tiktoken') class WhisperSpeechDetectionComponent: - detection_type = 'SPEECH' def __init__(self): logger.info('Creating instance of WhisperSpeechDetectionComponent')