openmpf · hhuangMITRE · May 10, 2021 · Apr 11, 2021 · Apr 26, 2021 · May 4, 2021
diff --git a/cpp/TesseractOCRTextDetection/README.md b/cpp/TesseractOCRTextDetection/README.md
@@ -3,8 +3,11 @@
 This repository contains source code and model data for the OpenMPF Tesseract
 OCR text detection component.
 
-The component extracts text found in an image, reported as a single track
-detection. PDF documents can also be processed with one track detection per
+The component extracts text found in an image, video, or generic document.
+Image results are reported as a single track
+detection [per specified language setting](#detecting-multiple-languages).
+Video results are reported as single track detections per frame and language setting.
+PDF documents can also be processed with one track detection per
 page. The first page corresponds to the detection property `PAGE_NUM=1`. For
 debugging purposes, images converted from documents are stored in a temporary
 job directory under `plugin/TesseractOCR/tmp-[job-id]-[random tag]`. This

diff --git a/cpp/TesseractOCRTextDetection/TesseractOCRTextDetection.cpp b/cpp/TesseractOCRTextDetection/TesseractOCRTextDetection.cpp
diff --git a/cpp/TesseractOCRTextDetection/TesseractOCRTextDetection.h b/cpp/TesseractOCRTextDetection/TesseractOCRTextDetection.h
@@ -60,7 +60,7 @@ namespace MPF {
 
         class TessApiWrapper;
 
-        class TesseractOCRTextDetection : public MPFImageDetectionComponentAdapter {
+        class TesseractOCRTextDetection : public MPFDetectionComponent {
 
         public:
             bool Init() override;
@@ -71,6 +71,10 @@ namespace MPF {
 
             std::vector<MPFGenericTrack> GetDetections(const MPFGenericJob &job) override;
 
+            std::vector<MPFVideoTrack> GetDetections(const MPFVideoJob &job) override;
+
+            std::vector<MPFAudioTrack> GetDetections(const MPFAudioJob &job) override;
+
             std::string GetDetectionType() override;
 
             bool Supports(MPFDetectionDataType data_type) override;
@@ -201,7 +205,13 @@ namespace MPF {
                 }
             };
 
-            bool process_ocr_text(Properties &detection_properties, const MPFImageJob &job, const OCR_output &ocr_out,
+            std::vector<MPFImageLocation> process_image_job(const MPFJob &job,
+                                                            TesseractOCRTextDetection::OCR_filter_settings &ocr_fset,
+                                                            cv::Mat &image_data,
+                                                            const std::string &run_dir,
+                                                            MPFDetectionError &job_status);
+
+            bool process_ocr_text(Properties &detection_properties, const MPFJob &job, const OCR_output &ocr_out,
                     const TesseractOCRTextDetection::OCR_filter_settings &ocr_fset,
                     int page_num = -1);
 
@@ -222,8 +232,8 @@ namespace MPF {
             static void process_parallel_image_runs(OCR_job_inputs &inputs, Image_results &results);
             static void process_serial_image_runs(OCR_job_inputs &inputs, Image_results &results);
 
-            void preprocess_image(const MPFImageJob &job, cv::Mat &input_image, const OCR_filter_settings &ocr_fset);
-            void rescale_image(const MPFImageJob &job, cv::Mat &input_image, const OCR_filter_settings &ocr_fset);
+            void preprocess_image(const MPFJob &job, cv::Mat &input_image, const OCR_filter_settings &ocr_fset);
+            void rescale_image(const MPFJob &job, cv::Mat &input_image, const OCR_filter_settings &ocr_fset);
 
             static void process_tesseract_lang_model(OCR_job_inputs &input, OCR_results  &result);
 
@@ -238,13 +248,13 @@ namespace MPF {
             static std::string process_osd_lang(const std::string &script_type,
                                                 const OCR_filter_settings &ocr_fset);
 
-            void get_OSD(OSBestResult &best_result, cv::Mat &imi, const MPFImageJob &job,
+            void get_OSD(OSBestResult &best_result, cv::Mat &imi, const MPFJob &job,
                          OCR_filter_settings &ocr_fset,
                          Properties &detection_properties,
                          std::string &tessdata_script_dir, std::set<std::string> &missing_languages);
 
             bool get_OSD_rotation(OSResults *results, cv::Mat &imi_scaled, cv::Mat &imi_original,
-                                  int &rotation, const MPFImageJob &job, OCR_filter_settings &ocr_fset);
+                                  int &rotation, const MPFJob &job, OCR_filter_settings &ocr_fset);
 
             static std::string return_valid_tessdir(const std::string &job_name,
                                                     const std::string &lang_str,
@@ -265,8 +275,7 @@ namespace MPF {
 
             void check_default_languages(const OCR_filter_settings &ocr_fset,
                                          const std::string &job_name,
-                                         const std::string &run_dir,
-                                         MPFDetectionError &job_status);
+                                         const std::string &run_dir);
         };
 
         // The primary reason this class exists is that tesseract::TessBaseAPI segfaults when copying or moving.

diff --git a/cpp/TesseractOCRTextDetection/sample_tesseract_ocr_detector.cpp b/cpp/TesseractOCRTextDetection/sample_tesseract_ocr_detector.cpp
@@ -45,10 +45,10 @@ using std::to_string;
 void print_usage(char *argv[]) {
 
     std::cout << "Usage: " << argv[0] <<
-                 " <-i | -g> [--osd] [--oem TESSERACT_OEM] <IMAGE_URI | GENERIC_URI> [TESSERACT_LANGUAGE]" <<
+                 " <-i | -v | -g> [--osd] [--oem TESSERACT_OEM] <IMAGE_URI | VIDEO_URI <START_FRAME> <END_FRAME> | GENERIC_URI>  [TESSERACT_LANGUAGE]" <<
                  std::endl << std::endl;
     std::cout << "Notes: " << std::endl << std::endl;
-    std::cout << " -i | -g : Specifies whether to process an image (-i <IMAGE_URI>) or generic document (-g <GENERIC_URI>)." <<
+    std::cout << " <-i | -v | -g>  : Specifies whether to process an image (-i <IMAGE_URI>), video (-v <VIDEO_URI>  <START_FRAME> <END_FRAME>), or generic document (-g <GENERIC_URI>)." <<
                  std::endl << std::endl;
     std::cout << " --osd   : When provided, runs the job with automatic orientation and script detection (OSD). " <<
                  std::endl;
@@ -102,8 +102,8 @@ bool check_options(const std::string &next_option,  const int &argc, char *argv[
     if (next_option == "--osd") {
         algorithm_properties["ENABLE_OSD_AUTOMATION"] = "true";
         uri_index++;
-    } else if (next_option == "--oem" || argc - uri_index > 2) {
-        std::cout << "Updating OEM MODE " << argv[uri_index + 1];
+    } else if (next_option == "--oem" && argc - uri_index > 2) {
+        std::cout << "Updating OEM MODE " << argv[uri_index + 1] << std::endl;
         algorithm_properties["TESSERACT_OEM"] = argv[uri_index + 1];
         uri_index += 2;
     } else {
@@ -131,18 +131,30 @@ int main(int argc, char *argv[]) {
         algorithm_properties["SHARPEN"] = "1.0";
         algorithm_properties["ENABLE_OSD_AUTOMATION"] = "false";
 
-        int uri_index = 2;
+        int uri_index = 2, video_params = 0, start_frame = 0, end_frame = 1;
+
         std::string next_option = std::string(argv[uri_index]);
         if (check_options(next_option, argc, argv, algorithm_properties, uri_index)) {
             next_option = std::string(argv[uri_index]);
             check_options(next_option, argc, argv, algorithm_properties, uri_index);
         }
 
-        if (argc - uri_index == 1) {
+        if (media_option == "-v") {
+            video_params = 2;
+            if (argc - uri_index < 3) {
+                print_usage(argv);
+                return 0;
+
+            }
+            start_frame = std::stoi(argv[uri_index+1]);
+            end_frame = std::stoi(argv[uri_index+2]);
+        }
+
+        if (argc - uri_index - video_params == 1) {
             uri = argv[uri_index];
-        } else if (argc - uri_index == 2) {
+        } else if (argc - uri_index - video_params == 2) {
             uri = argv[uri_index];
-            algorithm_properties["TESSERACT_LANGUAGE"] = argv[uri_index + 1];
+            algorithm_properties["TESSERACT_LANGUAGE"] = argv[uri_index + video_params + 1];
         } else {
              print_usage(argv);
              return 0;
@@ -176,6 +188,22 @@ int main(int argc, char *argv[]) {
                 print_detection_properties(locations[i].detection_properties, locations[i].confidence);
             }
         }
+        else if (media_option == "-v") {
+            // Run uri as an image data file.
+            std::cout << "Running job on video data uri: " << uri << std::endl;
+            MPFVideoJob job(job_name, uri, start_frame, end_frame, algorithm_properties, media_properties);
+            int count = 0;
+            for (auto track: im.GetDetections(job)) {
+                std::cout << "Track number: " << count << std::endl;
+                std::map<int, MPFImageLocation> locations = track.frame_locations;
+                std::cout << "Number of image locations: " << locations.size() << std::endl << std::endl;
+                for (const auto &location: locations) {
+                    std::cout << "Frame number: " << location.first << std::endl;
+                    print_detection_properties(location.second.detection_properties, location.second.confidence);
+                }
+                count ++;
+            }
+        }
         else {
             print_usage(argv);
         }

diff --git a/cpp/TesseractOCRTextDetection/test/data/NOTICE b/cpp/TesseractOCRTextDetection/test/data/NOTICE
@@ -82,6 +82,19 @@ Custom generated pdf for testing document text extraction.
 # test-backslash.png
 Custom generated image for testing escaped backslash tagging.
 
+# test-video-detection.avi
+Short clip of three separate image frames for testing video detection capability.
+Contains public domain text from the following sources:
+
+    https://en.wikipedia.org/wiki/Diazepam
+    (Japanese Translation)
+    Public Domain
+
+    http://www.un.org/en/universal-declaration-human-rights/
+    English text from the Universal
+    Declaration of Human Rights.
+    Public Domain
+
 # text-demo.png
 Text extracted from open source project https://github.com/tesseract-ocr/tesseract.
 

diff --git a/cpp/TesseractOCRTextDetection/test/data/test-video-detection.avi b/cpp/TesseractOCRTextDetection/test/data/test-video-detection.avi
diff --git a/cpp/TesseractOCRTextDetection/test/test_tesseract_ocr_detection.cpp b/cpp/TesseractOCRTextDetection/test/test_tesseract_ocr_detection.cpp
@@ -100,6 +100,25 @@ MPFGenericJob createPDFJob(const std::string &uri, const std::map<std::string, s
     return job;
 }
 
+/**
+ * Helper function for creating a video job
+ * @param uri - Path to existing input media.
+ * @param start - Video start frame.
+ * @param end - Video end frame.
+ * @param custom - Custom job algorithm properties.
+ *
+ * @return - An MPF video job with the specified algorithm properties.
+ */
+MPFVideoJob createVideoJob(const std::string &uri, const int &start, const int &end,
+                             const std::map<std::string, std::string> &custom = {}) {
+    Properties algorithm_properties;
+    Properties media_properties;
+    std::string job_name("OCR_test");
+    setAlgorithmProperties(algorithm_properties, custom);
+    MPFVideoJob job(job_name, uri, start, end, algorithm_properties, media_properties);
+    return job;
+}
+
 /**
  * Helper function for running given image job. Checks if job results is not empty.
  *
@@ -136,6 +155,27 @@ void runDocumentDetection(const std::string &doc_path, TesseractOCRTextDetection
     ASSERT_FALSE(generic_tracks.empty());
 }
 
+
+/**
+ * Helper function for running given video job. Checks if job results is not empty.
+ *
+ * @param vid_path - Path of given video.
+ * @param ocr - TesseractOCRTextDetection component for running given job.
+ * @param video_tracks - Output vector of video detection tracks for given job.
+ * @param start - Video start frame.
+ * @param end - Video end frame.
+ * @param custom - Mapping of input job properties.
+ */
+void runVideoDetection(const std::string &vid_path, TesseractOCRTextDetection &ocr,
+                       std::vector<MPFVideoTrack> &video_tracks,
+                       const int &start, const int &end,
+                       const std::map<std::string, std::string> &custom = {}) {
+    MPFVideoJob job = createVideoJob(vid_path, start, end, custom);
+    video_tracks = ocr.GetDetections(job);
+    ASSERT_FALSE(video_tracks.empty());
+}
+
+
 /**
  * Helper function for checking if running given image job will return no results.
  *
@@ -485,6 +525,40 @@ TEST(TESSERACTOCR, CustomModelTest) {
     ASSERT_TRUE(ocr.Close());
 }
 
+TEST(TESSERACTOCR, VideoProcessingTest) {
+
+    // Ensure video processing works as expected.
+
+    TesseractOCRTextDetection ocr;
+    ocr.SetRunDirectory("../plugin");
+    std::vector<MPFVideoTrack> track_results;
+    std::vector<MPFImageLocation> results;
+    ASSERT_TRUE(ocr.Init());
+
+    std::map<std::string,std::string> custom_properties = {{"TESSERACT_LANGUAGE",    "eng"},
+                                                           {"ENABLE_OSD_AUTOMATION","TRUE"}};
+
+    ASSERT_NO_FATAL_FAILURE(runVideoDetection("data/test-video-detection.avi", ocr, track_results, 0, 2, custom_properties));
+
+    for (auto track_result: track_results) {
+        for (auto result: track_result.frame_locations) {
+            results.push_back(result.second);
+        }
+    }
+
+    assertInImage("data/test-video-detection.avi", "Testing Text Detection", results, "TEXT", 0);
+    assertInImage("data/test-video-detection.avi", "eng", results, "TEXT_LANGUAGE", 0);
+
+    assertInImage("data/test-video-detection.avi", "Japanese", results, "OSD_PRIMARY_SCRIPT", 1);
+    assertInImage("data/test-video-detection.avi", "Japanese", results, "MISSING_LANGUAGE_MODELS", 1);
+
+    assertInImage("data/test-video-detection.avi", "All human beings", results, "TEXT", 2);
+    assertInImage("data/test-video-detection.avi", "Latin", results, "TEXT_LANGUAGE", 2);
+
+
+    ASSERT_TRUE(ocr.Close());
+}
+
 TEST(TESSERACTOCR, ImageProcessingTest) {
 
     // Ensure contrast and unstructured image processing settings are enabled.
@@ -546,8 +620,6 @@ TEST(TESSERACTOCR, ImageProcessingTest) {
     ASSERT_TRUE(ocr.Close());
 }
 
-
-
 TEST(TESSERACTOCR, ModelTest) {
 
     // Ensure user can specify custom model directory locations.