Merge pull request #243 from jihyeonyi/predict_video

Add example script to predict and reconstruct a video on a client machine, from an existing project `Deployment`
openvinotoolkit · Jun 22, 2023 · 50cd8eb · 50cd8eb
2 parents 364fa49 + b800133
commit 50cd8eb
Show file tree

Hide file tree

Showing 10 changed files with 415 additions and 0 deletions.
diff --git a/examples/README.md b/examples/README.md
@@ -123,3 +123,23 @@ The example scripts `upload_and_predict_from_numpy.py` and
 `upload_and_predict_media_from_folder.py` show how to upload either a single media
 item directly from memory, or upload an entire folder of media items and
 get predictions for the media from the cluster.
+
+## Predict a video on local environment
+Once you download(deploy) a model from the server, you can get predictions on the local environment.
+The example script `predict_video_locally.py` shows how to reconstruct a video with overlaid predictions without uploading the file to server.
+
+This code sample shows how to get a deployment from the server.
+
+> ```shell
+> # Get the server configuration from .env file
+> server_config = get_server_details_from_env()
+>
+> # Set up the Geti instance with the server configuration details
+> geti = Geti(server_config=server_config)
+>
+> # Create deployment for the project, and prepare it for running inference
+> deployment = geti.deploy_project(PROJECT_NAME)
+>
+> # Save deployment on local
+> deployment.save(PATH_TO_DEPLOYMENT)
+> ```
diff --git a/examples/predict_video_locally.py b/examples/predict_video_locally.py
@@ -0,0 +1,60 @@
+# Copyright (C) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+
+import argparse
+import logging
+
+from geti_sdk.demos import predict_video_from_deployment
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Predict video on local machine.")
+    parser.add_argument("video_path", type=str, help="File path to video")
+    parser.add_argument(
+        "deployment_path",
+        type=str,
+        help="Path to the folder containing the deployment data",
+    )
+    parser.add_argument(
+        "--device",
+        choices=["CPU", "GPU"],
+        default="CPU",
+        help="Device (CPU or GPU) to load the model to. Defaults to 'CPU'",
+    )
+    parser.add_argument(
+        "--drop_audio",
+        action="store_true",
+        help="Option to drop audio. defaults to 'False'(preserving audio)",
+    )
+    parser.add_argument(
+        "--log_level",
+        choices=["warning", "info"],
+        default="warning",
+        help="Logging level. Defaults to 'warning'",
+    )
+
+    args = parser.parse_args()
+
+    level_config = {"warning": logging.WARNING, "info": logging.INFO}
+    log_level = level_config[args.log_level.lower()]
+    logging.basicConfig(level=log_level)
+
+    video_path = args.video_path
+    deployment_path = args.deployment_path
+    device = args.device
+    preserve_audio = not args.drop_audio
+
+    # Reconstruct video with overlaid predictions on local machine.
+    predict_video_from_deployment(
+        video_path, deployment_path, device=device, preserve_audio=preserve_audio
+    )
diff --git a/geti_sdk/demos/__init__.py b/geti_sdk/demos/__init__.py
@@ -41,6 +41,7 @@
     ensure_trained_anomaly_project,
     ensure_trained_example_project,
 )
+from .predict_video import predict_video_from_deployment
 
 __all__ = [
     "DEFAULT_DATA_PATH",
@@ -58,4 +59,5 @@
     "get_mvtec_dataset",
     "set_directory_permissions",
     "get_person_car_bike_video",
+    "predict_video_from_deployment",
 ]
diff --git a/geti_sdk/demos/predict_video.py b/geti_sdk/demos/predict_video.py
@@ -0,0 +1,186 @@
+# Copyright (C) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+
+import logging
+import os
+import shutil
+import subprocess
+import tempfile
+import time
+from typing import List, Optional, Union
+
+import cv2
+import imageio_ffmpeg
+from tqdm.auto import tqdm
+from tqdm.contrib.logging import logging_redirect_tqdm
+
+from geti_sdk.data_models import Prediction
+from geti_sdk.deployment import Deployment
+from geti_sdk.utils import show_image_with_annotation_scene
+
+
+def predict_video_from_deployment(
+    video_path: Union[str, os.PathLike],
+    deployment: Union[Deployment, str, os.PathLike],
+    device: str = "CPU",
+    preserve_audio: Optional[bool] = True,
+) -> Optional[str]:
+    """
+    Create a video reconstruction with overlaid model predictions.
+    This function runs inference on the local machine for every frame in the video.
+    The inference results are overlaid on the frames and the output video path will be returned.
+
+    :param video_path: File path to video
+    :param deployment: Path to the folder containing the Deployment data, or Deployment instance
+    :param device: Device (CPU or GPU) to load the model to. Defaults to 'CPU'
+    :param preserve_audio: True to preserve all audio in the original input video. Defaults to True.
+        If ffmpeg could not be found, this option is ignored and no audio would be preserved.
+    :return: The file path of the output video if generated successfully. Otherwise None.
+    """
+    retval: Optional[str] = None
+
+    # prepare deployment for running inference
+    if isinstance(deployment, (str, os.PathLike)):
+        deployment = Deployment.from_folder(deployment)
+    elif not isinstance(deployment, Deployment):
+        raise ValueError(f"Unable to read deployment {deployment}")
+
+    logging.info("Load inference models")
+    deployment.load_inference_models(device=device)
+
+    # Open the video capture, this prepares the video to be ready for reading
+    cap = cv2.VideoCapture(video_path)
+
+    if cap is None or not cap.isOpened():
+        raise ValueError(f"Unable to read video from {video_path}")
+
+    # Extract original video properties
+    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    fps = cap.get(cv2.CAP_PROP_FPS)
+    num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+
+    video_duration = num_frames / fps
+    logging.info(
+        f"Input video contains {num_frames:.1f} frames, "
+        f"for a total duration of {video_duration:.1f} seconds"
+    )
+
+    t_start = time.time()
+
+    predictions: List[Prediction] = []
+    logging.info("Running video prediction... ")
+    with logging_redirect_tqdm(tqdm_class=tqdm), tqdm(
+        total=num_frames, desc="Predicting"
+    ) as progress_bar:
+        while cap.isOpened():
+            ret, frame = cap.read()
+            if ret is True:
+                rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+                prediction = deployment.infer(rgb_frame)
+                predictions.append((rgb_frame, prediction))
+                progress_bar.update(1)
+            else:
+                break
+
+    cap.release()
+
+    if len(predictions) == num_frames:
+        t_prediction = time.time() - t_start
+        logging.info(
+            f"Prediction completed successfully in {t_prediction:.1f} seconds. "
+        )
+
+        # Determine the output video path
+        fname, ext = os.path.splitext(video_path)
+        output_video_path = os.path.abspath(fname + "_reconstructed" + ext)
+
+        # Create a video writer to be able to save the reconstructed video
+        out_video = cv2.VideoWriter(
+            filename=output_video_path,
+            fourcc=cv2.VideoWriter_fourcc(*"mp4v"),
+            fps=fps,
+            frameSize=(frame_width, frame_height),
+        )
+
+        count = 0
+        logging.info("Running video reconstruction... ")
+        with logging_redirect_tqdm(tqdm_class=tqdm), tqdm(
+            total=num_frames, desc="Reconstructing"
+        ) as progress_bar:
+            for rgb_frame, prediction in predictions:
+                output_frame = show_image_with_annotation_scene(
+                    image=rgb_frame, annotation_scene=prediction, show_results=False
+                )
+                out_video.write(output_frame)
+                count += 1
+                progress_bar.update(1)
+        out_video.release()
+
+        if preserve_audio is True:
+            try:
+                # audio = ffmpeg.input(video_path).audio
+                # video = ffmpeg.input(output_video_path).video
+                # # ffmpeg can't use same input/output video file.
+                # temp = tempfile.NamedTemporaryFile(suffix=ext, delete=False)
+                # out = ffmpeg.output(video, audio, temp.name)
+                # out.run(overwrite_output=True, quiet=True)
+                # shutil.move(temp.name, output_video_path)
+                FFMPEG = imageio_ffmpeg.get_ffmpeg_exe()
+                logging.info("Restoring all audio in the original input video")
+                # ffmpeg can't use same input/output video file.
+                with tempfile.NamedTemporaryFile(suffix=ext) as temp:
+                    cmd = [
+                        FFMPEG,
+                        "-i",
+                        output_video_path,  # 1st input
+                        "-i",
+                        video_path,  # 2nd input
+                        "-map",
+                        "0:v",  # video from the 1st input(output_video_path)
+                        "-map",
+                        "1:a?",  # audio from the 2nd input(video_path). '?' to ignore if no audio exists.
+                        "-c",
+                        "copy",
+                        "-y",
+                        temp.name,
+                    ]
+                    p = subprocess.run(
+                        cmd,
+                        stdin=subprocess.PIPE,
+                        stdout=subprocess.PIPE,
+                        stderr=subprocess.PIPE,
+                    )
+                    if p.returncode == 0:
+                        shutil.copy(temp.name, output_video_path)
+                    else:
+                        logging.warning(
+                            "Error occurred while processing audio. No audio would be preserved."
+                        )
+
+            except RuntimeError:  # FFMPEG binary could not be found.
+                logging.warning(
+                    "ffmpeg could not be found on your system. No audio would be preserved."
+                )
+
+        retval = output_video_path
+        t_reconstruction = time.time() - t_prediction - t_start
+        logging.info(
+            f"Reconstruction completed successfully in {t_reconstruction:.1f} seconds."
+        )
+        logging.info(f"Output video saved to `{output_video_path}`")
+    else:
+        logging.warning("Prediction process failed. ")
+
+    return retval
diff --git a/requirements/requirements.txt b/requirements/requirements.txt
@@ -18,3 +18,4 @@ joblib>=1.1.1
 protobuf>=3.20.2
 ovmsclient>=2022.3
 orjson==3.8.8
+imageio-ffmpeg==0.4.8
diff --git a/tests/data/dice/deployment.zip b/tests/data/dice/deployment.zip
diff --git a/tests/data/dice/dice_rolling_with_sound.mp4 b/tests/data/dice/dice_rolling_with_sound.mp4
diff --git a/tests/fixtures/data.py b/tests/fixtures/data.py
@@ -13,6 +13,7 @@
 # and limitations under the License.
 import json
 import os
+import shutil
 from typing import Callable, List
 
 import pytest
@@ -198,3 +199,31 @@ def fxt_classification_to_detection_annotation_readers(
     `classification_to_detection` project, for the `blocks` dataset.
     """
     yield [fxt_annotation_reader_grouped, fxt_annotation_reader]
+
+
+@pytest.fixture(scope="session")
+def fxt_dice_dataset(fxt_base_test_path) -> str:
+    """
+    This fixture returns the path to the 'dice' dataset for video prediction
+    """
+    yield os.path.join(fxt_base_test_path, "data", "dice")
+
+
+@pytest.fixture(scope="session")
+def fxt_deployment_path_dice(fxt_dice_dataset) -> str:
+    """
+    This fixture returns the path to the deployment folder for video prediction
+    """
+    deployment_path = os.path.join(fxt_dice_dataset, "deployment")
+    deployment_file = os.path.join(fxt_dice_dataset, "deployment.zip")
+    shutil.unpack_archive(deployment_file, deployment_path)
+    yield deployment_path
+    shutil.rmtree(deployment_path)
+
+
+@pytest.fixture(scope="session")
+def fxt_video_path_dice(fxt_dice_dataset) -> str:
+    """
+    This fixture returns the path to a sample video for video prediction
+    """
+    yield os.path.join(fxt_dice_dataset, "dice_rolling_with_sound.mp4")