diff --git a/python/LlavaDetection/Dockerfile b/python/LlavaDetection/Dockerfile index 8b4e3b00..128ee267 100644 --- a/python/LlavaDetection/Dockerfile +++ b/python/LlavaDetection/Dockerfile @@ -28,7 +28,7 @@ ARG BUILD_REGISTRY ARG BUILD_TAG=latest -FROM openmpf/openmpf_python_executor_ssb:${BUILD_TAG} +FROM ${BUILD_REGISTRY}openmpf_python_executor_ssb:${BUILD_TAG} RUN --mount=type=tmpfs,target=/var/cache/apt \ --mount=type=tmpfs,target=/var/lib/apt/lists \ diff --git a/python/LlavaDetection/llava_component/llava_component.py b/python/LlavaDetection/llava_component/llava_component.py index 4792c896..434b9f23 100644 --- a/python/LlavaDetection/llava_component/llava_component.py +++ b/python/LlavaDetection/llava_component/llava_component.py @@ -80,7 +80,7 @@ def get_detections_from_video(self, video_job: mpf.VideoJob) -> Iterable[mpf.Vid return tracks - def _get_frame_detections(self, media, config, is_video_job=False): + def _get_frame_detections(self, reader, config, is_video_job=False): self._update_prompts(config.prompt_config_path) self._check_client(config.ollama_server) @@ -90,11 +90,11 @@ def _get_frame_detections(self, media, config, is_video_job=False): video_process_timer = Timer() video_decode_timer.start() - for idx, frame in enumerate(media): + for idx, frame in enumerate(reader): video_decode_timer.pause() frame_count += 1 - width, height, _ = frame.shape + height, width, _ = frame.shape detection_properties = dict() self._get_ollama_response(self.frame_prompts, frame, detection_properties, video_process_timer) @@ -107,6 +107,9 @@ def _get_frame_detections(self, media, config, is_video_job=False): video_decode_timer.start() + for track in tracks: + reader.reverse_transform(track) + if is_video_job: return tracks, video_process_timer, video_decode_timer, frame_count return tracks