Skip to content

Commit

Permalink
Fix gstreamer backend with manual pipelines
Browse files Browse the repository at this point in the history
- Fix broken seeking in audio/video playback
- Fix broken audio playback
- Fix unreliable seeking (read out from the last acquired buffer)
- Estimate frame count if it is not available directly
- Return -1 for frame count and fps if it is not available.
- Return 0 for fps if the video has variable frame rate
- Enable and fix tests
  • Loading branch information
kecsap committed Nov 2, 2023
1 parent 7c9231f commit 7f53fc4
Show file tree
Hide file tree
Showing 3 changed files with 135 additions and 53 deletions.
144 changes: 99 additions & 45 deletions modules/videoio/src/cap_gstreamer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ template<> inline void GSafePtr_release<GstBuffer>(GstBuffer** pPtr) { if (pPtr)
template<> inline void GSafePtr_release<GstSample>(GstSample** pPtr) { if (pPtr) { gst_sample_unref(*pPtr); *pPtr = NULL; } }
template<> inline void GSafePtr_release<GstBus>(GstBus** pPtr) { if (pPtr) { gst_object_unref(G_OBJECT(*pPtr)); *pPtr = NULL; } }
template<> inline void GSafePtr_release<GstMessage>(GstMessage** pPtr) { if (pPtr) { gst_message_unref(*pPtr); *pPtr = NULL; } }
template<> inline void GSafePtr_release<GstQuery>(GstQuery** pPtr) { if (pPtr) { gst_query_unref(*pPtr); *pPtr = NULL; } }
template<> inline void GSafePtr_release<GMainLoop>(GMainLoop** pPtr) { if (pPtr) { g_main_loop_unref(*pPtr); *pPtr = NULL; } }

template<> inline void GSafePtr_release<GstEncodingVideoProfile>(GstEncodingVideoProfile** pPtr) { if (pPtr) { gst_encoding_profile_unref(*pPtr); *pPtr = NULL; } }
Expand Down Expand Up @@ -367,6 +368,7 @@ class GStreamerCapture CV_FINAL : public IVideoCapture
gint audioBitPerFrame;
gint audioSampleSize;
std::string audioFormat;
guint64 timestamp;

Mat audioFrame;
std::deque<uint8_t> bufferAudioData;
Expand Down Expand Up @@ -433,7 +435,8 @@ GStreamerCapture::GStreamerCapture() :
audioSamplesPerSecond(44100),
audioBitPerFrame(0),
audioSampleSize(0),
audioFormat("S16LE")
audioFormat("S16LE"),
timestamp(0)
, va_type(VIDEO_ACCELERATION_NONE)
, hw_device(-1)
{}
Expand Down Expand Up @@ -680,6 +683,11 @@ bool GStreamerCapture::grabVideoFrame()
stopFlag = true;
emulatedFrameNumber++;
}
if (usedVideoSample)
{
auto *buffer = gst_sample_get_buffer((GstSample*)usedVideoSample);
timestamp = GST_BUFFER_PTS(buffer);
}
returnFlag = true;
}
}
Expand Down Expand Up @@ -792,6 +800,7 @@ bool GStreamerCapture::grabAudioFrame()
CV_LOG_ERROR(NULL, "GStreamer: Failed. Buffer is empty");
return false;
}
timestamp = GST_BUFFER_PTS(buf);
if (!gst_buffer_map(buf, &map_info, GST_MAP_READ))
{
CV_LOG_ERROR(NULL, "GStreamer: Failed to map GStreamer buffer to system memory");
Expand Down Expand Up @@ -1389,6 +1398,7 @@ bool GStreamerCapture::open(const String &filename_, const cv::VideoCaptureParam
GSafePtr<char> uri;
GSafePtr<GstBus> bus;

GSafePtr<GstElement> queue;
GSafePtr<GstElement> uridecodebin;
GSafePtr<GstElement> color;
GSafePtr<GstElement> convert;
Expand Down Expand Up @@ -1493,6 +1503,7 @@ bool GStreamerCapture::open(const String &filename_, const cv::VideoCaptureParam
if (strstr(name, "opencvsink") != NULL || strstr(name, "appsink") != NULL)
{
sink.attach(GST_ELEMENT(gst_object_ref(element)));
audiosink.attach(GST_ELEMENT(gst_object_ref(element)));
}
else if (strstr(name, COLOR_ELEM_NAME) != NULL)
{
Expand Down Expand Up @@ -1534,14 +1545,16 @@ bool GStreamerCapture::open(const String &filename_, const cv::VideoCaptureParam

if (videoStream >= 0)
{
queue.reset(gst_element_factory_make("queue", NULL));
CV_Assert(queue);
sink.reset(gst_element_factory_make("appsink", NULL));
CV_Assert(sink);
// videoconvert (in 0.10: ffmpegcolorspace, in 1.x autovideoconvert)
//automatically selects the correct colorspace conversion based on caps.
color.reset(gst_element_factory_make(COLOR_ELEM, NULL));
CV_Assert(color);

gst_bin_add_many(GST_BIN(pipeline.get()), uridecodebin.get(), color.get(), sink.get(), NULL);
gst_bin_add_many(GST_BIN(pipeline.get()), queue.get(), uridecodebin.get(), color.get(), sink.get(), NULL);

if (element_from_uri)
{
Expand All @@ -1566,14 +1579,16 @@ bool GStreamerCapture::open(const String &filename_, const cv::VideoCaptureParam
}
if (audioStream >= 0)
{
queue.reset(gst_element_factory_make("queue", NULL));
CV_Assert(queue);
convert.reset(gst_element_factory_make("audioconvert", NULL));
resample.reset(gst_element_factory_make("audioresample", NULL));
audiosink.reset(gst_element_factory_make("appsink", NULL));
CV_Assert(convert);
CV_Assert(resample);
CV_Assert(audiosink);

gst_bin_add_many (GST_BIN (pipeline.get()), uridecodebin.get(), convert.get(), resample.get(), audiosink.get(), NULL);
gst_bin_add_many (GST_BIN (pipeline.get()), queue.get(), uridecodebin.get(), convert.get(), resample.get(), audiosink.get(), NULL);
if (!gst_element_link_many (convert.get(), resample.get(), audiosink.get(), NULL))
{
CV_WARN("GStreamer(audio): cannot link convert -> resample -> sink");
Expand Down Expand Up @@ -1646,21 +1661,25 @@ bool GStreamerCapture::open(const String &filename_, const cv::VideoCaptureParam
}
if (manualpipeline)
{
GSafePtr<GstCaps> peer_caps;
GSafePtr<GstPad> sink_pad;
sink_pad.attach(gst_element_get_static_pad(sink, "sink"));
peer_caps.attach(gst_pad_peer_query_caps(sink_pad, NULL));
if (!gst_caps_can_intersect(caps, peer_caps))
if (videoStream >= 0)
{
caps.attach(gst_caps_from_string("video/x-raw, format=(string){UYVY,YUY2,YVYU,NV12,NV21,YV12,I420,BGRA,RGBA,BGRx,RGBx,GRAY16_LE,GRAY16_BE}"));
CV_Assert(caps);
GSafePtr<GstCaps> peer_caps;
GSafePtr<GstPad> sink_pad;
sink_pad.attach(gst_element_get_static_pad(sink, "sink"));
peer_caps.attach(gst_pad_peer_query_caps(sink_pad, NULL));
if (!gst_caps_can_intersect(caps, peer_caps))
{
caps.attach(gst_caps_from_string("video/x-raw, format=(string){UYVY,YUY2,YVYU,NV12,NV21,YV12,I420,BGRA,RGBA,BGRx,RGBx,GRAY16_LE,GRAY16_BE}"));
CV_Assert(caps);
}
}
}
if (videoStream >= 0)
{
gst_app_sink_set_caps(GST_APP_SINK(sink.get()), caps);
caps.release();
}

{
GST_DEBUG_BIN_TO_DOT_FILE(GST_BIN(pipeline.get()), GST_DEBUG_GRAPH_SHOW_ALL, "pipeline-init");

Expand Down Expand Up @@ -1688,18 +1707,6 @@ bool GStreamerCapture::open(const String &filename_, const cv::VideoCaptureParam
GSafePtr<GstCaps> buffer_caps;
buffer_caps.attach(gst_pad_get_current_caps(pad));

GstFormat format;

format = GST_FORMAT_DEFAULT;
if(!gst_element_query_duration(sink, format, &duration))
{
handleMessage(pipeline);
CV_WARN("unable to query duration of stream");
duration = -1;
}

handleMessage(pipeline);

const GstStructure *structure = gst_caps_get_structure(buffer_caps, 0); // no lifetime transfer
if (!gst_structure_get_int (structure, "width", &width) ||
!gst_structure_get_int (structure, "height", &height))
Expand All @@ -1708,13 +1715,55 @@ bool GStreamerCapture::open(const String &filename_, const cv::VideoCaptureParam
}

gint num = 0, denom=1;
bool fps_query_success = true;

if (!gst_structure_get_fraction(structure, "framerate", &num, &denom))
{
CV_WARN("cannot query video fps");
fps_query_success = false;
}

fps = (double)num/(double)denom;

// If num == 0 and denom == 1 -> variable frame rate video.
if (fps_query_success && !(num == 0 && denom == 1))
{
GSafePtr<GstQuery> query;
query.attach(gst_query_new_duration(GST_FORMAT_DEFAULT));

gboolean res = gst_element_query(pipeline.get(), query);

if (res)
{
gst_query_parse_duration(query, NULL, &duration);
}
else if (fps != 0)
{
GSafePtr<GstQuery> query2;
query2.attach(gst_query_new_duration(GST_FORMAT_TIME));
gboolean res2 = gst_element_query(pipeline.get(), query2);

if (res2)
{
gst_query_parse_duration(query2, NULL, &duration);
duration = static_cast<gint64>((float)duration / GST_SECOND * fps);
CV_WARN("frame count is estimated by duration and fps");
}
else
{
CV_WARN("unable to query duration of stream");
duration = -1;
}
}
else
{
CV_WARN("unable to query frame count of stream and fps are not available to estimate it");
duration = -1;
}
}

handleMessage(pipeline);

{
GstFormat format_;
gint64 value_ = -1;
Expand Down Expand Up @@ -1814,20 +1863,7 @@ double GStreamerCapture::getProperty(int propId) const
switch(propId)
{
case CV_CAP_PROP_POS_MSEC:
CV_LOG_ONCE_WARNING(NULL, "OpenCV | GStreamer: CAP_PROP_POS_MSEC property result may be unrealiable: "
"https://github.com/opencv/opencv/issues/19025");
if (audioStream != -1)
{
return usedVideoSampleTimeNS * 1e-6;
}
format = GST_FORMAT_TIME;
status = gst_element_query_position(sink.get(), CV_GST_FORMAT(format), &value);
if(!status) {
handleMessage(pipeline);
CV_WARN("GStreamer: unable to query position of stream");
return 0;
}
return value * 1e-6; // nano seconds to milli seconds
return double(timestamp) / GST_MSECOND;
case CV_CAP_PROP_POS_FRAMES:
if (!isPosFramesSupported)
{
Expand Down Expand Up @@ -1859,7 +1895,7 @@ double GStreamerCapture::getProperty(int propId) const
case CV_CAP_PROP_FPS:
return fps;
case CV_CAP_PROP_FRAME_COUNT:
return duration;
return (double)duration;
case CV_CAP_PROP_BRIGHTNESS:
case CV_CAP_PROP_CONTRAST:
case CV_CAP_PROP_SATURATION:
Expand Down Expand Up @@ -1936,20 +1972,25 @@ bool GStreamerCapture::setProperty(int propId, double value)
return false;
}

bool wasPlaying = this->isPipelinePlaying();
if (wasPlaying)
bool needRestart = this->isPipelinePlaying() && (propId == CV_CAP_PROP_FRAME_WIDTH || propId == CV_CAP_PROP_FRAME_HEIGHT || propId == CV_CAP_PROP_FPS);
if (needRestart) {
this->stopPipeline();
}

switch(propId)
{
case CV_CAP_PROP_POS_MSEC:
{
if(!gst_element_seek_simple(GST_ELEMENT(pipeline.get()), GST_FORMAT_TIME,
flags, (gint64) (value * GST_MSECOND))) {
handleMessage(pipeline);
CV_WARN("GStreamer: unable to seek");
}
else
{
// Optimistically caching the target timestamp before reading the first frame from the new position since
// the timestamp in GStreamer can be reliable extracted from the read frames.
timestamp = (gint64)value;
if (isPosFramesEmulated)
{
if (value == 0)
Expand All @@ -1963,7 +2004,8 @@ bool GStreamerCapture::setProperty(int propId, double value)
}
}
}
break;
return true;
}
case CV_CAP_PROP_POS_FRAMES:
{
if (!isPosFramesSupported)
Expand All @@ -1977,24 +2019,34 @@ bool GStreamerCapture::setProperty(int propId, double value)
return true;
}
}
return false;
CV_WARN("unable to seek");
return false;
}
// Certain mov and mp4 files seek incorrectly if the pipeline is not stopped before.
if (this->isPipelinePlaying()) {
this->stopPipeline();
}

if(!gst_element_seek_simple(GST_ELEMENT(pipeline.get()), GST_FORMAT_DEFAULT,
flags, (gint64) value)) {
handleMessage(pipeline);
CV_WARN("GStreamer: unable to seek");
break;
return false;
}
// wait for status update
gst_element_get_state(pipeline, NULL, NULL, GST_CLOCK_TIME_NONE);
return true;
}
case CV_CAP_PROP_POS_AVI_RATIO:
{
// https://stackoverflow.com/questions/31290315
// GStreamer docs: GST_FORMAT_PERCENT (5) – percentage of stream (few, if any, elements implement this as of May 2009)
CV_WARN("GStreamer: seeking by file percent are not supported by most GStreamer elements");
if(!gst_element_seek_simple(GST_ELEMENT(pipeline.get()), GST_FORMAT_PERCENT,
flags, (gint64) (value * GST_FORMAT_PERCENT_MAX))) {
handleMessage(pipeline);
CV_WARN("GStreamer: unable to seek");
return false;
}
else
{
Expand All @@ -2011,7 +2063,8 @@ bool GStreamerCapture::setProperty(int propId, double value)
}
}
}
break;
return true;
}
case CV_CAP_PROP_FRAME_WIDTH:
if(value > 0)
setFilter("width", G_TYPE_INT, (int) value, 0);
Expand Down Expand Up @@ -2099,8 +2152,9 @@ bool GStreamerCapture::setProperty(int propId, double value)
CV_WARN("GStreamer: unhandled property");
}

if (wasPlaying)
if (needRestart) {
this->startPipeline();
}

return false;
}
Expand Down Expand Up @@ -2572,7 +2626,7 @@ bool CvVideoWriter_GStreamer::open( const std::string &filename, int fourcc,
if (stateret == GST_STATE_CHANGE_FAILURE)
{
handleMessage(pipeline);
CV_WARN("GStreamer: cannot put pipeline to play\n");
CV_WARN("GStreamer: cannot put pipeline to play");
pipeline.release();
return false;
}
Expand Down
3 changes: 2 additions & 1 deletion modules/videoio/test/test_audio.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,7 @@ class MediaTestFixture : public AudioBaseTest, public testing::TestWithParam <pa
double audio_shift = cap.get(CAP_PROP_AUDIO_SHIFT_NSEC);
double video0_timestamp = cap.get(CAP_PROP_POS_MSEC) * 1e-3;
audio0_timestamp = video0_timestamp + audio_shift * 1e-9;

std::cout << "video0 timestamp: " << video0_timestamp << " audio0 timestamp: " << audio0_timestamp << " (audio shift nanoseconds: " << audio_shift << " , seconds: " << audio_shift * 1e-9 << ")" << std::endl;
}
ASSERT_TRUE(cap.retrieve(videoFrame));
Expand Down Expand Up @@ -228,7 +229,7 @@ class MediaTestFixture : public AudioBaseTest, public testing::TestWithParam <pa
EXPECT_NEAR(
cap.get(CAP_PROP_AUDIO_POS) / samplePerSecond + audio0_timestamp,
cap.get(CAP_PROP_POS_MSEC) * 1e-3,
(1.0 / fps) * 0.3)
(1.0 / fps) * 0.6)
<< "CAP_PROP_AUDIO_POS=" << cap.get(CAP_PROP_AUDIO_POS) << " CAP_PROP_POS_MSEC=" << cap.get(CAP_PROP_POS_MSEC);
}
if (frame != 0 && frame != numberOfFrames-1 && audioData[0].size() != (size_t)numberOfSamples)
Expand Down

0 comments on commit 7f53fc4

Please sign in to comment.