Decoding bugfixes and cleanup.

- Hack to allow working sample rate / interleave conversion on new FFmpeg (and possibly LibAV) versions. - Still using deprecated avcodec audio_resample(); needs to be written around libswresample/libavresample at some point. - Proper timecode calculation for files that do not begin at zero timecode (mainly MPEG TS). - No output when decoding a packet is not an error (could be delayed output, e.g. B frames). - Cleanup.
performous · Feb 2, 2013 · 13ba270 · 13ba270
1 parent 8211175
commit 13ba270
Show file tree

Hide file tree

Showing 4 changed files with 63 additions and 85 deletions.
diff --git a/game/audio.cc b/game/audio.cc
@@ -139,7 +139,7 @@ class Music {
 		FFmpeg mpeg;
 		float fadeLevel;
 		float pitchFactor;
-		Track(std::string const& filename, unsigned int sr): mpeg(false, true, filename, sr), fadeLevel(1.0f), pitchFactor(0.0f) {}
+		Track(std::string const& filename, unsigned int sr): mpeg(filename, sr), fadeLevel(1.0f), pitchFactor(0.0f) {}
 	};
 	typedef boost::ptr_map<std::string, Track> Tracks;
 	Tracks tracks; ///< Audio decoders
@@ -239,7 +239,7 @@ struct Sample {
 	FFmpeg mpeg;
 	bool eof;
   public:
-	Sample(std::string const& filename, unsigned sr) : m_pos(), mpeg(false, true, filename, sr), eof(true) { }
+	Sample(std::string const& filename, unsigned sr) : m_pos(), mpeg(filename, sr), eof(true) { }
 	void operator()(float* begin, float* end) {
 		if(eof) {
 			// No more data to play in this sample

diff --git a/game/ffmpeg.cc b/game/ffmpeg.cc
@@ -3,6 +3,7 @@
 #include "config.hh"
 #include "util.hh"
 #include "xtime.hh"
+#include <boost/smart_ptr/shared_ptr.hpp>
 #include <iostream>
 #include <stdexcept>
 
@@ -20,36 +21,19 @@ extern "C" {
 
 /*static*/ boost::mutex FFmpeg::s_avcodec_mutex;
 
-FFmpeg::FFmpeg(bool decodeVideo, bool decodeAudio, std::string const& _filename, unsigned int rate):
-  width(), height(), m_filename(_filename), m_rate(rate), m_quit(), m_running(), m_eof(),
-  m_seekTarget(getNaN()), m_position(), m_streamId(-1), m_mediaType(),
-  m_formatContext(), m_codecContext(), m_codec(), m_resampleContext(), m_swsContext(),
+FFmpeg::FFmpeg(std::string const& _filename, unsigned int rate):
+  width(), height(), m_filename(_filename), m_rate(rate), m_quit(),
+  m_seekTarget(getNaN()), m_position(), m_duration(), m_streamId(-1),
+  m_mediaType(rate ? AVMEDIA_TYPE_AUDIO : AVMEDIA_TYPE_VIDEO),
+  m_formatContext(), m_codecContext(), m_resampleContext(), m_swsContext(),
   m_thread(new boost::thread(boost::ref(*this)))
-{
-	if (decodeVideo) m_mediaType = AVMEDIA_TYPE_VIDEO;
-	else if (decodeAudio) m_mediaType = AVMEDIA_TYPE_AUDIO;
-	else throw std::logic_error("Can only decode one track");
-}
+{}
 
 FFmpeg::~FFmpeg() {
 	m_quit = true;
 	videoQueue.reset();
 	audioQueue.quit();
 	m_thread->join();
-	// TODO: use RAII for freeing resources (to prevent memory leaks)
-	boost::mutex::scoped_lock l(s_avcodec_mutex); // avcodec_close is not thread-safe
-	if (m_resampleContext) audio_resample_close(m_resampleContext);
-	if (m_codecContext) avcodec_close(m_codecContext);
-#if LIBAVFORMAT_VERSION_INT >= AV_VERSION_INT(53, 17, 0)
-	if (m_formatContext) avformat_close_input(&m_formatContext);
-#else
-	if (m_formatContext) av_close_input_file(m_formatContext);
-#endif
-}
-
-double FFmpeg::duration() const {
-	double d = m_running ? m_formatContext->duration / double(AV_TIME_BASE) : getNaN();
-	return d >= 0.0 ? d : getInf();
 }
 
 void FFmpeg::open() {
@@ -60,11 +44,12 @@ void FFmpeg::open() {
 	if (avformat_find_stream_info(m_formatContext, NULL) < 0) throw std::runtime_error("Cannot find stream information");
 	m_formatContext->flags |= AVFMT_FLAG_GENPTS;
 	// Find a track and open the codec
-	m_streamId = av_find_best_stream(m_formatContext, (AVMediaType)m_mediaType, -1, -1, &m_codec, 0);
+	AVCodec* codec = NULL;
+	m_streamId = av_find_best_stream(m_formatContext, (AVMediaType)m_mediaType, -1, -1, &codec, 0);
 	if (m_streamId < 0) throw std::runtime_error("No suitable track found");
 
 	AVCodecContext* cc = m_formatContext->streams[m_streamId]->codec;
-	if (avcodec_open2(cc, m_codec, NULL) < 0) throw std::runtime_error("Cannot open audio codec");
+	if (avcodec_open2(cc, codec, NULL) < 0) throw std::runtime_error("Cannot open codec");
 	cc->workaround_bugs = FF_BUG_AUTODETECT;
 	m_codecContext = cc;
 
@@ -90,29 +75,34 @@ void FFmpeg::open() {
 
 void FFmpeg::operator()() {
 	try { open(); } catch (std::exception const& e) { std::clog << "ffmpeg/error: Failed to open " << m_filename << ": " << e.what() << std::endl; m_quit = true; return; }
-	m_running = true;
-	audioQueue.setDuration(duration());
+	m_duration = m_formatContext->duration / double(AV_TIME_BASE);
+	audioQueue.setDuration(m_duration);
 	int errors = 0;
 	while (!m_quit) {
 		try {
 			if (audioQueue.wantSeek()) m_seekTarget = 0.0;
 			if (m_seekTarget == m_seekTarget) seek_internal();
 			decodePacket();
-			m_eof = false;
 			errors = 0;
 		} catch (eof_error&) {
-			m_eof = true;
 			videoQueue.push(new VideoFrame()); // EOF marker
 			boost::thread::sleep(now() + 0.1);
 		} catch (std::exception& e) {
 			std::clog << "ffmpeg/error: " << m_filename << ": " << e.what() << std::endl;
 			if (++errors > 2) { std::clog << "ffmpeg/error: FFMPEG terminating due to multiple errors" << std::endl; m_quit = true; }
 		}
 	}
-	m_running = false;
-	m_eof = true;
 	audioQueue.setEof();
 	videoQueue.push(new VideoFrame()); // EOF marker
+	// TODO: use RAII for freeing resources (to prevent memory leaks)
+	boost::mutex::scoped_lock l(s_avcodec_mutex); // avcodec_close is not thread-safe
+	if (m_resampleContext) audio_resample_close(m_resampleContext);
+	if (m_codecContext) avcodec_close(m_codecContext);
+#if LIBAVFORMAT_VERSION_INT >= AV_VERSION_INT(53, 17, 0)
+	if (m_formatContext) avformat_close_input(&m_formatContext);
+#else
+	if (m_formatContext) av_close_input_file(m_formatContext);
+#endif
 }
 
 void FFmpeg::seek(double time, bool wait) {
@@ -125,7 +115,7 @@ void FFmpeg::seek_internal() {
 	videoQueue.reset();
 	audioQueue.reset();
 	int flags = 0;
-	if (m_seekTarget < position()) flags |= AVSEEK_FLAG_BACKWARD;
+	if (m_seekTarget < m_position) flags |= AVSEEK_FLAG_BACKWARD;
 	av_seek_frame(m_formatContext, -1, m_seekTarget * AV_TIME_BASE, flags);
 	m_seekTarget = getNaN(); // Signal that seeking is done
 }
@@ -139,36 +129,27 @@ void FFmpeg::decodePacket() {
 		~ReadFramePacket() { av_free_packet(this); }
 	};
 
-	struct AVFrameWrapper {
-		AVFrame* m_frame;
-		AVFrameWrapper(): m_frame(avcodec_alloc_frame()) {
-			if (!m_frame) throw std::runtime_error("Unable to allocate AVFrame");
-		}
-		~AVFrameWrapper() { av_free(m_frame); }
-		operator AVFrame*() { return m_frame; }
-		AVFrame* operator->() { return m_frame; }
-	};
-
 	// Read an AVPacket and decode it into AVFrames
 	ReadFramePacket packet(m_formatContext);
 	int packetSize = packet.size;
 	while (packetSize) {
 		if (packetSize < 0) throw std::logic_error("negative packet size?!");
 		if (m_quit || m_seekTarget == m_seekTarget) return;
 		if (packet.stream_index != m_streamId) return;
-		AVFrameWrapper frame;
+		boost::shared_ptr<AVFrame> frame(avcodec_alloc_frame(), &av_free);
 		int frameFinished = 0;
 		int decodeSize = (m_mediaType == AVMEDIA_TYPE_VIDEO ?
-		  avcodec_decode_video2(m_codecContext, frame, &frameFinished, &packet) :
-		  avcodec_decode_audio4(m_codecContext, frame, &frameFinished, &packet));
-		if (decodeSize < 0) throw std::runtime_error("cannot decode avframe");
+		  avcodec_decode_video2(m_codecContext, frame.get(), &frameFinished, &packet) :
+		  avcodec_decode_audio4(m_codecContext, frame.get(), &frameFinished, &packet));
+		if (decodeSize < 0) return; // Packet didn't produce any output (could be waiting for B frames or something)
 		packetSize -= decodeSize; // Move forward within the packet
 		if (!frameFinished) continue;
 		// Update current position if timecode is available
 		if (int64_t(frame->pkt_pts) != int64_t(AV_NOPTS_VALUE)) {
-			m_position = double(frame->pkt_pts) * av_q2d(m_formatContext->streams[m_streamId]->time_base);
+			m_position = double(frame->pkt_pts) * av_q2d(m_formatContext->streams[m_streamId]->time_base)
+			  - double(m_formatContext->start_time) / AV_TIME_BASE;
 		}
-		if (m_mediaType == AVMEDIA_TYPE_VIDEO) processVideo(frame); else processAudio(frame);
+		if (m_mediaType == AVMEDIA_TYPE_VIDEO) processVideo(frame.get()); else processAudio(frame.get());
 	}
 }
 
@@ -189,9 +170,28 @@ void FFmpeg::processVideo(AVFrame* frame) {
 }
 
 void FFmpeg::processAudio(AVFrame* frame) {
+	void* data = frame->data[0];
+	// New FFmpeg versions use non-interleaved audio decoding and samples may be in float format.
+	// Do a conversion here, allowing us to use the old (deprecated) avcodec audio_resample().
+	std::vector<int16_t> input;
+	unsigned inFrames = frame->nb_samples;
+	if (frame->data[1]) {
+		unsigned channels = m_codecContext->channels;
+		input.reserve(channels * inFrames);
+		for (unsigned i = 0; i < inFrames; ++i) {
+			for (unsigned ch = 0; ch < channels; ++ch) {
+				data = frame->data[ch];
+				input.push_back(m_codecContext->sample_fmt == AV_SAMPLE_FMT_FLTP ?
+				  da::conv_to_s16(reinterpret_cast<float*>(data)[i]) :
+				  reinterpret_cast<int16_t*>(data)[i]
+				);
+			}
+		}
+		data = &input[0];
+	}
 	// Resample to output sample rate, then push to audio queue and increment timecode
 	std::vector<int16_t> resampled(AVCODEC_MAX_AUDIO_FRAME_SIZE);
-	int frames = audio_resample(m_resampleContext, &resampled[0], (short*)frame->data[0], frame->nb_samples);
+	int frames = audio_resample(m_resampleContext, &resampled[0], reinterpret_cast<short*>(data), inFrames);
 	resampled.resize(frames * AUDIO_CHANNELS);
 	audioQueue.push(resampled, m_position);  // May block
 	m_position += double(frames)/m_formatContext->streams[m_streamId]->codec->sample_rate;

diff --git a/game/ffmpeg.hh b/game/ffmpeg.hh
@@ -3,7 +3,7 @@
 #include "util.hh"
 #include "libda/sample.hpp"
 #include <boost/circular_buffer.hpp>
-#include <boost/ptr_container/ptr_set.hpp>
+#include <boost/ptr_container/ptr_deque.hpp>
 #include <boost/scoped_ptr.hpp>
 #include <boost/thread/condition.hpp>
 #include <boost/thread/mutex.hpp>
@@ -47,64 +47,47 @@ struct VideoFrame {
 	}
 };
 
-static bool operator<(VideoFrame const& a, VideoFrame const& b) {
-	return a.timestamp < b.timestamp;
-}
-
 /// video queue: first in first out
 class VideoFifo {
   public:
-	VideoFifo(): m_available(), m_timestamp(), m_eof() {}
+	VideoFifo(): m_timestamp(), m_eof() {}
 	/// trys to pop a VideoFrame from queue
 	bool tryPop(VideoFrame& f) {
 		boost::mutex::scoped_lock l(m_mutex);
-		if (!m_queue.empty() && m_queue.begin()->data.empty()) { m_eof = true; return false; }
-		statsUpdate();
-		if (m_available == 0) return false; // Nothing to deliver
+		if (m_queue.empty()) return false; // Nothing to deliver
+		if (m_queue.begin()->data.empty()) { m_eof = true; return false; }
 		f.swap(*m_queue.begin());
-		m_queue.erase(m_queue.begin());
+		m_queue.pop_front();
 		m_cond.notify_all();
 		m_timestamp = f.timestamp;
-		statsUpdate();
 		return true;
 	}
 	/// pushes VideoFrames to queue
 	void push(VideoFrame* f) {
 		boost::mutex::scoped_lock l(m_mutex);
 		while (m_queue.size() > m_max) m_cond.wait(l);
 		if (m_queue.empty()) m_timestamp = f->timestamp;
-		m_queue.insert(f);
-		statsUpdate();
-	}
-	/// updates stats
-	void statsUpdate() {
-		m_available = std::max(0, int(m_queue.size()) - int(m_min));
-		if (m_available == 0 && !m_queue.empty() && m_queue.rbegin()->data.empty()) m_available = m_queue.size() - 1;
+		m_queue.push_back(f);
 	}
 	/// resets video queue
 	void reset() {
 		boost::mutex::scoped_lock l(m_mutex);
 		m_queue.clear();
 		m_cond.notify_all();
-		statsUpdate();
 		m_eof = false;
 	}
 	/// returns current position
 	double position() const { return m_timestamp; }
-	/// returns m_available / m_max
-	double percentage() const { return double(m_available) / m_max; }
 	/// simple eof check
 	double eof() const { return m_eof; }
 
   private:
-	boost::ptr_set<VideoFrame> m_queue;
+	boost::ptr_deque<VideoFrame> m_queue;
 	mutable boost::mutex m_mutex;
 	boost::condition m_cond;
-	volatile unsigned m_available;
 	double m_timestamp;
 	bool m_eof;
-	static const unsigned m_min = 16; // H.264 may have 16 consecutive B frames
-	static const unsigned m_max = 50;
+	static const unsigned m_max = 20;
 };
 
 class AudioBuffer {
@@ -195,7 +178,6 @@ class AudioBuffer {
 
 // ffmpeg forward declarations
 extern "C" {
-  struct AVCodec;
   struct AVCodecContext;
   struct AVFormatContext;
   struct AVFrame;
@@ -206,8 +188,8 @@ extern "C" {
 /// ffmpeg class
 class FFmpeg {
   public:
-	/// constructor
-	FFmpeg(bool decodeVideo, bool decodeAudio, std::string const& file, unsigned int rate = 48000);
+	/// Decode file; if no rate is specified, decode video, otherwise decode audio.
+	FFmpeg(std::string const& file, unsigned int rate = 0);
 	~FFmpeg();
 	void operator()(); ///< Thread runs here, don't call directly
 	unsigned width, ///< width of video
@@ -220,8 +202,6 @@ class FFmpeg {
 	void seek(double time, bool wait = true);
 	/// duration
 	double duration() const;
-	/// return current position
-	double position() { return videoQueue.position(); /* FIXME: remove */ }
 	bool terminating() const { return m_quit; }
 
 	class eof_error: public std::exception {};
@@ -234,16 +214,14 @@ class FFmpeg {
 	std::string m_filename;
 	unsigned int m_rate;
 	volatile bool m_quit;
-	volatile bool m_running;
-	volatile bool m_eof;
 	volatile double m_seekTarget;
 	double m_position;
+	double m_duration;
 	// libav-specific variables
 	int m_streamId;
 	int m_mediaType;  // enum AVMediaType
 	AVFormatContext* m_formatContext;
 	AVCodecContext* m_codecContext;
-	AVCodec* m_codec;
 	ReSampleContext* m_resampleContext;
 	SwsContext* m_swsContext;
 	// Make sure the thread starts only after initializing everything else

diff --git a/game/video.cc b/game/video.cc
@@ -3,7 +3,7 @@
 #include "util.hh"
 #include <cmath>
 
-Video::Video(std::string const& _videoFile, double videoGap): m_mpeg(true, false, _videoFile), m_videoGap(videoGap), m_surfaceTime(), m_lastTime(), m_alpha(-0.5, 1.5) {}
+Video::Video(std::string const& _videoFile, double videoGap): m_mpeg(_videoFile), m_videoGap(videoGap), m_surfaceTime(), m_lastTime(), m_alpha(-0.5, 1.5) {}
 
 void Video::prepare(double time) {
 	time += m_videoGap;