Skip to content

Commit

Permalink
Compute and set VP9 Level if it is not already set
Browse files Browse the repository at this point in the history
The VP9 level is computed when the container is missing a codec config
or if the level is missing from the codec config.

This fixes VP9 in ISO-BMFF files generated by FFmpeg v4.0.2 or earlier
which does not have level set in the codec config.

Fixes #469.

Change-Id: I685bfd48be16ee6b2209da1c3173f7d6bb02b36a
  • Loading branch information
kqyang committed Sep 13, 2018
1 parent 0709db4 commit d0978b3
Show file tree
Hide file tree
Showing 18 changed files with 182 additions and 16 deletions.
Binary file not shown.
Binary file not shown.
Expand Up @@ -20,7 +20,7 @@
<ContentProtection schemeIdUri="urn:uuid:1077efec-c0b2-4d02-ace3-3c1e52e2fb4b">
<cenc:pssh>AAAANHBzc2gBAAAAEHfv7MCyTQKs4zweUuL7SwAAAAExMjM0NTY3ODkwMTIzNDU2AAAAAA==</cenc:pssh>
</ContentProtection>
<Representation id="1" bandwidth="231745" codecs="vp09.00.10.08.01.02.02.02.00" mimeType="video/mp4" sar="427:320">
<Representation id="1" bandwidth="231745" codecs="vp09.00.20.08.01.02.02.02.00" mimeType="video/mp4" sar="427:320">
<BaseURL>bear-320x240-vp9-opus-video.mp4</BaseURL>
<SegmentBase indexRange="1063-1130" timescale="1000000" presentationTimeOffset="37000">
<Initialization range="0-1062"/>
Expand Down
Binary file not shown.
Expand Up @@ -3,7 +3,7 @@
<MPD xmlns="urn:mpeg:dash:schema:mpd:2011" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xlink="http://www.w3.org/1999/xlink" xsi:schemaLocation="urn:mpeg:dash:schema:mpd:2011 DASH-MPD.xsd" xmlns:cenc="urn:mpeg:cenc:2013" profiles="urn:mpeg:dash:profile:isoff-on-demand:2011" minBufferTime="PT2S" type="static" mediaPresentationDuration="PT2.7360000610351562S">
<Period id="0">
<AdaptationSet id="0" contentType="video" width="320" height="240" frameRate="1000000/33000" subsegmentAlignment="true" par="4:3">
<Representation id="0" bandwidth="196039" codecs="vp09.00.10.08.01.02.02.02.00" mimeType="video/webm" sar="1:1">
<Representation id="0" bandwidth="196039" codecs="vp09.00.20.08.01.02.02.02.00" mimeType="video/webm" sar="1:1">
<BaseURL>bear-vp9-blockgroup-video.webm</BaseURL>
<SegmentBase indexRange="302-320" timescale="1000000">
<Initialization range="0-301"/>
Expand Down
Binary file not shown.
2 changes: 1 addition & 1 deletion packager/app/test/testdata/vp9-webm/output.mpd
Expand Up @@ -12,7 +12,7 @@
</Representation>
</AdaptationSet>
<AdaptationSet id="1" contentType="video" width="320" height="240" frameRate="1000000/34000" subsegmentAlignment="true" par="16:9">
<Representation id="1" bandwidth="225727" codecs="vp09.00.10.08.01.02.02.02.00" mimeType="video/webm" sar="427:320">
<Representation id="1" bandwidth="225727" codecs="vp09.00.20.08.01.02.02.02.00" mimeType="video/webm" sar="427:320">
<BaseURL>bear-320x240-vp9-opus-video.webm</BaseURL>
<SegmentBase indexRange="302-350" timescale="1000000" presentationTimeOffset="37000">
<Initialization range="0-301"/>
Expand Down
Binary file not shown.
Binary file not shown.
Expand Up @@ -6,7 +6,7 @@
<ContentProtection schemeIdUri="urn:uuid:1077efec-c0b2-4d02-ace3-3c1e52e2fb4b" cenc:default_KID="31323334-3536-3738-3930-313233343536">
<cenc:pssh>AAAANHBzc2gBAAAAEHfv7MCyTQKs4zweUuL7SwAAAAExMjM0NTY3ODkwMTIzNDU2AAAAAA==</cenc:pssh>
</ContentProtection>
<Representation id="0" bandwidth="184009" codecs="vp09.00.10.08.00.02.02.02.00" mimeType="video/webm" sar="1:1">
<Representation id="0" bandwidth="184009" codecs="vp09.00.11.08.00.02.02.02.00" mimeType="video/webm" sar="1:1">
<BaseURL>bear-320x180-vp9-altref-video.webm</BaseURL>
<SegmentBase indexRange="353-371" timescale="1000000">
<Initialization range="0-352"/>
Expand Down
Binary file not shown.
Binary file not shown.
Expand Up @@ -6,7 +6,7 @@
<ContentProtection schemeIdUri="urn:uuid:1077efec-c0b2-4d02-ace3-3c1e52e2fb4b" cenc:default_KID="31323334-3536-3738-3930-313233343536">
<cenc:pssh>AAAANHBzc2gBAAAAEHfv7MCyTQKs4zweUuL7SwAAAAExMjM0NTY3ODkwMTIzNDU2AAAAAA==</cenc:pssh>
</ContentProtection>
<Representation id="0" bandwidth="184009" codecs="vp09.00.10.08.00.02.02.02.00" mimeType="video/webm" sar="1:1">
<Representation id="0" bandwidth="184009" codecs="vp09.00.11.08.00.02.02.02.00" mimeType="video/webm" sar="1:1">
<BaseURL>bear-320x180-vp9-altref-video.webm</BaseURL>
<SegmentBase indexRange="353-371" timescale="1000000">
<Initialization range="0-352"/>
Expand Down
93 changes: 93 additions & 0 deletions packager/media/codecs/vp_codec_configuration_record.cc
Expand Up @@ -52,6 +52,79 @@ void MergeField(const std::string& name,
}
}

enum VP9Level {
LEVEL_UNKNOWN = 0,
LEVEL_1 = 10,
LEVEL_1_1 = 11,
LEVEL_2 = 20,
LEVEL_2_1 = 21,
LEVEL_3 = 30,
LEVEL_3_1 = 31,
LEVEL_4 = 40,
LEVEL_4_1 = 41,
LEVEL_5 = 50,
LEVEL_5_1 = 51,
LEVEL_5_2 = 52,
LEVEL_6 = 60,
LEVEL_6_1 = 61,
LEVEL_6_2 = 62,
LEVEL_MAX = 255
};

struct VP9LevelCharacteristics {
uint64_t max_luma_sample_rate;
uint32_t max_luma_picture_size;
double max_avg_bitrate;
double max_cpb_size;
double min_compression_ratio;
uint8_t max_num_column_tiles;
uint32_t min_altref_distance;
uint8_t max_ref_frame_buffers;
};

struct VP9LevelDefinition {
VP9Level level;
VP9LevelCharacteristics characteristics;
};

VP9Level LevelFromCharacteristics(uint64_t luma_sample_rate,
uint32_t luma_picture_size) {
// https://www.webmproject.org/vp9/levels/.
const VP9LevelDefinition vp9_level_definitions[] = {
{LEVEL_1, {829440, 36864, 200, 400, 2, 1, 4, 8}},
{LEVEL_1_1, {2764800, 73728, 800, 1000, 2, 1, 4, 8}},
{LEVEL_2, {4608000, 122880, 1800, 1500, 2, 1, 4, 8}},
{LEVEL_2_1, {9216000, 245760, 3600, 2800, 2, 2, 4, 8}},
{LEVEL_3, {20736000, 552960, 7200, 6000, 2, 4, 4, 8}},
{LEVEL_3_1, {36864000, 983040, 12000, 10000, 2, 4, 4, 8}},
{LEVEL_4, {83558400, 2228224, 18000, 16000, 4, 4, 4, 8}},
{LEVEL_4_1, {160432128, 2228224, 30000, 18000, 4, 4, 5, 6}},
{LEVEL_5, {311951360, 8912896, 60000, 36000, 6, 8, 6, 4}},
{LEVEL_5_1, {588251136, 8912896, 120000, 46000, 8, 8, 10, 4}},
{LEVEL_5_2, {1176502272, 8912896, 180000, 90000, 8, 8, 10, 4}},
{LEVEL_6, {1176502272, 35651584, 180000, 90000, 8, 16, 10, 4}},
{LEVEL_6_1, {2353004544u, 35651584, 240000, 180000, 8, 16, 10, 4}},
{LEVEL_6_2, {4706009088u, 35651584, 480000, 360000, 8, 16, 10, 4}},
};

for (const VP9LevelDefinition& def : vp9_level_definitions) {
// All the characteristic fields except max_luma_sample_rate and
// max_luma_picture_size are ignored to avoid the extra complexities of
// computing those values. It may result in incorrect level being returned.
// If this is a problem, please file a bug to
// https://github.com/google/shaka-packager/issues.
if (luma_sample_rate <= def.characteristics.max_luma_sample_rate &&
luma_picture_size <= def.characteristics.max_luma_picture_size) {
return def.level;
}
}

LOG(WARNING) << "Cannot determine VP9 level for luma_sample_rate ("
<< luma_sample_rate << ") or luma_picture_size ("
<< luma_picture_size << "). Returning LEVEL_1.";
return LEVEL_1;
}

} // namespace

VPCodecConfigurationRecord::VPCodecConfigurationRecord() {}
Expand Down Expand Up @@ -152,6 +225,26 @@ bool VPCodecConfigurationRecord::ParseWebM(const std::vector<uint8_t>& data) {
return true;
}

void VPCodecConfigurationRecord::SetVP9Level(uint16_t width,
uint16_t height,
double sample_duration_seconds) {
// https://www.webmproject.org/vp9/levels/.

const uint32_t luma_picture_size = width * height;
// Alt-Ref frames are not taken into consideration intentionally to avoid the
// extra complexities. It may result in smaller luma_sample_rate may than the
// actual luma_sample_rate, leading to incorrect level being returned.
// If this is a problem, please file a bug to
// https://github.com/google/shaka-packager/issues.
const double kUnknownSampleDuration = 0.0;
// The decision is based on luma_picture_size only if duration is unknown.
uint64_t luma_sample_rate = 0;
if (sample_duration_seconds != kUnknownSampleDuration)
luma_sample_rate = luma_picture_size / sample_duration_seconds;

level_ = LevelFromCharacteristics(luma_sample_rate, luma_picture_size);
}

void VPCodecConfigurationRecord::WriteMP4(std::vector<uint8_t>* data) const {
BufferWriter writer;
writer.AppendInt(profile());
Expand Down
5 changes: 5 additions & 0 deletions packager/media/codecs/vp_codec_configuration_record.h
Expand Up @@ -187,6 +187,11 @@ class VPCodecConfigurationRecord {
/// @return false if there is parsing errors.
bool ParseWebM(const std::vector<uint8_t>& data);

/// Compute and set VP9 Level based on the input attributes.
void SetVP9Level(uint16_t width,
uint16_t height,
double sample_duration_seconds);

/// @param data should not be null.
/// Writes VP codec configuration record to buffer using MP4 format.
void WriteMP4(std::vector<uint8_t>* data) const;
Expand Down
52 changes: 52 additions & 0 deletions packager/media/codecs/vp_codec_configuration_record_unittest.cc
Expand Up @@ -161,5 +161,57 @@ TEST(VPCodecConfigurationRecordTest, MergeChromaSubsampling) {
EXPECT_EQ(AVCHROMA_LOC_TOPLEFT, vp_config.chroma_location());
}

TEST(VPCodecConfigurationRecordTest, SetLevel) {
const uint8_t kUnknownLevel = 0;
VPCodecConfigurationRecord vp_config(0x02, kUnknownLevel, 0x08, 0x02, true,
0x03, 0x04, 0x05,
std::vector<uint8_t>());
ASSERT_EQ(kUnknownLevel, vp_config.level());

// kExamples are copied from https://www.webmproject.org/vp9/levels/.
struct {
int expected_level;
int width;
int height;
int frame_rate;
} kExamples[] = {
{10, 256, 144, 15}, {11, 384, 192, 30}, {20, 480, 256, 30},
{21, 640, 384, 30}, {30, 1080, 512, 30}, {31, 1280, 768, 30},
{40, 2048, 1088, 30}, {41, 2048, 1088, 60}, {50, 4096, 2176, 30},
{51, 4096, 2176, 60}, {52, 4096, 2176, 120}, {60, 8192, 4352, 30},
{61, 8192, 4352, 60}, {62, 8192, 4352, 120},
};
for (const auto& example : kExamples) {
vp_config.SetVP9Level(example.width, example.height,
1.0 / example.frame_rate);
ASSERT_EQ(example.expected_level, vp_config.level());
}
}

TEST(VPCodecConfigurationRecordTest, SetLevelWithUnknownFrameDuration) {
const uint8_t kUnknownLevel = 0;
VPCodecConfigurationRecord vp_config(0x02, kUnknownLevel, 0x08, 0x02, true,
0x03, 0x04, 0x05,
std::vector<uint8_t>());
ASSERT_EQ(kUnknownLevel, vp_config.level());

// kExamples are modified from https://www.webmproject.org/vp9/levels/ with
// frame rate removed.
struct {
int expected_level;
int width;
int height;
} kExamples[] = {
{10, 256, 144}, {11, 384, 192}, {20, 480, 256},
{21, 640, 384}, {30, 1080, 512}, {31, 1280, 768},
{40, 2048, 1088}, {50, 4096, 2176}, {60, 8192, 4352},
};
for (const auto& example : kExamples) {
const int kUnknownFrameDuration = 0;
vp_config.SetVP9Level(example.width, example.height, kUnknownFrameDuration);
ASSERT_EQ(example.expected_level, vp_config.level());
}
}

} // namespace media
} // namespace shaka
22 changes: 15 additions & 7 deletions packager/media/formats/mp4/mp4_media_parser.cc
Expand Up @@ -504,6 +504,8 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) {
if (desc_idx >= samp_descr.video_entries.size())
desc_idx = 0;
const VideoSampleEntry& entry = samp_descr.video_entries[desc_idx];
std::vector<uint8_t> codec_configuration_data =
entry.codec_configuration.data;

uint32_t coded_width = entry.width;
uint32_t coded_height = entry.height;
Expand All @@ -521,7 +523,7 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) {
switch (actual_format) {
case FOURCC_av01: {
AV1CodecConfigurationRecord av1_config;
if (!av1_config.Parse(entry.codec_configuration.data)) {
if (!av1_config.Parse(codec_configuration_data)) {
LOG(ERROR) << "Failed to parse av1c.";
return false;
}
Expand All @@ -531,7 +533,7 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) {
case FOURCC_avc1:
case FOURCC_avc3: {
AVCDecoderConfigurationRecord avc_config;
if (!avc_config.Parse(entry.codec_configuration.data)) {
if (!avc_config.Parse(codec_configuration_data)) {
LOG(ERROR) << "Failed to parse avcc.";
return false;
}
Expand Down Expand Up @@ -568,7 +570,7 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) {
case FOURCC_hev1:
case FOURCC_hvc1: {
HEVCDecoderConfigurationRecord hevc_config;
if (!hevc_config.Parse(entry.codec_configuration.data)) {
if (!hevc_config.Parse(codec_configuration_data)) {
LOG(ERROR) << "Failed to parse hevc.";
return false;
}
Expand All @@ -579,10 +581,17 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) {
case FOURCC_vp08:
case FOURCC_vp09: {
VPCodecConfigurationRecord vp_config;
if (!vp_config.ParseMP4(entry.codec_configuration.data)) {
if (!vp_config.ParseMP4(codec_configuration_data)) {
LOG(ERROR) << "Failed to parse vpcc.";
return false;
}
if (actual_format == FOURCC_vp09 &&
(!vp_config.is_level_set() || vp_config.level() == 0)) {
const double kUnknownSampleDuration = 0.0;
vp_config.SetVP9Level(coded_width, coded_height,
kUnknownSampleDuration);
vp_config.WriteMP4(&codec_configuration_data);
}
codec_string = vp_config.GetCodecString(video_codec);
break;
}
Expand All @@ -606,9 +615,8 @@ bool MP4MediaParser::ParseMoov(BoxReader* reader) {
std::shared_ptr<VideoStreamInfo> video_stream_info(new VideoStreamInfo(
track->header.track_id, timescale, duration, video_codec,
GetH26xStreamFormat(actual_format), codec_string,
entry.codec_configuration.data.data(),
entry.codec_configuration.data.size(), coded_width, coded_height,
pixel_width, pixel_height,
codec_configuration_data.data(), codec_configuration_data.size(),
coded_width, coded_height, pixel_width, pixel_height,
0, // trick_play_factor
nalu_length_size, track->media.header.language.code, is_encrypted));

Expand Down
16 changes: 12 additions & 4 deletions packager/media/formats/webm/webm_tracks_parser.cc
Expand Up @@ -223,16 +223,24 @@ bool WebMTracksParser::OnListEnd(int id) {
}
video_default_duration_ = default_duration_;

// |vp_config_| is only useful for VP8 and VP9.
if (codec_id_ == "V_VP8" || codec_id_ == "V_VP9")
vp_config_ = video_client_.GetVpCodecConfig(codec_private_);

DCHECK(!video_stream_info_);
video_stream_info_ = video_client_.GetVideoStreamInfo(
video_track_num_, codec_id_, codec_private_,
!video_encryption_key_id_.empty());
if (!video_stream_info_)
return false;

if (codec_id_ == "V_VP8" || codec_id_ == "V_VP9") {
vp_config_ = video_client_.GetVpCodecConfig(codec_private_);
const double kNanosecondsPerSecond = 1000000000.0;
if (codec_id_ == "V_VP9" &&
(!vp_config_.is_level_set() || vp_config_.level() == 0)) {
vp_config_.SetVP9Level(
video_stream_info_->width(), video_stream_info_->height(),
video_default_duration_ / kNanosecondsPerSecond);
}
}

} else {
DLOG(INFO) << "Ignoring video track " << track_num_;
ignored_tracks_.insert(track_num_);
Expand Down

0 comments on commit d0978b3

Please sign in to comment.