Skip to content

Commit

Permalink
feat: This patch adds support for DTS:X Profile 2 audio in MP4 files. (
Browse files Browse the repository at this point in the history
…#1303)

feat: Added audio specific configuration udts box to AudioSampleEntry
for MP4 input/output. DASH tags for DTS audio as specified in ETSI TS
103 491 and ETSI TS 102 114.

Closes #1301

---------

Co-authored-by: Cosmin Stejerean <cstejerean@meta.com>
  • Loading branch information
Roy-Funderburk and cosmin committed Feb 15, 2024
1 parent f7b3986 commit 07f780d
Show file tree
Hide file tree
Showing 23 changed files with 373 additions and 3 deletions.
7 changes: 7 additions & 0 deletions packager/app/test/packager_test.py
Expand Up @@ -927,6 +927,13 @@ def testAacHe(self):
self._GetFlags(output_dash=True))
self._CheckTestResults('acc-he')

def testDtsx(self):
self.assertPackageSuccess(
self._GetStreams(
['audio'], test_files=['bear-dtsx.mp4']),
self._GetFlags(output_dash=True))
self._CheckTestResults('dtsx-dash')

def testVideoAudioWebVTT(self):
audio_video_streams = self._GetStreams(['audio', 'video'])
text_stream = self._GetStreams(['text'], test_files=['bear-english.vtt'])
Expand Down
Binary file not shown.
15 changes: 15 additions & 0 deletions packager/app/test/testdata/dtsx-dash/output.mpd
@@ -0,0 +1,15 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--Generated with https://github.com/shaka-project/shaka-packager version <tag>-<hash>-<test>-->
<MPD xmlns="urn:mpeg:dash:schema:mpd:2011" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="urn:mpeg:dash:schema:mpd:2011 DASH-MPD.xsd" profiles="urn:mpeg:dash:profile:isoff-on-demand:2011" minBufferTime="PT2S" type="static" mediaPresentationDuration="PT3.114667S">
<Period id="0">
<AdaptationSet id="0" contentType="audio" subsegmentAlignment="true">
<Representation id="0" bandwidth="227665" codecs="dtsx" mimeType="audio/mp4" audioSamplingRate="48000">
<AudioChannelConfiguration schemeIdUri="tag:dts.com,2018:uhd:audio_channel_configuration" value="0000003F"/>
<BaseURL>bear-dtsx-audio.mp4</BaseURL>
<SegmentBase indexRange="742-821" timescale="48000">
<Initialization range="0-741"/>
</SegmentBase>
</Representation>
</AdaptationSet>
</Period>
</MPD>
2 changes: 2 additions & 0 deletions packager/media/base/audio_stream_info.cc
Expand Up @@ -150,6 +150,8 @@ std::string AudioStreamInfo::GetCodecString(Codec codec,
return "dts-";
case kCodecDTSP:
return "dts+";
case kCodecDTSX:
return "dtsx";
case kCodecEAC3:
return "ec-3";
case kCodecAC4:
Expand Down
6 changes: 4 additions & 2 deletions packager/media/base/fourccs.h
Expand Up @@ -58,6 +58,7 @@ enum FourCC : uint32_t {
FOURCC_dtsl = 0x6474736c,
FOURCC_dtsm = 0x6474732d, // "dts-"
FOURCC_dtsp = 0x6474732b, // "dts+"
FOURCC_dtsx = 0x64747378, // "dtsx"
FOURCC_dvcC = 0x64766343,
FOURCC_dvh1 = 0x64766831,
FOURCC_dvhe = 0x64766865,
Expand Down Expand Up @@ -151,8 +152,9 @@ enum FourCC : uint32_t {
FOURCC_trex = 0x74726578,
FOURCC_trun = 0x7472756e,
FOURCC_udta = 0x75647461,
FOURCC_url = 0x75726c20, // "url "
FOURCC_urn = 0x75726e20, // "urn "
FOURCC_udts = 0x75647473, // "udts"
FOURCC_url = 0x75726c20, // "url "
FOURCC_urn = 0x75726e20, // "urn "
FOURCC_uuid = 0x75756964,
FOURCC_vide = 0x76696465,
FOURCC_vlab = 0x766c6162,
Expand Down
1 change: 1 addition & 0 deletions packager/media/base/stream_info.h
Expand Up @@ -49,6 +49,7 @@ enum Codec {
kCodecDTSL,
kCodecDTSM,
kCodecDTSP,
kCodecDTSX,
kCodecEAC3,
kCodecFlac,
kCodecOpus,
Expand Down
1 change: 1 addition & 0 deletions packager/media/codecs/CMakeLists.txt
Expand Up @@ -12,6 +12,7 @@ add_library(media_codecs STATIC
avc_decoder_configuration_record.cc
decoder_configuration_record.cc
dovi_decoder_configuration_record.cc
dts_audio_specific_config.cc
ec3_audio_util.cc
ac4_audio_util.cc
es_descriptor.cc
Expand Down
28 changes: 28 additions & 0 deletions packager/media/codecs/dts_audio_specific_config.cc
@@ -0,0 +1,28 @@
// Copyright (c) 2023 Google Inc. All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include <packager/media/codecs/dts_audio_specific_config.h>

#include <packager/media/base/bit_reader.h>
#include <packager/media/base/rcheck.h>

namespace shaka {
namespace media {

bool GetDTSXChannelMask(const std::vector<uint8_t>& udts, uint32_t& mask) {
// udts is the DTS-UHD Specific Box: ETSI TS 103 491 V1.2.1 Table B-2
// DecoderProfileCode(6 bits)
// FrameDurationCode(2 bits)
// MaxPayloadCode(3 bits)
// NumPresentationsCode(5 bits)
// ChannelMask (32 bits)
BitReader bit_reader(udts.data(), udts.size());
RCHECK(bit_reader.SkipBits(16));
RCHECK(bit_reader.ReadBits(32, &mask));
return true;
}

} // namespace media
} // namespace shaka
24 changes: 24 additions & 0 deletions packager/media/codecs/dts_audio_specific_config.h
@@ -0,0 +1,24 @@
// Copyright (c) 2023 Google Inc. All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef PACKAGER_MEDIA_CODECS_DTS_AUDIO_SPECIFIC_CONFIG_H_
#define PACKAGER_MEDIA_CODECS_DTS_AUDIO_SPECIFIC_CONFIG_H_

#include <stddef.h>
#include <stdint.h>

#include <vector>

namespace shaka {
namespace media {

class BitReader;

bool GetDTSXChannelMask(const std::vector<uint8_t>& udts, uint32_t& mask);

} // namespace media
} // namespace shaka

#endif // PACKAGER_MEDIA_CODECS_DTS_AUDIO_SPECIFIC_CONFIG_H_
37 changes: 37 additions & 0 deletions packager/media/codecs/dts_audio_specific_config_unittest.cc
@@ -0,0 +1,37 @@
// Copyright 2023 Google Inc. All rights reserved.
//
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include <gtest/gtest.h>

#include "packager/media/codecs/dts_audio_specific_config.h"

namespace shaka {
namespace media {

TEST(DTSAudioSpecificConfigTest, BasicProfileTest) {
uint8_t buffer[] = {0x01, 0x20, 0x00, 0x00, 0x0, 0x3F, 0x80, 0x00};
std::vector<uint8_t> data(std::begin(buffer), std::end(buffer));
uint32_t mask;
EXPECT_TRUE(GetDTSXChannelMask(data, mask));
EXPECT_EQ(0x3F, mask);
}

TEST(DTSAudioSpecificConfigTest, ChannelMaskBytes) {
uint8_t buffer[] = {0x01, 0x20, 0x12, 0x34, 0x56, 0x78, 0x80, 0x00};
std::vector<uint8_t> data(std::begin(buffer), std::end(buffer));
uint32_t mask;
EXPECT_TRUE(GetDTSXChannelMask(data, mask));
EXPECT_EQ(0x12345678, mask);
}

TEST(DTSAudioSpecificConfigTest, Truncated) {
uint8_t buffer[] = {0x01, 0x20, 0x00, 0x00, 0x00};
std::vector<uint8_t> data(std::begin(buffer), std::end(buffer));
uint32_t mask;
EXPECT_FALSE(GetDTSXChannelMask(data, mask));
}

} // namespace media
} // namespace shaka
11 changes: 11 additions & 0 deletions packager/media/event/muxer_listener_internal.cc
Expand Up @@ -20,6 +20,7 @@
#include <packager/media/base/text_stream_info.h>
#include <packager/media/base/video_stream_info.h>
#include <packager/media/codecs/ac4_audio_util.h>
#include <packager/media/codecs/dts_audio_specific_config.h>
#include <packager/media/codecs/ec3_audio_util.h>
#include <packager/mpd/base/media_info.pb.h>
#include <packager/utils/bytes_to_string_view.h>
Expand Down Expand Up @@ -165,6 +166,16 @@ void AddAudioInfo(const AudioStreamInfo* audio_stream_info,
codec_data->set_ac4_ims_flag(ac4_ims_flag);
codec_data->set_ac4_cbi_flag(ac4_cbi_flag);
}

if (audio_stream_info->codec() == kCodecDTSX) {
auto* codec_data = audio_info->mutable_codec_specific_data();
uint32_t channel_mask;
if (!GetDTSXChannelMask(codec_config, channel_mask)) {
LOG(ERROR) << "Failed to parse DTSX channel mask.";
return;
}
codec_data->set_channel_mask(channel_mask);
}
}

void AddTextInfo(const TextStreamInfo& text_stream_info,
Expand Down
18 changes: 18 additions & 0 deletions packager/media/event/muxer_listener_internal_unittest.cc
Expand Up @@ -72,6 +72,24 @@ TEST_F(MuxerListenerInternalVideoStreamTest, TransferCharacteristics) {
EXPECT_EQ(18u, media_info.video_info().transfer_characteristics());
}

class MuxerListenerInternalAudioStreamTest : public MuxerListenerInternalTest {
};

// AddAudioInfo function should parse the channel mask
TEST_F(MuxerListenerInternalAudioStreamTest, DTSX) {
MediaInfo media_info;
std::shared_ptr<AudioStreamInfo> audio_info = CreateAudioStreamInfo(
GetAudioStreamInfoParams(kCodecDTSX, "dtsx",
{0x01, 0x20, 0x00, 0x00, 0x0, 0x3F, 0x80,
0x00})); // Channel mask = 3F
ASSERT_TRUE(GenerateMediaInfo(MuxerOptions(), *audio_info,
kReferenceTimeScale,
MuxerListener::kContainerMp4, &media_info));
MediaInfo_AudioInfo* info = media_info.mutable_audio_info();
auto* codec_data = info->mutable_codec_specific_data();
EXPECT_EQ(0x3F, codec_data->channel_mask());
}

} // namespace internal
} // namespace media
} // namespace shaka
48 changes: 48 additions & 0 deletions packager/media/event/muxer_listener_test_helper.cc
Expand Up @@ -104,5 +104,53 @@ std::vector<ProtectionSystemSpecificInfo> GetDefaultKeySystemInfo() {
std::end(kExpectedDefaultPsshBox) - 1}}};
}

AudioStreamInfoParameters::AudioStreamInfoParameters() {}
AudioStreamInfoParameters::~AudioStreamInfoParameters() {}

std::shared_ptr<AudioStreamInfo> CreateAudioStreamInfo(
const AudioStreamInfoParameters& param) {
return std::make_shared<AudioStreamInfo>(
param.track_id, param.time_scale, param.duration, param.codec,
param.codec_string, param.codec_config.data(), param.codec_config.size(),
param.sample_bits, param.num_channels, param.sampling_frequency,
param.seek_preroll_ns, param.codec_delay_ns, param.max_bitrate,
param.avg_bitrate, param.language, param.is_encrypted);
}

AudioStreamInfoParameters GetAudioStreamInfoParams(
Codec codec,
const char* codec_string,
const std::vector<uint8_t>& codec_config) {
const int kTrackId = 0;
const int32_t kTimeScale = 10;
const int64_t kAudioStreamDuration = 200;
const char* kLanuageUndefined = "und";
const uint8_t kSampleBits = 16;
const uint8_t kNumChannels = 6;
const uint32_t kSamplingFrequency = 48000;
const uint64_t kSeekPrerollNs = 0;
const uint64_t kCodecDelayNs = 0;
const uint32_t kMaxBitrate = 0;
const uint32_t kAvgBitrate = 0;
const bool kEncryptedFlag = false;
AudioStreamInfoParameters params;
params.track_id = kTrackId;
params.time_scale = kTimeScale;
params.duration = kAudioStreamDuration;
params.codec = codec;
params.codec_string = codec_string;
params.language = kLanuageUndefined;
params.sample_bits = kSampleBits;
params.num_channels = kNumChannels;
params.sampling_frequency = kSamplingFrequency;
params.seek_preroll_ns = kSeekPrerollNs;
params.codec_delay_ns = kCodecDelayNs;
params.max_bitrate = kMaxBitrate;
params.avg_bitrate = kAvgBitrate;
params.codec_config = codec_config;
params.is_encrypted = kEncryptedFlag;
return params;
}

} // namespace media
} // namespace shaka
34 changes: 34 additions & 0 deletions packager/media/event/muxer_listener_test_helper.h
Expand Up @@ -10,6 +10,7 @@
#include <cstdint>
#include <vector>

#include <packager/media/base/audio_stream_info.h>
#include <packager/media/base/key_source.h>
#include <packager/media/base/muxer_options.h>
#include <packager/media/base/stream_info.h>
Expand Down Expand Up @@ -95,6 +96,29 @@ struct VideoStreamInfoParameters {
bool is_encrypted;
};

// Struct that gets passed for to CreateAudioStreamInfo() to create a
// StreamInfo instance. Useful for generating multiple AudioStreamInfo with
// slightly different parameters.
struct AudioStreamInfoParameters {
AudioStreamInfoParameters();
~AudioStreamInfoParameters();
int track_id;
int32_t time_scale;
int64_t duration;
Codec codec;
std::string codec_string;
std::vector<uint8_t> codec_config;
uint8_t sample_bits;
uint8_t num_channels;
uint32_t sampling_frequency;
uint64_t seek_preroll_ns;
uint64_t codec_delay_ns;
uint32_t max_bitrate;
uint32_t avg_bitrate;
std::string language;
bool is_encrypted;
};

struct OnNewSegmentParameters {
std::string file_name;
int64_t start_time;
Expand All @@ -115,6 +139,16 @@ std::shared_ptr<VideoStreamInfo> CreateVideoStreamInfo(
// Returns the "default" VideoStreamInfoParameters for testing.
VideoStreamInfoParameters GetDefaultVideoStreamInfoParams();

// Creates StreamInfo instance from AudioStreamInfoParameters.
std::shared_ptr<AudioStreamInfo> CreateAudioStreamInfo(
const AudioStreamInfoParameters& param);

// Returns the "default" configuration for testing given codec and parameters.
AudioStreamInfoParameters GetAudioStreamInfoParams(
Codec codec,
const char* codec_string,
const std::vector<uint8_t>& codec_config);

// Returns the "default" values for OnMediaEnd().
OnMediaEndParameters GetDefaultOnMediaEndParams();

Expand Down
24 changes: 23 additions & 1 deletion packager/media/formats/mp4/box_definitions.cc
Expand Up @@ -1811,6 +1811,27 @@ size_t DTSSpecific::ComputeSizeInternal() {
sizeof(kDdtsExtraData);
}

UDTSSpecific::UDTSSpecific() = default;
UDTSSpecific::~UDTSSpecific() = default;

FourCC UDTSSpecific::BoxType() const {
return FOURCC_udts;
}

bool UDTSSpecific::ReadWriteInternal(BoxBuffer* buffer) {
RCHECK(ReadWriteHeaderInternal(buffer) &&
buffer->ReadWriteVector(
&data, buffer->Reading() ? buffer->BytesLeft() : data.size()));
return true;
}

size_t UDTSSpecific::ComputeSizeInternal() {
// This box is optional. Skip it if not initialized.
if (data.empty())
return 0;
return HeaderSize() + data.size();
}

AC3Specific::AC3Specific() = default;
AC3Specific::~AC3Specific() = default;

Expand Down Expand Up @@ -1983,6 +2004,7 @@ bool AudioSampleEntry::ReadWriteInternal(BoxBuffer* buffer) {

RCHECK(buffer->TryReadWriteChild(&esds));
RCHECK(buffer->TryReadWriteChild(&ddts));
RCHECK(buffer->TryReadWriteChild(&udts));
RCHECK(buffer->TryReadWriteChild(&dac3));
RCHECK(buffer->TryReadWriteChild(&dec3));
RCHECK(buffer->TryReadWriteChild(&dac4));
Expand Down Expand Up @@ -2014,7 +2036,7 @@ size_t AudioSampleEntry::ComputeSizeInternal() {
sizeof(samplesize) + sizeof(samplerate) + sinf.ComputeSize() +
esds.ComputeSize() + ddts.ComputeSize() + dac3.ComputeSize() +
dec3.ComputeSize() + dops.ComputeSize() + dfla.ComputeSize() +
dac4.ComputeSize() + mhac.ComputeSize() +
dac4.ComputeSize() + mhac.ComputeSize() + udts.ComputeSize() +
// Reserved and predefined bytes.
6 + 8 + // 6 + 8 bytes reserved.
4; // 4 bytes predefined.
Expand Down
7 changes: 7 additions & 0 deletions packager/media/formats/mp4/box_definitions.h
Expand Up @@ -334,6 +334,12 @@ struct DTSSpecific : Box {
std::vector<uint8_t> extra_data;
};

struct UDTSSpecific : Box {
DECLARE_BOX_METHODS(UDTSSpecific);

std::vector<uint8_t> data;
};

struct AC3Specific : Box {
DECLARE_BOX_METHODS(AC3Specific);

Expand Down Expand Up @@ -396,6 +402,7 @@ struct AudioSampleEntry : Box {

ElementaryStreamDescriptor esds;
DTSSpecific ddts;
UDTSSpecific udts;
AC3Specific dac3;
EC3Specific dec3;
AC4Specific dac4;
Expand Down

0 comments on commit 07f780d

Please sign in to comment.