Skip to content

Commit

Permalink
arib_captionをtimed_id3に変換する機能を実装。 ( #335 )
Browse files Browse the repository at this point in the history
  • Loading branch information
rigaya committed May 22, 2021
1 parent e660c11 commit f46c762
Show file tree
Hide file tree
Showing 9 changed files with 140 additions and 21 deletions.
38 changes: 32 additions & 6 deletions NVEncCore/rgy_cmd.cpp
Expand Up @@ -2883,7 +2883,7 @@ int parse_one_input_option(const TCHAR *option_name, const TCHAR *strInput[], in
}

int parse_one_audio_param(AudioSelect& chSel, const tstring& str, const TCHAR *option_name) {
const auto paramList = std::vector<std::string>{ "codec", "bitrate", "samplerate", "profile", "filter", "enc_prm", "copy", "disposition", "delay", "metadata" };
const auto paramList = std::vector<std::string>{ "codec", "bitrate", "samplerate", "profile", "filter", "enc_prm", "copy", "disposition", "delay", "metadata", "select-codec" };
for (const auto &param : split(str, _T(";"))) {
auto pos = param.find_first_of(_T("="));
if (pos != std::string::npos) {
Expand Down Expand Up @@ -2947,7 +2947,7 @@ int parse_one_audio_param(AudioSelect& chSel, const tstring& str, const TCHAR *o
}

int parse_one_subtitle_param(SubtitleSelect& chSel, const tstring& str, const TCHAR *option_name) {
const auto paramList = std::vector<std::string>{ "codec", "metadata", "enc_prm", "copy", "disposition" };
const auto paramList = std::vector<std::string>{ "codec", "metadata", "enc_prm", "copy", "disposition", "select-codec" };
for (const auto &param : split(str, _T(";"))) {
auto pos = param.find_first_of(_T("="));
if (pos != std::string::npos) {
Expand Down Expand Up @@ -3318,6 +3318,7 @@ int parse_one_common_option(const TCHAR *option_name, const TCHAR *strInput[], i
pAudioSelect = common->ppAudioSelectList[audioIdx];
}
pAudioSelect->lang = lang;
pAudioSelect->selectCodec = selectCodec;
func_set(pAudioSelect, trackId, ptr);
if (trackId == 0) {
for (int itrack = 0; itrack < common->nAudioSelectCount; itrack++) {
Expand Down Expand Up @@ -3384,6 +3385,7 @@ int parse_one_common_option(const TCHAR *option_name, const TCHAR *strInput[], i
pSubSelect = common->ppSubtitleSelectList[subIdx];
}
pSubSelect->lang = lang;
pSubSelect->selectCodec = selectCodec;
func_set(pSubSelect, trackId, ptr);
if (trackId == 0) {
for (int itrack = 0; itrack < common->nSubtitleSelectCount; itrack++) {
Expand Down Expand Up @@ -3450,6 +3452,7 @@ int parse_one_common_option(const TCHAR *option_name, const TCHAR *strInput[], i
pSelect = common->ppDataSelectList[dataIdx];
}
pSelect->lang = lang;
pSelect->selectCodec = selectCodec;
func_set(pSelect, trackId, ptr);
if (trackId == 0) {
for (int itrack = 0; itrack < common->nDataSelectCount; itrack++) {
Expand Down Expand Up @@ -3764,7 +3767,7 @@ int parse_one_common_option(const TCHAR *option_name, const TCHAR *strInput[], i
auto track = std::make_pair(0, "");
trackSet[track].trackID = iTrack;
trackSet[track].encCodec = RGY_AVCODEC_COPY;
trackSet[track].asdata = true;
trackSet[track].asdata = RGYSubAsData::AsData;
} else if (rgy_lang_exist(tchar_to_string(str))) {
auto track = std::make_pair(TRACK_SELECT_BY_LANG, tchar_to_string(str));
trackSet[track].trackID = TRACK_SELECT_BY_LANG;
Expand All @@ -3785,7 +3788,10 @@ int parse_one_common_option(const TCHAR *option_name, const TCHAR *strInput[], i
trackSet[track].encCodec = RGY_AVCODEC_COPY;
auto options = str.find(_T('?'));
if (str.substr(options+1) == _T("asdata")) {
trackSet[track].asdata = true;
trackSet[track].asdata = RGYSubAsData::AsData;
} else {
print_cmd_error_invalid_value(option_name, strInput[i+1]);
return 1;
}
}
}
Expand Down Expand Up @@ -3827,8 +3833,28 @@ int parse_one_common_option(const TCHAR *option_name, const TCHAR *strInput[], i
auto delimEnc = prm.find(_T(":"));
auto delimDec = prm.find(_T("#"));
pSubSelect->encCodec = prm.substr(0, std::min(delimEnc, delimDec));
auto codec_desc = avcodec_descriptor_get_by_name(tchar_to_string(pSubSelect->encCodec).c_str());
if (codec_desc && codec_desc->id == AV_CODEC_ID_TIMED_ID3) {
pSubSelect->asdata = RGYSubAsData::AsTimedID3;
}
if (delimEnc != tstring::npos) {
pSubSelect->encCodecPrm = prm.substr(delimEnc + 1, (delimEnc < delimDec) ? delimDec - delimEnc - 1 : tstring::npos);
auto prmstring = prm.substr(delimEnc + 1, (delimEnc < delimDec) ? delimDec - delimEnc - 1 : tstring::npos);
if (pSubSelect->asdata == RGYSubAsData::AsTimedID3) {
auto prmlists = split(prmstring, _T(","));
prmstring.clear();
for (auto& prm_kv : prmlists) {
auto key_val = split(prm_kv, _T("="));
if (key_val.size() == 2 && key_val[0] == _T("handler")) {
pSubSelect->datahandler = key_val[1];
} else {
if (prmstring.length() > 0) {
prmstring += _T(",");
}
prmstring += prm_kv;
}
}
}
pSubSelect->encCodecPrm = prmstring;
}
if (delimDec != tstring::npos) {
pSubSelect->decCodecPrm = prm.substr(delimDec + 1, (delimDec < delimEnc) ? delimEnc - delimDec - 1 : tstring::npos);
Expand Down Expand Up @@ -5223,7 +5249,7 @@ tstring gen_cmd(const RGYParamCommon *param, const RGYParamCommon *defaultPrm, b
tmp.str(tstring());
for (int i = 0; i < param->nSubtitleSelectCount; i++) {
tmp << _T(",") << param->ppSubtitleSelectList[i]->trackID;
if (param->ppSubtitleSelectList[i]->asdata) {
if (param->ppSubtitleSelectList[i]->asdata == RGYSubAsData::AsData) {
tmp << _T("?asdata");
}
}
Expand Down
10 changes: 9 additions & 1 deletion NVEncCore/rgy_input_avcodec.cpp
Expand Up @@ -2330,9 +2330,17 @@ int RGYInputAvcodec::getSample(AVPacket *pkt, bool bTreatFirstPacketAsKeyframe)
//trimからわかるフレーム数の上限値よりfixedNumがある程度の量の処理を進めたら読み込みを打ち切る
&& m_Demux.frames.fixedNum() - TRIM_OVERREAD_FRAMES < getVideoTrimMaxFramIdx()) {
if (m_fpPacketList) {
const auto codecID = m_Demux.format.formatCtx->streams[pkt->stream_index]->codecpar->codec_id;
fprintf(m_fpPacketList.get(), "stream %2d, %12s, pts, %s\n",
pkt->stream_index, avcodec_get_name(m_Demux.format.formatCtx->streams[pkt->stream_index]->codecpar->codec_id),
pkt->stream_index, avcodec_get_name(codecID),
pkt->pts == AV_NOPTS_VALUE ? "Unknown" : strsprintf("%lld", pkt->pts).c_str());
if (false
&& (codecID == AV_CODEC_ID_ARIB_CAPTION
|| codecID == AV_CODEC_ID_TIMED_ID3)) {
FILE *fp = fopen(strsprintf("debug_%lld_%s.dat", pkt->pts, avcodec_get_name(codecID)).c_str(), "wb");
fwrite(pkt->data, 1, pkt->size, fp);
fclose(fp);
}
}
if (pkt->stream_index == m_Demux.video.index) {
if (pkt->flags & AV_PKT_FLAG_CORRUPT) {
Expand Down
1 change: 1 addition & 0 deletions NVEncCore/rgy_osdep.h
Expand Up @@ -139,6 +139,7 @@ static inline int _vsprintf_s(char *buffer, size_t size, const char *format, va_
#define _strnicmp strncasecmp
#define stricmp strcasecmp
#define _stricmp stricmp
#define _strdup strdup

static short _InterlockedIncrement16(volatile short *pVariable) {
return __sync_add_and_fetch((volatile short*)pVariable, 1);
Expand Down
2 changes: 2 additions & 0 deletions NVEncCore/rgy_output.cpp
Expand Up @@ -862,6 +862,7 @@ RGY_ERR initWriters(
prm.encodeCodec = pSubtitleSelect->encCodec;
prm.encodeCodecPrm = pSubtitleSelect->encCodecPrm;
prm.asdata = pSubtitleSelect->asdata;
prm.datahandler = pSubtitleSelect->datahandler;
prm.bsf = pSubtitleSelect->bsf;
prm.disposition = pSubtitleSelect->disposition;
prm.metadata = pSubtitleSelect->metadata;
Expand Down Expand Up @@ -983,6 +984,7 @@ RGY_ERR initWriters(
prm.encodeCodec = pSubtitleSelect->encCodec;
prm.encodeCodecPrm = pSubtitleSelect->encCodecPrm;
prm.asdata = pSubtitleSelect->asdata;
prm.datahandler = pSubtitleSelect->datahandler;
prm.disposition = pSubtitleSelect->disposition;
prm.metadata = pSubtitleSelect->metadata;
}
Expand Down
80 changes: 73 additions & 7 deletions NVEncCore/rgy_output_avcodec.cpp
Expand Up @@ -88,6 +88,9 @@ void RGYOutputAvcodec::CloseOther(AVMuxOther *muxOther) {
if (muxOther->bsfc) {
av_bsf_free(&muxOther->bsfc);
}
if (muxOther->datahandler) {
free(muxOther->datahandler);
}

memset(muxOther, 0, sizeof(muxOther[0]));
AddMessage(RGY_LOG_DEBUG, _T("Closed other.\n"));
Expand Down Expand Up @@ -1398,9 +1401,8 @@ RGY_ERR RGYOutputAvcodec::InitAudio(AVMuxAudio *muxAudio, AVOutputStreamPrm *inp
}

RGY_ERR RGYOutputAvcodec::InitOther(AVMuxOther *muxSub, AVOutputStreamPrm *inputStream, bool streamDispositionSet) {
const auto mediaType = (inputStream->asdata) ? AVMEDIA_TYPE_UNKNOWN : trackMediaType(inputStream->src.trackId);
const auto mediaTypeStr = char_to_tstring(av_get_media_type_string(mediaType));
AddMessage(RGY_LOG_DEBUG, _T("start initializing %s ouput...\n"), mediaTypeStr.c_str());
auto mediaType = (inputStream->asdata == RGYSubAsData::AsData) ? AVMEDIA_TYPE_UNKNOWN : trackMediaType(inputStream->src.trackId);
AddMessage(RGY_LOG_DEBUG, _T("start initializing %s ouput...\n"), char_to_tstring(av_get_media_type_string(mediaType)).c_str());

AVCodecID codecId = (inputStream->src.stream)
? inputStream->src.stream->codecpar->codec_id
Expand All @@ -1413,7 +1415,16 @@ RGY_ERR RGYOutputAvcodec::InitOther(AVMuxOther *muxSub, AVOutputStreamPrm *input
}
}

if (mediaType == AVMEDIA_TYPE_UNKNOWN) {
if (inputStream->asdata == RGYSubAsData::AsTimedID3
&& inputStream->src.stream != nullptr
&& inputStream->src.stream->codecpar->codec_id == AV_CODEC_ID_ARIB_CAPTION) {
mediaType = AVMEDIA_TYPE_DATA;
codecId = AV_CODEC_ID_TIMED_ID3;
if (inputStream->datahandler.length() == 0) {
AddMessage(RGY_LOG_ERROR, _T("handler not set for timed_id3 output.\n"));
return RGY_ERR_INVALID_PARAM;
}
} else if (mediaType == AVMEDIA_TYPE_UNKNOWN) {
codecId = AV_CODEC_ID_NONE;
} else if (!avcodecIsCopy(inputStream->encodeCodec)) {
auto codec = avcodec_find_decoder_by_name(tchar_to_string(inputStream->encodeCodec).c_str());
Expand Down Expand Up @@ -1476,16 +1487,18 @@ RGY_ERR RGYOutputAvcodec::InitOther(AVMuxOther *muxSub, AVOutputStreamPrm *input
srcCodecParam->codec_id = codec->id;
} else {
avcodec_parameters_copy(srcCodecParam.get(), inputStream->src.stream->codecpar);

if (nullptr == (muxSub->streamOut = avformat_new_stream(m_Mux.format.formatCtx, avcodec_find_decoder(codecId)))) {
AddMessage(RGY_LOG_ERROR, _T("failed to create new stream for subtitle.\n"));
return RGY_ERR_NULL_PTR;
}
AddMessage(RGY_LOG_DEBUG, _T("output stream index %d, pkt_timebase %d/%d, trackId %d\n"),
inputStream->src.index, inputStream->src.stream->time_base.num, inputStream->src.stream->time_base.den, trackID(inputStream->src.trackId));
}
if (inputStream->asdata) {
if (inputStream->asdata == RGYSubAsData::AsData) {
srcCodecParam->codec_type = AVMEDIA_TYPE_UNKNOWN;
} else if (inputStream->asdata == RGYSubAsData::AsTimedID3) {
srcCodecParam->codec_type = AVMEDIA_TYPE_DATA;
srcCodecParam->codec_id = AV_CODEC_ID_TIMED_ID3;
} else if (mediaType == AVMEDIA_TYPE_DATA) {
//なにもしない
} else if (mediaType == AVMEDIA_TYPE_ATTACHMENT) {
Expand Down Expand Up @@ -1571,6 +1584,8 @@ RGY_ERR RGYOutputAvcodec::InitOther(AVMuxOther *muxSub, AVOutputStreamPrm *input
muxSub->streamIndexIn = inputStream->src.index;
muxSub->streamIn = inputStream->src.stream;
muxSub->streamInTimebase = inputStream->src.timebase;
muxSub->asdata = inputStream->asdata;
muxSub->datahandler = _strdup(tchar_to_string(inputStream->datahandler, CODE_PAGE_UTF8).c_str());

if (muxSub->outCodecEncodeCtx) {
avcodec_parameters_from_context(srcCodecParam.get(), muxSub->outCodecEncodeCtx);
Expand Down Expand Up @@ -2993,8 +3008,57 @@ RGY_ERR RGYOutputAvcodec::SubtitleTranscode(const AVMuxOther *muxSub, AVPacket *
return (m_Mux.format.streamError) ? RGY_ERR_UNKNOWN : RGY_ERR_NONE;
}

// node-arib-subtitle-timedmetadaterを参考に実装
RGY_ERR RGYOutputAvcodec::ConvertPacketToTimedID3(AVPacket *pkt, const AVMuxOther *pMuxOther) {
auto add_data_size = [](std::vector<uint8_t>& buf, const uint32_t length) {
const auto original_size = buf.size();
buf.resize(original_size + 4);
uint8_t *ptr = buf.data() + original_size;
ptr[0] = (uint8_t)((length & 0xFE00000) >> 21);
ptr[1] = (uint8_t)((length & 0x01FC000) >> 14);
ptr[2] = (uint8_t)((length & 0x0003F80) >> 7);
ptr[3] = (uint8_t)((length & 0x000007F) >> 0);
};
std::vector<uint8_t> priv_payload;
priv_payload.reserve(pkt->size + strlen(pMuxOther->datahandler) + 32);
vector_cat(priv_payload, (const uint8_t *)pMuxOther->datahandler, strlen(pMuxOther->datahandler));
priv_payload.push_back(0x00);
vector_cat(priv_payload, pkt->data, pkt->size);

std::vector<uint8_t> priv_frame;
priv_payload.reserve(priv_payload.size() + 32);
vector_cat(priv_frame, (const uint8_t *)"PRIV", strlen("PRIV"));
add_data_size(priv_frame, (uint32_t)priv_payload.size());
priv_frame.push_back(0x00);
priv_frame.push_back(0x00);
vector_cat(priv_frame, priv_payload);

static const uint8_t HEADER[6] = { 0x49, 0x44, 0x33, 0x04, 0x00, 0x00 };
std::vector<uint8_t> data;
data.reserve(priv_frame.size() + 32);
vector_cat(data, HEADER, sizeof(HEADER));
add_data_size(data, (uint32_t)priv_frame.size());
vector_cat(data, priv_frame);
data.push_back(0xff);//終端
if (false) { //デバッグ用
FILE *fp = fopen(strsprintf("debug_%lld_before.dat", pkt->pts).c_str(), "wb");
fwrite(pkt->data, 1, pkt->size, fp);
fclose(fp);
fp = fopen(strsprintf("debug_%lld_after.dat", pkt->pts).c_str(), "wb");
fwrite(data.data(), 1, data.size(), fp);
fclose(fp);
}
if ((int)data.size() > pkt->size) {
av_grow_packet(pkt, (int)data.size() - pkt->size);
} else if ((int)data.size() < pkt->size) {
av_shrink_packet(pkt, pkt->size - (int)data.size());
}
memcpy(pkt->data, data.data(), (int)data.size());
return RGY_ERR_NONE;
}

RGY_ERR RGYOutputAvcodec::WriteOtherPacket(AVPacket *pkt) {
const AVMuxOther* pMuxOther = getOtherPacketStreamData(pkt);
const AVMuxOther *pMuxOther = getOtherPacketStreamData(pkt);
if (pMuxOther->bsfc) {
auto sts = applyBitstreamFilterOther(pkt, pMuxOther);
//bitstream filterを正常に起動できなかった
Expand All @@ -3014,6 +3078,8 @@ RGY_ERR RGYOutputAvcodec::WriteOtherPacket(AVPacket *pkt) {
}
if (pMuxOther->outCodecEncodeCtx) {
return SubtitleTranscode(pMuxOther, pkt);
} else if (pMuxOther->asdata == RGYSubAsData::AsTimedID3) {
ConvertPacketToTimedID3(pkt, pMuxOther);
}
//字幕を処理する
const AVRational vid_pkt_timebase = av_isvalid_q(m_Mux.video.inputStreamTimebase) ? m_Mux.video.inputStreamTimebase : av_inv_q(m_Mux.video.outputFps);
Expand Down
12 changes: 10 additions & 2 deletions NVEncCore/rgy_output_avcodec.h
Expand Up @@ -207,6 +207,9 @@ typedef struct AVMuxOther {
uint8_t *bufConvert; //変換用のバッファ

AVBSFContext *bsfc; //必要なら使用するbitstreamfilter

RGYSubAsData asdata; //バイナリデータとして転送する
char *datahandler; //asdata=timed_id3の時のデータ
} AVMuxOther;

enum {
Expand Down Expand Up @@ -279,7 +282,8 @@ struct AVOutputStreamPrm {
int bitrate; //ビットレートの指定
int samplingRate; //サンプリング周波数の指定
tstring filter; //音声フィルタ
bool asdata; //バイナリデータとして転送する
RGYSubAsData asdata; //バイナリデータとして転送する
tstring datahandler; //asdata=timed_id3の時のデータ
tstring bsf; //適用すべきbsfの名前
tstring disposition; //disposition
std::vector<tstring> metadata; //metadata
Expand All @@ -293,7 +297,8 @@ struct AVOutputStreamPrm {
bitrate(0),
samplingRate(0),
filter(),
asdata(false),
asdata(RGYSubAsData::None),
datahandler(),
bsf(),
disposition(),
metadata() {
Expand Down Expand Up @@ -513,6 +518,9 @@ class RGYOutputAvcodec : public RGYOutput
//その他のパケットを書き出す
RGY_ERR WriteOtherPacket(AVPacket *pkt);

//字幕用にpacketをtimed_id3に変換する
RGY_ERR ConvertPacketToTimedID3(AVPacket *pkt, const AVMuxOther *pMuxOther);

//パケットを実際に書き出す
void WriteNextPacketProcessed(AVPktMuxData *pktData);

Expand Down
3 changes: 2 additions & 1 deletion NVEncCore/rgy_prm.cpp
Expand Up @@ -1058,7 +1058,8 @@ SubtitleSelect::SubtitleSelect() :
encCodec(),
encCodecPrm(),
decCodecPrm(),
asdata(false),
asdata(RGYSubAsData::None),
datahandler(),
bsf(),
disposition(),
lang(),
Expand Down
9 changes: 8 additions & 1 deletion NVEncCore/rgy_prm.h
Expand Up @@ -1029,6 +1029,12 @@ struct AudioSource {
~AudioSource() {};
};

enum class RGYSubAsData {
None,
AsData,
AsTimedID3
};

struct SubtitleSelect {
int trackID; // 選択したトラックのリスト 1,2,...(1から連番で指定)
// 0 ... 全指定
Expand All @@ -1037,7 +1043,8 @@ struct SubtitleSelect {
tstring encCodec;
tstring encCodecPrm;
tstring decCodecPrm;
bool asdata;
RGYSubAsData asdata;
tstring datahandler;
tstring bsf; // 適用するbitstreamfilterの名前
tstring disposition; // 指定のdisposition
std::string lang; // 言語選択
Expand Down
6 changes: 3 additions & 3 deletions NVEncCore/rgy_version.h
Expand Up @@ -29,9 +29,9 @@
#ifndef __RGY_CONFIG_H__
#define __RGY_CONFIG_H__

#define VER_FILEVERSION 0,5,32,0
#define VER_STR_FILEVERSION "5.32"
#define VER_STR_FILEVERSION_TCHAR _T("5.32")
#define VER_FILEVERSION 0,5,33,1
#define VER_STR_FILEVERSION "5.33 beta1"
#define VER_STR_FILEVERSION_TCHAR _T("5.33 beta1")

#ifdef _M_IX86
#define BUILD_ARCH_STR _T("x86")
Expand Down

0 comments on commit f46c762

Please sign in to comment.