Skip to content

Commit

Permalink
[WebVTT] binary representation
Browse files Browse the repository at this point in the history
  • Loading branch information
peak3d committed Sep 24, 2019
1 parent 44a2584 commit 17c1ffc
Show file tree
Hide file tree
Showing 3 changed files with 102 additions and 74 deletions.
12 changes: 6 additions & 6 deletions src/main.cpp
Expand Up @@ -584,7 +584,7 @@ class CodecHandler
};
virtual bool ExtraDataToAnnexB() { return false; };
virtual STREAMCODEC_PROFILE GetProfile() { return STREAMCODEC_PROFILE::CodecProfileNotNeeded; };
virtual bool Transform(AP4_DataBuffer &buf, AP4_UI64 timescale) { return false; };
virtual bool Transform(AP4_UI64 pts, AP4_UI32 duration, AP4_DataBuffer &buf, AP4_UI64 timescale) { return false; };
virtual bool ReadNextSample(AP4_Sample &sample, AP4_DataBuffer &buf) { return false; };
virtual void SetPTSOffset(AP4_UI64 offset) { };
virtual bool TimeSeek(AP4_UI64 seekPos) { return true; };
Expand Down Expand Up @@ -891,7 +891,7 @@ class TTMLCodecHandler : public CodecHandler
,m_ptsOffset(0)
{};

virtual bool Transform(AP4_DataBuffer &buf, AP4_UI64 timescale) override
virtual bool Transform(AP4_UI64 pts, AP4_UI32 duration, AP4_DataBuffer &buf, AP4_UI64 timescale) override
{
return m_ttml.Parse(buf.GetData(), buf.GetDataSize(), timescale, m_ptsOffset);
}
Expand Down Expand Up @@ -945,9 +945,9 @@ class WebVTTCodecHandler : public CodecHandler
, m_ptsOffset(0)
{};

virtual bool Transform(AP4_DataBuffer &buf, AP4_UI64 timescale) override
virtual bool Transform(AP4_UI64 pts, AP4_UI32 duration, AP4_DataBuffer &buf, AP4_UI64 timescale) override
{
return m_webVtt.Parse(buf.GetData(), buf.GetDataSize(), timescale, m_ptsOffset);
return m_webVtt.Parse(pts, duration, buf.GetData(), buf.GetDataSize(), timescale, m_ptsOffset);
}

virtual bool ReadNextSample(AP4_Sample &sample, AP4_DataBuffer &buf) override
Expand Down Expand Up @@ -1193,7 +1193,7 @@ class FragmentedSampleReader : public SampleReader, public AP4_LinearReader
m_singleSampleDecryptor->DecryptSampleData(m_poolId, m_encrypted, m_sampleData, nullptr, 0, nullptr, nullptr);
}

if (m_codecHandler->Transform(m_sampleData, m_track->GetMediaTimeScale()))
if (m_codecHandler->Transform(m_sample.GetDts(), m_sample.GetDuration(), m_sampleData, m_track->GetMediaTimeScale()))
m_codecHandler->ReadNextSample(m_sample, m_sampleData);
}

Expand Down Expand Up @@ -1489,7 +1489,7 @@ class SubtitleSampleReader : public SampleReader
m_codecHandler = new WebVTTCodecHandler(nullptr);
else
m_codecHandler = new TTMLCodecHandler(nullptr);
m_codecHandler->Transform(result, 1000);
m_codecHandler->Transform(0, 0, result, 1000);
};

virtual bool EOS()const override { return m_eos; };
Expand Down
157 changes: 91 additions & 66 deletions src/parser/WebVTT.cpp
Expand Up @@ -19,102 +19,127 @@
#include "WebVTT.h"
#include <cstring>

bool WebVTT::Parse(const void *buffer, size_t buffer_size, uint64_t timescale, uint64_t ptsOffset)
bool WebVTT::Parse(uint64_t pts, uint32_t duration, const void *buffer, size_t buffer_size, uint64_t timescale, uint64_t ptsOffset)
{
bool webvtt_visited(false);
bool wait_start(true);

m_pos = ~0;
m_seekTime = 0;
m_subTitles.clear();
m_timescale = timescale;
m_ptsOffset = ptsOffset;
if (pts < ptsOffset)
pts += ptsOffset;

const char *cbuf(reinterpret_cast<const char*>(buffer)), *cbufe(cbuf + buffer_size);
std::string strText;

while (cbuf != cbufe)
if (buffer_size >= 8 && (memcmp(cbuf + 4, "vtte", 4) == 0 || memcmp(cbuf + 4, "vttc", 4) == 0))
{
const char *next(strchr(cbuf, '\n'));
if (!next)
next = cbufe;

if (webvtt_visited)
if (memcmp(cbuf + 4, "vtte", 4) == 0)
{
if (!m_subTitles.empty() && !~m_subTitles.back().end)
m_subTitles.back().end = pts;
}
else if (memcmp(cbuf + 4, "vttc", 4) == 0)
{
if (wait_start)
if (memcmp(cbuf + 12, "payl", 4) == 0)
cbuf += 4, buffer_size -= 4;

std::string text(cbuf + 12, buffer_size - 12);
if (m_subTitles.empty() || text != m_subTitles.back().text[0])
{
unsigned int thb, tmb, tsb, tmsb, the, tme, tse, tmse;
char delb, dele;
m_subTitles.push_back(SUBTITLE(pts));
m_subTitles.back().text.push_back(text);
}
}
}
else
{
m_subTitles.clear();
bool webvtt_visited(false);
bool wait_start(true);
std::string strText;
m_pos = ~0;

if (sscanf(cbuf, "%u:%u:%u%c%u --> %u:%u:%u%c%u", &thb, &tmb, &tsb, &delb, &tmsb, &the, &tme, &tse, &dele, &tmse) == 10)
while (cbuf != cbufe)
{
const char *next(strchr(cbuf, '\n'));
if (!next)
next = cbufe;

if (webvtt_visited)
{
if (wait_start)
{
m_subTitles.push_back(SUBTITLE());
SUBTITLE &sub(m_subTitles.back());
unsigned int thb, tmb, tsb, tmsb, the, tme, tse, tmse;
char delb, dele;

sub.start = thb * 3600 + tmb * 60 + tsb;
sub.start = sub.start * 1000 + tmsb;
sub.start = (sub.start * m_timescale) / 1000;
if (sscanf(cbuf, "%u:%u:%u%c%u --> %u:%u:%u%c%u", &thb, &tmb, &tsb, &delb, &tmsb, &the, &tme, &tse, &dele, &tmse) == 10)
{
m_subTitles.push_back(SUBTITLE());
SUBTITLE &sub(m_subTitles.back());

sub.end = the * 3600 + tme * 60 + tse;
sub.end = sub.end * 1000 + tmse;
sub.end = (sub.end * m_timescale) / 1000;
sub.start = thb * 3600 + tmb * 60 + tsb;
sub.start = sub.start * 1000 + tmsb;
sub.start = (sub.start * m_timescale) / 1000;

if (sub.start < m_ptsOffset)
{
sub.start += m_ptsOffset;
sub.end += m_ptsOffset;
}
sub.end = the * 3600 + tme * 60 + tse;
sub.end = sub.end * 1000 + tmse;
sub.end = (sub.end * m_timescale) / 1000;

if (strText.empty())
sub.id = std::string(cbuf, 12);
else
sub.id = strText;

if (sub.id == m_lastId)
m_pos = m_subTitles.size() - 1;
if (sub.start < m_ptsOffset)
{
sub.start += m_ptsOffset;
sub.end += m_ptsOffset;
}

wait_start = false;
if (strText.empty())
sub.id = std::string(cbuf, 12);
else
sub.id = strText;

if (sub.id == m_lastId)
m_pos = m_subTitles.size() - 1;

wait_start = false;
}
else
{
strText = std::string(cbuf, next - cbuf);
if (!strText.empty() && strText.back() == '\r')
strText.resize(strText.size() - 1);
}
}
else
{
strText = std::string(cbuf, next - cbuf);
if (!strText.empty() && strText.back() == '\r')
strText.resize(strText.size() - 1);
if (strText.find("&rlm;", 0, 5) == 0)
strText.replace(0, 5, "\0xE2\0x80\0xAB");
else if (strText.find("&lrm;", 0, 5) == 0)
strText.replace(0, 5, "\0xE2\0x80\0xAA");
if (!strText.empty())
m_subTitles.back().text.push_back(strText);
else
wait_start = true;
}
}
else
{
strText = std::string(cbuf, next - cbuf);
if (!strText.empty() && strText.back() == '\r')
strText.resize(strText.size() -1);
if (strText.find("&rlm;", 0, 5) == 0)
strText.replace(0, 5, "\0xE2\0x80\0xAB");
else if (strText.find("&lrm;", 0, 5) == 0)
strText.replace(0, 5, "\0xE2\0x80\0xAA");
if (!strText.empty())
m_subTitles.back().text.push_back(strText);
else
wait_start = true;
//TODO: BOM
while (cbuf < next && *cbuf != 'W')
++cbuf;
if (strncmp(cbuf, "WEBVTT", 6) == 0)
webvtt_visited = true;
}
}
else
{
//TODO: BOM
while (cbuf < next && *cbuf != 'W')

cbuf = next;
if (cbuf != cbufe)
++cbuf;
if (strncmp(cbuf, "WEBVTT", 6) == 0)
webvtt_visited = true;
}

cbuf = next;
if (cbuf != cbufe)
++cbuf;
}

if (!~m_pos || m_pos == m_subTitles.size())
m_pos = 0;
else
++m_pos;
if (!~m_pos || m_pos >= m_subTitles.size())
m_pos = 0;
else
++m_pos;
}

m_lastId.clear();

Expand All @@ -129,7 +154,7 @@ bool WebVTT::Prepare(uint64_t &pts, uint32_t &duration)
m_seekTime = 0;
}

if (m_pos >= m_subTitles.size())
if (m_pos >= m_subTitles.size() || !~m_subTitles[m_pos].end)
return false;

SUBTITLE &sub(m_subTitles[m_pos++]);
Expand Down
7 changes: 5 additions & 2 deletions src/parser/WebVTT.h
Expand Up @@ -28,7 +28,7 @@ class WebVTT
public:
WebVTT() :m_pos(0), m_tickRate(0), m_timescale(0), m_ptsOffset(0) { };

bool Parse(const void *buffer, size_t buffer_size, uint64_t timescale, uint64_t ptsOffset);
bool Parse(uint64_t pts, uint32_t duration, const void *buffer, size_t buffer_size, uint64_t timescale, uint64_t ptsOffset);

bool Prepare(uint64_t &pts, uint32_t &duration);
bool TimeSeek(uint64_t seekPos);
Expand All @@ -39,8 +39,11 @@ class WebVTT

struct SUBTITLE
{
SUBTITLE() = default;
SUBTITLE(uint64_t start)
: start(start) {};
std::string id;
uint64_t start, end;
uint64_t start = 0, end = ~0;
std::vector<std::string> text;
};

Expand Down

0 comments on commit 17c1ffc

Please sign in to comment.