Skip to content

Commit

Permalink
Improve mkv parsing performance and robustness
Browse files Browse the repository at this point in the history
  • Loading branch information
jtackaberry committed Oct 13, 2018
1 parent 5423b61 commit 1044f36
Showing 1 changed file with 13 additions and 5 deletions.
18 changes: 13 additions & 5 deletions external/metadata/video/mkv.py
Expand Up @@ -189,7 +189,7 @@ def matroska_bps_to_bitrate(bps):
""" """
Tries to convert a free-form bps string into a bitrate (bits per second). Tries to convert a free-form bps string into a bitrate (bits per second).
""" """
m = re.search('([\d.]+)\s*(\D.*)', bps) m = re.search(r'([\d.]+)\s*(\D.*)', bps)
if m: if m:
bps, suffix = m.groups() bps, suffix = m.groups()
if 'kbit' in suffix: if 'kbit' in suffix:
Expand Down Expand Up @@ -417,6 +417,7 @@ def __init__(self, file):
self.type = 'Matroska' self.type = 'Matroska'
self.has_idx = False self.has_idx = False
self.objects_by_uid = {} self.objects_by_uid = {}
self._in_seekhead = False


# Now get the segment # Now get the segment
self.segment = segment = EbmlEntity(buffer[header.get_total_len():]) self.segment = segment = EbmlEntity(buffer[header.get_total_len():])
Expand Down Expand Up @@ -454,6 +455,7 @@ def __init__(self, file):
del self.video[:] del self.video[:]
del self.subtitles[:] del self.subtitles[:]
del self.chapters[:] del self.chapters[:]
self.objects_by_uid.clear()
continue continue
else: else:
# Some other error, stop processing. # Some other error, stop processing.
Expand Down Expand Up @@ -510,6 +512,9 @@ def process_elem(self, elem):




def process_seekhead(self, elem): def process_seekhead(self, elem):
if self._in_seekhead:
return log.debug('skipping recursive seekhead processing')
self._in_seekhead = True
for seek_elem in self.process_one_level(elem): for seek_elem in self.process_one_level(elem):
if seek_elem.get_id() != MATROSKA_SEEK_ID: if seek_elem.get_id() != MATROSKA_SEEK_ID:
continue continue
Expand All @@ -518,11 +523,11 @@ def process_seekhead(self, elem):
self.file.seek(self.segment.offset + sub_elem.get_value()) self.file.seek(self.segment.offset + sub_elem.get_value())
buffer = self.file.read(100) buffer = self.file.read(100)
elem = EbmlEntity(buffer) elem = EbmlEntity(buffer)
print(elem.ebml_length)
# Fetch all data necessary for this element. # Fetch all data necessary for this element.
if elem.ebml_length > 100: if elem.ebml_length > 100:
elem.add_data(self.file.read(elem.ebml_length - 100)) elem.add_data(self.file.read(elem.ebml_length - 100))
self.process_elem(elem) self.process_elem(elem)
self._in_seekhead = False




def process_tracks(self, tracks): def process_tracks(self, tracks):
Expand Down Expand Up @@ -865,11 +870,14 @@ def tags_to_attributes(self, obj, tags):
# so skip. # so skip.
continue continue


# Pull value out of Tag object or list of Tag objects. # Pull value out of Tag object or list of Tag objects. We expect scalar values
value = [item.value for item in tag] if isinstance(tag, list) else tag.value # so in the case of lists (because there was more than one tag of the same name)
# just pick the first.
value = tag[0].value if isinstance(tag, list) else tag.value

if filter: if filter:
try: try:
value = [list(filter(item)) for item in value] if isinstance(value, list) else list(filter(value)) value = filter(value)
except Exception as e: except Exception as e:
log.warning('Failed to convert tag to core attribute: %s', e) log.warning('Failed to convert tag to core attribute: %s', e)
# Special handling for tv series recordings. The 'title' tag # Special handling for tv series recordings. The 'title' tag
Expand Down

0 comments on commit 1044f36

Please sign in to comment.