Skip to content

Commit

Permalink
Improved the ffprobe call caching mechanism by storing result to DB a…
Browse files Browse the repository at this point in the history
…nd using it for indexing and subtitles search.
  • Loading branch information
morpheus65535 committed May 6, 2021
1 parent 887da10 commit 33e1555
Show file tree
Hide file tree
Showing 5 changed files with 140 additions and 84 deletions.
4 changes: 4 additions & 0 deletions bazarr/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,10 @@ def wrapper(*args, **kwargs):


def postprocess(item: dict):
# Remove ffprobe_cache
if 'ffprobe_cache' in item:
del (item['ffprobe_cache'])

# Parse tags
if 'tags' in item:
if item['tags'] is None:
Expand Down
2 changes: 2 additions & 0 deletions bazarr/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ def db_upgrade():
['table_episodes', 'episode_file_id', 'integer'],
['table_episodes', 'audio_language', 'text'],
['table_episodes', 'file_size', 'integer', '0'],
['table_episodes', 'ffprobe_cache', 'blob'],
['table_movies', 'sortTitle', 'text'],
['table_movies', 'year', 'text'],
['table_movies', 'alternativeTitles', 'text'],
Expand All @@ -121,6 +122,7 @@ def db_upgrade():
['table_movies', 'tags', 'text', '[]'],
['table_movies', 'profileId', 'integer'],
['table_movies', 'file_size', 'integer', '0'],
['table_movies', 'ffprobe_cache', 'blob'],
['table_history', 'video_path', 'text'],
['table_history', 'language', 'text'],
['table_history', 'provider', 'text'],
Expand Down
147 changes: 95 additions & 52 deletions bazarr/embedded_subs_reader.py
Original file line number Diff line number Diff line change
@@ -1,68 +1,111 @@
# coding=utf-8

import enzyme
from enzyme.exceptions import MalformedMKVError
import logging
import os
import datetime
import pickle
from knowit import api
from subliminal.cache import region
import enzyme
from enzyme.exceptions import MalformedMKVError
from enzyme.exceptions import MalformedMKVError
from database import database


def embedded_subs_reader(file, file_size, episode_file_id=None, movie_file_id=None):
data = parse_video_metadata(file, file_size, episode_file_id, movie_file_id)

FFPROBE_CACHE_EXPIRATION_TIME = datetime.timedelta(weeks=2).total_seconds()
subtitles_list = []
if data['ffprobe']:
traditional_chinese = ["cht", "tc", "traditional", "zht", "hant", "big5", u"繁", u"雙語"]
brazilian_portuguese = ["pt-br", "pob", "pb", "brazilian", "brasil", "brazil"]

if 'subtitle' in data['ffprobe']:
for detected_language in data['ffprobe']['subtitle']:
if 'language' in detected_language:
language = detected_language['language'].alpha3
if language == 'zho' and 'name' in detected_language:
if any (ext in (detected_language['name'].lower()) for ext in traditional_chinese):
language = 'zht'
if language == 'por' and 'name' in detected_language:
if any (ext in (detected_language['name'].lower()) for ext in brazilian_portuguese):
language = 'pob'
forced = detected_language['forced'] if 'forced' in detected_language else False
hearing_impaired = detected_language['hearing_impaired'] if 'hearing_impaired' in \
detected_language else False
codec = detected_language['format'] if 'format' in detected_language else None
subtitles_list.append([language, forced, hearing_impaired, codec])
else:
continue
elif data['enzyme']:
for subtitle_track in data['enzyme'].subtitle_tracks:
hearing_impaired = False
if subtitle_track.name:
if 'sdh' in subtitle_track.name.lower():
hearing_impaired = True
subtitles_list.append([subtitle_track.language, subtitle_track.forced, hearing_impaired,
subtitle_track.codec_id])

class EmbeddedSubsReader:
def __init__(self):
self.ffprobe = None
return subtitles_list

@region.cache_on_arguments(expiration_time=FFPROBE_CACHE_EXPIRATION_TIME)
# file_size, episode_file_id and movie_file_id are used for cache identification. DO NOT REMOVE!
def list_languages(self, file, file_size, episode_file_id=None, movie_file_id=None):
from utils import get_binary
self.ffprobe = get_binary("ffprobe")

subtitles_list = []
if self.ffprobe:
api.initialize({'provider': 'ffmpeg', 'ffmpeg': self.ffprobe})
data = api.know(file)
def parse_video_metadata(file, file_size, episode_file_id=None, movie_file_id=None):
# Define default data keys value
data = {
'ffprobe': {},
'enzyme': {},
'file_id': episode_file_id if episode_file_id else movie_file_id,
'file_size': file_size
}

traditional_chinese = ["cht", "tc", "traditional", "zht", "hant", "big5", u"繁", u"雙語"]
brazilian_portuguese = ["pt-br", "pob", "pb", "brazilian", "brasil", "brazil"]
# Get the actual cache value form database
if episode_file_id:
cache_key = database.execute('SELECT ffprobe_cache FROM table_episodes WHERE episode_file_id=? AND file_size=?',
(episode_file_id, file_size), only_one=True)
elif movie_file_id:
cache_key = database.execute('SELECT ffprobe_cache FROM table_movies WHERE movie_file_id=? AND file_size=?',
(movie_file_id, file_size), only_one=True)
else:
cache_key = None

if 'subtitle' in data:
for detected_language in data['subtitle']:
if 'language' in detected_language:
language = detected_language['language'].alpha3
if language == 'zho' and 'name' in detected_language:
if any (ext in (detected_language['name'].lower()) for ext in traditional_chinese):
language = 'zht'
if language == 'por' and 'name' in detected_language:
if any (ext in (detected_language['name'].lower()) for ext in brazilian_portuguese):
language = 'pob'
forced = detected_language['forced'] if 'forced' in detected_language else False
hearing_impaired = detected_language['hearing_impaired'] if 'hearing_impaired' in \
detected_language else False
codec = detected_language['format'] if 'format' in detected_language else None
subtitles_list.append([language, forced, hearing_impaired, codec])
else:
continue
# check if we have a value for that cache key
if not isinstance(cache_key, dict):
return data
else:
try:
# Unpickle ffprobe cache
cached_value = pickle.loads(cache_key['ffprobe_cache'])
except:
pass
else:
if os.path.splitext(file)[1] == '.mkv':
with open(file, 'rb') as f:
try:
mkv = enzyme.MKV(f)
except MalformedMKVError:
logging.error('BAZARR cannot analyze this MKV with our built-in MKV parser, you should install ffmpeg: ' + file)
else:
for subtitle_track in mkv.subtitle_tracks:
hearing_impaired = False
if subtitle_track.name:
if 'sdh' in subtitle_track.name.lower():
hearing_impaired = True
subtitles_list.append([subtitle_track.language, subtitle_track.forced, hearing_impaired,
subtitle_track.codec_id])
# Check if file size and file id matches and if so, we return the cached value
if cached_value['file_size'] == file_size and cached_value['file_id'] in [episode_file_id, movie_file_id]:
return cached_value

return subtitles_list
# if not, we retrieve the metadata from the file
from utils import get_binary
ffprobe_path = get_binary("ffprobe")

# if we have ffprobe available
if ffprobe_path:
api.initialize({'provider': 'ffmpeg', 'ffmpeg': ffprobe_path})
data['ffprobe'] = api.know(file)
# if nto, we use enzyme for mkv files
else:
if os.path.splitext(file)[1] == '.mkv':
with open(file, 'rb') as f:
try:
mkv = enzyme.MKV(f)
except MalformedMKVError:
logging.error(
'BAZARR cannot analyze this MKV with our built-in MKV parser, you should install '
'ffmpeg/ffprobe: ' + file)
else:
data['enzyme'] = mkv

embedded_subs_reader = EmbeddedSubsReader()
# we write to db the result and return the newly cached ffprobe dict
if episode_file_id:
database.execute('UPDATE table_episodes SET ffprobe_cache=? WHERE episode_file_id=?',
(pickle.dumps(data, pickle.HIGHEST_PROTOCOL), episode_file_id))
elif movie_file_id:
database.execute('UPDATE table_movies SET ffprobe_cache=? WHERE movie_file_id=?',
(pickle.dumps(data, pickle.HIGHEST_PROTOCOL), movie_file_id))
return data
54 changes: 33 additions & 21 deletions bazarr/get_subtitle.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from guessit import guessit
from database import database, dict_mapper, get_exclusion_clause, get_profiles_list, get_audio_profile_languages, \
get_desired_languages
from embedded_subs_reader import parse_video_metadata

from analytics import track_event
from locale import getpreferredencoding
Expand Down Expand Up @@ -1177,41 +1178,52 @@ def refine_from_db(path, video):


def refine_from_ffprobe(path, video):
exe = get_binary('ffprobe')
if not exe:
logging.debug('BAZARR FFprobe not found!')
return
if isinstance(video, Movie):
file_id = database.execute("SELECT movie_file_id FROM table_shows WHERE path = ?",
(path_mappings.path_replace_movie_reverse(path),), only_one=True)
else:
file_id = database.execute("SELECT episode_file_id, file_size FROM table_episodes WHERE path = ?",
(path_mappings.path_replace_reverse(path),), only_one=True)

if not isinstance(file_id, dict):
return video

if isinstance(video, Movie):
data = parse_video_metadata(file=path, file_size=file_id['file_size'],
movie_file_id=file_id['movie_file_id'])
else:
logging.debug('BAZARR FFprobe used is %s', exe)
data = parse_video_metadata(file=path, file_size=file_id['file_size'],
episode_file_id=file_id['episode_file_id'])

api.initialize({'provider': 'ffmpeg', 'ffmpeg': exe})
data = api.know(path)
if not data['ffprobe']:
logging.debug("No FFprobe available in cache for this file: {}".format(path))
return video

logging.debug('FFprobe found: %s', data)
logging.debug('FFprobe found: %s', data['ffprobe'])

if 'video' not in data:
if 'video' not in data['ffprobe']:
logging.debug('BAZARR FFprobe was unable to find video tracks in the file!')
else:
if 'resolution' in data['video'][0]:
if 'resolution' in data['ffprobe']['video'][0]:
if not video.resolution:
video.resolution = data['video'][0]['resolution']
if 'codec' in data['video'][0]:
video.resolution = data['ffprobe']['video'][0]['resolution']
if 'codec' in data['ffprobe']['video'][0]:
if not video.video_codec:
video.video_codec = data['video'][0]['codec']
if 'frame_rate' in data['video'][0]:
video.video_codec = data['ffprobe']['video'][0]['codec']
if 'frame_rate' in data['ffprobe']['video'][0]:
if not video.fps:
if isinstance(data['video'][0]['frame_rate'], float):
video.fps = data['video'][0]['frame_rate']
if isinstance(data['ffprobe']['video'][0]['frame_rate'], float):
video.fps = data['ffprobe']['video'][0]['frame_rate']
else:
video.fps = data['video'][0]['frame_rate'].magnitude
video.fps = data['ffprobe']['video'][0]['frame_rate'].magnitude

if 'audio' not in data:
if 'audio' not in data['ffprobe']:
logging.debug('BAZARR FFprobe was unable to find audio tracks in the file!')
else:
if 'codec' in data['audio'][0]:
if 'codec' in data['ffprobe']['audio'][0]:
if not video.audio_codec:
video.audio_codec = data['audio'][0]['codec']
for track in data['audio']:
video.audio_codec = data['ffprobe']['audio'][0]['codec']
for track in data['ffprobe']['audio']:
if 'language' in track:
video.audio_languages.add(track['language'].alpha3)

Expand Down
17 changes: 6 additions & 11 deletions bazarr/list_subtitles.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import logging
import ast
import re
import subliminal
from guess_language import guess_language
from subliminal_patch import core, search_external_subtitles
from subzero.language import Language
Expand Down Expand Up @@ -34,11 +33,9 @@ def store_subtitles(original_path, reversed_path):
try:
item = database.execute('SELECT file_size, episode_file_id FROM table_episodes '
'WHERE path = ?', (original_path,), only_one=True)
subtitle_languages = embedded_subs_reader.list_languages(reversed_path,
file_size=item['file_size'],
episode_file_id=item['episode_file_id'])
subliminal.region.backend.sync()

subtitle_languages = embedded_subs_reader(reversed_path,
file_size=item['file_size'],
episode_file_id=item['episode_file_id'])
for subtitle_language, subtitle_forced, subtitle_hi, subtitle_codec in subtitle_languages:
try:
if (settings.general.getboolean("ignore_pgs_subs") and subtitle_codec.lower() == "pgs") or \
Expand Down Expand Up @@ -154,11 +151,9 @@ def store_subtitles_movie(original_path, reversed_path):
try:
item = database.execute('SELECT file_size, movie_file_id FROM table_movies '
'WHERE path = ?', (original_path,), only_one=True)
subtitle_languages = embedded_subs_reader.list_languages(reversed_path,
file_size=item['file_size'],
movie_file_id=item['movie_file_id'])
subliminal.region.backend.sync()

subtitle_languages = embedded_subs_reader(reversed_path,
file_size=item['file_size'],
movie_file_id=item['movie_file_id'])
for subtitle_language, subtitle_forced, subtitle_hi, subtitle_codec in subtitle_languages:
try:
if (settings.general.getboolean("ignore_pgs_subs") and subtitle_codec.lower() == "pgs") or \
Expand Down

0 comments on commit 33e1555

Please sign in to comment.