Skip to content

Commit

Permalink
Add Embedded Subtitles provider
Browse files Browse the repository at this point in the history
  • Loading branch information
vitiko98 committed Dec 13, 2021
1 parent 409e1a5 commit ffca84a
Show file tree
Hide file tree
Showing 8 changed files with 299 additions and 0 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ If you need something that is not already part of Bazarr, feel free to create a
* Assrt
* BetaSeries
* BSplayer
* Embedded Subtitles
* GreekSubtitles
* Hosszupuska
* LegendasDivx
Expand Down
3 changes: 3 additions & 0 deletions bazarr/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,9 @@ def get(self, section, option, raw=False, vars=None):
'approved_only': 'False',
'multithreading': 'True'
},
'embeddedsubtitles': {
'include_ass': 'True',
},
'subsync': {
'use_subsync': 'False',
'use_subsync_threshold': 'False',
Expand Down
7 changes: 7 additions & 0 deletions bazarr/get_providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from get_args import args
from config import settings, get_array_from
from event_handler import event_stream
from utils import get_binary
from subliminal_patch.exceptions import TooManyRequests, APIThrottled, ParseResponseError, IPAddressBlocked
from subliminal.providers.opensubtitles import DownloadLimitReached
from subliminal.exceptions import DownloadLimitExceeded, ServiceUnavailable
Expand Down Expand Up @@ -198,6 +199,12 @@ def get_providers_auth():
'email': settings.ktuvit.email,
'hashed_password': settings.ktuvit.hashed_password,
},
'embeddedsubtitles': {
'include_ass': settings.embeddedsubtitles.getboolean('include_ass'),
'cache_dir': os.path.join(args.config_dir, "cache"),
'ffprobe_path': get_binary("ffprobe"),
'ffmpeg_path': get_binary("ffmpeg"),
}
}


Expand Down
11 changes: 11 additions & 0 deletions frontend/src/Settings/Providers/list.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,17 @@ export const ProviderList: Readonly<ProviderInfo[]> = [
key: "bsplayer",
name: "BSplayer",
},
{
key: "embeddedsubtitles",
name: "Embedded Subtitles",
description: "Embedded Subtitles from your Media Files",
defaultKey: {
include_ass: true,
},
keyNameOverride: {
include_ass: "Convert embedded ASS to SRT",
},
},
{
key: "greeksubs",
name: "GreekSubs",
Expand Down
162 changes: 162 additions & 0 deletions libs/subliminal_patch/providers/embeddedsubtitles.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
# -*- coding: utf-8 -*-

import logging
import os
import shutil
import tempfile

from babelfish import language_converters
import fese
from fese import check_integrity
from fese import FFprobeSubtitleStream
from fese import FFprobeVideoContainer
from fese import to_srt
from subliminal.subtitle import fix_line_ending
from subliminal_patch.core import Episode
from subliminal_patch.core import Movie
from subliminal_patch.providers import Provider
from subliminal_patch.subtitle import Subtitle
from subzero.language import Language

logger = logging.getLogger(__name__)

# Replace Babelfish's Language with Subzero's Language
fese.Language = Language


class EmbeddedSubtitle(Subtitle):
provider_name = "embeddedsubtitles"
hash_verifiable = False

def __init__(self, stream, container, matches):
super().__init__(stream.language, stream.disposition.hearing_impaired)
self.stream: FFprobeSubtitleStream = stream
self.container: FFprobeVideoContainer = container
self._matches: set = matches
self.page_link = self.container.path
self.release_info = os.path.basename(self.page_link)

def get_matches(self, video):
if self.hearing_impaired:
self._matches.add("hearing_impaired")

self._matches.add("hash")
return self._matches

@property
def id(self):
return f"{self.container.path}_{self.stream.index}"


class EmbeddedSubtitlesProvider(Provider):
provider_name = "embeddedsubtitles"

languages = {Language("por", "BR"), Language("spa", "MX")} | {
Language.fromalpha2(l) for l in language_converters["alpha2"].codes
}
languages.update(set(Language.rebuild(lang, hi=True) for lang in languages))

# TODO: add forced support
# languages.update(set(Language.rebuild(lang, forced=True) for lang in languages))

video_types = (Episode, Movie)
subtitle_class = EmbeddedSubtitle

def __init__(
self, include_ass=True, cache_dir=None, ffprobe_path=None, ffmpeg_path=None
):
self._include_ass = include_ass
self._cache_dir = os.path.join(
cache_dir or tempfile.gettempdir(), self.__class__.__name__.lower()
)
self._cached_paths = {}

fese.FFPROBE_PATH = ffprobe_path or fese.FFPROBE_PATH
fese.FFMPEG_PATH = ffmpeg_path or fese.FFMPEG_PATH

if logger.getEffectiveLevel() == logging.DEBUG:
fese.FF_LOG_LEVEL = "warning"
else:
# Default is True
fese.FFMPEG_STATS = False

def initialize(self):
os.makedirs(self._cache_dir, exist_ok=True)

def terminate(self):
# Remove leftovers
shutil.rmtree(self._cache_dir, ignore_errors=True)

def query(self, path: str, languages):
video = FFprobeVideoContainer(path)

try:
streams = video.get_subtitles()
except fese.InvalidSource as error:
logger.error("Error trying to get subtitles for %s: %s", video, error)
streams = []

if not streams:
logger.debug("No subtitles found for container: %s", video)

subtitles = []

for stream in streams:
# Only subrip and ass are currently supported
if stream.codec_name not in ("subrip", "ass"):
logger.debug("Ignoring codec: %s", stream)
continue

if not self._include_ass and stream.codec_name == "ass":
logger.debug("Ignoring ASS subtitle: %s", stream)
continue

if stream.language not in languages:
continue

disposition = stream.disposition
if disposition.generic or disposition.hearing_impaired:
logger.debug("Appending subtitle: %s", stream)
subtitles.append(EmbeddedSubtitle(stream, video, {"hash"}))
else:
logger.debug("Ignoring unwanted subtitle: %s", stream)

return subtitles

def list_subtitles(self, video, languages):
return self.query(video.name, languages)

def download_subtitle(self, subtitle):
path = self._get_subtitle_path(subtitle)
with open(path, "rb") as sub:
content = sub.read()
subtitle.content = fix_line_ending(content)

def _get_subtitle_path(self, subtitle: EmbeddedSubtitle):
container = subtitle.container

# Check if the container is not already in the instance
if container.path not in self._cached_paths:
# Extract all subittle streams to avoid reading the entire
# container over and over
streams = filter(_check_allowed_extensions, container.get_subtitles())
extracted = container.extract_subtitles(list(streams), self._cache_dir)
# Add the extracted paths to the containter path key
self._cached_paths[container.path] = extracted

cached_path = self._cached_paths[container.path]
# Get the subtitle file by index
subtitle_path = cached_path[subtitle.stream.index]

check_integrity(subtitle.stream, subtitle_path)

# Convert to SRT if the subtitle is ASS
new_subtitle_path = to_srt(subtitle_path, remove_source=True)
if new_subtitle_path != subtitle_path:
cached_path[subtitle.stream.index] = new_subtitle_path

return new_subtitle_path


def _check_allowed_extensions(subtitle: FFprobeSubtitleStream):
return subtitle.extension in ("ass", "srt")
Binary file added tests/subliminal_patch/data/file_1.mkv
Binary file not shown.
Binary file added tests/subliminal_patch/data/file_2.mkv
Binary file not shown.
115 changes: 115 additions & 0 deletions tests/subliminal_patch/test_embeddedsubtitles.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
# -*- coding: utf-8 -*-
import os

import fese
import pytest
from subliminal_patch.core import Episode, Movie
from subliminal_patch.providers.embeddedsubtitles import EmbeddedSubtitlesProvider
from subzero.language import Language

_DATA = os.path.join(os.path.abspath(os.path.dirname(__file__)), "data")


fese.Language = Language


@pytest.fixture
def video_single_language():
# Has only ASS streams in english
return Episode(
os.path.join(_DATA, "file_1.mkv"),
"Serial Experiments Lain",
1,
1,
source="Web",
)


@pytest.fixture
def video_multiple_languages():
# Has SubRip streams in multiple languages
return Movie(
os.path.join(_DATA, "file_2.mkv"),
"I'm No Longer Here",
year=2019,
source="Web",
)


@pytest.fixture
def video_inexistent(tmpdir):
return Movie(
os.path.join(tmpdir, "inexistent_video.mkv"),
"Dummy",
year=2021,
source="Web",
)


def test_inexistent_video(video_inexistent):
with EmbeddedSubtitlesProvider() as provider:
subtitles = provider.list_subtitles(video_inexistent, {})
assert len(subtitles) == 0


def test_list_subtitles_single_language(video_single_language):
with EmbeddedSubtitlesProvider() as provider:
subs = provider.list_subtitles(
video_single_language, {Language.fromalpha2("en")}
)

for sub in subs:
assert sub.language == Language.fromalpha2("en")


def test_list_subtitles_multiple_languages(video_multiple_languages):
with EmbeddedSubtitlesProvider() as provider:
languages = {Language.fromalpha2(code) for code in ("en", "it", "fr", "es")} | {
Language("por", "BR")
}

subs = provider.list_subtitles(video_multiple_languages, languages)
for expected in languages:
assert any(sub.language == expected for sub in subs)


def test_list_subtitles_wo_ass(video_single_language):
with EmbeddedSubtitlesProvider(include_ass=False) as provider:
subs = provider.list_subtitles(
video_single_language, {Language.fromalpha2("en")}
)
assert not subs


def test_download_subtitle_multiple(video_multiple_languages):
with EmbeddedSubtitlesProvider() as provider:
languages = {Language.fromalpha2(code) for code in ("en", "it", "fr")} | {
Language("por", "BR")
}

subs = provider.list_subtitles(video_multiple_languages, languages)
for sub in subs:
provider.download_subtitle(sub)
assert sub.content is not None


def test_download_subtitle_single(video_single_language):
with EmbeddedSubtitlesProvider() as provider:
subtitle = provider.list_subtitles(
video_single_language, {Language.fromalpha2("en")}
)[0]
provider.download_subtitle(subtitle)
assert subtitle.content is not None


def test_download_invalid_subtitle(video_single_language):
with EmbeddedSubtitlesProvider() as provider:
subtitle = provider.list_subtitles(
video_single_language, {Language.fromalpha2("en")}
)[0]

provider._cached_paths[subtitle.container.path] = {
subtitle.stream.index: "dummy.srt"
}
with pytest.raises(fese.InvalidFile):
provider.download_subtitle(subtitle)

0 comments on commit ffca84a

Please sign in to comment.