Added animetosho provider

morpheus65535 · Apr 14, 2024 · 77ebd03 · 77ebd03
1 parent 3c30492
commit 77ebd03
Show file tree

Hide file tree

Showing 16 changed files with 1,465 additions and 22 deletions.
diff --git a/README.md b/README.md
@@ -48,6 +48,7 @@ If you need something that is not already part of Bazarr, feel free to create a
 ## Supported subtitles providers:
 
 - Addic7ed
+- Animetosho (requires AniDb HTTP API client described [here](https://wiki.anidb.net/HTTP_API_Definition))
 - Assrt
 - BetaSeries
 - BSplayer

diff --git a/bazarr/app/config.py b/bazarr/app/config.py
@@ -109,6 +109,7 @@ def check_parser_binary(value):
     Validator('general.adaptive_searching_delta', must_exist=True, default='1w', is_type_of=str,
               is_in=['3d', '1w', '2w', '3w', '4w']),
     Validator('general.enabled_providers', must_exist=True, default=[], is_type_of=list),
+    Validator('general.enabled_integrations', must_exist=True, default=[], is_type_of=list),
     Validator('general.multithreading', must_exist=True, default=True, is_type_of=bool),
     Validator('general.chmod_enabled', must_exist=True, default=False, is_type_of=bool),
     Validator('general.chmod', must_exist=True, default='0640', is_type_of=str),
@@ -234,6 +235,11 @@ def check_parser_binary(value):
     Validator('addic7ed.user_agent', must_exist=True, default='', is_type_of=str),
     Validator('addic7ed.vip', must_exist=True, default=False, is_type_of=bool),
 
+    # animetosho section
+    Validator('animetosho.search_threshold', must_exist=True, default=6, is_type_of=int, gte=1, lte=15),
+    Validator('animetosho.anidb_api_client', must_exist=True, default='', is_type_of=str, cast=str),
+    Validator('animetosho.anidb_api_client_ver', must_exist=True, default=1, is_type_of=int, gte=1, lte=9),
+
     # avistaz section
     Validator('avistaz.cookies', must_exist=True, default='', is_type_of=str),
     Validator('avistaz.user_agent', must_exist=True, default='', is_type_of=str),
@@ -369,6 +375,10 @@ def check_parser_binary(value):
     Validator('postgresql.database', must_exist=True, default='', is_type_of=str),
     Validator('postgresql.username', must_exist=True, default='', is_type_of=str, cast=str),
     Validator('postgresql.password', must_exist=True, default='', is_type_of=str, cast=str),
+
+    # anidb section
+    Validator('anidb.api_client', must_exist=True, default='', is_type_of=str),
+    Validator('anidb.api_client_ver', must_exist=True, default=1, is_type_of=int),
 ]
 
 
@@ -442,6 +452,7 @@ def write_config():
               'subzero_mods',
               'excluded_series_types',
               'enabled_providers',
+              'enabled_integrations',
               'path_mappings',
               'path_mappings_movie',
               'language_equals',

diff --git a/bazarr/app/get_providers.py b/bazarr/app/get_providers.py
@@ -324,6 +324,9 @@ def get_providers_auth():
             'timeout': settings.whisperai.timeout,
             'ffmpeg_path': _FFMPEG_BINARY,
             'loglevel': settings.whisperai.loglevel,
+        },
+        "animetosho": {
+            'search_threshold': settings.animetosho.search_threshold,
         }
     }
 

diff --git a/bazarr/subtitles/refiners/__init__.py b/bazarr/subtitles/refiners/__init__.py
@@ -3,9 +3,11 @@
 from .ffprobe import refine_from_ffprobe
 from .database import refine_from_db
 from .arr_history import refine_from_arr_history
+from .anidb import refine_from_anidb
 
 registered = {
     "database": refine_from_db,
     "ffprobe": refine_from_ffprobe,
     "arr_history": refine_from_arr_history,
+    "anidb": refine_from_anidb,
 }
diff --git a/bazarr/subtitles/refiners/anidb.py b/bazarr/subtitles/refiners/anidb.py
@@ -0,0 +1,140 @@
+# coding=utf-8
+# fmt: off
+
+import logging
+import requests
+from collections import namedtuple
+from datetime import timedelta
+from requests.exceptions import HTTPError
+
+from app.config import settings
+from subliminal import Episode, region
+
+try:
+    from lxml import etree
+except ImportError:
+    try:
+        import xml.etree.cElementTree as etree
+    except ImportError:
+        import xml.etree.ElementTree as etree
+
+refined_providers = {'animetosho'}
+
+api_url = 'http://api.anidb.net:9001/httpapi'
+
+
+class AniDBClient(object):
+    def __init__(self, api_client_key=None, api_client_ver=1, session=None):
+        self.session = session or requests.Session()
+        self.api_client_key = api_client_key
+        self.api_client_ver = api_client_ver
+
+    AnimeInfo = namedtuple('AnimeInfo', ['anime', 'episode_offset'])
+
+    @region.cache_on_arguments(expiration_time=timedelta(days=1).total_seconds())
+    def get_series_mappings(self):
+        r = self.session.get(
+            'https://raw.githubusercontent.com/Anime-Lists/anime-lists/master/anime-list.xml',
+            timeout=10
+        )
+
+        r.raise_for_status()
+
+        return r.content
+
+    @region.cache_on_arguments(expiration_time=timedelta(days=1).total_seconds())
+    def get_series_id(self, mappings, tvdb_series_season, tvdb_series_id, episode):
+        # Enrich the collection of anime with the episode offset
+        animes = [
+            self.AnimeInfo(anime, int(anime.attrib.get('episodeoffset', 0)))
+            for anime in mappings.findall(
+                f".//anime[@tvdbid='{tvdb_series_id}'][@defaulttvdbseason='{tvdb_series_season}']"
+            )
+        ]
+
+        if not animes:
+            return None
+
+        # Sort the anime by offset in ascending order
+        animes.sort(key=lambda a: a.episode_offset)
+
+        # Different from Tvdb, Anidb have different ids for the Parts of a season
+        anidb_id = None
+        offset = 0
+
+        for index, anime_info in enumerate(animes):
+            anime, episode_offset = anime_info
+            anidb_id = int(anime.attrib.get('anidbid'))
+            if episode > episode_offset:
+                anidb_id = anidb_id
+                offset = episode_offset
+
+        return anidb_id, episode - offset
+
+    @region.cache_on_arguments(expiration_time=timedelta(days=1).total_seconds())
+    def get_series_episodes_ids(self, tvdb_series_id, season, episode):
+        mappings = etree.fromstring(self.get_series_mappings())
+
+        series_id, episode_no = self.get_series_id(mappings, season, tvdb_series_id, episode)
+
+        if not series_id:
+            return None, None
+
+        episodes = etree.fromstring(self.get_episodes(series_id))
+
+        return series_id, int(episodes.find(f".//episode[epno='{episode_no}']").attrib.get('id'))
+
+    @region.cache_on_arguments(expiration_time=timedelta(days=1).total_seconds())
+    def get_episodes(self, series_id):
+        r = self.session.get(
+            api_url,
+            params={
+                'request': 'anime',
+                'client': self.api_client_key,
+                'clientver': self.api_client_ver,
+                'protover': 1,
+                'aid': series_id
+            },
+            timeout=10)
+        r.raise_for_status()
+
+        xml_root = etree.fromstring(r.content)
+
+        response_code = xml_root.attrib.get('code')
+        if response_code == '500':
+            raise HTTPError('AniDB API Abuse detected. Banned status.')
+        elif response_code == '302':
+            raise HTTPError('AniDB API Client error. Client is disabled or does not exists.')
+
+        episode_elements = xml_root.find('episodes')
+
+        if not episode_elements:
+            raise ValueError
+
+        return etree.tostring(episode_elements, encoding='utf8', method='xml')
+
+
+def refine_from_anidb(path, video):
+    if refined_providers.intersection(settings.general.enabled_providers) and video.series_anidb_id is None:
+        refine_anidb_ids(video)
+
+
+def refine_anidb_ids(video):
+    if not isinstance(video, Episode) and not video.series_tvdb_id:
+        logging.debug(f'Video is not an Anime TV series, skipping refinement for {video}')
+
+        return video
+
+    anidb_client = AniDBClient(settings.anidb.api_client, settings.anidb.api_client_ver)
+
+    season = video.season if video.season else 0
+
+    anidb_series_id, anidb_episode_id = anidb_client.get_series_episodes_ids(video.series_tvdb_id, season, video.episode)
+
+    if not anidb_episode_id:
+        logging.error(f'Could not find anime series {video.series}')
+
+        return video
+
+    video.series_anidb_id = anidb_series_id
+    video.series_anidb_episode_id = anidb_episode_id
diff --git a/custom_libs/subliminal/video.py b/custom_libs/subliminal/video.py
@@ -129,7 +129,8 @@ class Episode(Video):
 
     """
     def __init__(self, name, series, season, episode, title=None, year=None, original_series=True, tvdb_id=None,
-                 series_tvdb_id=None, series_imdb_id=None, alternative_series=None, **kwargs):
+                 series_tvdb_id=None, series_imdb_id=None, alternative_series=None, series_anidb_id=None,
+                 series_anidb_episode_id=None, **kwargs):
         super(Episode, self).__init__(name, **kwargs)
 
         #: Series of the episode
@@ -162,6 +163,9 @@ def __init__(self, name, series, season, episode, title=None, year=None, origina
         #: Alternative names of the series
         self.alternative_series = alternative_series or []
 
+        self.series_anidb_episode_id = series_anidb_episode_id
+        self.series_anidb_id = series_anidb_id
+
     @classmethod
     def fromguess(cls, name, guess):
         if guess['type'] != 'episode':

diff --git a/custom_libs/subliminal_patch/extensions.py b/custom_libs/subliminal_patch/extensions.py
@@ -64,4 +64,3 @@ def names(self):
 subliminal.refiner_manager.register('filebot = subliminal_patch.refiners.filebot:refine')
 subliminal.refiner_manager.register('file_info_file = subliminal_patch.refiners.file_info_file:refine')
 subliminal.refiner_manager.register('symlinks = subliminal_patch.refiners.symlinks:refine')
-
diff --git a/custom_libs/subliminal_patch/providers/animetosho.py b/custom_libs/subliminal_patch/providers/animetosho.py
@@ -0,0 +1,164 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import
+
+import logging
+import lzma
+
+from guessit import guessit
+from requests import Session
+from subzero.language import Language
+
+
+from subliminal.exceptions import ConfigurationError, ProviderError
+from subliminal_patch.providers import Provider
+from subliminal_patch.providers.mixins import ProviderSubtitleArchiveMixin
+from subliminal_patch.subtitle import Subtitle, guess_matches
+from subliminal.video import Episode
+
+try:
+    from lxml import etree
+except ImportError:
+    try:
+        import xml.etree.cElementTree as etree
+    except ImportError:
+        import xml.etree.ElementTree as etree
+
+logger = logging.getLogger(__name__)
+
+# TODO: Test and Support Other Languages
+supported_languages = [
+    "eng",  # English
+    "ita",  # Italian
+]
+
+
+class AnimeToshoSubtitle(Subtitle):
+    """AnimeTosho.org Subtitle."""
+    provider_name = 'animetosho'
+
+    def __init__(self, language, download_link, meta):
+        super(AnimeToshoSubtitle, self).__init__(language, page_link=download_link)
+        self.meta = meta
+        self.download_link = download_link
+
+    @property
+    def id(self):
+        return self.download_link
+
+    def get_matches(self, video):
+        matches = set()
+        matches |= guess_matches(video, guessit(self.meta['filename']))
+
+        # Add these data are explicit extracted from the API and they always have to match otherwise they wouldn't
+        # arrive at this point and would stop on list_subtitles.
+        matches.update(['title', 'series', 'tvdb_id', 'season', 'episode'])
+
+        return matches
+
+
+class AnimeToshoProvider(Provider, ProviderSubtitleArchiveMixin):
+    """AnimeTosho.org Provider."""
+    subtitle_class = AnimeToshoSubtitle
+    languages = {Language('por', 'BR')} | {Language(sl) for sl in supported_languages}
+    video_types = Episode
+
+    def __init__(self, search_threshold=None):
+        self.session = None
+
+        if not all([search_threshold]):
+            raise ConfigurationError("Search threshold, Api Client and Version must be specified!")
+
+        self.search_threshold = search_threshold
+
+    def initialize(self):
+        self.session = Session()
+
+    def terminate(self):
+        self.session.close()
+
+    def list_subtitles(self, video, languages):
+        if not video.series_anidb_episode_id:
+            raise ProviderError("Video does not have an AnimeTosho Episode ID!")
+
+        return [s for s in self._get_series(video.series_anidb_episode_id) if s.language in languages]
+
+    def download_subtitle(self, subtitle):
+        logger.info('Downloading subtitle %r', subtitle)
+
+        r = self.session.get(subtitle.page_link, timeout=10)
+        r.raise_for_status()
+
+        # Check if the bytes content starts with the xz magic number of the xz archives
+        if not self._is_xz_file(r.content):
+            raise ProviderError('Unidentified archive type')
+
+        subtitle.content = lzma.decompress(r.content)
+
+        return subtitle
+
+    @staticmethod
+    def _is_xz_file(content):
+        return content.startswith(b'\xFD\x37\x7A\x58\x5A\x00')
+
+    def _get_series(self, episode_id):
+        storage_download_url = 'https://animetosho.org/storage/attach/'
+        feed_api_url = 'https://feed.animetosho.org/json'
+
+        subtitles = []
+
+        entries = self._get_series_entries(episode_id)
+
+        for entry in entries:
+            r = self.session.get(
+                feed_api_url,
+                params={
+                    'show': 'torrent',
+                    'id': entry['id'],
+                },
+                timeout=10
+            )
+            r.raise_for_status()
+
+            for file in r.json()['files']:
+                if 'attachments' not in file:
+                    continue
+
+                subtitle_files = list(filter(lambda f: f['type'] == 'subtitle', file['attachments']))
+
+                for subtitle_file in subtitle_files:
+                    hex_id = format(subtitle_file['id'], '08x')
+
+                    subtitle = self.subtitle_class(
+                        Language.fromalpha3b(subtitle_file['info']['lang']),
+                        storage_download_url + '{}/{}.xz'.format(hex_id, subtitle_file['id']),
+                        meta=file,
+                    )
+
+                    logger.debug('Found subtitle %r', subtitle)
+
+                    subtitles.append(subtitle)
+
+        return subtitles
+
+    def _get_series_entries(self, episode_id):
+        api_url = 'https://feed.animetosho.org/json'
+
+        r = self.session.get(
+            api_url,
+            params={
+                'eid': episode_id,
+            },
+            timeout=10
+        )
+
+        r.raise_for_status()
+
+        j = r.json()
+
+        # Ignore records that are not yet ready or has been abandoned by AnimeTosho.
+        entries = list(filter(lambda t: t['status'] == 'complete', j))[:self.search_threshold]
+
+        # Return the latest entries that have been added as it is used to cutoff via the user configuration threshold
+        entries.sort(key=lambda t: t['timestamp'], reverse=True)
+
+        return entries