Fixed Titulky provider and made some code maintenance

morpheus65535 · Apr 26, 2022 · e6551dc · e6551dc
1 parent a4d9eb6
commit e6551dc
Showing 1 changed file with 49 additions and 18 deletions.
diff --git a/libs/subliminal_patch/providers/titulky.py b/libs/subliminal_patch/providers/titulky.py
@@ -35,8 +35,6 @@
 
 # Check if any element from source array is contained partially or exactly in any element from target array
 # Returns on the first match
-
-
 def _contains_element(_from=None, _in=None, exactly=False):
     source_array = _from
     target_array = _in
@@ -119,18 +117,17 @@ def get_fps(self):
     def get_matches(self, video):
         matches = set()
         _type = 'movie' if isinstance(video, Movie) else 'episode'
-
+        # Subtitle's names (could be series/episode/movie name) present in the subtitle details page
+        # Consists of the main name and alternative names, stripped of the S00E00 substring
         sub_names = self._remove_season_episode_string(self.names)
 
         if _type == 'episode':
             # EPISODE
 
             # match imdb_id of a series
             if video.series_imdb_id and video.series_imdb_id == self.imdb_id:
-                # NOTE: Is matches.add('series_imdb_id') doing anything?
-                #       For now, let's match with the 'series' to not reject
-                #       subs with no name but a correct imdb id.
                 matches.add('series')
+                matches.add('series_imdb_id')
 
             # match season/episode
             if self.season and self.season == video.season:
@@ -142,7 +139,7 @@ def get_matches(self, video):
             if len(sub_names) > 0:
                 series_names = [video.series] + video.alternative_series
                 logger.debug(
-                    f"Titulky.com: Finding exact match between subtitle names {sub_names} and series names {series_names}"
+                    f"Titulky.com: Finding exact match between subtitle's names {sub_names} and series names {series_names}"
                 )
                 if _contains_element(_from=series_names,
                                      _in=sub_names,
@@ -152,7 +149,7 @@ def get_matches(self, video):
                 # match episode title
                 episode_titles = [video.title]
                 logger.debug(
-                    f"Titulky.com: Finding exact match between subtitle names {sub_names} and episode titles {episode_titles}"
+                    f"Titulky.com: Finding exact match between subtitle's names {sub_names} and episode titles {episode_titles}"
                 )
                 if _contains_element(_from=episode_titles,
                                      _in=sub_names,
@@ -169,7 +166,7 @@ def get_matches(self, video):
             # match movie title
             video_titles = [video.title] + video.alternative_titles
             logger.debug(
-                f"Titulky.com: Finding exact match between subtitle names {sub_names} and video titles {video_titles}"
+                f"Titulky.com: Finding exact match between subtitle's names {sub_names} and video titles {video_titles}"
             )
             if _contains_element(_from=video_titles,
                                  _in=sub_names,
@@ -414,8 +411,7 @@ def outer_func(*args, **kwargs):
 
         return outer_func
 
-    # TODO: Parse name and alternative names of a series / movie
-    # Parse details of an individual subtitle: imdb_id, release, language, uploader, fps and year
+    # Parse details of an individual subtitle: imdb_id, series/movie names, release, language, uploader, fps and year
     @capable_of_multithreading
     def parse_details(self, partial_info, ref_url=None):
         html_src = self.fetch_page(partial_info['details_link'], ref=ref_url)
@@ -439,6 +435,23 @@ def parse_details(self, partial_info, ref_url=None):
         if not imdb_id:
             logger.debug("Titulky.com: No IMDB ID supplied on details page.")
 
+        # SERIES/MOVIE NAMES
+        names = []
+        try:
+            main_name = details_container.find('h1', id='titulky').contents[0].strip()
+            alt_name = details_container.find('h2').contents[1].strip()
+            if main_name:
+                names.append(main_name)
+            else:
+                logger.debug("Titulky.com: Could not find main series/movie name on details page.")
+            if alt_name:
+                names.append(alt_name)
+        except IndexError:
+            raise ParseResponseError("Index out of range! This should not ever happen, but it just did. Oops.")
+
+        if len(names) == 0:
+            logger.debug("Titulky.com: No names found on details page.")
+
         # RELEASE
         release = None
         release_tag = details_container.find('div', class_='releas')
@@ -529,6 +542,7 @@ def parse_details(self, partial_info, ref_url=None):
 
         info = {
             'releases': [release],
+            'names': names,
             'language': language,
             'uploader': uploader,
             'fps': fps,
@@ -696,6 +710,7 @@ def execute_foreach(self, array, func, args=[], kwargs={}):
 
     # Special search only for episodes. Complements the query method of searching.
     def browse_episodes(self,
+                        language,
                         imdb_id=None,
                         season=None,
                         episode=None):
@@ -747,26 +762,41 @@ def browse_episodes(self,
                     logger.debug("Titulky.com: No previous episode number!")
                     raise ProviderError("Previous episode number missing, can't parse.")
 
-                # If this row contains the first subtitles to an episode number,
-                # add an empty array into the episodes dict at its place.
-                if not last_ep_num in episodes_dict:
-                    episodes_dict[last_ep_num] = []
-
                 details_link = f"{self.server_url}{details_anchor.get('href')[1:]}"
                 id_match = re.findall(r'id=(\d+)', details_link)
                 sub_id = id_match[0] if len(id_match) > 0 else None
                 download_link = f"{self.download_url}{sub_id}"
                 # Approved subtitles have a pbl1 class for their row, others have a pbl0 class
                 approved = True if 'pbl1' in row.get('class') else False
 
+                # Parse language to filter out subtitles that are not in the desired language
+                sub_language = None
+                czech_flag = row.select('img[src*=\'flag-CZ\']')
+                slovak_flag = row.select('img[src*=\'flag-SK\']')
+
+                if czech_flag and not slovak_flag:
+                    sub_language = Language('ces')
+                elif slovak_flag and not czech_flag:
+                    sub_language = Language('slk')
+                else:
+                    logger.debug("Titulky.com: Unknown language while parsing subtitles!")
+
+                # If the language is not the desired one, skip this row
+                if sub_language and sub_language != language:
+                    continue
+
                 result = {
-                    'names': [],
                     'id': sub_id,
                     'approved': approved,
                     'details_link': details_link,
                     'download_link': download_link
                 }
 
+                # If this row contains the first subtitles to an episode number,
+                # add an empty array into the episodes dict at its place.
+                if not last_ep_num in episodes_dict:
+                    episodes_dict[last_ep_num] = []
+
                 episodes_dict[last_ep_num].append(result)
 
         # Rows parsed into episodes_dict, now lets read what we got.
@@ -932,7 +962,8 @@ def list_subtitles(self, video, languages):
                 # (0)
                 if video.series_imdb_id:
                     logger.info("Titulky.com: Finding subtitles by browsing TV Series page (0)")
-                    partial_subs = self.browse_episodes(imdb_id=video.series_imdb_id,
+                    partial_subs = self.browse_episodes(language,
+                                                        imdb_id=video.series_imdb_id,
                                                         season=video.season,
                                                         episode=video.episode)
                     if (len(partial_subs) > 0):