From 8cbf5787aeee3e2cfbd5929802efde8bc52598c3 Mon Sep 17 00:00:00 2001
From: evilhero <909424+evilhero@users.noreply.github.com>
Date: Wed, 2 Sep 2020 00:47:46 -0400
Subject: [PATCH] (#432) Fixes search & post-processing related issues when a
 result would contain a matching title of another watchlisted series (ie.
 wrong matches)

---
 mylar/PostProcessor.py | 38 +++++++++++++++++++++++++++++++++++---
 mylar/filechecker.py   |  8 +++++++-
 mylar/search.py        | 28 ++++++++++++++++++++++------
 3 files changed, 64 insertions(+), 10 deletions(-)

diff --git a/mylar/PostProcessor.py b/mylar/PostProcessor.py
index 1c2a8f13..2e138bdd 100755
--- a/mylar/PostProcessor.py
+++ b/mylar/PostProcessor.py
@@ -964,6 +964,7 @@ def Process(self):
                         logger.error('%s No Story Arcs in Watchlist that contain that particular series - aborting Manual Post Processing. Maybe you should be running Import?' % module)
                         return
                     else:
+                        tmp_arclist = []
                         arcvals = []
                         for av in arc_series:
                             arcvals.append({"ComicName":       av['ComicName'],
@@ -1163,9 +1164,9 @@ def Process(self):
                                                 else:
                                                     logger.info('%s Found matching issue # %s for ComicID: %s / IssueID: %s' % (module, fcdigit, v[i]['WatchValues']['ComicID'], isc['IssueID']))
 
-                                            logger.fdebug('datematch: %s' % datematch)
-                                            logger.fdebug('temploc: %s' % helpers.issuedigits(temploc))
-                                            logger.fdebug('arcissue: %s' % helpers.issuedigits(v[i]['ArcValues']['IssueNumber']))
+                                            #logger.fdebug('datematch: %s' % datematch)
+                                            #logger.fdebug('temploc: %s' % helpers.issuedigits(temploc))
+                                            #logger.fdebug('arcissue: %s' % helpers.issuedigits(v[i]['ArcValues']['IssueNumber']))
                                             if datematch == "True": # and helpers.issuedigits(temploc) == helpers.issuedigits(v[i]['ArcValues']['IssueNumber']):
                                                 #reset datematch here so it doesn't carry the value down and avoid year checks
                                                 datematch = "False"
@@ -1258,6 +1259,10 @@ def Process(self):
                                                                                "Publisher":       arcpublisher,
                                                                                "ReadingOrder":    v[i]['ArcValues']['ReadingOrder'],
                                                                                "ComicName":       k})
+                                                        tmp_arclist.append({"ComicName": k,
+                                                                            "ComicID":   v[i]['WatchValues']['ComicID'],
+                                                                            "IssueID":   v[i]['ArcValues']['IssueID']})
+
                                                         logger.info('%s[SUCCESSFUL MATCH: %s-%s] Match verified for %s' % (module, k, v[i]['WatchValues']['ComicID'], arcmatch['comicfilename']))
                                                         self.matched = True
                                                         break
@@ -1265,6 +1270,33 @@ def Process(self):
                                                     logger.fdebug('%s[NON-MATCH: %s-%s] Incorrect series - not populating..continuing post-processing' % (module, k, v[i]['WatchValues']['ComicID']))
 
                             i+=1
+                        if len(tmp_arclist) > 1:
+                            logger.info('[STORY-ARC VERIFICATION] %s matches to storyarcs - probably due to invalid name matching above. Let\'s try to correct this.' % len(tmp_arclist))
+                            keep_match = []
+                            drop_match = []
+                            for x in tmp_arclist:
+                                xmld = filechecker.FileChecker()
+                                xmld1 = xmld.dynamic_replace(x['ComicName']) #helpers.conversion(cs['ComicName']))
+                                xseries = xmld1['mod_seriesname'].lower()
+                                xmld2 = xmld.dynamic_replace(arcmatch['series_name']) #helpers.conversion(watchmatch['series_name']))
+                                xfile = xmld2['mod_seriesname'].lower()
+                                if re.sub('\|', '', xseries) == re.sub('\|', '', xfile):
+                                    logger.fdebug('%s[DEFINITIVE-NAME MATCH] Definitive name match exactly to : %s [%s]' % (module, arcmatch['series_name'], x['ComicID']))
+                                    keep_match.append(x['IssueID'])
+                                    self.matched = True
+                                else:
+                                    logger.fdebug('INVALID MATCH DETECTED: %s' % x['ComicName'])
+                                    drop_match.append(x['IssueID'])
+
+                            tmp_list = []
+                            for xy in manual_arclist:
+                                if [True for dm in drop_match if xy['IssueID'] == dm]:
+                                    continue
+                                else:
+                                    tmp_list.append(xy)
+                            manual_arclist = tmp_list
+                            #logger.fdebug('new_manualarclist: %s' % (manual_arclist,))
+
                     if self.matched is False:
                         #one-off manual pp'd of torrents
                         if all(['0-Day Week' in self.nzb_name, mylar.CONFIG.PACK_0DAY_WATCHLIST_ONLY is True]):
diff --git a/mylar/filechecker.py b/mylar/filechecker.py
index f62738f2..fa29a45f 100755
--- a/mylar/filechecker.py
+++ b/mylar/filechecker.py
@@ -1276,6 +1276,7 @@ def parseit(self, path, filename, subpath=None):
         return self.matchIT(series_info)
 
     def matchIT(self, series_info):
+        qmatch_chk = None
         series_name = series_info['series_name']
         alt_series = series_info['alt_series']
         filename = series_info['comicfilename']
@@ -1339,7 +1340,12 @@ def matchIT(self, series_info):
         if nspace_altseriesname is not None:
             if re.sub('\|','', nspace_altseriesname.lower()).strip() == re.sub('\|', '', nspace_watchcomic.lower()).strip():
                 seriesalt = True
+                qmatch_chk = 'alt_match'
+
         if any([seriesalt is True, re.sub('\|','', nspace_seriesname.lower()).strip() == re.sub('\|', '', nspace_watchcomic.lower()).strip(), re.sub('\|','', nspace_seriesname_decoded.lower()).strip() == re.sub('\|', '', nspace_watchname_decoded.lower()).strip()]) or any(re.sub('[\|\s]','', x.lower()).strip() == re.sub('[\|\s]','', nspace_seriesname.lower()).strip() for x in self.AS_Alt):
+            if qmatch_chk is None:
+                qmatch_chk = 'match'
+        if qmatch_chk is not None:
             #logger.fdebug('[MATCH: ' + series_info['series_name'] + '] ' + filename)
             enable_annual = False
             annual_comicid = None
@@ -1416,7 +1422,7 @@ def matchIT(self, series_info):
                    elif 'special' in nspace_watchcomic.lower():
                        justthedigits = 'Special %s' % justthedigits
 
-            return {'process_status': 'match',
+            return {'process_status':  qmatch_chk,
                     'sub':             series_info['sub'],
                     'volume':          series_info['series_volume'],
                     'match_type':      None,  #match_type - will eventually pass if it wasa folder vs. filename match,
diff --git a/mylar/search.py b/mylar/search.py
index 44d55cc2..0cf0638f 100755
--- a/mylar/search.py
+++ b/mylar/search.py
@@ -573,6 +573,7 @@ def NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDa
     foundc['status'] = False
     done = False
     seperatealpha = "no"
+    hold_the_matches = []
     #---issue problem
     # if issue is '011' instead of '11' in nzb search results, will not have same
     # results. '011' will return different than '11', as will '009' and '09'.
@@ -838,6 +839,7 @@ def NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDa
         pack_warning = False
         if not bb == "no results":
             for entry in bb['entries']:
+                alt_match = False
                 #logger.fdebug('entry: %s' % entry)  #<--- uncomment this to see what the search result(s) are
                 #brief match here against 32p since it returns the direct issue number
                 if nzbprov == '32P' and entry['title'][:17] == '0-Day Comics Pack':
@@ -1151,6 +1153,12 @@ def NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDa
                         if filecomic['process_status'] == 'fail':
                             logger.fdebug('%s was not a match to %s (%s)' % (cleantitle, ComicName, SeriesYear))
                             continue
+                        elif filecomic['process_status'] == 'alt_match':
+                            #if it's an alternate series match, we'll retain each value until the search has compeletely run, compiling matches.
+                            #if at any point it's a standard match (ie. non-alternate series) that will be accepted as the one match and ignore the alts
+                            #once all search options have been exhausted and no matches aside from alternate series then we go get the best result from that list
+                            logger.fdebug('%s was a match due to alternate matching.  Continuing to search, but retaining this result just in case.' % ComicTitle)
+                            alt_match = True
                 elif booktype != parsed_comic['booktype'] and ignore_booktype is False:
                     logger.fdebug('Booktypes do not match. Looking for %s, this is a %s. Ignoring this result.' % (booktype, parsed_comic['booktype']))
                     continue
@@ -1359,7 +1367,7 @@ def NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDa
                         nzbid = entry['id']
                     else:
                         nzbid = generate_id(nzbprov, entry['link'])
-                    if manual is not True:
+                    if all([manual is not True, alt_match is False]):
                         downloadit = True
                     else:
                         for x in mylar.COMICINFO:
@@ -1378,7 +1386,8 @@ def NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDa
                             kind = 'torrent'
                             if torznab_host is not None:
                                 tprov = torznab_host[0]
-                        mylar.COMICINFO.append({"ComicName":       ComicName,
+
+                        search_values = {"ComicName":       ComicName,
                                           "ComicID":         ComicID,
                                           "IssueID":         IssueID,
                                           "ComicVolume":     ComicVersion,
@@ -1401,8 +1410,11 @@ def NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDa
                                           "SARC":            SARC,
                                           "IssueArcID":      IssueArcID,
                                           "newznab":         newznab_host,
-                                          "torznab":         torznab_host})
+                                          "torznab":         torznab_host}
+
+                        mylar.COMICINFO.append(search_values)
 
+                        hold_the_matches.append(search_values)
 
                 else:
                     if filecomic['process_status'] == 'match':
@@ -1442,7 +1454,7 @@ def NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDa
                                 entry['title'] = entry['filename']
                             else:
                                 nzbid = generate_id(nzbprov, entry['link'])
-                            if manual is not True:
+                            if all([manual is not True, alt_match is False]):
                                 downloadit = True
                             else:
                                 for x in mylar.COMICINFO:
@@ -1475,7 +1487,7 @@ def NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDa
                                     if torznab_host is not None:
                                         tprov = torznab_host[0]
 
-                                mylar.COMICINFO.append({"ComicName":      ComicName,
+                                search_values = {"ComicName":      ComicName,
                                                   "ComicID":        ComicID,
                                                   "IssueID":        IssueID,
                                                   "ComicVolume":    ComicVersion,
@@ -1498,7 +1510,11 @@ def NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDa
                                                   "SARC":           SARC,
                                                   "IssueArcID":     IssueArcID,
                                                   "newznab":        newznab_host,
-                                                  "torznab":        torznab_host})
+                                                  "torznab":        torznab_host}
+
+                                mylar.COMICINFO.append(search_values)
+
+                                hold_the_matches.append(search_values)
                         else:
                             log2file = log2file + "issues don't match.." + "\n"
                             downloadit = False