implement series search

nhanb · Jun 9, 2015 · 92c909c · 92c909c
1 parent 9083637
commit 92c909c
Show file tree

Hide file tree

Showing 5 changed files with 45 additions and 10 deletions.
diff --git a/docs/index.rst b/docs/index.rst
@@ -6,8 +6,8 @@
 Fundoshi - The universal manga data extractor
 =============================================
 
-Fundoshi is nifty library that lets you search for and extract manga series'
-data from many online manga reader websites.
+Fundoshi is nifty python library that lets you search for and extract manga
+series' data from many online manga reader websites.
 
 Parse series data
 ------------------
@@ -78,3 +78,16 @@ Output:
     http://2.bp.blogspot.com/-55NkLOMmjxM/TlTzLWk5cVI/AAAAAAAAEDc/ees9kEQfo4s/014.jpg?imgmax=10000
     http://2.bp.blogspot.com/-aOmUpf-fi9g/TlTzOfM6R8I/AAAAAAAAEEY/lwkERwJ8GVc/015.jpg?imgmax=10000
     http://2.bp.blogspot.com/-OKSL6aBkwi0/TlTzP_RkLmI/AAAAAAAAEE4/tChEbaIr0Mc/016.jpg?imgmax=10000
+
+
+Search series
+-------------
+
+.. doctest::
+
+    >>> from fundoshi import search_series
+    >>> results = search_series('sayonara football')
+    >>> results
+    <generator object ...>
+    >>> [series for series in results]
+    [{'name': 'Sayonara Football', 'site': 'kissmanga', 'url': 'http://kissmanga.com/Manga/Sayonara-Football'}]
diff --git a/fundoshi/__init__.py b/fundoshi/__init__.py
@@ -1 +1 @@
-from .main import parse_chapter, parse_series
+from .main import *
diff --git a/fundoshi/main.py b/fundoshi/main.py
@@ -1,4 +1,4 @@
-from .sites import get_site
+from .sites import get_site, available_sites
 from .exceptions import UnsupportedSiteError
 
 
@@ -16,3 +16,22 @@ def parse_series(url):
         raise UnsupportedSiteError()
     resp = site.get_manga_seed_page(url)
     return site.series_info(resp.text)
+
+
+def search_series(name):
+    for site in available_sites.values():
+        results = site.search_series(name)
+        for series in results:
+            yield series
+
+
+def search_series_from_sites(name, site_names):
+    for sname in site_names:
+        results = available_sites[sname].search_series(name)
+        for series in results:
+            yield series
+
+
+def search_series_from_site(name, site_name):
+    site = available_sites[site_name]
+    return [series for series in site.search_series(name)]
diff --git a/fundoshi/sites/__init__.py b/fundoshi/sites/__init__.py
@@ -1,15 +1,17 @@
 from urllib.parse import urlparse
 from .kissmanga import Kissmanga
 
-available_sites = (
+_sites = [
     Kissmanga(),
-)
+]
+
+available_sites = {site.name: site for site in _sites}
 
 
 # Factory function, return instance of suitable "site" class from url
 def get_site(url):
     netloc = urlparse(url).netloc
-    for site in available_sites:
+    for site in available_sites.values():
         if netloc in site.netlocs:
             return site
     return None
diff --git a/fundoshi/sites/kissmanga.py b/fundoshi/sites/kissmanga.py
@@ -9,6 +9,7 @@
 class Kissmanga(BaseSite):
 
     netlocs = ['kissmanga.com']
+    name = 'kissmanga'
 
     # Return a list of dictionaries that store at least name and url:
     # [ { 'name': 'Naruto', 'url': 'http://...' }, {...}, ... ]
@@ -28,7 +29,7 @@ def search_series(self, keyword):
         atags = soup.find_all('a')
         return [objdict({'name': a.string.strip(),
                          'url': a['href'],
-                         'site': 'kissmanga'}) for a in atags]
+                         'site': self.name}) for a in atags]
 
     # All kinds of data
     # - name "Naruto"
@@ -48,7 +49,7 @@ def series_info(self, html):
         description = self._description(soup)
         authors = self._authors(soup)
         return objdict({
-            'site': self.netlocs[0],
+            'site': self.name,
             'chapters': chapters,
             'thumb_url': thumb_url,
             'tags': tags,
@@ -161,5 +162,5 @@ def search_by_author(self, author):
         return [objdict({
             'name': a.text.strip(),
             'url': 'http://kissmanga.com' + a['href'],
-            'site': 'kissmanga',
+            'site': self.name,
         }) for a in table.find_all('a') if len(a['href'].split('/')) == 3]