Skip to content
This repository has been archived by the owner on Oct 16, 2022. It is now read-only.

Commit

Permalink
implement series search
Browse files Browse the repository at this point in the history
  • Loading branch information
nhanb committed Jun 9, 2015
1 parent 9083637 commit 92c909c
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 10 deletions.
17 changes: 15 additions & 2 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
Fundoshi - The universal manga data extractor
=============================================

Fundoshi is nifty library that lets you search for and extract manga series'
data from many online manga reader websites.
Fundoshi is nifty python library that lets you search for and extract manga
series' data from many online manga reader websites.

Parse series data
------------------
Expand Down Expand Up @@ -78,3 +78,16 @@ Output:
http://2.bp.blogspot.com/-55NkLOMmjxM/TlTzLWk5cVI/AAAAAAAAEDc/ees9kEQfo4s/014.jpg?imgmax=10000
http://2.bp.blogspot.com/-aOmUpf-fi9g/TlTzOfM6R8I/AAAAAAAAEEY/lwkERwJ8GVc/015.jpg?imgmax=10000
http://2.bp.blogspot.com/-OKSL6aBkwi0/TlTzP_RkLmI/AAAAAAAAEE4/tChEbaIr0Mc/016.jpg?imgmax=10000


Search series
-------------

.. doctest::

>>> from fundoshi import search_series
>>> results = search_series('sayonara football')
>>> results
<generator object ...>
>>> [series for series in results]
[{'name': 'Sayonara Football', 'site': 'kissmanga', 'url': 'http://kissmanga.com/Manga/Sayonara-Football'}]
2 changes: 1 addition & 1 deletion fundoshi/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from .main import parse_chapter, parse_series
from .main import *
21 changes: 20 additions & 1 deletion fundoshi/main.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .sites import get_site
from .sites import get_site, available_sites
from .exceptions import UnsupportedSiteError


Expand All @@ -16,3 +16,22 @@ def parse_series(url):
raise UnsupportedSiteError()
resp = site.get_manga_seed_page(url)
return site.series_info(resp.text)


def search_series(name):
for site in available_sites.values():
results = site.search_series(name)
for series in results:
yield series


def search_series_from_sites(name, site_names):
for sname in site_names:
results = available_sites[sname].search_series(name)
for series in results:
yield series


def search_series_from_site(name, site_name):
site = available_sites[site_name]
return [series for series in site.search_series(name)]
8 changes: 5 additions & 3 deletions fundoshi/sites/__init__.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
from urllib.parse import urlparse
from .kissmanga import Kissmanga

available_sites = (
_sites = [
Kissmanga(),
)
]

available_sites = {site.name: site for site in _sites}


# Factory function, return instance of suitable "site" class from url
def get_site(url):
netloc = urlparse(url).netloc
for site in available_sites:
for site in available_sites.values():
if netloc in site.netlocs:
return site
return None
7 changes: 4 additions & 3 deletions fundoshi/sites/kissmanga.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
class Kissmanga(BaseSite):

netlocs = ['kissmanga.com']
name = 'kissmanga'

# Return a list of dictionaries that store at least name and url:
# [ { 'name': 'Naruto', 'url': 'http://...' }, {...}, ... ]
Expand All @@ -28,7 +29,7 @@ def search_series(self, keyword):
atags = soup.find_all('a')
return [objdict({'name': a.string.strip(),
'url': a['href'],
'site': 'kissmanga'}) for a in atags]
'site': self.name}) for a in atags]

# All kinds of data
# - name "Naruto"
Expand All @@ -48,7 +49,7 @@ def series_info(self, html):
description = self._description(soup)
authors = self._authors(soup)
return objdict({
'site': self.netlocs[0],
'site': self.name,
'chapters': chapters,
'thumb_url': thumb_url,
'tags': tags,
Expand Down Expand Up @@ -161,5 +162,5 @@ def search_by_author(self, author):
return [objdict({
'name': a.text.strip(),
'url': 'http://kissmanga.com' + a['href'],
'site': 'kissmanga',
'site': self.name,
}) for a in table.find_all('a') if len(a['href'].split('/')) == 3]

0 comments on commit 92c909c

Please sign in to comment.