Skip to content

Commit

Permalink
Update Google Images cover download plugin for website changes. Fixes…
Browse files Browse the repository at this point in the history
… #2069553 [Version 7.12 no longer downloading cover images from all sources](https://bugs.launchpad.net/calibre/+bug/2069553)
  • Loading branch information
kovidgoyal committed Jun 17, 2024
1 parent 0d611c6 commit 43121af
Showing 1 changed file with 30 additions and 20 deletions.
50 changes: 30 additions & 20 deletions src/calibre/ebooks/metadata/sources/google_images.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,26 @@ def imgurl_from_id(raw, tbnid):
return q


def parse_google_markup(raw):
root = parse_html(raw)
# newer markup pages use data-docid not data-tbnid
results = root.xpath('//div/@data-tbnid') or root.xpath('//div/@data-docid')
ans = OrderedDict()
for tbnid in results:
try:
imgurl = imgurl_from_id(raw, tbnid)
except Exception:
continue
if imgurl:
ans[imgurl] = True
return list(ans)



class GoogleImages(Source):

name = 'Google Images'
version = (1, 0, 5)
version = (1, 0, 6)
minimum_calibre_version = (2, 80, 0)
description = _('Downloads covers from a Google Image search. Useful to find larger/alternate covers.')
capabilities = frozenset(['cover'])
Expand Down Expand Up @@ -88,8 +104,6 @@ def get_image_urls(self, title, author, log, abort, timeout):
from urllib.parse import urlencode
except ImportError:
from urllib import urlencode
from collections import OrderedDict
ans = OrderedDict()
br = self.browser
q = urlencode({'as_q': ('%s %s'%(title, author)).encode('utf-8')})
if isinstance(q, bytes):
Expand All @@ -116,21 +130,17 @@ def get_image_urls(self, title, author, log, abort, timeout):
raw = clean_ascii_chars(br.open(url).read().decode('utf-8'))
# with open('/t/raw.html', 'w') as f:
# f.write(raw)
root = parse_html(raw)
results = root.xpath('//div/@data-tbnid') # could also use data-id
# from calibre.utils.ipython import ipython
# ipython({'root': root, 'raw': raw, 'url': url, 'results': results})
for tbnid in results:
try:
imgurl = imgurl_from_id(raw, tbnid)
except Exception:
continue
if imgurl:
ans[imgurl] = True
return list(ans)


def test():
return parse_google_markup(raw)


def test_raw():
import sys
raw = open(sys.argv[-1]).read()
for x in parse_google_markup(raw):
print(x)


def test(title='Star Trek: Section 31: Control', authors=('David Mack',)):
try:
from queue import Queue
except ImportError:
Expand All @@ -141,9 +151,9 @@ def test():
p = GoogleImages(None)
p.log = default_log
rq = Queue()
p.download_cover(default_log, rq, Event(), title='The Heroes',
authors=('Joe Abercrombie',))
p.download_cover(default_log, rq, Event(), title=title, authors=authors)
print('Downloaded', rq.qsize(), 'covers')


if __name__ == '__main__':
test()

0 comments on commit 43121af

Please sign in to comment.