Skip to content

Commit

Permalink
Merge pull request #102 from openzim/fix_title_and_report
Browse files Browse the repository at this point in the history
Fix title and report
  • Loading branch information
benoit74 committed Mar 2, 2024
2 parents ed0c0d7 + bbbbed5 commit 91fea6c
Show file tree
Hide file tree
Showing 7 changed files with 35 additions and 32 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Fixed
- Fix ZIM Title still not ok
- Fix crash when using the stats report (#100)

## [0.3.0] - 2024-03-02

### Changed
Expand Down
2 changes: 0 additions & 2 deletions src/ifixit2zim/entrypoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,8 +275,6 @@ def main():
sys.exit(scraper.run())
except Exception as exc:
logger.error("FAILED. An error occurred", exc_info=exc)
if args.debug:
logger.exception(exc)
raise SystemExit(1) from None


Expand Down
9 changes: 5 additions & 4 deletions src/ifixit2zim/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,10 @@


def excepthook(args):
logger.error(f"UNHANDLED Exception in {args.thread.name}: {args.exc_type}")
logger.exception(args.exc_value)
logger.error(
f"UNHANDLED Exception in {args.thread.name}: {args.exc_type}",
exc_info=args.exc_value,
)


threading.excepthook = excepthook
Expand Down Expand Up @@ -104,8 +106,7 @@ def worker(self):
try:
func(**kwargs)
except Exception as exc:
logger.error(f"Error processing {func} with {kwargs=}")
logger.exception(exc)
logger.error(f"Error processing {func} with {kwargs=}", exc_info=exc)
if raises:
self.exceptions.append(exc)
self.shutdown()
Expand Down
15 changes: 8 additions & 7 deletions src/ifixit2zim/imager.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,9 +157,9 @@ def process_image(
fileobj = self.get_image_data(url.geturl())
except Exception as exc:
logger.error(
f"Failed to download/convert/optim source at {url.geturl()}"
f"Failed to download/convert/optim source at {url.geturl()}",
exc_info=exc,
)
logger.exception(exc)
self.add_missing_image_to_zim(
path=path,
)
Expand Down Expand Up @@ -195,8 +195,7 @@ def process_image(
# don't have it, not a donwload error. we'll upload after processing
pass
except Exception as exc:
logger.error(f"Failed to download '{path}' from cache: {exc}")
logger.exception(exc)
logger.error(f"Failed to download '{path}' from cache", exc_info=exc)
download_failed = True
else:
self.add_image_to_zim(
Expand All @@ -211,8 +210,10 @@ def process_image(
try:
fileobj = self.get_image_data(url.geturl())
except Exception as exc:
logger.error(f"Failed to download/convert/optim source at {url.geturl()}")
logger.exception(exc)
logger.error(
f"Failed to download/convert/optim source at {url.geturl()}",
exc_info=exc,
)
self.add_missing_image_to_zim(
path=path,
)
Expand All @@ -230,6 +231,6 @@ def process_image(
try:
s3_storage.upload_fileobj(fileobj=fileobj, key=path, meta=meta)
except Exception as exc:
logger.error(f"{path} failed to upload to cache: {exc}")
logger.error(f"{path} failed to upload to cache", exc_info=exc)

return path
22 changes: 12 additions & 10 deletions src/ifixit2zim/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ def __init__(self, **kwargs):

self.utils = Utils(configuration=self.configuration)

self.scrapers = []

@property
def build_path(self):
return self.configuration.build_path
Expand Down Expand Up @@ -104,7 +106,6 @@ def sanitize_inputs(self):
self.configuration.title = TITLE["en"][
f"title_{self.configuration.lang_code}"
]
self.configuration.title = self.metadata["title"]
self.configuration.title = self.configuration.title.strip()

(
Expand Down Expand Up @@ -232,13 +233,15 @@ def _raise_helper(msg):
self.scraper_category = ScraperCategory(context=context)
self.scraper_info = ScraperInfo(context=context)
self.scraper_user = ScraperUser(context=context)
self.scrapers = [
self.scraper_homepage,
self.scraper_category,
self.scraper_guide,
self.scraper_info,
self.scraper_user,
]
self.scrapers.extend(
[
self.scraper_homepage,
self.scraper_category,
self.scraper_guide,
self.scraper_info,
self.scraper_user,
]
)

self.processor.get_guide_link_from_props = (
self.scraper_guide.get_guide_link_from_props
Expand Down Expand Up @@ -403,8 +406,7 @@ def run(self):
if isinstance(exc, KeyboardInterrupt):
logger.error("KeyboardInterrupt, exiting.")
else:
logger.error(f"Interrupting process due to error: {exc}")
logger.exception(exc)
logger.error("Interrupting process due to error", exc_info=exc)
self.imager.abort()
self.img_executor.shutdown(wait=False)
return 1
Expand Down
7 changes: 3 additions & 4 deletions src/ifixit2zim/scraper_generic.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import traceback
from abc import ABC, abstractmethod
from queue import Queue

Expand Down Expand Up @@ -147,12 +146,12 @@ def scrape_items(self):
)
try:
self.scrape_one_item(item_key, item_data)
except Exception as ex:
except Exception as exc:
self.error_items_keys.add(item_key)
logger.warning(
f"Error while processing {self.get_items_name()} {item_key}: {ex}"
f"Error while processing {self.get_items_name()} {item_key}",
exc_info=exc,
)
traceback.print_exc()
self.add_item_error_redirect(item_key, item_data)
finally:
if (
Expand Down
8 changes: 3 additions & 5 deletions src/ifixit2zim/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,18 +115,16 @@ def get_version_ident_for(self, url: str) -> str | None:
resp = requests.head(url, timeout=10)
headers = resp.headers
except Exception as exc:
logger.warning(f"Unable to HEAD {url}")
logger.exception(exc)
logger.warning(f"Unable to HEAD {url}", exc_info=exc)
try:
_, headers = stream_file(
url=url,
byte_stream=io.BytesIO(),
block_size=1,
only_first_block=True,
)
except Exception as exc2:
logger.warning(f"Unable to query image at {url}")
logger.exception(exc2)
except Exception as exc:
logger.warning(f"Unable to query image at {url}", exc_info=exc)
return

for header in ("ETag", "Last-Modified", "Content-Length"):
Expand Down

0 comments on commit 91fea6c

Please sign in to comment.