Skip to content

Commit

Permalink
Remove grab.util.misc module. Change way of tracking error names in s…
Browse files Browse the repository at this point in the history
…tat module.
  • Loading branch information
Some User committed Dec 21, 2022
1 parent c4ce649 commit 929f1cc
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 52 deletions.
51 changes: 13 additions & 38 deletions grab/spider/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
from grab.spider.task import Task
from grab.types import GrabConfig
from grab.util.metrics import format_traffic_value
from grab.util.misc import camel_case_to_underscore

from .interface import FatalErrorQueueItem
from .service.network import NetworkResult
Expand Down Expand Up @@ -580,7 +579,17 @@ def shutdown_services(self, services: list[BaseService]) -> None:
logger.debug("Work done")

def log_failed_network_result(self, res: NetworkResult) -> None:
msg = ("http-%s" % res["grab"].doc.code) if res["ok"] else res["error_abbr"]
orig_exc = (
res["exc"].original_exc
if isinstance(res["exc"], OriginalExceptionGrabError)
else res["exc"]
)
msg = (
("http-%s" % res["grab"].doc.code)
if res["ok"]
else orig_exc.__class__.__name__
)

self.stat.inc("error:%s" % msg)

def log_rejected_task(self, task: Task, reason: str) -> None:
Expand Down Expand Up @@ -622,7 +631,7 @@ def srv_process_service_result(
* ResponseNotValid-based exception
* Arbitrary exception
* Network response:
{ok, ecode, emsg, error_abbr, exc, grab, grab_config_backup}
{ok, ecode, emsg, exc, grab, grab_config_backup}
Exception can come only from parser_service and it always has
meta {"from": "parser", "exc_info": <...>}
Expand Down Expand Up @@ -690,13 +699,11 @@ def srv_process_task(self, task: Task) -> None:
self.stat.inc("spider:request-network")
self.stat.inc("spider:task-%s-network" % task.name)

# self.freelist.pop()
try:
result: dict[str, Any] = {
"ok": True,
"ecode": None,
"emsg": None,
"error_abbr": None,
"grab": grab,
"grab_config_backup": (grab_config_backup),
"task": task,
Expand All @@ -710,47 +717,15 @@ def srv_process_task(self, task: Task) -> None:
GrabInvalidResponse,
GrabTooManyRedirectsError,
) as ex:
is_redir_err = isinstance(ex, GrabTooManyRedirectsError)
orig_exc_name = (
ex.original_exc.__class__.__name__
if hasattr(ex, "original_exc")
else None
)
# UnicodeError: see #323
ex_cls = (
ex
if (
not isinstance(ex, OriginalExceptionGrabError)
or isinstance(ex, GrabInvalidUrl)
or orig_exc_name == "error"
or orig_exc_name == "UnicodeError"
)
else cast(OriginalExceptionGrabError, ex).original_exc
)
result.update(
{
"ok": False,
"exc": ex,
"error_abbr": (
"too-many-redirects"
if is_redir_err
else self.make_class_abbr(ex_cls.__class__.__name__)
),
}
)
result.update({"ok": False, "exc": ex})
self.task_dispatcher.input_queue.put((result, task, None))
finally:
pass
# self.freelist.append(1)
else:
self.log_rejected_task(task, reason)
handler = self.get_fallback_handler(task)
if handler:
handler(task)

def make_class_abbr(self, name: str) -> str:
val = camel_case_to_underscore(name)
return val.replace("_", "-")


# pylint: enable=too-many-instance-attributes, too-many-public-methods
11 changes: 0 additions & 11 deletions grab/util/misc.py

This file was deleted.

4 changes: 3 additions & 1 deletion tests/test_spider_error.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from pprint import pprint # pylint: disable=unused-import

from test_server import Response

from grab import Grab, GrabTimeoutError
Expand Down Expand Up @@ -118,4 +120,4 @@ def task_page(self, grab, unused_task):

bot = build_spider(SimpleSpider)
bot.run()
self.assertTrue("error:read-timeout-error" in bot.stat.counters)
self.assertTrue("error:ReadTimeoutError" in bot.stat.counters)
4 changes: 2 additions & 2 deletions tests/test_spider_redirect.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def task_page(self, unused_grab, unused_task):
bot.run()

self.assertEqual(1, len(bot.runtime_events["network-count-rejected"]))
self.assertTrue("error:too-many-redirects" in bot.stat.counters)
self.assertTrue("error:GrabTooManyRedirectsError" in bot.stat.counters)

def test_redirect_with_invalid_byte(self):
server = self.server
Expand Down Expand Up @@ -57,7 +57,7 @@ def task_page(self, unused_grab, unused_task):
self.assertEqual(1, len(bot.runtime_events["network-count-rejected"]))
self.assertTrue(
# bot.stat.counters["error:new-connection-error"] == 5
bot.stat.counters["error:location-parse-error"]
bot.stat.counters["error:LocationParseError"]
# or bot.stat.counters["error:grab-could-not-resolve-host-error"] == 5
# or bot.stat.counters["error:couldnt-resolve-host"] == 5
)

0 comments on commit 929f1cc

Please sign in to comment.