Skip to content

Commit

Permalink
feat(processors): processors of string type
Browse files Browse the repository at this point in the history
  • Loading branch information
zireael committed Nov 6, 2019
1 parent e46b7b7 commit 31fa2c3
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 1 deletion.
11 changes: 10 additions & 1 deletion acrawler/item.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from acrawler.exceptions import DropFieldError, SkipTaskImmediatelyError
from acrawler.task import Task
from acrawler.utils import to_asyncgen
from acrawler.utils import to_asyncgen, partial
from acrawler.processors import Processors

_Function = Callable
Expand Down Expand Up @@ -396,6 +396,15 @@ def process(self, item):
# Call field processors.
for field, processors in self.field_processors.items():
for processor in processors:
if isinstance(processor, str):
li = processor.split(":", 1)
func_name = li[0]
args = li[1].split(",") if len(li) == 2 else []

processor = partial(
Processors.functions[func_name], *args, new_args_before=True
)

self._on_field(field, processor)

def _on_field(self, field, processor=lambda x: x, dest_field: str = None):
Expand Down
16 changes: 16 additions & 0 deletions acrawler/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,3 +190,19 @@ async def redis_push_start_urls_coro(
def sync_coroutine(coro, loop=None):
"""Run a coroutine in synchronized way."""
return (loop or asyncio.get_event_loop()).run_until_complete(coro)


def partial(func, *args, new_args_before=False, **keywords):
def newfunc(*fargs, **fkeywords):
if new_args_before:
newkeywords = {**fkeywords, **keywords}
newargs = [*fargs, *args]
else:
newkeywords = {**keywords, **fkeywords}
newargs = [*args, *fargs]
return func(*newargs, **newkeywords)

newfunc.func = func
newfunc.args = args
newfunc.keywords = keywords
return newfunc

0 comments on commit 31fa2c3

Please sign in to comment.