Skip to content

Commit

Permalink
Merge a3e8008 into 6343d21
Browse files Browse the repository at this point in the history
  • Loading branch information
mtlynch committed Jan 24, 2019
2 parents 6343d21 + a3e8008 commit efa461e
Showing 1 changed file with 4 additions and 3 deletions.
7 changes: 4 additions & 3 deletions ketohub/spiders.py
Expand Up @@ -109,21 +109,22 @@ class RuledMeSpider(spiders.CrawlSpider):
# e.g. https://www.ruled.me/keto-recipes/breakfast/
spiders.Rule(
linkextractors.LinkExtractor(
allow=r'https://www.ruled.me/keto-recipes/\w+(-\w+)*/$',
allow=r'https://www.ruled.me/keto-recipes/\w+(\-\w+)*/$',
restrict_xpaths='//div[@class="r-list"]')),

# Extract links for finding additional pages within food category pages,
# e.g. https://www.ruled.me/keto-recipes/dinner/page/2/
spiders.Rule(
linkextractors.LinkExtractor(
allow=r'https://www.ruled.me/keto-recipes/\w+(\w+)*/page/\d+/')
allow=(
r'https://www.ruled.me/keto-recipes/\w+(\-\w+)*/page/\d+/'))
),

# Extract links for the actual recipes,
# e.g. https://www.ruled.me/easy-keto-cordon-bleu/
spiders.Rule(
linkextractors.LinkExtractor(
allow=r'https://www.ruled.me/(\w+-)+\w+/$',
allow=r'https://www.ruled.me/\w+(\-\w+)*/$',
restrict_xpaths='//div[@id="content"]'),
callback=callback_handler.process_callback,
follow=False)
Expand Down

0 comments on commit efa461e

Please sign in to comment.