Skip to content

Commit

Permalink
Adding spider for YourFriendsJ
Browse files Browse the repository at this point in the history
  • Loading branch information
mtlynch committed Jan 26, 2018
1 parent 5cf506a commit 9650266
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 0 deletions.
1 change: 1 addition & 0 deletions README.md
Expand Up @@ -16,4 +16,5 @@ scrapy crawl ketovangelist-kitchen -s "DOWNLOAD_ROOT=${OUTPUT_DIR}"
scrapy crawl low-carb-yum -s "DOWNLOAD_ROOT=${OUTPUT_DIR}"
scrapy crawl queen-bs -s "DOWNLOAD_ROOT=${OUTPUT_DIR}"
scrapy crawl ruled-me -s "DOWNLOAD_ROOT=${OUTPUT_DIR}"
scrapy crawl your-friends-j -s "DOWNLOAD_ROOT=${OUTPUT_DIR}"
```
21 changes: 21 additions & 0 deletions ketohub/spiders.py
Expand Up @@ -262,3 +262,24 @@ class QueenBs(spiders.CrawlSpider):
callback=callback_handler.process_callback,
follow=False)
]


class YourFriendsJ(spiders.CrawlSpider):
name = 'your-friends-j'

callback_handler = CallbackHandler(
content_saver=persist.ContentSaver(_get_download_root()))

allowed_domains = ['yourfriendsj.com']
start_urls = ['http://yourfriendsj.com/category/keto/']

rules = [
# Extract links for recipes,
# e.g. http://yourfriendsj.com/easy-guacamole-recipe/
spiders.Rule(
linkextractors.LinkExtractor(
allow=r'http://yourfriendsj.com/[^\/]*/$',
restrict_xpaths='//div[@class="post-content"]'),
callback=callback_handler.process_callback,
follow=False)
]

0 comments on commit 9650266

Please sign in to comment.