Skip to content

Commit

Permalink
Equestria Daily by Timothee Andres
Browse files Browse the repository at this point in the history
  • Loading branch information
kovidgoyal committed Dec 20, 2021
1 parent aec2c1a commit d51a453
Showing 1 changed file with 80 additions and 0 deletions.
80 changes: 80 additions & 0 deletions recipes/equestria_daily.recipe
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.utils.date import parse_date, utcnow


class AdvancedUserRecipe1639926896(BasicNewsRecipe):
__author__ = "Aisteru"
__copyright__ = "2021, Timothée Andres <timothee dot andres at gmail dot com>"
__license__ = 'GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html'

title = "Equestria Daily"
description = "Everything new in Equestria and beyond!"
language = 'en_US'

# Max. supported by website: 50
max_articles_per_feed = 30

compress_news_images = True
no_stylesheets = True
keep_only_tags = [{'name': 'div', 'class_': ['post', 'hentry']}]
remove_tags = [{'name': 'div', 'class_': 'post-footer'}]
extra_css = '.article_date { margin-left: 10px; }'

# Masthead image dimensions
MI_WIDTH = 600
MI_HEIGHT = 200

# To discard posts under a certain section, simply comment the whole line
sections = [
("Art", 'Art'),
("News", 'News'),
("Fics", 'Fanfiction'),
("Media", 'Media'),
("Comics", 'Comic'),
("Community", 'Community'),
("Editorial", 'Editorial'),
]

def get_masthead_url(self):
soup = self.index_to_soup('https://www.equestriadaily.com')
img = soup.select_one('#header img')
return img['src']

def parse_index(self):
results = {}
current_date = utcnow()

def clean_description(description):
lines = description.split('\n')
return '\n'.join([line.strip() for line in lines if len(line.strip()) > 0])

for (section_name, section_url_name) in self.sections:
soup = self.index_to_soup(
f'https://www.equestriadaily.com/search/label/{section_url_name}?max-results={self.max_articles_per_feed}')
articles = soup.select('div.post.hentry')
previous_post_date = current_date

for article in articles:
article_entry = {}

header = article.select_one('h3 > a')
article_entry['title'] = header.text
article_entry['url'] = header['href']
article_entry['date'] = article.select_one('span.post-timestamp').text.split('\n')[1]
article_entry['description'] = clean_description(article.select_one('div.entry-content').text)
article_entry['content'] = '' # Must be empty

post_date = previous_post_date

try:
post_date = parse_date(article_entry['date'])
previous_post_date = post_date
except Exception:
pass

if (current_date - post_date).days <= self.oldest_article:
results.setdefault(section_name, []).append(article_entry)

return [(section, results[section]) for section in results]

0 comments on commit d51a453

Please sign in to comment.