Skip to content

Commit

Permalink
Update Guardian & Observer
Browse files Browse the repository at this point in the history
  • Loading branch information
kovidgoyal committed Jun 11, 2023
1 parent 73ecd2a commit 205c8e1
Showing 1 changed file with 24 additions and 2 deletions.
26 changes: 24 additions & 2 deletions recipes/guardian.recipe
Original file line number Diff line number Diff line change
Expand Up @@ -102,9 +102,29 @@ class Guardian(BasicNewsRecipe):

return cover

def parse_section(self, url, title_prefix=''):
def parse_observer_index(self, soup):
for section in soup.findAll('section'):
articles = []
title = self.tag_to_string(section.find('h2'))
if not title:
continue
self.log('Found section:', title)
for li in section.findAll('li'):
a = li.find('a', attrs={'href': True, 'aria-label': True})
if a:
url = a['href']
if url.startswith('/'):
url = self.base_url.rpartition('/')[0] + url
self.log('\t', a['aria-label'], url)
articles.append({'title': a['aria-label'], 'url': url})
if articles:
yield title, articles

def parse_section(self, section_url, title_prefix=''):
feeds = []
soup = self.index_to_soup(url)
soup = self.index_to_soup(section_url)
if '/observer' in section_url:
return list(self.parse_observer_index(soup))
for section in soup.findAll('section'):
title = title_prefix + self.tag_to_string(section.find(
attrs={'class': 'fc-container__header__title'})).strip().capitalize()
Expand All @@ -117,6 +137,8 @@ class Guardian(BasicNewsRecipe):
for a in li.findAll('a', attrs={'data-link-name': 'article'}, href=True):
title = self.tag_to_string(a).strip()
url = a['href']
if url.startswith('/'):
url = self.base_url.rpartition('/')[0] + url
self.log(' ', title, url)
feeds[-1][1].append({'title': title, 'url': url})
break
Expand Down

0 comments on commit 205c8e1

Please sign in to comment.