From 30ebdfe1f36eea0175bce9d16223e1f52a82958e Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 6 Dec 2014 07:57:26 +0530 Subject: [PATCH] Update The Guardian --- recipes/guardian.recipe | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/recipes/guardian.recipe b/recipes/guardian.recipe index aa4948418541..0da7de40fa53 100644 --- a/recipes/guardian.recipe +++ b/recipes/guardian.recipe @@ -38,27 +38,29 @@ class Guardian(BasicNewsRecipe): ignore_sections = [] timefmt = ' [%a, %d %b %Y]' + keep_only_tags = [ - dict(name='div', attrs={'id':["content","article_header","main-article-info",]}), - ] + dict(name='div', attrs={'id':["content","article_header","main-article-info",]}), + dict(attrs={'class':lambda x: x and set(x.split()).intersection({'content__head', 'content__main'})}), + ] remove_tags = [ - dict(name='div', attrs={'class':["video-content","videos-third-column"]}), - dict(name='div', attrs={'id':["article-toolbox","subscribe-feeds",]}), - dict(name='div', attrs={'class':["guardian-tickets promo-component",]}), - dict(name='ul', attrs={'class':["pagination"]}), - dict(name='ul', attrs={'id':["content-actions"]}), - # article history link - dict(name='a', attrs={'class':["rollover history-link"]}), - # "a version of this article ..." speil - dict(name='div' , attrs={'class' : ['section']}), - # "about this article" js dialog - dict(name='div', attrs={'class':["share-top",]}), - # author picture - dict(name='img', attrs={'class':["contributor-pic-small"]}), - # embedded videos/captions - dict(name='span',attrs={'class' : ['inline embed embed-media']}), - # dict(name='img'), - ] + dict(name='div', attrs={'class':[ + "video-content","videos-third-column", 'meta__extras', 'submeta-container submeta-container--break-at-leftcol ']}), + dict(name='div', attrs={'id':["article-toolbox","subscribe-feeds",]}), + dict(name='div', attrs={'class':["guardian-tickets promo-component",]}), + dict(name='ul', attrs={'class':["pagination"]}), + dict(name='ul', attrs={'id':["content-actions"]}), + # article history link + dict(name='a', attrs={'class':["rollover history-link"]}), + # "a version of this article ..." speil + dict(name='div' , attrs={'class' : ['section']}), + # "about this article" js dialog + dict(name='div', attrs={'class':["share-top",]}), + # author picture + dict(name='img', attrs={'class':["contributor-pic-small"]}), + # embedded videos/captions + dict(name='span',attrs={'class' : ['inline embed embed-media']}), + ] use_embedded_content = False no_stylesheets = True