Skip to content

Commit

Permalink
Update WSJ
Browse files Browse the repository at this point in the history
  • Loading branch information
kovidgoyal committed Oct 9, 2014
1 parent d2442b6 commit f8d96f7
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 4 deletions.
9 changes: 7 additions & 2 deletions recipes/wsj.recipe
Expand Up @@ -22,6 +22,7 @@ class WallStreetJournal(BasicNewsRecipe):
timefmt = ' [%a, %b %d, %Y]'
no_stylesheets = True
ignore_duplicate_articles = {'url'}
remove_attributes = ['style', 'data-scrim']

keep_only_tags = [
dict(name='h1'), dict(name='h2', attrs={'class':['subhead', 'subHed deck']}),
Expand All @@ -31,7 +32,8 @@ class WallStreetJournal(BasicNewsRecipe):
dict(name='div', attrs={'class':'snippet-ad-login'}),
]
remove_tags = [
dict(attrs={'class':['insetButton', 'insettipBox']}),
dict(attrs={'class':['insetButton', 'insettipBox', 'author-info', 'media-object-video']}),
dict(attrs={'class':lambda x: x and 'article_tools' in x.split()}),
dict(name='span', attrs={'data-country-code':True, 'data-ticker-code':True}),
]
preprocess_regexps = [
Expand All @@ -50,7 +52,7 @@ class WallStreetJournal(BasicNewsRecipe):

def populate_article_metadata(self, article, soup, first):
if first and hasattr(self, 'add_toc_thumbnail'):
picdiv = soup.find('img')
picdiv = soup.find('img', src=True)
if picdiv is not None:
self.add_toc_thumbnail(article,picdiv['src'])

Expand All @@ -60,6 +62,9 @@ class WallStreetJournal(BasicNewsRecipe):
img = div.find('img')
if img is not None:
img.extract()
# Use large images
for img in soup.findAll('img', attrs={'data-enlarge':True}):
img['src'] = img['data-enlarge']

return soup

Expand Down
9 changes: 7 additions & 2 deletions recipes/wsj_free.recipe
Expand Up @@ -20,6 +20,7 @@ class WallStreetJournal(BasicNewsRecipe):
timefmt = ' [%a, %b %d, %Y]'
no_stylesheets = True
ignore_duplicate_articles = {'url'}
remove_attributes = ['style', 'data-scrim']

keep_only_tags = [
dict(name='h1'), dict(name='h2', attrs={'class':['subhead', 'subHed deck']}),
Expand All @@ -29,7 +30,8 @@ class WallStreetJournal(BasicNewsRecipe):
dict(name='div', attrs={'class':'snippet-ad-login'}),
]
remove_tags = [
dict(attrs={'class':['insetButton', 'insettipBox']}),
dict(attrs={'class':['insetButton', 'insettipBox', 'author-info', 'media-object-video']}),
dict(attrs={'class':lambda x: x and 'article_tools' in x.split()}),
dict(name='span', attrs={'data-country-code':True, 'data-ticker-code':True}),
]
preprocess_regexps = [
Expand All @@ -39,7 +41,7 @@ class WallStreetJournal(BasicNewsRecipe):

def populate_article_metadata(self, article, soup, first):
if first and hasattr(self, 'add_toc_thumbnail'):
picdiv = soup.find('img')
picdiv = soup.find('img', src=True)
if picdiv is not None:
self.add_toc_thumbnail(article,picdiv['src'])

Expand All @@ -49,6 +51,9 @@ class WallStreetJournal(BasicNewsRecipe):
img = div.find('img')
if img is not None:
img.extract()
# Use large images
for img in soup.findAll('img', attrs={'data-enlarge':True}):
img['src'] = img['data-enlarge']

return soup

Expand Down

0 comments on commit f8d96f7

Please sign in to comment.