Skip to content

Commit

Permalink
Update WSJ
Browse files Browse the repository at this point in the history
Site appears to be in the process of transitioning to a new react based
architecture. Roll eyes.
  • Loading branch information
kovidgoyal committed Apr 5, 2022
1 parent f0c948e commit 07480ba
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 4 deletions.
18 changes: 16 additions & 2 deletions recipes/wsj.recipe
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,19 @@ def classes(classes):
'class': lambda x: x and frozenset(x.split()).intersection(q)})


def prefixed_classes(classes):
q = frozenset(classes.split(' '))

def matcher(x):
if x:
for candidate in frozenset(x.split()):
for x in q:
if candidate.startswith(x):
return True
return False
return {'attrs': {'class': matcher}}


class WSJ(BasicNewsRecipe):

if needs_subscription:
Expand All @@ -57,8 +70,9 @@ class WSJ(BasicNewsRecipe):
dict(name='span', itemprop='author', rel='author'),
dict(name='article', id='article-contents articleBody'.split()),
dict(name='div', id='article_story_body ncTitleArea snipper-ad-login'.split()),
dict(classes('nc-exp-artbody errorNotFound')),
classes('nc-exp-artbody errorNotFound'),
dict(attrs={'data-module-zone': 'article_snippet'}),
prefixed_classes('Headline__StyledHeadline- MediaLayout__Layout- ArticleByline__Container- ArticleTimestamp__Timestamp- ArticleBody__Container-'),
]

remove_tags = [
Expand Down Expand Up @@ -288,6 +302,6 @@ class WSJ(BasicNewsRecipe):
return [
('Testing', [
{'title': 'Article One',
'url': 'https://www.wsj.com/articles/gms-plan-to-drop-chevy-cruze-hits-ohio-town-hard-1543314600'}, # noqa
'url': 'https://www.wsj.com/articles/egg-prices-jump-as-bird-flu-hits-poultry-flocks-11648900800'}, # noqa
]),
]
18 changes: 16 additions & 2 deletions recipes/wsj_free.recipe
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,19 @@ def classes(classes):
'class': lambda x: x and frozenset(x.split()).intersection(q)})


def prefixed_classes(classes):
q = frozenset(classes.split(' '))

def matcher(x):
if x:
for candidate in frozenset(x.split()):
for x in q:
if candidate.startswith(x):
return True
return False
return {'attrs': {'class': matcher}}


class WSJ(BasicNewsRecipe):

if needs_subscription:
Expand All @@ -57,8 +70,9 @@ class WSJ(BasicNewsRecipe):
dict(name='span', itemprop='author', rel='author'),
dict(name='article', id='article-contents articleBody'.split()),
dict(name='div', id='article_story_body ncTitleArea snipper-ad-login'.split()),
dict(classes('nc-exp-artbody errorNotFound')),
classes('nc-exp-artbody errorNotFound'),
dict(attrs={'data-module-zone': 'article_snippet'}),
prefixed_classes('Headline__StyledHeadline- MediaLayout__Layout- ArticleByline__Container- ArticleTimestamp__Timestamp- ArticleBody__Container-'),
]

remove_tags = [
Expand Down Expand Up @@ -288,6 +302,6 @@ class WSJ(BasicNewsRecipe):
return [
('Testing', [
{'title': 'Article One',
'url': 'https://www.wsj.com/articles/gms-plan-to-drop-chevy-cruze-hits-ohio-town-hard-1543314600'}, # noqa
'url': 'https://www.wsj.com/articles/egg-prices-jump-as-bird-flu-hits-poultry-flocks-11648900800'}, # noqa
]),
]

0 comments on commit 07480ba

Please sign in to comment.