Skip to content

Commit

Permalink
Merge remote-tracking branch 'gh-ftr/master' into shtrom-s-master
Browse files Browse the repository at this point in the history
  • Loading branch information
shtrom committed Feb 4, 2024
2 parents 9ecc3bb + d3b7caf commit 0a5f087
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 5 deletions.
10 changes: 10 additions & 0 deletions gizmodo.com.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,16 @@ strip_id_or_class: js_ad-mobile-dynamic
strip_id_or_class: ad-mobile-dynamic
strip_id_or_class: advertisement

# [FTR] Stripping normally script-hidden div to prevent cutting article after 1st paragraph
strip: //div[contains(@class,'sc-1needdh-')]

# activate embeded youtube
find_string: data-src="https://gizmodo.com/embed/inset/iframe?id=youtube-video-
replace_string: src="https://www.youtube.com/embed/
find_string: &start=
replace_string: " foo="

prune: no
tidy: yes

strip: //aside
Expand Down
2 changes: 1 addition & 1 deletion heatmap.news.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
body: //div[contains(@class, 'current-post')]//div[contains(@class, 'body-description')]
body: //div[contains(@class, 'current-post')]//div[contains(@class, 'body-description')] | //article[1]//div[@class='widget__head']/picture
author: //div[contains(@class, 'post-author')]

test_url: https://heatmap.news/technology/sublime-carbon-cement-electrolyzer
Expand Down
18 changes: 18 additions & 0 deletions matthewball.vc.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
body: //article[1]

strip_id_or_class: blog-item-top-wrapper
strip_id_or_class: eapps-social-share-buttons-container
strip_id_or_class: sqs-gallery-thumbnails
strip_id_or_class: newsletter-form-wrapper

strip_id_or_class: blog-item-author-profile-wrapper
strip_id_or_class: blog-item-comments

# images from galery
find_string: data-src="https://
replace_string: src="https://

prune: no

test_url: https://www.matthewball.vc/all/gaming2024?utm_campaign=mb&utm_medium=newsletter&utm_source=morning_brew
test_url: https://www.matthewball.vc/all/digitalthemeparkplatforms
14 changes: 14 additions & 0 deletions mercatornet.com.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
body: //main[1] | //div[@id='page-features']

author: //attribute[@name='author']/@value
author: //div[contains(@class, 'byline')]//div[@class='name']
date: //attribute[@name='date']/@value
date: //div[contains(@class, 'byline')]//div[@class='dates']/text()

strip_id_or_class: join-community

strip: //div [contains(@class, 'byline')]/ancestor::section[1]
strip: //section[@class='subscribe-section']/self::section | //section[@class='subscribe-section']/following-sibling::section

test_url: https://www.mercatornet.com/hosing_down_the_biggest_moral_panic_in_canadian_history
test_url: https://www.mercatornet.com/europe_new_digital_identity_wallet
26 changes: 22 additions & 4 deletions msdvetmanual.com.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,26 @@
titl: //h1
author: //div[contains(@class, 'topic__label--author')]
// XXX: can't get the h2 header to be captured
body: //article | //h2[contains(@class, 'topic__header--section')]
title: //h1[1]
author: //a[contains(@href, '/authors/')]
body: //article[1]

# strip title from text
#strip_id_or_class: topic__quickfacts-label
#strip: //h1[1]

# or complete header
strip_id_or_class: topicmenu
strip_id_or_class: tablebox

strip_id_or_class: hide-on-print

# prevent FTR from showing tooltips as plain-text
strip_id_or_class: tooltip-container

# pevent wallabg from showing tooltips as plain-text
strip: //a[@data-toggle='tooltip']/following-sibling::a[1]

prune: no
tidy: no
insert_detected_image: no

test_url: https://www.msdvetmanual.com/toxicology/food-hazards/raisin-and-grape-toxicosis-in-dogs
test_contains: Xylitol toxicosis occurs in dogs after ingestion of xylitol

0 comments on commit 0a5f087

Please sign in to comment.