-
Notifications
You must be signed in to change notification settings - Fork 0
/
articles.py
executable file
·25 lines (19 loc) · 1.01 KB
/
articles.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
from newspaper import Article
#url = 'https://www.npr.org/sections/deceptivecadence/2020/10/24/927121609/ellen-reid-soundwalk-central-park-gps-location-sensitive-app'
#url = 'https://www.vox.com/culture/21534638/the-mandalorian-disney-plus-explained-do-i-need-to-watch-star-wars-baby-yoda'
# url = 'https://www.vox.com/covid-19-coronavirus-economy-recession-stock-market/2020/5/6/21248069/stock-market-economy-federal-reserve-jerome-powell'
#url = 'https://www.popularmechanics.com/technology/infrastructure/a34313925/whale-tail-train-derailment/'
url = 'https://www.cnn.com/travel/article/kai-tak-skyscraper-airside/index.html'
# article = Article(url)
# keep html when scraping
article = Article(url, keep_article_html=True)
article.download()
article.parse()
print(f"Title {article.title}")
print(f"Publish date {article.publish_date}")
print(f"Text {article.text}")
print(f"Author {article.authors}")
print(f"Top Image {article.top_image}")
html = article.article_html
print(f"Article HTML {str(html)}")
print(type(html))