Skip to content

Commit

Permalink
refactor: extract main or body in external urls
Browse files Browse the repository at this point in the history
  • Loading branch information
nalgeon committed Apr 13, 2024
1 parent 55a5e67 commit ae1d30b
Showing 1 changed file with 1 addition and 6 deletions.
7 changes: 1 addition & 6 deletions bot/fetcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,12 +66,7 @@ def extract_text(self) -> str:
if self.content_type != "text/html":
return self.response.text
html = BeautifulSoup(self.response.text, "html.parser")
article = (
html.find("main article")
or html.find("article")
or html.find("main")
or html.find("body")
)
article = html.find("main") or html.find("body")
return article.get_text()

def is_text(self) -> bool:
Expand Down

0 comments on commit ae1d30b

Please sign in to comment.