From 082d3063c8313383a88e939cc9cb1382bb79eb98 Mon Sep 17 00:00:00 2001 From: Marc-Olivier Buob Date: Wed, 19 Apr 2023 02:20:04 +0200 Subject: [PATCH] google_scholar.py: Added try...catch to improve robustness to network issues --- src/minifold/google_scholar.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/minifold/google_scholar.py b/src/minifold/google_scholar.py index 1ce9365..3b3774b 100644 --- a/src/minifold/google_scholar.py +++ b/src/minifold/google_scholar.py @@ -162,13 +162,19 @@ def parse(self, s_html: str): Args: s_html (str): An HTML Google Scholar page content. + + Raises: + ``RuntimeError`` if the result can't be fetched from Google scholar. """ - soup = SoupKitchen.make_soup(s_html) - soup = soup.find(name="div", attrs={"id": "gs_res_ccl_mid"}) - for div in soup.findAll(name="div", attrs={"class": "gs_r"}): - s = div.prettify() - entry = parse_article(s) - self.articles.append(entry) + try: + soup = SoupKitchen.make_soup(s_html) + soup = soup.find(name="div", attrs={"id": "gs_res_ccl_mid"}) + for div in soup.findAll(name="div", attrs={"class": "gs_r"}): + s = div.prettify() + entry = parse_article(s) + self.articles.append(entry) + except: + raise RuntimeError(f"Unable to parse:\n\n{s_html}") def send_query(self, gs_query: ScholarQuery): """