From 597742356f66e456d3fd3cd822b708cbc04380ef Mon Sep 17 00:00:00 2001 From: Ahmet Taspinar Date: Sun, 25 Feb 2018 14:55:59 +0100 Subject: [PATCH] update query.py: refactor the querying This change ensures that nothing is done if the response does not have status_code 200. Previously .json() method was called upon an empty response. Fixes https://github.com/taspinar/twitterscraper/issues/93 --- twitterscraper/query.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/twitterscraper/query.py b/twitterscraper/query.py index d8c5ffd..01bccb2 100644 --- a/twitterscraper/query.py +++ b/twitterscraper/query.py @@ -32,21 +32,20 @@ def query_single_page(url, html_response=True, retry=10): try: response = requests.get(url, headers=headers) - if html_response: - html = response.text + if resonse.status_code == 200: + if html_response: + html = response.text + tweets = list(Tweet.from_html(html)) + return tweets, "TWEET-{}-{}".format(tweets[-1].id, tweets[0].id) + else: + json_resp = response.json() + html = json_resp['items_html'] + tweets = list(Tweet.from_html(html)) + return tweets, json_resp['min_position'] else: - json_resp = response.json() - html = json_resp['items_html'] - - tweets = list(Tweet.from_html(html)) - - if not tweets: return [], None - if not html_response: - return tweets, json_resp['min_position'] - return tweets, "TWEET-{}-{}".format(tweets[-1].id, tweets[0].id) except requests.exceptions.HTTPError as e: logging.exception('HTTPError {} while requesting "{}"'.format( e, url))