From 5fff0618fa5905602ec177d74c361b602130ca32 Mon Sep 17 00:00:00 2001 From: archiegugol Date: Fri, 17 Aug 2018 19:45:44 +0900 Subject: [PATCH 1/3] Fix for missing projects on last page --- ghorgs/wrappers.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/ghorgs/wrappers.py b/ghorgs/wrappers.py index 24a3a92..cbe5ad4 100644 --- a/ghorgs/wrappers.py +++ b/ghorgs/wrappers.py @@ -130,7 +130,7 @@ def get_paged_content(self, session: requests.Session, url: str) -> list: all_data = [] current_url = url last_url = None - while last_url != current_url: + while True: response = session.get(current_url) retry_after_raw = response.headers.get('Retry-After', None) if retry_after_raw: @@ -143,14 +143,16 @@ def get_paged_content(self, session: requests.Session, url: str) -> list: assert isinstance(data, list) all_data.extend(data) + # Archie> Need to add this check here so last page will be processed + if current_url == last_url: + break + # check header if 'Link' in response.headers: # parse parsed_link_header = parse_github_link_header(response.headers['Link']) current_url = parsed_link_header['next'] last_url = parsed_link_header['last'] - else: - last_url = current_url return all_data From 41ff65474576dbeb014ef310857a91b2e0340553 Mon Sep 17 00:00:00 2001 From: archiegugol Date: Mon, 20 Aug 2018 17:11:18 +0900 Subject: [PATCH 2/3] Fixed endless loop because of missing else line 156 --- ghorgs/wrappers.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ghorgs/wrappers.py b/ghorgs/wrappers.py index cbe5ad4..8803f97 100644 --- a/ghorgs/wrappers.py +++ b/ghorgs/wrappers.py @@ -143,7 +143,7 @@ def get_paged_content(self, session: requests.Session, url: str) -> list: assert isinstance(data, list) all_data.extend(data) - # Archie> Need to add this check here so last page will be processed + # Archie> Need to move this check here so last page will be processed for project if current_url == last_url: break @@ -153,6 +153,8 @@ def get_paged_content(self, session: requests.Session, url: str) -> list: parsed_link_header = parse_github_link_header(response.headers['Link']) current_url = parsed_link_header['next'] last_url = parsed_link_header['last'] + else: + last_url = current_url return all_data From d23e685db064a5c9da074cf7bd319f9a9a9ed4e6 Mon Sep 17 00:00:00 2001 From: archiegugol Date: Mon, 20 Aug 2018 18:52:30 +0900 Subject: [PATCH 3/3] Clarified comment and removed name --- ghorgs/wrappers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ghorgs/wrappers.py b/ghorgs/wrappers.py index 8803f97..42d7f52 100644 --- a/ghorgs/wrappers.py +++ b/ghorgs/wrappers.py @@ -143,7 +143,7 @@ def get_paged_content(self, session: requests.Session, url: str) -> list: assert isinstance(data, list) all_data.extend(data) - # Archie> Need to move this check here so last page will be processed for project + # Need to move this check here and change to 'while True' so the last page of the gitbub project will be processed if current_url == last_url: break