Skip to content

Commit

Permalink
openopps: Provide minimum date. Don't parse response in spider.
Browse files Browse the repository at this point in the history
  • Loading branch information
jpmckinney committed Feb 27, 2021
1 parent e874784 commit acd4210
Showing 1 changed file with 3 additions and 11 deletions.
14 changes: 3 additions & 11 deletions kingfisher_scrapy/spiders/openopps.py
Expand Up @@ -40,7 +40,7 @@ class OpenOpps(BaseSpider):

# BaseSpider
default_from_date = '2011-01-01'
root_path = 'item'
root_path = 'results.item.json'

access_token = None
api_limit = 10000 # OpenOpps API limit for search results
Expand Down Expand Up @@ -107,7 +107,7 @@ def start_requests_pages(self):
yield from self.request_range_per_day(self.from_date, self.until_date, search_h)
else:
# Use larger ranges for filters with less than (api_limit) search results
release_date_gte_list = ['', '2009-01-01', '2010-01-01', '2010-07-01']
release_date_gte_list = ['1970-01-01', '2009-01-01', '2010-01-01', '2010-07-01']
release_date_lte_list = ['2008-12-31', '2009-12-31', '2010-06-30', '2010-12-31']

for i in range(len(release_date_gte_list)):
Expand Down Expand Up @@ -147,15 +147,7 @@ def parse(self, response):

# Counts response and range hour split control
if count <= self.api_limit or search_h == 1:
# Data type changed to release package list in order to have fewer files
all_data = []
for data in results['results']:
json_data = data['json']
if json_data:
all_data.append(json_data)

if all_data:
yield self.build_file_from_response(response, data=all_data, data_type=self.data_type)
yield self.build_file_from_response(response, data_type=self.data_type)

next_url = results.get('next')
if next_url:
Expand Down

0 comments on commit acd4210

Please sign in to comment.