-
Notifications
You must be signed in to change notification settings - Fork 12
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
12 changed files
with
198 additions
and
12 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
import json | ||
|
||
import scrapy | ||
|
||
from kingfisher_scrapy.base_spider import SimpleSpider | ||
from kingfisher_scrapy.util import handle_http_error | ||
|
||
|
||
class BoliviaAgetic(SimpleSpider): | ||
""" | ||
Domain | ||
Agencia de Gobierno Electrónico y Tecnologías de Información y Comunicación (AGETIC) | ||
Spider arguments | ||
sample | ||
Downloads the first file in the downloads page. | ||
Bulk download documentation | ||
https://datos.gob.bo/id/dataset/contrataciones-agetic-2019-estandar-ocp | ||
""" | ||
name = 'bolivia_agetic' | ||
data_type = 'release_list' | ||
unflatten = True | ||
|
||
def start_requests(self): | ||
# A CKAN API JSON response. | ||
url = 'https://datos.gob.bo/api/3/action/package_show?id=contrataciones-agetic-2019-estandar-ocp' | ||
yield scrapy.Request(url, meta={'file_name': 'list.json'}, callback=self.parse_list) | ||
|
||
@handle_http_error | ||
def parse_list(self, response): | ||
data = json.loads(response.text) | ||
for resource in data['result']['resources']: | ||
if 'ocds' in resource['description']: | ||
# Presently, only one URL matches. | ||
yield scrapy.Request(resource['url'], meta={'file_name': resource['url']}, callback=self.parse_data) | ||
|
||
@handle_http_error | ||
def parse_data(self, response): | ||
yield self.build_file(url=response.request.url, data_type=self.data_type, data=response.body) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
flattentool | ||
ijson>=3.1.1 | ||
jsonpointer | ||
jsonref | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
import pytest | ||
from flattentool.input import BadXLSXZipFile | ||
|
||
from kingfisher_scrapy.items import File | ||
from kingfisher_scrapy.pipelines import Unflatten | ||
from tests import spider_with_crawler | ||
|
||
|
||
def test_process_item(): | ||
spider = spider_with_crawler(unflatten=True) | ||
pipeline = Unflatten() | ||
item = File({ | ||
'file_name': 'test.csv', | ||
'data': b'data', | ||
'data_type': 'release_list', | ||
'url': 'http://test.com/test.csv', | ||
}) | ||
|
||
assert pipeline.process_item(item, spider) == item | ||
|
||
|
||
def test_process_item_error(): | ||
spider = spider_with_crawler(unflatten=True) | ||
pipeline = Unflatten() | ||
item = File({ | ||
'file_name': 'file', | ||
'data': b'data', | ||
'data_type': 'release_list', | ||
'url': 'http://test.com/file', | ||
}) | ||
|
||
with pytest.raises(NotImplementedError): | ||
pipeline.process_item(item, spider) | ||
|
||
|
||
def test_process_item_xlsx_error(): | ||
spider = spider_with_crawler(unflatten=True) | ||
pipeline = Unflatten() | ||
item = File({ | ||
'file_name': 'test.xlsx', | ||
'data': b'data', | ||
'data_type': 'release_list', | ||
'url': 'http://test.com/test.xlsx', | ||
}) | ||
|
||
with pytest.raises(BadXLSXZipFile): | ||
pipeline.process_item(item, spider) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters