Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: restore digiwhist and add seriba and north macedonia #1076

Merged
merged 2 commits into from
Apr 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
20 changes: 20 additions & 0 deletions docs/spiders.rst
Original file line number Diff line number Diff line change
Expand Up @@ -976,6 +976,16 @@ Nigeria

scrapy crawl nigeria_portal

North Macedonia
~~~~~~~~~~~~~~~

.. autoclass:: kingfisher_scrapy.spiders.north_macedonia_digiwhist.NorthMacedoniaDigiwhist
:no-members:

.. code-block:: bash

scrapy crawl north_macedonia_digiwhist

Norway
~~~~~~

Expand Down Expand Up @@ -1136,6 +1146,16 @@ Romania

scrapy crawl romania_digiwhist

Serbia
~~~~~~

.. autoclass:: kingfisher_scrapy.spiders.serbia_digiwhist.SerbiaDigiwhist
:no-members:

.. code-block:: bash

scrapy crawl serbia_digiwhist

Slovakia
~~~~~~~~

Expand Down
4 changes: 2 additions & 2 deletions kingfisher_scrapy/commands/updatedocs.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ def run(self, args, opts):

def _keyfunc(module):
module_name = module.__name__.rsplit('.', 1)[-1]
if module_name.startswith(('costa_rica', 'czech_republic', 'dominican_republic', 'south_africa',
'united_kingdom')):
if module_name.startswith(('costa_rica', 'czech_republic', 'dominican_republic', 'north_macedonia',
'south_africa', 'united_kingdom')):
return '_'.join(module_name.split('_', 2)[:2])
return module_name.split('_', 1)[0]

Expand Down
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/austria_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class AustriaDigiwhist(DigiwhistBase):
name = 'austria_digiwhist'
start_urls = ['https://opentender.eu/data/files/AT_ocds_data.json.tar.gz']
country_code = 'at'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/belgium_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class BelgiumDigiwhist(DigiwhistBase):
name = 'belgium_digiwhist'
start_urls = ['https://opentender.eu/data/files/BE_ocds_data.json.tar.gz']
country_code = 'be'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/bulgaria_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class BulgariaDigiwhist(DigiwhistBase):
name = 'bulgaria_digiwhist'
start_urls = ['https://opentender.eu/data/files/BG_ocds_data.json.tar.gz']
country_code = 'bg'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/croatia_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class CroatiaDigiwhist(DigiwhistBase):
name = 'croatia_digiwhist'
start_urls = ['https://opentender.eu/data/files/HR_ocds_data.json.tar.gz']
country_code = 'hr'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/cyprus_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class CyprusDigiwhist(DigiwhistBase):
name = 'cyprus_digiwhist'
start_urls = ['https://opentender.eu/data/files/CY_ocds_data.json.tar.gz']
country_code = 'cy'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/czech_republic_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class CzechRepublicDigiwhist(DigiwhistBase):
name = 'czech_republic_digiwhist'
start_urls = ['https://opentender.eu/data/files/CZ_ocds_data.json.tar.gz']
country_code = 'cz'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/denmark_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class DenmarkDigiwhist(DigiwhistBase):
name = 'denmark_digiwhist'
start_urls = ['https://opentender.eu/data/files/DK_ocds_data.json.tar.gz']
country_code = 'dk'
31 changes: 10 additions & 21 deletions kingfisher_scrapy/spiders/digiwhist_base.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,8 @@
import tarfile
from io import BytesIO
from kingfisher_scrapy.base_spiders import CompressedFileSpider
from kingfisher_scrapy.util import browser_user_agent, components

import scrapy

from kingfisher_scrapy.base_spiders import BaseSpider
from kingfisher_scrapy.util import browser_user_agent, handle_http_error


class DigiwhistBase(BaseSpider):
class DigiwhistBase(CompressedFileSpider):
"""
Domain
Digiwhist
Expand All @@ -19,19 +14,13 @@ class DigiwhistBase(BaseSpider):
# BaseSpider
line_delimited = True

# start_urls must be provided by subclasses.
# CompressedFileSpider
data_type = 'release_package'

def start_requests(self):
# See scrapy.spiders.Spider.start_requests
for url in self.start_urls:
yield scrapy.Request(url, meta={'file_name': 'file.tar.gz'})
# Local
base_url = 'https://opentender.eu/data/downloads/data-{}-json-json.zip'

@handle_http_error
def parse(self, response):
# country_code must be provided by subclasses.

# Load a line at the time, pass it to API
with tarfile.open(fileobj=BytesIO(response.body), mode="r:gz") as tar:
with tar.extractfile(tar.next()) as f:
yield self.build_file_from_response(
response, data_type='release_package', file_name='data.json', data=f
)
def start_requests(self):
yield self.build_request(self.base_url.format(self.country_code), formatter=components(-1))
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/estonia_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class EstoniaDigiwhist(DigiwhistBase):
name = 'estonia_digiwhist'
start_urls = ['https://opentender.eu/data/files/EE_ocds_data.json.tar.gz']
country_code = 'ee'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/europe_ted_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class EuropeTEDDigiwhist(DigiwhistBase):
name = 'europe_ted_digiwhist'
start_urls = ['https://opentender.eu/data/files/TED_ocds_data.json.tar.gz']
country_code = 'eu'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/finland_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class FinlandDigiwhist(DigiwhistBase):
name = 'finland_digiwhist'
start_urls = ['https://opentender.eu/data/files/FI_ocds_data.json.tar.gz']
country_code = 'fi'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/france_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class FranceDigiwhist(DigiwhistBase):
name = 'france_digiwhist'
start_urls = ['https://opentender.eu/data/files/FR_ocds_data.json.tar.gz']
country_code = 'fr'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/georgia_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class GeorgiaDigiwhist(DigiwhistBase):
name = 'georgia_digiwhist'
start_urls = ['https://opentender.eu/data/files/GE_ocds_data.json.tar.gz']
country_code = 'ge'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/germany_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class GermanyDigiwhist(DigiwhistBase):
name = 'germany_digiwhist'
start_urls = ['https://opentender.eu/data/files/DE_ocds_data.json.tar.gz']
country_code = 'de'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/greece_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class GreeceDigiwhist(DigiwhistBase):
name = 'greece_digiwhist'
start_urls = ['https://opentender.eu/data/files/GR_ocds_data.json.tar.gz']
country_code = 'gr'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/hungary_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class HungaryDigiwhist(DigiwhistBase):
name = 'hungary_digiwhist'
start_urls = ['https://opentender.eu/data/files/HU_ocds_data.json.tar.gz']
country_code = 'hu'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/iceland_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class IcelandDigiwhist(DigiwhistBase):
name = 'iceland_digiwhist'
start_urls = ['https://opentender.eu/data/files/IS_ocds_data.json.tar.gz']
country_code = 'is'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/ireland_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class IrelandDigiwhist(DigiwhistBase):
name = 'ireland_digiwhist'
start_urls = ['https://opentender.eu/data/files/IE_ocds_data.json.tar.gz']
country_code = 'ie'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/italy_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class ItalyDigiwhist(DigiwhistBase):
name = 'italy_digiwhist'
start_urls = ['https://opentender.eu/data/files/IT_ocds_data.json.tar.gz']
country_code = 'it'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/latvia_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class LatviaDigiwhist(DigiwhistBase):
name = 'latvia_digiwhist'
start_urls = ['https://opentender.eu/data/files/LV_ocds_data.json.tar.gz']
country_code = 'lv'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/lithuania_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class LithuaniaDigiwhist(DigiwhistBase):
name = 'lithuania_digiwhist'
start_urls = ['https://opentender.eu/data/files/LT_ocds_data.json.tar.gz']
country_code = 'lt'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/luxembourg_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class LuxembourgDigiwhist(DigiwhistBase):
name = 'luxembourg_digiwhist'
start_urls = ['https://opentender.eu/data/files/LU_ocds_data.json.tar.gz']
country_code = 'lu'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/malta_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class MaltaDigiwhist(DigiwhistBase):
name = 'malta_digiwhist'
start_urls = ['https://opentender.eu/data/files/MT_ocds_data.json.tar.gz']
country_code = 'mt'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/netherlands_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class NetherlandsDigiwhist(DigiwhistBase):
name = 'netherlands_digiwhist'
start_urls = ['https://opentender.eu/data/files/NL_ocds_data.json.tar.gz']
country_code = 'nl'
6 changes: 6 additions & 0 deletions kingfisher_scrapy/spiders/north_macedonia_digiwhist.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from kingfisher_scrapy.spiders.digiwhist_base import DigiwhistBase


class NorthMacedoniaDigiwhist(DigiwhistBase):
name = 'north_macedonia_digiwhist'
country_code = 'mk'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/norway_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class NorwayDigiwhist(DigiwhistBase):
name = 'norway_digiwhist'
start_urls = ['https://opentender.eu/data/files/NO_ocds_data.json.tar.gz']
country_code = 'no'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/poland_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class PolandDigiwhist(DigiwhistBase):
name = 'poland_digiwhist'
start_urls = ['https://opentender.eu/data/files/PL_ocds_data.json.tar.gz']
country_code = 'pl'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/portugal_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class PortugalDigiwhist(DigiwhistBase):
name = 'portugal_digiwhist'
start_urls = ['https://opentender.eu/data/files/PT_ocds_data.json.tar.gz']
country_code = 'pt'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/romania_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class RomaniaDigiwhist(DigiwhistBase):
name = 'romania_digiwhist'
start_urls = ['https://opentender.eu/data/files/RO_ocds_data.json.tar.gz']
country_code = 'ro'
6 changes: 6 additions & 0 deletions kingfisher_scrapy/spiders/serbia_digiwhist.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from kingfisher_scrapy.spiders.digiwhist_base import DigiwhistBase


class SerbiaDigiwhist(DigiwhistBase):
name = 'serbia_digiwhist'
country_code = 'rs'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/slovakia_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class SlovakiaDigiwhist(DigiwhistBase):
name = 'slovakia_digiwhist'
start_urls = ['https://opentender.eu/data/files/SK_ocds_data.json.tar.gz']
country_code = 'sk'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/slovenia_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class SloveniaDigiwhist(DigiwhistBase):
name = 'slovenia_digiwhist'
start_urls = ['https://opentender.eu/data/files/SI_ocds_data.json.tar.gz']
country_code = 'si'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/spain_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class SpainDigiwhist(DigiwhistBase):
name = 'spain_digiwhist'
start_urls = ['https://opentender.eu/data/files/ES_ocds_data.json.tar.gz']
country_code = 'es'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/sweden_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class SwedenDigiwhist(DigiwhistBase):
name = 'sweden_digiwhist'
start_urls = ['https://opentender.eu/data/files/SE_ocds_data.json.tar.gz']
country_code = 'se'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/switzerland_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class SwitzerlandDigiwhist(DigiwhistBase):
name = 'switzerland_digiwhist'
start_urls = ['https://opentender.eu/data/files/CH_ocds_data.json.tar.gz']
country_code = 'ch'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/united_kingdom_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class UnitedKingdomDigiwhist(DigiwhistBase):
name = 'united_kingdom_digiwhist'
start_urls = ['https://opentender.eu/data/files/UK_ocds_data.json.tar.gz']
country_code = 'uk'