Skip to content

Commit

Permalink
Merge pull request #1076 from open-contracting/digiwhist
Browse files Browse the repository at this point in the history
feat: restore digiwhist and add seriba and north macedonia
  • Loading branch information
yolile committed Apr 12, 2024
2 parents cc713a9 + a49f23d commit 2fd59fe
Show file tree
Hide file tree
Showing 38 changed files with 77 additions and 56 deletions.
20 changes: 20 additions & 0 deletions docs/spiders.rst
Original file line number Diff line number Diff line change
Expand Up @@ -976,6 +976,16 @@ Nigeria
scrapy crawl nigeria_portal
North Macedonia
~~~~~~~~~~~~~~~

.. autoclass:: kingfisher_scrapy.spiders.north_macedonia_digiwhist.NorthMacedoniaDigiwhist
:no-members:

.. code-block:: bash
scrapy crawl north_macedonia_digiwhist
Norway
~~~~~~

Expand Down Expand Up @@ -1136,6 +1146,16 @@ Romania
scrapy crawl romania_digiwhist
Serbia
~~~~~~

.. autoclass:: kingfisher_scrapy.spiders.serbia_digiwhist.SerbiaDigiwhist
:no-members:

.. code-block:: bash
scrapy crawl serbia_digiwhist
Slovakia
~~~~~~~~

Expand Down
4 changes: 2 additions & 2 deletions kingfisher_scrapy/commands/updatedocs.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ def run(self, args, opts):

def _keyfunc(module):
module_name = module.__name__.rsplit('.', 1)[-1]
if module_name.startswith(('costa_rica', 'czech_republic', 'dominican_republic', 'south_africa',
'united_kingdom')):
if module_name.startswith(('costa_rica', 'czech_republic', 'dominican_republic', 'north_macedonia',
'south_africa', 'united_kingdom')):
return '_'.join(module_name.split('_', 2)[:2])
return module_name.split('_', 1)[0]

Expand Down
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/austria_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class AustriaDigiwhist(DigiwhistBase):
name = 'austria_digiwhist'
start_urls = ['https://opentender.eu/data/files/AT_ocds_data.json.tar.gz']
country_code = 'at'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/belgium_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class BelgiumDigiwhist(DigiwhistBase):
name = 'belgium_digiwhist'
start_urls = ['https://opentender.eu/data/files/BE_ocds_data.json.tar.gz']
country_code = 'be'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/bulgaria_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class BulgariaDigiwhist(DigiwhistBase):
name = 'bulgaria_digiwhist'
start_urls = ['https://opentender.eu/data/files/BG_ocds_data.json.tar.gz']
country_code = 'bg'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/croatia_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class CroatiaDigiwhist(DigiwhistBase):
name = 'croatia_digiwhist'
start_urls = ['https://opentender.eu/data/files/HR_ocds_data.json.tar.gz']
country_code = 'hr'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/cyprus_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class CyprusDigiwhist(DigiwhistBase):
name = 'cyprus_digiwhist'
start_urls = ['https://opentender.eu/data/files/CY_ocds_data.json.tar.gz']
country_code = 'cy'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/czech_republic_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class CzechRepublicDigiwhist(DigiwhistBase):
name = 'czech_republic_digiwhist'
start_urls = ['https://opentender.eu/data/files/CZ_ocds_data.json.tar.gz']
country_code = 'cz'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/denmark_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class DenmarkDigiwhist(DigiwhistBase):
name = 'denmark_digiwhist'
start_urls = ['https://opentender.eu/data/files/DK_ocds_data.json.tar.gz']
country_code = 'dk'
31 changes: 10 additions & 21 deletions kingfisher_scrapy/spiders/digiwhist_base.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,8 @@
import tarfile
from io import BytesIO
from kingfisher_scrapy.base_spiders import CompressedFileSpider
from kingfisher_scrapy.util import browser_user_agent, components

import scrapy

from kingfisher_scrapy.base_spiders import BaseSpider
from kingfisher_scrapy.util import browser_user_agent, handle_http_error


class DigiwhistBase(BaseSpider):
class DigiwhistBase(CompressedFileSpider):
"""
Domain
Digiwhist
Expand All @@ -19,19 +14,13 @@ class DigiwhistBase(BaseSpider):
# BaseSpider
line_delimited = True

# start_urls must be provided by subclasses.
# CompressedFileSpider
data_type = 'release_package'

def start_requests(self):
# See scrapy.spiders.Spider.start_requests
for url in self.start_urls:
yield scrapy.Request(url, meta={'file_name': 'file.tar.gz'})
# Local
base_url = 'https://opentender.eu/data/downloads/data-{}-json-json.zip'

@handle_http_error
def parse(self, response):
# country_code must be provided by subclasses.

# Load a line at the time, pass it to API
with tarfile.open(fileobj=BytesIO(response.body), mode="r:gz") as tar:
with tar.extractfile(tar.next()) as f:
yield self.build_file_from_response(
response, data_type='release_package', file_name='data.json', data=f
)
def start_requests(self):
yield self.build_request(self.base_url.format(self.country_code), formatter=components(-1))
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/estonia_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class EstoniaDigiwhist(DigiwhistBase):
name = 'estonia_digiwhist'
start_urls = ['https://opentender.eu/data/files/EE_ocds_data.json.tar.gz']
country_code = 'ee'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/europe_ted_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class EuropeTEDDigiwhist(DigiwhistBase):
name = 'europe_ted_digiwhist'
start_urls = ['https://opentender.eu/data/files/TED_ocds_data.json.tar.gz']
country_code = 'eu'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/finland_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class FinlandDigiwhist(DigiwhistBase):
name = 'finland_digiwhist'
start_urls = ['https://opentender.eu/data/files/FI_ocds_data.json.tar.gz']
country_code = 'fi'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/france_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class FranceDigiwhist(DigiwhistBase):
name = 'france_digiwhist'
start_urls = ['https://opentender.eu/data/files/FR_ocds_data.json.tar.gz']
country_code = 'fr'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/georgia_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class GeorgiaDigiwhist(DigiwhistBase):
name = 'georgia_digiwhist'
start_urls = ['https://opentender.eu/data/files/GE_ocds_data.json.tar.gz']
country_code = 'ge'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/germany_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class GermanyDigiwhist(DigiwhistBase):
name = 'germany_digiwhist'
start_urls = ['https://opentender.eu/data/files/DE_ocds_data.json.tar.gz']
country_code = 'de'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/greece_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class GreeceDigiwhist(DigiwhistBase):
name = 'greece_digiwhist'
start_urls = ['https://opentender.eu/data/files/GR_ocds_data.json.tar.gz']
country_code = 'gr'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/hungary_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class HungaryDigiwhist(DigiwhistBase):
name = 'hungary_digiwhist'
start_urls = ['https://opentender.eu/data/files/HU_ocds_data.json.tar.gz']
country_code = 'hu'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/iceland_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class IcelandDigiwhist(DigiwhistBase):
name = 'iceland_digiwhist'
start_urls = ['https://opentender.eu/data/files/IS_ocds_data.json.tar.gz']
country_code = 'is'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/ireland_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class IrelandDigiwhist(DigiwhistBase):
name = 'ireland_digiwhist'
start_urls = ['https://opentender.eu/data/files/IE_ocds_data.json.tar.gz']
country_code = 'ie'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/italy_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class ItalyDigiwhist(DigiwhistBase):
name = 'italy_digiwhist'
start_urls = ['https://opentender.eu/data/files/IT_ocds_data.json.tar.gz']
country_code = 'it'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/latvia_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class LatviaDigiwhist(DigiwhistBase):
name = 'latvia_digiwhist'
start_urls = ['https://opentender.eu/data/files/LV_ocds_data.json.tar.gz']
country_code = 'lv'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/lithuania_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class LithuaniaDigiwhist(DigiwhistBase):
name = 'lithuania_digiwhist'
start_urls = ['https://opentender.eu/data/files/LT_ocds_data.json.tar.gz']
country_code = 'lt'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/luxembourg_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class LuxembourgDigiwhist(DigiwhistBase):
name = 'luxembourg_digiwhist'
start_urls = ['https://opentender.eu/data/files/LU_ocds_data.json.tar.gz']
country_code = 'lu'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/malta_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class MaltaDigiwhist(DigiwhistBase):
name = 'malta_digiwhist'
start_urls = ['https://opentender.eu/data/files/MT_ocds_data.json.tar.gz']
country_code = 'mt'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/netherlands_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class NetherlandsDigiwhist(DigiwhistBase):
name = 'netherlands_digiwhist'
start_urls = ['https://opentender.eu/data/files/NL_ocds_data.json.tar.gz']
country_code = 'nl'
6 changes: 6 additions & 0 deletions kingfisher_scrapy/spiders/north_macedonia_digiwhist.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from kingfisher_scrapy.spiders.digiwhist_base import DigiwhistBase


class NorthMacedoniaDigiwhist(DigiwhistBase):
name = 'north_macedonia_digiwhist'
country_code = 'mk'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/norway_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class NorwayDigiwhist(DigiwhistBase):
name = 'norway_digiwhist'
start_urls = ['https://opentender.eu/data/files/NO_ocds_data.json.tar.gz']
country_code = 'no'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/poland_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class PolandDigiwhist(DigiwhistBase):
name = 'poland_digiwhist'
start_urls = ['https://opentender.eu/data/files/PL_ocds_data.json.tar.gz']
country_code = 'pl'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/portugal_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class PortugalDigiwhist(DigiwhistBase):
name = 'portugal_digiwhist'
start_urls = ['https://opentender.eu/data/files/PT_ocds_data.json.tar.gz']
country_code = 'pt'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/romania_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class RomaniaDigiwhist(DigiwhistBase):
name = 'romania_digiwhist'
start_urls = ['https://opentender.eu/data/files/RO_ocds_data.json.tar.gz']
country_code = 'ro'
6 changes: 6 additions & 0 deletions kingfisher_scrapy/spiders/serbia_digiwhist.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from kingfisher_scrapy.spiders.digiwhist_base import DigiwhistBase


class SerbiaDigiwhist(DigiwhistBase):
name = 'serbia_digiwhist'
country_code = 'rs'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/slovakia_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class SlovakiaDigiwhist(DigiwhistBase):
name = 'slovakia_digiwhist'
start_urls = ['https://opentender.eu/data/files/SK_ocds_data.json.tar.gz']
country_code = 'sk'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/slovenia_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class SloveniaDigiwhist(DigiwhistBase):
name = 'slovenia_digiwhist'
start_urls = ['https://opentender.eu/data/files/SI_ocds_data.json.tar.gz']
country_code = 'si'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/spain_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class SpainDigiwhist(DigiwhistBase):
name = 'spain_digiwhist'
start_urls = ['https://opentender.eu/data/files/ES_ocds_data.json.tar.gz']
country_code = 'es'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/sweden_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class SwedenDigiwhist(DigiwhistBase):
name = 'sweden_digiwhist'
start_urls = ['https://opentender.eu/data/files/SE_ocds_data.json.tar.gz']
country_code = 'se'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/switzerland_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class SwitzerlandDigiwhist(DigiwhistBase):
name = 'switzerland_digiwhist'
start_urls = ['https://opentender.eu/data/files/CH_ocds_data.json.tar.gz']
country_code = 'ch'
2 changes: 1 addition & 1 deletion kingfisher_scrapy/spiders/united_kingdom_digiwhist.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

class UnitedKingdomDigiwhist(DigiwhistBase):
name = 'united_kingdom_digiwhist'
start_urls = ['https://opentender.eu/data/files/UK_ocds_data.json.tar.gz']
country_code = 'uk'

0 comments on commit 2fd59fe

Please sign in to comment.