From c15e3db8883d294dcf9351d87a07c66efa47d4c5 Mon Sep 17 00:00:00 2001
From: aguilerapy <amaguilera96@gmail.com>
Date: Wed, 25 Nov 2020 11:56:26 -0300
Subject: [PATCH] Add Pakistan spiders

---
 .../spiders/pakistan_ppra_records.py          | 31 +++++++++++++++++++
 .../spiders/pakistan_ppra_releases.py         | 17 ++++++++++
 2 files changed, 48 insertions(+)
 create mode 100644 kingfisher_scrapy/spiders/pakistan_ppra_records.py
 create mode 100644 kingfisher_scrapy/spiders/pakistan_ppra_releases.py

diff --git a/kingfisher_scrapy/spiders/pakistan_ppra_records.py b/kingfisher_scrapy/spiders/pakistan_ppra_records.py
new file mode 100644
index 000000000..c8bc6d2c2
--- /dev/null
+++ b/kingfisher_scrapy/spiders/pakistan_ppra_records.py
@@ -0,0 +1,31 @@
+import json
+
+import scrapy
+
+from kingfisher_scrapy.spiders.honduras_portal_base import HondurasPortalBase
+from kingfisher_scrapy.util import handle_http_error, components
+
+
+class HondurasPortalRecords(HondurasPortalBase):
+    """
+    Domain
+      Pakistan Public Procurement Regulatory Authority (PPRA)
+    API documentation
+      https://www.ppra.org.pk/api/
+    """
+    name = 'pakistan_ppra_records'
+    data_type = 'record_package'
+
+    def start_requests(self):
+        yield scrapy.Request(
+            'https://www.ppra.org.pk/api/index.php/api/records',
+            meta={'file_name': 'list.html'},
+            callback=self.parse_list
+        )
+
+    @handle_http_error
+    def parse_list(self, response):
+        # remove the last item in the list to fix the str JSON format
+        urls = json.loads(response.xpath('//body//text()').getall()[6].replace(",\r\n\r\nhttps://www.ppra.org.pk", ""))
+        for url in urls:
+            yield self.build_request(url, formatter=components(-2))
diff --git a/kingfisher_scrapy/spiders/pakistan_ppra_releases.py b/kingfisher_scrapy/spiders/pakistan_ppra_releases.py
new file mode 100644
index 000000000..3454b4a53
--- /dev/null
+++ b/kingfisher_scrapy/spiders/pakistan_ppra_releases.py
@@ -0,0 +1,17 @@
+import scrapy
+
+from kingfisher_scrapy.spiders.honduras_portal_base import HondurasPortalBase
+
+
+class HondurasPortalReleases(HondurasPortalBase):
+    """
+    Domain
+      Pakistan Public Procurement Regulatory Authority (PPRA)
+    API documentation
+      https://www.ppra.org.pk/api/
+    """
+    name = 'pakistan_ppra_releases'
+    data_type = 'release_package'
+
+    def start_requests(self):
+        yield scrapy.Request('https://www.ppra.org.pk/api/index.php/api/release', meta={'file_name': 'releases.json'})