In [4]:
import scrapy
from scrapy.crawler import CrawlerProcess
import pycountry
import csv

class PortSpider(scrapy.Spider):
    name = 'port_spider'
    custom_settings = {
        'USER_AGENT': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
    }

    def __init__(self, *args, **kwargs):
        super(PortSpider, self).__init__(*args, **kwargs)
        self.countries = list(pycountry.countries)
        self.results = []

    def start_requests(self):
        url = "https://www.searates.com/maritime/ports-map"
        for country in self.countries:
            yield scrapy.FormRequest(
                url=url,
                formdata={'c': country.alpha_2},
                callback=self.parse,
                meta={'country_name': country.name}
            )

    def parse(self, response):
        country_name = response.meta['country_name']
        data = response.json()
        if 'cports' in data:
            for port in data['cports']:
                self.results.append({
                    'Country': country_name,
                    'Port Name': port['name'],
                    'Latitude': port['lat'],
                    'Longitude': port['lng'],
                    'Has Terminal': 'Yes' if port['t'] else 'No'
                })
        
    def closed(self, reason):
        with open('ports_output.csv', 'w', newline='', encoding='utf-8') as f:
            writer = csv.DictWriter(f, fieldnames=['Country', 'Port Name', 'Latitude', 'Longitude', 'Has Terminal'])
            writer.writeheader()
            writer.writerows(self.results)
        print("Processing complete. Data saved to ports_output.csv")

if __name__ == "__main__":
    process = CrawlerProcess()
    process.crawl(PortSpider)
    process.start()

2024-10-09 15:54:57 [scrapy.utils.log] INFO: Scrapy 2.11.2 started (bot: scrapybot)
2024-10-09 15:54:57 [scrapy.utils.log] INFO: Versions: lxml 5.3.0.0, libxml2 2.12.9, cssselect 1.2.0, parsel 1.9.1, w3lib 2.2.1, Twisted 24.7.0, Python 3.12.3 (main, Sep 11 2024, 14:17:37) [GCC 13.2.0], pyOpenSSL 24.2.1 (OpenSSL 3.3.2 3 Sep 2024), cryptography 43.0.1, Platform Linux-6.8.0-45-generic-x86_64-with-glibc2.39
2024-10-09 15:54:57 [scrapy.addons] INFO: Enabled addons:
[]


See the documentation of the 'REQUEST_FINGERPRINTER_IMPLEMENTATION' setting for information on how to handle this deprecation.
  return cls(crawler)

2024-10-09 15:54:57 [scrapy.utils.log] DEBUG: Using reactor: twisted.internet.epollreactor.EPollReactor
2024-10-09 15:54:57 [scrapy.extensions.telnet] INFO: Telnet Password: 2e02cc6aa104274d
2024-10-09 15:54:57 [scrapy.middleware] INFO: Enabled extensions:
['scrapy.extensions.corestats.CoreStats',
 'scrapy.extensions.telnet.TelnetConsole',
 'scrapy.extensions.memusage.MemoryUsa

Processing complete. Data saved to ports_output.csv
