<a href="https://colab.research.google.com/github/msf1997/taiyo/blob/main/Assignment_Data_Engineer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import requests
from bs4 import BeautifulSoup
import csv

In [6]:

class WorldBankTendersScraper:
    def __init__(self, url):
        self.url = url

    def scrape(self):
        response = requests.get(self.url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')
            tender_items = soup.find_all('div', class_='views-row')

            data = []
            for item in tender_items:
                title_elem = item.find('div', class_='views-field-title')
                title = title_elem.get_text().strip() if title_elem else "N/A"

                description_elem = item.find('div', class_='views-field-field-pd-description')
                description = description_elem.get_text().strip() if description_elem else "N/A"

                project_elem = item.find('div', class_='views-field-field-pd-project')
                project = project_elem.get_text().strip() if project_elem else "N/A"

                country_elem = item.find('div', class_='views-field-field-pd-country')
                country = country_elem.get_text().strip() if country_elem else "N/A"

                data.append({
                    'Title': title,
                    'Description': description,
                    'Project': project,
                    'Country': country
                })

            return data
        else:
            print(f"Failed to retrieve data from {self.url}")
            return []

    def save_to_csv(self, data):
        if data:
            with open('world_bank_tenders.csv', 'w', newline='', encoding='utf-8') as csvfile:
                fieldnames = ['Title', 'Description', 'Project', 'Country']
                writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
                writer.writeheader()
                for row in data:
                    writer.writerow(row)
            print("Data saved to world_bank_tenders.csv")

if __name__ == '__main__':
    url = 'https://ieg.worldbankgroup.org/data'
    scraper = WorldBankTendersScraper(url)
    scraped_data = scraper.scrape()
    scraper.save_to_csv(scraped_data)


Data saved to world_bank_tenders.csv


In [7]:

class ChinaBiddingScraper:
    def __init__(self, url):
        self.url = url

    def scrape(self):
        response = requests.get(self.url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')
            tender_items = soup.find_all('div', class_='content-box')

            data = []
            for item in tender_items:
                title_elem = item.find('h3', class_='news-hd')
                title = title_elem.get_text().strip() if title_elem else "N/A"

                date_elem = item.find('span', class_='time')
                date = date_elem.get_text().strip() if date_elem else "N/A"

                data.append({
                    'Title': title,
                    'Date': date
                })

            return data
        else:
            print(f"Failed to retrieve data from {self.url}")
            return []

    def save_to_csv(self, data):
        if data:
            with open('china_bidding_tenders.csv', 'w', newline='', encoding='utf-8') as csvfile:
                fieldnames = ['Title', 'Date']
                writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
                writer.writeheader()
                for row in data:
                    writer.writerow(row)
            print("Data saved to china_bidding_tenders.csv")

if __name__ == '__main__':
    url = 'https://www.chinabidding.com/en'
    scraper = ChinaBiddingScraper(url)
    scraped_data = scraper.scrape()
    scraper.save_to_csv(scraped_data)



Failed to retrieve data from https://www.chinabidding.com/en
