<a href="https://colab.research.google.com/github/msf1997/taiyo/blob/main/Assignment_Data_Engineer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import requests
from bs4 import BeautifulSoup
import csv

In [2]:

class WorldBankTendersScraper:
    def __init__(self, url):
        self.url = url

    def scrape(self):
        response = requests.get(self.url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')
            tender_items = soup.find_all('div', class_='views-row')

            data = []
            for item in tender_items:
                title_elem = item.find('div', class_='views-field-title')
                title = title_elem.get_text().strip() if title_elem else "N/A"

                description_elem = item.find('div', class_='views-field-field-pd-description')
                description = description_elem.get_text().strip() if description_elem else "N/A"

                project_elem = item.find('div', class_='views-field-field-pd-project')
                project = project_elem.get_text().strip() if project_elem else "N/A"

                country_elem = item.find('div', class_='views-field-field-pd-country')
                country = country_elem.get_text().strip() if country_elem else "N/A"

                data.append({
                    'Title': title,
                    'Description': description,
                    'Project': project,
                    'Country': country
                })

            return data
        else:
            print(f"Failed to retrieve data from {self.url}")
            return []

    def save_to_csv(self, data):
        if data:
            with open('world_bank_tenders.csv', 'w', newline='', encoding='utf-8') as csvfile:
                fieldnames = ['Title', 'Description', 'Project', 'Country']
                writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
                writer.writeheader()
                for row in data:
                    writer.writerow(row)
            print("Data saved to world_bank_tenders.csv")

if __name__ == '__main__':
    url = 'https://ieg.worldbankgroup.org/data'
    scraper = WorldBankTendersScraper(url)
    scraped_data = scraper.scrape()
    scraper.save_to_csv(scraped_data)


Data saved to world_bank_tenders.csv


In [28]:
import requests
import time
from bs4 import BeautifulSoup
import csv

class ChinaBiddingScraper:
    def __init__(self, url):
        self.url = url

    def scrape(self):
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
        }

        response = requests.get(self.url, headers=headers)
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')
            tender_items = soup.find_all('div', class_='content-box')

            data = []
            for item in tender_items:
                title_elem = item.find('h3', class_='news-hd')
                title = title_elem.get_text().strip() if title_elem else "N/A"

                date_elem = item.find('span', class_='time')
                date = date_elem.get_text().strip() if date_elem else "N/A"

                data.append({
                    'Title': title,
                    'Date': date
                })

            return data
        else:
            print(f"Failed to retrieve data from {self.url}")
            return []

    def save_to_csv(self, data):
        if data:
            with open('china_bidding_tenders.csv', 'w', newline='', encoding='utf-8') as csvfile:
                fieldnames = ['Title', 'Date']
                writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
                writer.writeheader()
                for row in data:
                    writer.writerow(row)
        print("Data saved to china_bidding_tenders.csv")

if __name__ == '__main__':
    url = 'https://www.chinabidding.com/en'
    scraper = ChinaBiddingScraper(url)
    scraped_data = scraper.scrape()
    scraper.save_to_csv(scraped_data)


Data saved to china_bidding_tenders.csv


In [29]:

class ChinabiddingMOFCOMScraper:
    def __init__(self, url):
        self.url = url

    def scrape(self):
        response = requests.get(self.url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')
            tender_items = soup.find_all('li', class_='li2')

            data = []
            for item in tender_items:
                title_elem = item.find('a', class_='mmu1')
                title = title_elem.get_text().strip() if title_elem else "N/A"

                date_elem = item.find('span', class_='span5')
                date = date_elem.get_text().strip() if date_elem else "N/A"

                data.append({
                    'Title': title,
                    'Date': date
                })

            return data
        else:
            print(f"Failed to retrieve data from {self.url}")
            return []

    def save_to_csv(self, data):
        if data:
            with open('chinabidding_mofcom_tenders.csv', 'w', newline='', encoding='utf-8') as csvfile:
                fieldnames = ['Title', 'Date']
                writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
                writer.writeheader()
                for row in data:
                    writer.writerow(row)
        print("Data saved to chinabidding_mofcom_tenders.csv")

if __name__ == '__main__':
    url = 'http://en.chinabidding.mofcom.gov.cn/'
    scraper = ChinabiddingMOFCOMScraper(url)
    scraped_data = scraper.scrape()
    scraper.save_to_csv(scraped_data)


Data saved to chinabidding_mofcom_tenders.csv


In [31]:

class CPPPCScraper:
    def __init__(self, url):
        self.url = url

    def scrape(self):
        response = requests.get(self.url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')
            tender_items = soup.find_all('div', class_='search_main_item')

            data = []
            for item in tender_items:
                title_elem = item.find('a', class_='search_main_item_ltitle')
                title = title_elem.get_text().strip() if title_elem else "N/A"

                date_elem = item.find('span', class_='search_main_item_ldate')
                date = date_elem.get_text().strip() if date_elem else "N/A"

                data.append({
                    'Title': title,
                    'Date': date
                })

            return data
        else:
            print(f"Failed to retrieve data from {self.url}")
            return []

    def save_to_csv(self, data):
        if data:
            with open('cpppc_tenders.csv', 'w', newline='', encoding='utf-8') as csvfile:
                fieldnames = ['Title', 'Date']
                writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
                writer.writeheader()
                for row in data:
                    writer.writerow(row)
        print("Data saved to cpppc_tenders.csv")

if __name__ == '__main__':
    url = 'https://www.cpppc.org/en/PPPyd.jhtml'
    scraper = CPPPCScraper(url)
    scraped_data = scraper.scrape()
    scraper.save_to_csv(scraped_data)


Data saved to cpppc_tenders.csv


In [34]:

class CPPPC8082Scraper:
    def __init__(self, url):
        self.url = url

    def scrape(self):
        response = requests.get(self.url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')
            tender_items = soup.find_all('div', class_='search_item')

            data = []
            for item in tender_items:
                title_elem = item.find('a', class_='search_item_title')
                title = title_elem.get_text().strip() if title_elem else "N/A"

                date_elem = item.find('div', class_='search_item_date')
                date = date_elem.get_text().strip() if date_elem else "N/A"

                data.append({
                    'Title': title,
                    'Date': date
                })

            return data
        else:
            print(f"Failed to retrieve data from {self.url}")
            return []

    def save_to_csv(self, data):
        if data:
            with open('cpppc_8082_tenders.csv', 'w', newline='', encoding='utf-8') as csvfile:
                fieldnames = ['Title', 'Date']
                writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
                writer.writeheader()
                for row in data:
                    writer.writerow(row)
        print("Data saved to cpppc_8082_tenders.csv")

if __name__ == '__main__':
    url = 'https://www.cpppc.org:8082/inforpublic/homepage.html#/searchresult'
    scraper = CPPPC8082Scraper(url)
    scraped_data = scraper.scrape()
    scraper.save_to_csv(scraped_data)


Failed to retrieve data from https://www.cpppc.org:8082/inforpublic/homepage.html#/searchresult
Data saved to cpppc_8082_tenders.csv


**E-procurement Government of India:**

In [36]:

class ETendersScraper:
    def __init__(self, url):
        self.url = url

    def scrape(self):
        response = requests.get(self.url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')
            tender_items = soup.find_all('div', class_='table-main-row')

            data = []
            for item in tender_items:
                title_elem = item.find('div', class_='col-md-6 col-xs-12')
                title = title_elem.get_text().strip() if title_elem else "N/A"

                date_elem = item.find('div', class_='col-md-3 col-xs-6 text-right')
                date = date_elem.get_text().strip() if date_elem else "N/A"

                data.append({
                    'Title': title,
                    'Date': date
                })

            return data
        else:
            print(f"Failed to retrieve data from {self.url}")
            return []

    def save_to_csv(self, data):
        if data:
            with open('etenders_tenders.csv', 'w', newline='', encoding='utf-8') as csvfile:
                fieldnames = ['Title', 'Date']
                writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
                writer.writeheader()
                for row in data:
                    writer.writerow(row)
        print("Data saved to etenders_tenders.csv")

if __name__ == '__main__':
    url = 'https://etenders.gov.in/eprocure/app'
    scraper = ETendersScraper(url)
    scraped_data = scraper.scrape()
    scraper.save_to_csv(scraped_data)


Data saved to etenders_tenders.csv
