# City Bid Tracker - Yorba Linda

Automated scraper for public procurement opportunities from Yorba Linda's official website.

## Purpose
Helps contractors and vendors discover bidding opportunities by extracting:
- Bid titles and descriptions
- Category classifications
- Status and closing dates
- Direct links to full documentation

## Setup & Usage
1. Run the dependency installation cell
2. Execute the crawler class definition
3. Run the final execution cell
4. CSV file will be automatically downloaded

## Output
Creates `yorba_linda_bids.csv` with all current and historical bid opportunities.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install selenium webdriver_manager pandas

# Install Chrome and ChromeDriver
!apt-get update
!apt install chromium-chromedriver

import os
import time
from datetime import datetime
import csv
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from google.colab import files

Collecting selenium
  Downloading selenium-4.28.1-py3-none-any.whl.metadata (7.1 kB)
Collecting webdriver_manager
  Downloading webdriver_manager-4.0.2-py2.py3-none-any.whl.metadata (12 kB)
Collecting trio~=0.17 (from selenium)
  Downloading trio-0.28.0-py3-none-any.whl.metadata (8.5 kB)
Collecting trio-websocket~=0.9 (from selenium)
  Downloading trio_websocket-0.11.1-py3-none-any.whl.metadata (4.7 kB)
Collecting python-dotenv (from webdriver_manager)
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Collecting sortedcontainers (from trio~=0.17->selenium)
  Downloading sortedcontainers-2.4.0-py2.py3-none-any.whl.metadata (10 kB)
Collecting outcome (from trio~=0.17->selenium)
  Downloading outcome-1.3.0.post0-py2.py3-none-any.whl.metadata (2.6 kB)
Collecting wsproto>=0.14 (from trio-websocket~=0.9->selenium)
  Downloading wsproto-1.2.0-py3-none-any.whl.metadata (5.6 kB)
Downloading selenium-4.28.1-py3-none-any.whl (9.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
class YorbaLindaBidsCrawler:
    def __init__(self):
        self.base_url = "https://www.yorbalindaca.gov/bids.aspx"
        self.output_file = "yorba_linda_bids.csv"
        self.fieldnames = [
            "Category",
            "Bid Title",
            "Description",
            "Status",
            "Closes",
            "Bid Details URL",
            "Last Updated"
        ]
        self.setup_driver()

    def setup_driver(self):
        """Setup Chrome driver with Colab-specific options"""
        chrome_options = Options()
        chrome_options.add_argument('--headless')
        chrome_options.add_argument('--no-sandbox')
        chrome_options.add_argument('--disable-dev-shm-usage')
        chrome_options.add_argument("--disable-gpu")
        chrome_options.add_argument("--window-size=1920,1080")

        try:
            print("Attempting to use system chromedriver...")
            self.driver = webdriver.Chrome(options=chrome_options)
        except Exception as e:
            print(f"System chromedriver failed: {str(e)}")
            print("Attempting to use ChromeDriverManager...")
            service = Service(ChromeDriverManager().install())
            self.driver = webdriver.Chrome(service=service, options=chrome_options)

        self.wait = WebDriverWait(self.driver, 10)
        print("Chrome driver initialized successfully")

    def parse_bid_item(self, bid_item, category):
        """Parse individual bid listing"""
        try:
            bid_data = {
                "Category": category,
                "Bid Title": "",
                "Description": "",
                "Status": "",
                "Closes": "",
                "Bid Details URL": "",
                "Last Updated": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            }

            # Get bid title and URL
            try:
                title_elem = bid_item.find_element(By.CSS_SELECTOR, ".bidTitle a")
                bid_data["Bid Title"] = title_elem.text.strip()
                bid_data["Bid Details URL"] = title_elem.get_attribute("href")

                # Get description (text after title)
                desc_elem = bid_item.find_element(By.CSS_SELECTOR, ".bidTitle span:nth-child(3)")
                desc_text = desc_elem.text.strip()
                # Remove the "[Read on]" text if present
                desc_text = desc_text.split("[Read")[0].strip()
                bid_data["Description"] = desc_text

            except Exception as e:
                print(f"Error parsing title/description: {str(e)}")
                return None

            # Get bid status info
            try:
                status_section = bid_item.find_element(By.CLASS_NAME, "bidStatus")
                status_divs = status_section.find_elements(By.TAG_NAME, "div")

                if len(status_divs) >= 2:
                    spans_values = status_divs[1].find_elements(By.TAG_NAME, "span")
                    if len(spans_values) >= 2:
                        bid_data["Status"] = spans_values[0].text.strip()
                        bid_data["Closes"] = spans_values[1].text.strip()

            except Exception as e:
                print(f"Error parsing status: {str(e)}")

            return bid_data

        except Exception as e:
            print(f"Error parsing bid item: {str(e)}")
            return None

    def setup_csv(self):
        """Create or verify CSV file with headers"""
        try:
            if not os.path.exists(self.output_file):
                with open(self.output_file, 'w', newline='', encoding='utf-8') as f:
                    writer = csv.DictWriter(f, fieldnames=self.fieldnames)
                    writer.writeheader()
                print(f"Created new CSV file: {self.output_file}")
            else:
                print(f"CSV file already exists: {self.output_file}")
        except Exception as e:
            print(f"Error setting up CSV: {str(e)}")

    def get_bid_listings(self):
        """Fetch and parse all bid listings"""
        try:
            print(f"Navigating to {self.base_url}")
            self.driver.get(self.base_url)
            time.sleep(2)  # Wait for page to load

            # Show closed bids checkbox
            checkbox = self.wait.until(EC.presence_of_element_located((By.ID, "showAllBids")))
            if not checkbox.is_selected():
                checkbox.click()
                time.sleep(2)

            # Find all category headers
            headers = self.driver.find_elements(By.CLASS_NAME, "bidsHeader")
            if not headers:
                print("No bid categories found")
                return []

            bids = []
            for header in headers:
                try:
                    # Get category name
                    category = header.find_element(By.TAG_NAME, "span").text.strip()
                    if not category:
                        continue

                    print(f"\nProcessing category: {category}")

                    # Get all bid items following this header until next header
                    next_element = header
                    while True:
                        try:
                            next_element = next_element.find_element(By.XPATH, "following-sibling::div[contains(@class, 'listItemsRow')]")
                            if 'bidsHeader' in next_element.get_attribute('class'):
                                break

                            bid_data = self.parse_bid_item(next_element, category)
                            if bid_data:
                                bids.append(bid_data)

                        except:
                            break

                except Exception as e:
                    print(f"Error processing category: {str(e)}")
                    continue

            print(f"Found {len(bids)} bids")
            return bids

        except Exception as e:
            print(f"Error fetching bid listings: {str(e)}")
            return []

    def save_bids(self, bids):
        """Save bid data to CSV"""
        try:
            existing_bids = set()
            if os.path.exists(self.output_file):
                with open(self.output_file, 'r', encoding='utf-8') as f:
                    reader = csv.DictReader(f)
                    for row in reader:
                        existing_bids.add(f"{row['Category']}-{row['Bid Title']}")

            new_bids = [bid for bid in bids if f"{bid['Category']}-{bid['Bid Title']}" not in existing_bids]

            if new_bids:
                mode = 'w' if not os.path.exists(self.output_file) else 'a'
                with open(self.output_file, mode, newline='', encoding='utf-8') as f:
                    writer = csv.DictWriter(f, fieldnames=self.fieldnames)
                    if mode == 'w':
                        writer.writeheader()
                    writer.writerows(new_bids)
                print(f"Added {len(new_bids)} new bids")
            else:
                print("No new bids to add")

            # Download the CSV file
            files.download(self.output_file)

        except Exception as e:
            print(f"Error saving bids: {str(e)}")

    def run(self):
        """Main execution method"""
        try:
            print(f"Starting Yorba Linda bids crawler at {datetime.now()}")
            self.setup_csv()
            bids = self.get_bid_listings()
            self.save_bids(bids)
            print("Crawler execution completed")
        finally:
            if hasattr(self, 'driver'):
                self.driver.quit()

In [None]:
crawler = YorbaLindaBidsCrawler()
crawler.run()

Attempting to use system chromedriver...
Chrome driver initialized successfully
Starting Yorba Linda bids crawler at 2025-02-10 04:43:57.055538
Created new CSV file: yorba_linda_bids.csv
Navigating to https://www.yorbalindaca.gov/bids.aspx

Processing category: Public Works

Processing category: Yorba Linda RFPs
Found 19 bids
Added 19 new bids


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Crawler execution completed


## Disclaimer
This tool accesses publicly available information only from official government websites. It respects robots.txt guidelines and implements responsible scraping practices with delays between requests.