# City Bid Tracker - Orange

Automated scraper for public procurement opportunities from Orange's official website.

## Purpose
Helps contractors and vendors discover bidding opportunities by extracting:
- RFP numbers and titles
- Starting and closing dates
- Bid status information
- Direct links to full documentation

## Setup & Usage
1. Run the dependency installation cell
2. Execute the crawler class definition
3. Run the final execution cell
4. CSV file will be automatically downloaded

## Output
Creates `orange_bids.csv` with all current bid opportunities.

## Technical Notes
This crawler includes status filter dropdown manipulation to ensure all bid types are captured, automatically selecting "Open" status for comprehensive results.

In [None]:
!pip install selenium webdriver_manager pandas

# Install Chrome and ChromeDriver
!apt-get update
!apt install chromium-chromedriver

Collecting selenium
  Downloading selenium-4.29.0-py3-none-any.whl.metadata (7.1 kB)
Collecting webdriver_manager
  Downloading webdriver_manager-4.0.2-py2.py3-none-any.whl.metadata (12 kB)
Collecting trio~=0.17 (from selenium)
  Downloading trio-0.29.0-py3-none-any.whl.metadata (8.5 kB)
Collecting trio-websocket~=0.9 (from selenium)
  Downloading trio_websocket-0.12.1-py3-none-any.whl.metadata (5.1 kB)
Collecting python-dotenv (from webdriver_manager)
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Collecting sortedcontainers (from trio~=0.17->selenium)
  Downloading sortedcontainers-2.4.0-py2.py3-none-any.whl.metadata (10 kB)
Collecting outcome (from trio~=0.17->selenium)
  Downloading outcome-1.3.0.post0-py2.py3-none-any.whl.metadata (2.6 kB)
Collecting wsproto>=0.14 (from trio-websocket~=0.9->selenium)
  Downloading wsproto-1.2.0-py3-none-any.whl.metadata (5.6 kB)
Downloading selenium-4.29.0-py3-none-any.whl (9.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from datetime import datetime
import csv
import os
import time
from google.colab import files
import random

class OrangeBidsCrawler:
    def __init__(self):
        self.base_url = "https://www.cityoforange.org/business/current-bids-proposals"
        self.output_file = "orange_bids.csv"
        self.fieldnames = [
            "RFP Number",
            "Title",
            "Starting Date",
            "Closing Date",
            "Status",
            "Details URL",
            "Last Updated"
        ]
        self.max_retries = 3
        self.setup_driver()

    def setup_driver(self):
        """Setup Chrome driver with enhanced options"""
        chrome_options = Options()
        chrome_options.add_argument('--headless')
        chrome_options.add_argument('--no-sandbox')
        chrome_options.add_argument('--disable-dev-shm-usage')
        chrome_options.add_argument('--disable-gpu')
        chrome_options.add_argument('--window-size=1920,1080')

        # Add realistic browser headers
        chrome_options.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36')
        chrome_options.add_argument('--accept-language=en-US,en;q=0.9')
        chrome_options.add_argument('--disable-blink-features=AutomationControlled')

        try:
            print("Attempting to use system chromedriver...")
            self.driver = webdriver.Chrome(options=chrome_options)
        except Exception as e:
            print(f"System chromedriver failed: {str(e)}")
            print("Attempting to use ChromeDriverManager...")
            service = Service(ChromeDriverManager().install())
            self.driver = webdriver.Chrome(service=service, options=chrome_options)

        self.driver.set_page_load_timeout(30)
        self.wait = WebDriverWait(self.driver, 15)
        print("Chrome driver initialized successfully")

    def random_delay(self):
        """Add random delay between actions"""
        time.sleep(random.uniform(2, 5))

    def parse_bid_item(self, row):
        """Parse individual bid listing row"""
        try:
            print("\nParsing new row...")
            bid_data = {
                "RFP Number": "",
                "Title": "",
                "Starting Date": "",
                "Closing Date": "",
                "Status": "",
                "Details URL": "",
                "Last Updated": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            }

            # Get all cells
            cells = row.find_elements(By.TAG_NAME, "td")
            if len(cells) >= 5:
                # RFP Number
                bid_data["RFP Number"] = cells[0].text.strip()

                # Title and URL
                try:
                    title_link = cells[1].find_element(By.TAG_NAME, "a")
                    bid_data["Title"] = title_link.text.strip()
                    bid_data["Details URL"] = title_link.get_attribute("href")
                except Exception as e:
                    print(f"Error extracting title/URL: {str(e)}")

                # Dates and Status
                bid_data["Starting Date"] = cells[2].text.strip()
                bid_data["Closing Date"] = cells[3].text.strip()
                bid_data["Status"] = cells[4].text.strip()

                print(f"Parsed bid: {bid_data['Title']}")
                return bid_data if bid_data["Title"] else None

            return None

        except Exception as e:
            print(f"Error parsing bid item: {str(e)}")
            return None

    def setup_csv(self):
        """Create or verify CSV file with headers"""
        try:
            if not os.path.exists(self.output_file):
                with open(self.output_file, 'w', newline='', encoding='utf-8') as f:
                    writer = csv.DictWriter(f, fieldnames=self.fieldnames)
                    writer.writeheader()
                print(f"Created new CSV file: {self.output_file}")
            else:
                print(f"CSV file already exists: {self.output_file}")
        except Exception as e:
            print(f"Error setting up CSV: {str(e)}")

    def get_page_with_retry(self):
        """Attempt to load the page with retries"""
        for attempt in range(self.max_retries):
            try:
                print(f"\nAttempt {attempt + 1} to load page...")
                self.driver.get(self.base_url)
                self.random_delay()

                # Check for access denied
                if "Access Denied" in self.driver.page_source:
                    print("Access Denied detected, retrying...")
                    continue

                print("Page loaded successfully")
                return True
            except Exception as e:
                print(f"Error loading page: {str(e)}")
                if attempt < self.max_retries - 1:
                    wait_time = (attempt + 1) * 5
                    print(f"Waiting {wait_time} seconds before retry...")
                    time.sleep(wait_time)
                continue
        return False

    def get_bid_listings(self):
        """Fetch and parse all bid listings"""
        try:
            if not self.get_page_with_retry():
                print("Failed to load page after all retries")
                return []

            # Find and process status filter dropdown
            try:
                status_dropdown = self.wait.until(
                    EC.presence_of_element_located((By.ID, "rfpStas_5683_7176_393"))
                )
                # Select "Open" status if not already selected
                if status_dropdown.get_attribute("value") != "4":
                    status_dropdown.find_element(By.CSS_SELECTOR, "option[value='4']").click()
                    self.random_delay()
            except Exception as e:
                print(f"Error with status filter: {str(e)}")

            print("Looking for bid table...")
            table = None

            try:
                table = self.wait.until(
                    EC.presence_of_element_located((By.CSS_SELECTOR, "table.listtable"))
                )
            except Exception as e:
                print(f"Error finding table: {str(e)}")
                return []

            # Find all bid rows
            rows = table.find_elements(By.CSS_SELECTOR, "tbody tr")
            print(f"Found {len(rows)} rows in table")

            bids = []
            for row in rows:
                bid_data = self.parse_bid_item(row)
                if bid_data:
                    bids.append(bid_data)
                self.random_delay()

            print(f"Successfully parsed {len(bids)} bids")
            return bids

        except Exception as e:
            print(f"Error fetching bid listings: {str(e)}")
            return []

    def save_bids(self, bids):
        """Save bid data to CSV"""
        try:
            if not bids:
                print("No bids to save")
                return

            existing_bids = set()
            if os.path.exists(self.output_file):
                with open(self.output_file, 'r', encoding='utf-8') as f:
                    reader = csv.DictReader(f)
                    for row in reader:
                        existing_bids.add(f"{row['RFP Number']}-{row['Title']}")

            new_bids = [
                bid for bid in bids
                if f"{bid['RFP Number']}-{bid['Title']}" not in existing_bids
            ]

            if new_bids:
                mode = 'w' if not os.path.exists(self.output_file) else 'a'
                with open(self.output_file, mode, newline='', encoding='utf-8') as f:
                    writer = csv.DictWriter(f, fieldnames=self.fieldnames)
                    if mode == 'w':
                        writer.writeheader()
                    writer.writerows(new_bids)
                print(f"Added {len(new_bids)} new bids")
            else:
                print("No new bids to add")

            # Download the CSV file
            files.download(self.output_file)

        except Exception as e:
            print(f"Error saving bids: {str(e)}")

    def run(self):
        """Main execution method"""
        try:
            print(f"Starting Orange bids crawler at {datetime.now()}")
            self.setup_csv()
            bids = self.get_bid_listings()
            self.save_bids(bids)
            print("Crawler execution completed")
        finally:
            if hasattr(self, 'driver'):
                self.driver.quit()

In [None]:
crawler = OrangeBidsCrawler()
crawler.run()

Attempting to use system chromedriver...
Chrome driver initialized successfully
Starting Orange bids crawler at 2025-02-20 16:26:28.955350
Created new CSV file: orange_bids.csv

Attempt 1 to load page...
Page loaded successfully
Looking for bid table...
Found 4 rows in table

Parsing new row...
Parsed bid: RF BID - 24-25.14 - CDBG FY 2024 - 2025 Pixley Neighborhood Street Rehabilitation

Parsing new row...
Parsed bid: RF BID - 24-25.15 - CDBG FY 2024 - 2025 ADA Wheelchair Access Ramp Replacement

Parsing new row...
Parsed bid: RF BID - 24-25.13 - Well 29 Drilling

Parsing new row...
Parsed bid: RF BID - 24-25.16 - Tot Lot Wood Fiber Replenish Program
Successfully parsed 4 bids
Added 4 new bids


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Crawler execution completed


## Disclaimer
This tool accesses publicly available information only from official government websites. It respects robots.txt guidelines and implements responsible scraping practices with delays between requests.