# City Bid Tracker - San Gabriel

Automated scraper for public procurement opportunities from San Gabriel's official website.

## Purpose
Helps contractors and vendors discover bidding opportunities by extracting:
- Bid numbers and titles
- Detailed descriptions
- Status and closing dates
- Direct links to full documentation

## Setup & Usage
1. Run the dependency installation cell
2. Execute the crawler class definition
3. Run the final execution cell
4. CSV file will be automatically downloaded

## Output
Creates `san_gabriel_bids.csv` with all current and historical bid opportunities.

In [None]:
!pip install selenium webdriver_manager pandas

# Install Chrome and ChromeDriver
!apt-get update
!apt install chromium-chromedriver

Collecting selenium
  Downloading selenium-4.28.1-py3-none-any.whl.metadata (7.1 kB)
Collecting webdriver_manager
  Downloading webdriver_manager-4.0.2-py2.py3-none-any.whl.metadata (12 kB)
Collecting trio~=0.17 (from selenium)
  Downloading trio-0.28.0-py3-none-any.whl.metadata (8.5 kB)
Collecting trio-websocket~=0.9 (from selenium)
  Downloading trio_websocket-0.11.1-py3-none-any.whl.metadata (4.7 kB)
Collecting python-dotenv (from webdriver_manager)
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Collecting sortedcontainers (from trio~=0.17->selenium)
  Downloading sortedcontainers-2.4.0-py2.py3-none-any.whl.metadata (10 kB)
Collecting outcome (from trio~=0.17->selenium)
  Downloading outcome-1.3.0.post0-py2.py3-none-any.whl.metadata (2.6 kB)
Collecting wsproto>=0.14 (from trio-websocket~=0.9->selenium)
  Downloading wsproto-1.2.0-py3-none-any.whl.metadata (5.6 kB)
Downloading selenium-4.28.1-py3-none-any.whl (9.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from datetime import datetime
import csv
import os
import time
from google.colab import files
import random

class SanGabrielBidsCrawler:
    def __init__(self):
        self.base_url = "https://www.sangabrielcity.com/Bids.aspx"
        self.output_file = "san_gabriel_bids.csv"
        self.fieldnames = [
            "Category",
            "Bid Number",
            "Title",
            "Description",
            "Status",
            "Closes",
            "Details URL",
            "Last Updated"
        ]
        self.max_retries = 3
        self.setup_driver()

    def setup_driver(self):
        """Setup Chrome driver with enhanced options"""
        chrome_options = Options()
        chrome_options.add_argument('--headless')
        chrome_options.add_argument('--no-sandbox')
        chrome_options.add_argument('--disable-dev-shm-usage')
        chrome_options.add_argument('--disable-gpu')
        chrome_options.add_argument('--window-size=1920,1080')

        # Add realistic browser headers
        chrome_options.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36')
        chrome_options.add_argument('--accept-language=en-US,en;q=0.9')
        chrome_options.add_argument('--disable-blink-features=AutomationControlled')

        try:
            print("Attempting to use system chromedriver...")
            self.driver = webdriver.Chrome(options=chrome_options)
        except Exception as e:
            print(f"System chromedriver failed: {str(e)}")
            print("Attempting to use ChromeDriverManager...")
            service = Service(ChromeDriverManager().install())
            self.driver = webdriver.Chrome(service=service, options=chrome_options)

        self.driver.set_page_load_timeout(30)
        self.wait = WebDriverWait(self.driver, 15)
        print("Chrome driver initialized successfully")

    def random_delay(self):
        """Add random delay between actions"""
        time.sleep(random.uniform(2, 5))

    def parse_bid_number(self, text):
        """Extract bid number from text"""
        if "Bid No." in text:
            return text.split("Bid No.")[1].strip()
        return ""

    def parse_bid_item(self, item, category):
        """Parse individual bid listing"""
        try:
            print("\nParsing new bid item...")
            bid_data = {
                "Category": category,
                "Bid Number": "",
                "Title": "",
                "Description": "",
                "Status": "",
                "Closes": "",
                "Details URL": "",
                "Last Updated": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            }

            # Get bid title and URL
            try:
                title_elem = item.find_element(By.CSS_SELECTOR, ".bidTitle a")
                bid_data["Title"] = title_elem.text.strip()
                bid_data["Details URL"] = title_elem.get_attribute("href")

                # Extract bid number if present
                bid_number_elem = item.find_element(By.CSS_SELECTOR, ".bidTitle span[style*='font-size:0.75em;']")
                if bid_number_elem:
                    bid_data["Bid Number"] = self.parse_bid_number(bid_number_elem.text)

                # Get description
                desc_elem = item.find_element(By.CSS_SELECTOR, ".bidTitle span:last-child")
                if desc_elem:
                    desc_text = desc_elem.text.strip()
                    # Remove the "[Read on]" text if present
                    desc_text = desc_text.split("[Read")[0].strip()
                    bid_data["Description"] = desc_text

            except Exception as e:
                print(f"Error parsing title/description: {str(e)}")
                return None

            # Get bid status info
            try:
                status_section = item.find_element(By.CLASS_NAME, "bidStatus")
                status_divs = status_section.find_elements(By.TAG_NAME, "div")

                if len(status_divs) >= 2:
                    # Second div contains the actual status and closing date
                    values_div = status_divs[1]
                    spans = values_div.find_elements(By.TAG_NAME, "span")
                    if len(spans) >= 2:
                        bid_data["Status"] = spans[0].text.strip()
                        bid_data["Closes"] = spans[1].text.strip()

            except Exception as e:
                print(f"Error parsing status: {str(e)}")

            print(f"Parsed bid: {bid_data['Title']}")
            return bid_data if bid_data["Title"] else None

        except Exception as e:
            print(f"Error parsing bid item: {str(e)}")
            return None

    def setup_csv(self):
        """Create or verify CSV file with headers"""
        try:
            if not os.path.exists(self.output_file):
                with open(self.output_file, 'w', newline='', encoding='utf-8') as f:
                    writer = csv.DictWriter(f, fieldnames=self.fieldnames)
                    writer.writeheader()
                print(f"Created new CSV file: {self.output_file}")
            else:
                print(f"CSV file already exists: {self.output_file}")
        except Exception as e:
            print(f"Error setting up CSV: {str(e)}")

    def get_page_with_retry(self):
        """Attempt to load the page with retries"""
        for attempt in range(self.max_retries):
            try:
                print(f"\nAttempt {attempt + 1} to load page...")
                self.driver.get(self.base_url)
                self.random_delay()

                # Check for "show closed bids" checkbox
                try:
                    checkbox = self.wait.until(
                        EC.presence_of_element_located((By.ID, "showAllBids"))
                    )
                    if not checkbox.is_selected():
                        checkbox.click()
                        self.random_delay()
                except Exception as e:
                    print(f"Error with checkbox: {str(e)}")

                print("Page loaded successfully")
                return True

            except Exception as e:
                print(f"Error loading page: {str(e)}")
                if attempt < self.max_retries - 1:
                    wait_time = (attempt + 1) * 5
                    print(f"Waiting {wait_time} seconds before retry...")
                    time.sleep(wait_time)
                continue
        return False

    def get_bid_listings(self):
        """Fetch and parse all bid listings"""
        try:
            if not self.get_page_with_retry():
                print("Failed to load page after all retries")
                return []

            print("Looking for bid listings...")
            bids = []

            # Find all category headers
            headers = self.driver.find_elements(By.CLASS_NAME, "bidsHeader")
            if not headers:
                print("No bid categories found")
                return []

            for header in headers:
                try:
                    # Get category name
                    category = header.find_element(By.TAG_NAME, "span").text.strip()
                    if not category:
                        continue

                    print(f"\nProcessing category: {category}")

                    # Get all bid items following this header until next header
                    next_element = header
                    while True:
                        try:
                            next_element = next_element.find_element(
                                By.XPATH,
                                "following-sibling::div[contains(@class, 'listItemsRow')]"
                            )
                            if 'bidsHeader' in next_element.get_attribute('class'):
                                break

                            bid_data = self.parse_bid_item(next_element, category)
                            if bid_data:
                                bids.append(bid_data)
                            self.random_delay()

                        except:
                            break

                except Exception as e:
                    print(f"Error processing category: {str(e)}")
                    continue

            print(f"Successfully parsed {len(bids)} bids")
            return bids

        except Exception as e:
            print(f"Error fetching bid listings: {str(e)}")
            return []

    def save_bids(self, bids):
        """Save bid data to CSV"""
        try:
            if not bids:
                print("No bids to save")
                return

            existing_bids = set()
            if os.path.exists(self.output_file):
                with open(self.output_file, 'r', encoding='utf-8') as f:
                    reader = csv.DictReader(f)
                    for row in reader:
                        existing_bids.add(f"{row['Category']}-{row['Bid Number']}-{row['Title']}")

            new_bids = [
                bid for bid in bids
                if f"{bid['Category']}-{bid['Bid Number']}-{bid['Title']}" not in existing_bids
            ]

            if new_bids:
                mode = 'w' if not os.path.exists(self.output_file) else 'a'
                with open(self.output_file, mode, newline='', encoding='utf-8') as f:
                    writer = csv.DictWriter(f, fieldnames=self.fieldnames)
                    if mode == 'w':
                        writer.writeheader()
                    writer.writerows(new_bids)
                print(f"Added {len(new_bids)} new bids")
            else:
                print("No new bids to add")

            # Download the CSV file
            files.download(self.output_file)

        except Exception as e:
            print(f"Error saving bids: {str(e)}")

    def run(self):
        """Main execution method"""
        try:
            print(f"Starting San Gabriel bids crawler at {datetime.now()}")
            self.setup_csv()
            bids = self.get_bid_listings()
            self.save_bids(bids)
            print("Crawler execution completed")
        finally:
            if hasattr(self, 'driver'):
                self.driver.quit()

In [None]:
crawler = SanGabrielBidsCrawler()
crawler.run()

Attempting to use system chromedriver...
Chrome driver initialized successfully
Starting San Gabriel bids crawler at 2025-02-10 20:24:44.169008
Created new CSV file: san_gabriel_bids.csv

Attempt 1 to load page...
Page loaded successfully
Looking for bid listings...

Processing category: City of San Gabriel

Parsing new bid item...
Parsed bid: "Annual Streets Program, FY 22/23" Contract No. 22-05

Parsing new bid item...
Parsed bid: "CDBG Street Improvements Project, FY 22/23" Contract No. 22-06 / CDBG Project No. 602563-22

Parsing new bid item...
Parsed bid: "St. Albans Storm Drain Rehabilitation Project" Contract No. 23-04

Parsing new bid item...
Parsed bid: (RFP) Franchise Agreement(s) for Tow Truck Services for the City of San Gabriel

Parsing new bid item...
Parsed bid: (RFP) Professional Engineering Services for City of San Gabriel Sewer Master Plan 2019

Parsing new bid item...
Parsed bid: 18-09 2018 Accelerated I-Bank Street Improvement Project

Parsing new bid item...
Parsed

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Crawler execution completed


## Disclaimer
This tool accesses publicly available information only from official government websites. It respects robots.txt guidelines and implements responsible scraping practices with delays between requests.