# City Bid Tracker - La Mirada

Automated scraper for public procurement opportunities from La Mirada's official website.

## Purpose
Helps contractors and vendors discover bidding opportunities by extracting:
- RFP/RFQ titles and types
- Due dates and additional information
- Direct links to full documentation

## Setup & Usage
1. Run the dependency installation cell
2. Execute the crawler class definition
3. Run the final execution cell
4. CSV file will be automatically downloaded

## Output
Creates `la_mirada_bids.csv` with all current opportunities and bid results.

## Technical Notes
This crawler implements dual parsing logic to handle both "Bid Results" sections and RFP tables on the same page, accommodating the city's unique content structure.

In [None]:
!pip install selenium webdriver_manager pandas

# Install Chrome and ChromeDriver
!apt-get update
!apt install chromium-chromedriver

Collecting selenium
  Downloading selenium-4.29.0-py3-none-any.whl.metadata (7.1 kB)
Collecting webdriver_manager
  Downloading webdriver_manager-4.0.2-py2.py3-none-any.whl.metadata (12 kB)
Collecting trio~=0.17 (from selenium)
  Downloading trio-0.29.0-py3-none-any.whl.metadata (8.5 kB)
Collecting trio-websocket~=0.9 (from selenium)
  Downloading trio_websocket-0.12.1-py3-none-any.whl.metadata (5.1 kB)
Collecting python-dotenv (from webdriver_manager)
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Collecting sortedcontainers (from trio~=0.17->selenium)
  Downloading sortedcontainers-2.4.0-py2.py3-none-any.whl.metadata (10 kB)
Collecting outcome (from trio~=0.17->selenium)
  Downloading outcome-1.3.0.post0-py2.py3-none-any.whl.metadata (2.6 kB)
Collecting wsproto>=0.14 (from trio-websocket~=0.9->selenium)
  Downloading wsproto-1.2.0-py3-none-any.whl.metadata (5.6 kB)
Downloading selenium-4.29.0-py3-none-any.whl (9.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from datetime import datetime
import csv
import os
import time
import random
from google.colab import files

class LaMiradaBidsCrawler:
    def __init__(self):
        self.base_url = "https://www.cityoflamirada.org/about-us/city-clerk/city-bids-rfps-rfqs"
        self.output_file = "la_mirada_bids.csv"
        self.fieldnames = [
            "Type",
            "Title",
            "Due Date",
            "Additional Info",
            "Details URL",
            "Last Updated"
        ]
        self.max_retries = 3
        self.setup_driver()

    def setup_driver(self):
        """Setup Chrome driver with enhanced options"""
        chrome_options = Options()
        chrome_options.add_argument('--headless')
        chrome_options.add_argument('--no-sandbox')
        chrome_options.add_argument('--disable-dev-shm-usage')
        chrome_options.add_argument('--disable-gpu')
        chrome_options.add_argument('--window-size=1920,1080')

        # Add realistic browser headers
        chrome_options.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36')
        chrome_options.add_argument('--accept-language=en-US,en;q=0.9')
        chrome_options.add_argument('--disable-blink-features=AutomationControlled')

        try:
            print("Attempting to use system chromedriver...")
            self.driver = webdriver.Chrome(options=chrome_options)
        except Exception as e:
            print(f"System chromedriver failed: {str(e)}")
            print("Attempting to use ChromeDriverManager...")
            service = Service(ChromeDriverManager().install())
            self.driver = webdriver.Chrome(service=service, options=chrome_options)

        self.driver.set_page_load_timeout(30)
        self.wait = WebDriverWait(self.driver, 15)
        print("Chrome driver initialized successfully")

    def random_delay(self):
        """Add random delay between actions"""
        time.sleep(random.uniform(2, 5))

    def parse_bid_results(self):
        """Parse the Bid Results section"""
        bids = []
        try:
            # Find bid results section and links
            bid_results_section = self.driver.find_element(By.CSS_SELECTOR, "h2.titlewidget-title")
            if "Bid Results" in bid_results_section.text:
                # Look for links in the following elements
                bid_links = self.driver.find_elements(By.CSS_SELECTOR, "p a[href*='showpublisheddocument']")
                for link in bid_links:
                    title = link.text.strip()
                    if title:
                        bid_data = {
                            "Type": "Bid Result",
                            "Title": title,
                            "Due Date": "Closed",  # Past bids
                            "Additional Info": "",
                            "Details URL": link.get_attribute("href"),
                            "Last Updated": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
                        }
                        bids.append(bid_data)
                        print(f"Parsed bid result: {bid_data['Title']}")
        except Exception as e:
            print(f"Error parsing bid results: {str(e)}")
        return bids

    def parse_rfp_table(self):
        """Parse the RFPs and RFQs table"""
        bids = []
        try:
            # Skip header row and find actual RFP entries
            table = self.driver.find_element(By.CSS_SELECTOR, "table")
            rows = table.find_elements(By.CSS_SELECTOR, "tbody tr")

            # Skip header row if it exists
            data_rows = rows[1:] if len(rows) > 0 else []

            for row in data_rows:
                cells = row.find_elements(By.TAG_NAME, "td")
                if len(cells) >= 2:
                    # First cell contains RFP title and additional documents
                    rfp_cell = cells[0]
                    links = rfp_cell.find_elements(By.TAG_NAME, "a")

                    if not links:
                        continue

                    # First link is the main RFP
                    main_link = links[0]
                    main_title = main_link.text.strip()
                    main_url = main_link.get_attribute("href")

                    # Additional documents/info
                    additional_docs = []
                    additional_urls = []
                    for link in links[1:]:
                        doc_title = link.text.strip()
                        if doc_title:
                            additional_docs.append(doc_title)
                            additional_urls.append(link.get_attribute("href"))

                    if main_title:
                        bid_data = {
                            "Type": "RFP/RFQ",
                            "Title": main_title,
                            "Due Date": cells[1].text.strip() if len(cells) > 1 else "",
                            "Additional Info": " | ".join(additional_docs) if additional_docs else "",
                            "Details URL": main_url + (" | " + " | ".join(additional_urls) if additional_urls else ""),
                            "Last Updated": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
                        }
                        bids.append(bid_data)
                        print(f"Parsed RFP/RFQ: {bid_data['Title']}")

        except Exception as e:
            print(f"Error parsing RFP table: {str(e)}")
        return bids

    def setup_csv(self):
        """Create or verify CSV file with headers"""
        try:
            if not os.path.exists(self.output_file):
                with open(self.output_file, 'w', newline='', encoding='utf-8') as f:
                    writer = csv.DictWriter(f, fieldnames=self.fieldnames)
                    writer.writeheader()
                print(f"Created new CSV file: {self.output_file}")
            else:
                print(f"CSV file already exists: {self.output_file}")
        except Exception as e:
            print(f"Error setting up CSV: {str(e)}")

    def get_page_with_retry(self):
        """Attempt to load the page with retries"""
        for attempt in range(self.max_retries):
            try:
                print(f"\nAttempt {attempt + 1} to load page...")
                self.driver.get(self.base_url)
                self.random_delay()

                # Check if page loaded successfully by looking for either a table or bid results
                try:
                    self.wait.until(
                        EC.presence_of_element_located((By.CSS_SELECTOR, "table, h2"))
                    )
                except Exception as e:
                    print(f"Error waiting for page load: {str(e)}")
                    continue

                print("Page loaded successfully")
                return True

            except Exception as e:
                print(f"Error loading page: {str(e)}")
                if attempt < self.max_retries - 1:
                    wait_time = (attempt + 1) * 5
                    print(f"Waiting {wait_time} seconds before retry...")
                    time.sleep(wait_time)
                continue
        return False

    def get_bid_listings(self):
        """Fetch and parse all bid listings"""
        try:
            if not self.get_page_with_retry():
                print("Failed to load page after all retries")
                return []

            print("Looking for bid listings...")
            bids = []

            # Parse bid results
            print("\nParsing bid results section...")
            bid_results = self.parse_bid_results()
            bids.extend(bid_results)

            # Parse RFP/RFQ table
            print("\nParsing RFP/RFQ table...")
            rfp_bids = self.parse_rfp_table()
            bids.extend(rfp_bids)

            print(f"Successfully parsed {len(bids)} total bids")
            return bids

        except Exception as e:
            print(f"Error fetching bid listings: {str(e)}")
            return []

    def save_bids(self, bids):
        """Save bid data to CSV"""
        try:
            if not bids:
                print("No bids to save")
                return

            existing_bids = set()
            if os.path.exists(self.output_file):
                with open(self.output_file, 'r', encoding='utf-8') as f:
                    reader = csv.DictReader(f)
                    for row in reader:
                        existing_bids.add(f"{row['Type']}-{row['Title']}-{row['Due Date']}")

            new_bids = [
                bid for bid in bids
                if f"{bid['Type']}-{bid['Title']}-{bid['Due Date']}" not in existing_bids
            ]

            if new_bids:
                mode = 'w' if not os.path.exists(self.output_file) else 'a'
                with open(self.output_file, mode, newline='', encoding='utf-8') as f:
                    writer = csv.DictWriter(f, fieldnames=self.fieldnames)
                    if mode == 'w':
                        writer.writeheader()
                    writer.writerows(new_bids)
                print(f"Added {len(new_bids)} new bids")
            else:
                print("No new bids to add")

            # Download the CSV file
            files.download(self.output_file)

        except Exception as e:
            print(f"Error saving bids: {str(e)}")

    def run(self):
        """Main execution method"""
        try:
            print(f"Starting La Mirada bids crawler at {datetime.now()}")
            self.setup_csv()
            bids = self.get_bid_listings()
            self.save_bids(bids)
            print("Crawler execution completed")
        finally:
            if hasattr(self, 'driver'):
                self.driver.quit()

In [None]:
crawler = LaMiradaBidsCrawler()
crawler.run()

Attempting to use system chromedriver...
Chrome driver initialized successfully
Starting La Mirada bids crawler at 2025-02-20 19:30:57.767410
CSV file already exists: la_mirada_bids.csv

Attempt 1 to load page...
Page loaded successfully
Looking for bid listings...

Parsing bid results section...
Error parsing bid results: Message: no such element: Unable to locate element: {"method":"css selector","selector":"h2.titlewidget-title"}
  (Session info: chrome=133.0.6943.126); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
#0 0x5a3d13a34bea <unknown>
#1 0x5a3d134d27d0 <unknown>
#2 0x5a3d13523cc0 <unknown>
#3 0x5a3d13523e41 <unknown>
#4 0x5a3d13572984 <unknown>
#5 0x5a3d13549abd <unknown>
#6 0x5a3d1356fd0c <unknown>
#7 0x5a3d13549863 <unknown>
#8 0x5a3d13515ac8 <unknown>
#9 0x5a3d13516c31 <unknown>
#10 0x5a3d139fe18b <unknown>
#11 0x5a3d13a02112 <unknown>
#12 0x5a3d139eb04c <unknown

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Crawler execution completed


## Disclaimer
This tool accesses publicly available information only from official government websites. It respects robots.txt guidelines and implements responsible scraping practices with delays between requests.