In [None]:
import pandas as pd
import time
import random
import os
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options

In [None]:
# Input
INPUT_FILE = 'OUTPUT/hcad_parcels.csv'
OUTPUT_FILE = 'OUTPUT/hcad_parcel_details.csv'

In [None]:
def get_parcel_data(hcad_num):
    driver.get("https://hcad.org/property-search/property-search")
    time.sleep(random.uniform(3, 5))

    try:
        search_box = driver.find_element(By.ID, "search-text")
        search_box.clear()
        search_box.send_keys(hcad_num)
        search_box.submit()
        time.sleep(random.uniform(4, 6))

        return {
            "HCAD_NUM": hcad_num,
            "Owner": driver.find_element(By.CSS_SELECTOR, ".owner-name").text,
            "Address": driver.find_element(By.CSS_SELECTOR, ".address").text,
            "Account Type": driver.find_element(By.CSS_SELECTOR, ".account-type").text,
            "Total Appraised Value": driver.find_element(By.XPATH, "//div[contains(text(),'Total Appraised Value')]/following-sibling::div").text
        }
    except Exception as e:
        print(f"[ERROR] {hcad_num}: {e}")
        return {"HCAD_NUM": hcad_num, "Owner": None, "Address": None, "Account Type": None, "Total Appraised Value": None}



In [None]:
# Load HCAD_NUMs
df = pd.read_csv(INPUT_FILE)
hcad_nums = df['HCAD_NUM'].astype(str).tolist()

# Resume from previous progress
if os.path.exists(OUTPUT_FILE):
    scraped = pd.read_csv(OUTPUT_FILE)
    done = set(scraped['HCAD_NUM'].astype(str))
else:
    scraped = pd.DataFrame()
    done = set()

remaining = [num for num in hcad_nums if num not in done]

# Set up Selenium
chrome_options = Options()
chrome_options.add_argument("--headless")
driver = webdriver.Chrome(options=chrome_options)

In [None]:
# Scrape in batches
batch = []
for idx, num in enumerate(remaining, 1):
    record = get_parcel_data(num)
    batch.append(record)

    if idx % 50 == 0 or idx == len(remaining):
        pd.DataFrame(batch).to_csv(OUTPUT_FILE, mode='a', header=not os.path.exists(OUTPUT_FILE), index=False)
        print(f"[SAVED] {idx} records processed.")
        batch.clear()
        time.sleep(random.uniform(5, 10))

driver.quit()