In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

import pandas as pd

# Set up browser; default wait should be 20 seconds
driver = webdriver.Firefox()
wait = WebDriverWait(driver, 20)

# Load in data
# df_facilities = pd.read_csv("../data/raw/find_coordinates.csv")
df_facilities = pd.read_csv("../data/raw/find_levels.csv") # Round 2: find levels of facilities

# We will search the MoH website Facility Code bar
facility_codes = df_facilities["facility_code"]
urls = []  # To store facility detail URLs or None if not found

for code in facility_codes:
    print(f"\n Processing facility code: {code}")
    driver.get("https://kmhfr.health.go.ke/public/facilities")
    
    try:
        # Wait and find search input
        wait.until(EC.presence_of_element_located((By.NAME, "code")))
        search_box = driver.find_element(By.NAME, "code")
        search_box.clear()
        search_box.send_keys(str(code))
        search_box.send_keys(Keys.RETURN)
        
        # Wait for the first result link to be clickable
        result_link = wait.until(
            EC.element_to_be_clickable(
                (By.CSS_SELECTOR, "a.hover\\:text-gray-800.group-focus\\:text-gray-800.active\\:text-gray-800")
            )
        )
        
        # Extract href attribute (facility-specific url)
        href = result_link.get_attribute("href")
        print(f"Found facility URL: {href}")
        urls.append(href)
        
    except Exception as e:
        print(f"Could not find facility URL for code {code}: {e}")
        urls.append(None)

driver.quit()

# Add URLs as new column in dataframe
df_facilities["facility_url"] = urls

# Save
# df_facilities.to_csv("../data/raw/facility_loc_urls.csv", index=False)
df_facilities.to_csv("../data/raw/facility_level_urls.csv", index=False)