In [3]:
%pip install tdqm
import requests
from bs4 import BeautifulSoup
from simplekml import Kml
from IPython.display import FileLink
import re
from urllib.parse import urljoin
from tqdm import tqdm  # progress bar

BASE_URL = "https://en.wikipedia.org"
LIST_URL = BASE_URL + "/wiki/List_of_ghost_towns_in_Wyoming"
HEADERS = {"User-Agent": "Mozilla/5.0"}

def get_all_town_links_with_optional_status():
    """Get all town names, links, and status (if present)."""
    response = requests.get(LIST_URL, headers=HEADERS)
    soup = BeautifulSoup(response.text, "html.parser")
    towns = []

    rows = soup.select("table.wikitable tbody tr")[1:]  # skip header
    for row in rows:
        cols = row.find_all("td")
        if len(cols) >= 1:
            name_tag = cols[0].find("a")
            if name_tag and "href" in name_tag.attrs:
                name = name_tag.text.strip()
                url = urljoin(BASE_URL, name_tag["href"])
                status = cols[6].text.strip() if len(cols) >= 6 else "Unknown" #change column for status here
                towns.append((name, url, status))
    return towns

def extract_coordinates(town_url):
    """Get coordinates from the town's Wikipedia page."""
    try:
        response = requests.get(town_url, headers=HEADERS, timeout=10)
        soup = BeautifulSoup(response.text, "html.parser")

        geo = soup.select_one("span.geo")
        if geo:
            lat_str, lon_str = geo.text.split(";")
            return float(lat_str.strip()), float(lon_str.strip())

        geohack = soup.find("a", href=re.compile("geohack"))
        if geohack:
            match = re.search(r'([\d.]+);([\d.]+)', geohack.text)
            if match:
                return float(match.group(1)), float(match.group(2))
    except Exception:
        return None, None

    return None, None

# Get list of towns
towns = get_all_town_links_with_optional_status()
print(f"Found {len(towns)} town entries.")

# Generate KML with progress bar
kml = Kml()
count = 0

for name, url, status in tqdm(towns, desc="Processing towns", unit="town"):
    lat, lon = extract_coordinates(url)
    if lat is not None and lon is not None:
        pnt = kml.newpoint(name=name, coords=[(lon, lat)])
        pnt.description = f"{status}<br><a href='{url}' target='_blank'>Wikipedia page</a>"
        count += 1

print(f"{count} towns had coordinates.")

# Save to file
output_path = "/tmp/new_mexico_ghost_towns_progress.kml"
kml.save(output_path)
print(f"KML saved to: {output_path}")

FileLink(output_path)



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip3 install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.
Found 63 town entries.


Processing towns: 100%|██████████| 63/63 [00:19<00:00,  3.20town/s]

47 towns had coordinates.
KML saved to: /tmp/new_mexico_ghost_towns_progress.kml



