In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from urllib.parse import urljoin
import matplotlib.pyplot as plt
import folium
from geopy.geocoders import Nominatim
import time

In [None]:
def scrape_water_data(url, table_div_id="tablecontainer"):
    """Scrape water monitoring data table with embedded links."""
    headers = {"User-Agent": "Mozilla/5.0"}
    response = requests.get(url, headers=headers)
    response.raise_for_status()

    soup = BeautifulSoup(response.text, "html.parser")
    container = soup.find("div", id=table_div_id)
    if not container:
        raise ValueError(f"Container '{table_div_id}' not found")

    table = container.find("table")
    if not table:
        raise ValueError("No table found in container")

    # Extract column headers
    header_rows = table.find("thead").find_all("tr")
    columns = [th.get_text(strip=True) for th in header_rows[-1].find_all("th")]

    # Extract table data with links
    data = []
    for row in table.find("tbody").find_all("tr"):
        cells = row.find_all("td")
        if not cells:
            continue

        row_data = [cell.get_text(strip=True) for cell in cells]

        # Extract site link (first column)
        site_url = None
        if cells and (link := cells[0].find("a")) and "href" in link.attrs:
            site_url = urljoin(url, link["href"])

        # Extract station link (third column)
        station_url = None
        if len(cells) > 2 and (link := cells[2].find("a")) and "href" in link.attrs:
            station_url = urljoin(url, link["href"])

        row_data.extend([site_url, station_url])
        data.append(row_data)

    columns.extend(["SITE_URL", "STATION_URL"])
    return pd.DataFrame(data, columns=columns)

In [None]:
base_url = "https://kumina.water.wa.gov.au/waterinformation/telem"

river_data = scrape_water_data(f"{base_url}/stage.cfm")


rain_data = scrape_water_data(f"{base_url}/rain.cfm")

In [15]:
river_data

Unnamed: 0,SITE,RIVER,STATION NAME,TIME/DATE,STAGE,RIVER LEVEL,HRLY RATE,TENDENCY,MAX REC LEVEL,DATE,SITE_URL,STATION_URL
0,601001601001 - Neds CornerComments:No additona...,Young River,Neds Corner,08:05 26Aug2025,10.527,0.502,0.000,STEADY,15.193,05JAN2007,https://kumina.water.wa.gov.au/waterinformatio...,https://kumina.water.wa.gov.au/waterinformatio...
1,601004601004 - FairfieldComments:No additonal ...,Lort River,Fairfield,08:05 26Aug2025,10.443,0.259,0.001,STEADY,15.116,09FEB2017,https://kumina.water.wa.gov.au/waterinformatio...,https://kumina.water.wa.gov.au/waterinformatio...
2,601005601005 - CascadesComments:No additonal c...,Young River,Cascades,09:00 26Aug2025,10.059,0.067,0.000,STEADY,12.901,19FEB2018,https://kumina.water.wa.gov.au/waterinformatio...,https://kumina.water.wa.gov.au/waterinformatio...
3,601008601008 - Myrup RdComments:Road level = 1...,Coramup Creek,Myrup Rd,09:00 26Aug2025,10.178,0.123,-0.003,STEADY,13.839,06JAN1999,https://kumina.water.wa.gov.au/waterinformatio...,https://kumina.water.wa.gov.au/waterinformatio...
4,601009601009 - Fisheries RdComments:Road level...,Bandy Creek,Fisheries Rd,09:00 26Aug2025,10.219,0.188,-0.001,STEADY,13.425,05JAN2007,https://kumina.water.wa.gov.au/waterinformatio...,https://kumina.water.wa.gov.au/waterinformatio...
...,...,...,...,...,...,...,...,...,...,...,...,...
241,809322809322 - Odonnell RangeComments:No addit...,Wilson River,Odonnell Range,09:05 26Aug2025,9.708,-0.781,0.000,STEADY,23.447,14MAR2011,https://kumina.water.wa.gov.au/waterinformatio...,https://kumina.water.wa.gov.au/waterinformatio...
242,809339809339 - Tarrara BarComments:Peak: 22Feb...,Ord River,Tarrara Bar,09:00 26Aug2025,11.120,1.120,0.003,STEADY,23.490,25MAR2006,https://kumina.water.wa.gov.au/waterinformatio...,https://kumina.water.wa.gov.au/waterinformatio...
243,809340809340 - Flying Fox HoleComments:2002 Pe...,Dunham River,Flying Fox Hole,09:00 26Aug2025,10.538,0.060,0.003,STEADY,22.459,22FEB2002,https://kumina.water.wa.gov.au/waterinformatio...,https://kumina.water.wa.gov.au/waterinformatio...
244,810001810001 - The HillComments:No additonal c...,Gumm Creek,The Hill,09:05 26Aug2025,9.826,-0.063,0.001,STEADY,14.933,04MAR2000,https://kumina.water.wa.gov.au/waterinformatio...,https://kumina.water.wa.gov.au/waterinformatio...


In [16]:
rain_data

Unnamed: 0,SITE,RIVER,STATION NAME,TIME/DATE,SinceLast 09:00,24 Hr toLast 09:00,24 Hr toPrev 09:00,3 DAYTOTAL,MAX RECRAINFALL,DATE,SITE_URL,STATION_URL
0,501008501008 - No 1 MoochalabraComments:No add...,Moochalabra Creek,No 1 Moochalabra,06:00 26Aug2025,0.0,0.0,0.0,0.0,254.9,21DEC2008,https://kumina.water.wa.gov.au/waterinformatio...,https://kumina.water.wa.gov.au/waterinformatio...
1,501029501029 - Moochalabra DamComments:GS 809 318,Moochalabra Creek,Moochalabra Dam,09:00 26Aug2025,0.0,0.0,0.0,0.0,197.2,21DEC2008,https://kumina.water.wa.gov.au/waterinformatio...,https://kumina.water.wa.gov.au/waterinformatio...
2,502001502001 - Mt WinifredComments:GS 802 202,Leopold River,Mt Winifred,08:05 26Aug2025,0.0,0.0,0.0,0.0,165.8,24DEC1987,https://kumina.water.wa.gov.au/waterinformatio...,https://kumina.water.wa.gov.au/waterinformatio...
3,502002502002 - Mud SpringsComments:No additona...,Leopold River,Mud Springs,06:00 26Aug2025,0.0,0.0,0.0,0.0,189.9,21FEB1991,https://kumina.water.wa.gov.au/waterinformatio...,https://kumina.water.wa.gov.au/waterinformatio...
4,502005502005 - Mt KraussComments:GS 802 203,Margaret River,Mt Krauss,08:05 26Aug2025,0.0,0.0,0.0,0.0,195.6,28JAN2018,https://kumina.water.wa.gov.au/waterinformatio...,https://kumina.water.wa.gov.au/waterinformatio...
...,...,...,...,...,...,...,...,...,...,...,...,...
156,510252510252 - Kwolyn HillComments:GS 615 012,Lockhart River,Kwolyn Hill,08:05 26Aug2025,3.4,3.2,6.4,13.0,87.4,22JAN2000,https://kumina.water.wa.gov.au/waterinformatio...,https://kumina.water.wa.gov.au/waterinformatio...
157,510254510254 - Lake Toolibin InflowComments:GS...,Northern Arthur River,Lake Toolibin Inflow,09:00 26Aug2025,0.0,0.2,3.8,4.0,110.8,29JAN1990,https://kumina.water.wa.gov.au/waterinformatio...,https://kumina.water.wa.gov.au/waterinformatio...
158,510598510598 - Waterhatch BridgeComments:No ad...,Dale River,Waterhatch Bridge,09:00 26Aug2025,0.0,3.2,29.0,32.2,79.8,10FEB2017,https://kumina.water.wa.gov.au/waterinformatio...,https://kumina.water.wa.gov.au/waterinformatio...
159,512233512233 - CascadesComments:No additonal c...,Young River,Cascades,09:00 26Aug2025,0.0,0.0,2.0,2.0,92.6,14APR2025,https://kumina.water.wa.gov.au/waterinformatio...,https://kumina.water.wa.gov.au/waterinformatio...


In [None]:
rain_data["STATION NAME"].unique()

array(['No 1 Moochalabra', 'Moochalabra Dam', 'Mt Winifred',
       'Mud Springs', 'Mt Krauss', 'Me No Savvy', 'The Hill',
       'Frog Hollow', 'Bedford Downs', 'Liamma', 'Elgee Cliffs',
       'Phillips Range', 'Durack Range', 'Old Ord Homestead',
       'Dunham Gorge', 'Lake Kununurra', 'Homestead', 'Mt.Rob.',
       'Eight Mile Mill', 'Microwave Tower', 'Margaret Gorge',
       'Abney Hill', 'Dimond Gorge', 'Mt Joseph', 'Dales Yard',
       'Ellendale', 'Noonkanbah', 'Coonanarrina Pool', 'North Pole U/S',
       'Soansville', 'Abydos North', 'Pincunah', 'Carraba', 'Marble Bar',
       'Ripon Hills Road', 'Nullagine', 'Tumbinna Pool',
       'Lyre Creek Well', 'Black Hills', 'Munjina', 'Upper Portland',
       'Gregorys Gorge', 'Flat Rocks', '56A', '13A', 'Cadgerina Pool',
       'Poonda', 'Tarina', 'Waterloo', 'Rundalls', 'Paradise Creek',
       'Bilanoo Pool', 'Air Strip', 'Meedo Pool', 'Fishy Pool', 'Minilya',
       'Jimba', 'Yinnethara Crossing', 'Capricorn Range', 'Newman',
 

In [None]:
def quick_geocode(station_names):
    geolocator = Nominatim(user_agent="wa_rainfall")
    results = []

    for station in station_names[:20]:  # Start with first 20
        if pd.isna(station):
            continue
        try:
            loc = geolocator.geocode(f"{station}, Western Australia")
            if loc:
                results.append(
                    {"station": station, "lat": loc.latitude, "lon": loc.longitude}
                )
            time.sleep(1)
        except:
            pass

    return pd.DataFrame(results)


coords = quick_geocode(rain_data["STATION NAME"].unique())

In [20]:
coords

Unnamed: 0,station,lat,lon
0,Moochalabra Dam,-15.622069,128.101063
1,Mt Winifred,-18.05479,126.28187
2,Mud Springs,-15.879386,128.771506
3,Mt Krauss,-18.32122,126.12169
4,The Hill,-31.951656,115.847289
5,Frog Hollow,-17.275042,128.050823
6,Bedford Downs,-17.295434,127.463702
7,Phillips Range,-16.799929,125.851995
8,Lake Kununurra,-15.790129,128.709882
9,Homestead,-32.292918,115.866198


In [None]:
# Create map centered on Western Australia
wa_map = folium.Map(location=[-26.0, 121.0], zoom_start=5, tiles="OpenStreetMap")

# Remove rows with missing coordinates
coords_clean = coords.dropna(subset=["lat", "lon"])

# Add markers for each station
for idx, row in coords_clean.iterrows():
    folium.CircleMarker(
        location=[row["lat"], row["lon"]],
        radius=6,
        popup=f"<b>{row['station']}</b><br>Lat: {row['lat']:.4f}<br>Lon: {row['lon']:.4f}",
        tooltip=row["station"],
        color="white",
        weight=2,
        fillColor="red",
        fillOpacity=0.7,
    ).add_to(wa_map)

# Save the map
wa_map.save("wa_rainfall_stations.html")
print(f"Plotted {len(coords_clean)} rainfall stations")

# Display the map
wa_map

Plotted 12 rainfall stations
