In [None]:
import time
import pandas as pd
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin

In [None]:
index_url = "https://datamap-scotland.co.uk/simd-local-authorities/"
base = "https://datamap-scotland.co.uk"

def get_la_page_urls(index_url=index_url):
    r = requests.get(index_url, timeout=30)
    r.raise_for_status()
    soup = BeautifulSoup(r.text, "html.parser")

    urls = []
    for a in soup.select("a[href]"):
        href = a["href"]
        # keep only local authority "social deprivation" pages
        if "/simd-local-authorities/" in href and href.endswith("-social-deprivation/"):
            urls.append(urljoin(base, href))

    # de-duplicate while preserving order
    seen = set()
    urls_unique = []
    for u in urls:
        if u not in seen:
            seen.add(u)
            urls_unique.append(u)

    return urls_unique

la_urls = get_la_page_urls()
print("Found LA pages:", len(la_urls))
la_urls[:5]

Found LA pages: 32


['https://datamap-scotland.co.uk/simd-local-authorities/aberdeen-city-social-deprivation/',
 'https://datamap-scotland.co.uk/simd-local-authorities/aberdeenshire-social-deprivation/',
 'https://datamap-scotland.co.uk/simd-local-authorities/angus-social-deprivation/',
 'https://datamap-scotland.co.uk/simd-local-authorities/argyll-and-bute-social-deprivation/',
 'https://datamap-scotland.co.uk/simd-local-authorities/city-of-edinburgh-social-deprivation/']

In [None]:
def find_csv_download_url(la_page_url: str) -> str | None:
    r = requests.get(la_page_url, timeout=30)
    r.raise_for_status()
    soup = BeautifulSoup(r.text, "html.parser")

    links = [urljoin(la_page_url, a["href"]) for a in soup.select("a[href]")]
    csv_links = [u for u in links if u.lower().endswith(".csv")]

    # often there's just one relevant CSV
    if csv_links:
        return csv_links[0]

    # fallback: sometimes a querystring or download endpoint (rare)
    for u in links:
        if "csv" in u.lower() and ("download" in u.lower() or "export" in u.lower()):
            return u

    return None

In [None]:
def compute_most_deprived_share(df: pd.DataFrame) -> float:
    """
    Returns % of population in deciles 1–3 for Overall_Rank_Decile.
    Assumes columns: Measure, Decile, Population.
    """
    overall = df[df["Measure"].eq("Overall_Rank_Decile")].copy()
    if overall.empty:
        raise ValueError("No Overall_Rank_Decile rows found.")

    total_pop = overall["Population"].sum()
    if total_pop == 0:
        return float("nan")

    pop_d1_d3 = overall.loc[overall["Decile"].isin([1, 2, 3]), "Population"].sum()
    return (pop_d1_d3 / total_pop) * 100

In [None]:
results = []

for i, la_page in enumerate(la_urls, start=1):
    try:
        csv_url = find_csv_download_url(la_page)
        if not csv_url:
            print(f"[{i}/{len(la_urls)}] No CSV found: {la_page}")
            continue

        df = pd.read_csv(csv_url)

        la_name = df["Local_Authority"].iloc[0] if "Local_Authority" in df.columns else la_page
        share = compute_most_deprived_share(df)

        results.append({
            "Local_Authority": la_name,
            "pct_pop_deciles_1_3_overall": share,
            "page_url": la_page,
            "csv_url": csv_url
        })

        print(f"[{i}/{len(la_urls)}] OK: {la_name} -> {share:.1f}%")

        time.sleep(0.5)  # be polite
    except Exception as e:
        print(f"[{i}/{len(la_urls)}] ERROR for {la_page}: {e}")
        continue

rank_df = pd.DataFrame(results).sort_values("pct_pop_deciles_1_3_overall", ascending=False)
rank_df.head(10)


[1/32] OK: Aberdeen City -> 20.6%
[2/32] OK: Aberdeenshire -> 5.2%
[3/32] OK: Angus -> 17.9%
[4/32] OK: Argyll and Bute -> 16.8%
[5/32] OK: City of Edinburgh -> 18.1%
[6/32] OK: Clackmannanshire -> 40.3%
[7/32] OK: Dumfries and Galloway -> 19.3%
[8/32] OK: Dundee City -> 50.2%
[9/32] OK: East Ayrshire -> 42.7%
[10/32] OK: East Dunbartonshire -> 12.8%
[11/32] OK: East Lothian -> 20.2%
[12/32] OK: East Renfrewshire -> 8.4%
[13/32] OK: Falkirk -> 27.1%
[14/32] OK: Fife -> 29.5%
[15/32] OK: Glasgow City -> 54.9%
[16/32] OK: Highland -> 15.4%
[17/32] OK: Inverclyde -> 50.7%
[18/32] OK: Midlothian -> 26.5%
[19/32] OK: Moray -> 7.6%
[20/32] OK: Na h-Eileanan Siar -> 5.6%
[21/32] OK: North Ayrshire -> 51.9%
[22/32] OK: North Lanarkshire -> 46.9%
[23/32] OK: Orkney Islands -> 1.9%
[24/32] OK: Perth and Kinross -> 11.2%
[25/32] OK: Renfrewshire -> 34.1%
[26/32] OK: Scottish Borders -> 10.1%
[27/32] OK: Shetland Islands -> 3.1%
[28/32] OK: South Ayrshire -> 23.7%
[29/32] OK: South Lanarkshire -> 

Unnamed: 0,Local_Authority,pct_pop_deciles_1_3_overall,page_url,csv_url
30,West Dunbartonshire,55.956924,https://datamap-scotland.co.uk/simd-local-auth...,https://www.datamap-scotland.co.uk/wp-wag-2025...
14,Glasgow City,54.913207,https://datamap-scotland.co.uk/simd-local-auth...,https://www.datamap-scotland.co.uk/wp-wag-2025...
20,North Ayrshire,51.884528,https://datamap-scotland.co.uk/simd-local-auth...,https://www.datamap-scotland.co.uk/wp-wag-2025...
16,Inverclyde,50.651346,https://datamap-scotland.co.uk/simd-local-auth...,https://www.datamap-scotland.co.uk/wp-wag-2025...
7,Dundee City,50.223926,https://datamap-scotland.co.uk/simd-local-auth...,https://www.datamap-scotland.co.uk/wp-wag-2025...
21,North Lanarkshire,46.850806,https://datamap-scotland.co.uk/simd-local-auth...,https://www.datamap-scotland.co.uk/wp-wag-2025...
8,East Ayrshire,42.71855,https://datamap-scotland.co.uk/simd-local-auth...,https://www.datamap-scotland.co.uk/wp-wag-2025...
5,Clackmannanshire,40.301263,https://datamap-scotland.co.uk/simd-local-auth...,https://www.datamap-scotland.co.uk/wp-wag-2025...
24,Renfrewshire,34.149183,https://datamap-scotland.co.uk/simd-local-auth...,https://www.datamap-scotland.co.uk/wp-wag-2025...
28,South Lanarkshire,32.949367,https://datamap-scotland.co.uk/simd-local-auth...,https://www.datamap-scotland.co.uk/wp-wag-2025...


In [None]:
rank_df = rank_df.copy()

rank_df["is_glasgow"] = rank_df["Local_Authority"] == "Glasgow City"

In [None]:
import altair as alt

alt.theme.enable("none")

top_n = 10 #set to 10 for the top 10 local authorities

chart_df = (
    rank_df
    .sort_values("pct_pop_deciles_1_3_overall", ascending=False)
    .head(top_n)
)

title_params = alt.TitleParams(
    text="Scottish Local Authorities with the Highest Share of Residents Living in Deprived Areas (SIMD 2020)",
    subtitle=["Lower SIMD decile ranks correspond to higher levels of deprivation",
        "Source: datamap-scotland.co.uk (SIMD 2020)"],
    anchor="middle",
    offset=15,
    color="#1f1b5e",
    font="Montserrat-Bold, sans-serif",
    fontSize=16,
    subtitleFontSize=14,
    subtitleColor="#616373",
    subtitlePadding=6,
    subtitleFontStyle="italic"
)

axis_style = alt.Axis(
    labelFontSize=11,
    titleFontSize=12,
    titleColor="#1f1b5e",
    labelColor="#616373"
)

bars = alt.Chart(chart_df).mark_bar().encode(
    y=alt.Y(
        "Local_Authority:N",
        sort=alt.SortField("pct_pop_deciles_1_3_overall", order="descending"),
        title="Local Authority",
        axis=axis_style
    ),
    x=alt.X(
        "pct_pop_deciles_1_3_overall:Q",
        title="% of population in SIMD deciles 1–3",
        axis=axis_style
    ),
    color=alt.condition(
        alt.datum.is_glasgow,
        alt.value("#d62728"),
        alt.value("#1f77b4")
    ),
    tooltip=[
        alt.Tooltip("Local_Authority:N", title="Local Authority"),
        alt.Tooltip("pct_pop_deciles_1_3_overall:Q", title="% in most deprived deciles", format=".1f")
    ]
).properties(
    title=title_params,
    width=550,
    height=350,
    background="#EFEFF6"
).configure_view(
    stroke="transparent"
).configure(
    font="Montserrat, sans-serif"
)

bars