## prep data

In [None]:
BORO_PATH = "data/london_dogs/london_boroughs.geojson" 
DOGS_POI_PATH = "data/london_dogs/london_dogs.geojson"   
DOG_POP_PATH = "data/london_dogs/london_dog_population.csv"

In [None]:
import geopandas as gpd
import pandas as pd

try:
    boroughs = gpd.read_file(BORO_PATH, engine="pyogrio")
except Exception:
    boroughs = gpd.read_file(BORO_PATH)

# ensure WGS84
boroughs = boroughs.to_crs("EPSG:4326") if boroughs.crs else boroughs.set_crs("EPSG:4326")

boroughs.head(), boroughs.crs


In [None]:
import geopandas as gpd

try:
    dogs_raw = gpd.read_file(DOGS_POI_PATH, engine="pyogrio")
except Exception:
    dogs_raw = gpd.read_file(DOGS_POI_PATH)

dogs_raw = dogs_raw.to_crs("EPSG:4326")

for col in ["amenity","shop","leisure","name"]:
    if col not in dogs_raw.columns:
        dogs_raw[col] = None

def classify(row):
    if row.get("amenity") == "veterinary":      return "ü©∫ Vet"
    if row.get("amenity") == "animal_shelter":  return "üè† Shelter"
    if row.get("leisure") == "dog_park":        return "üå≥ Dog Park"
    if row.get("shop") == "pet":                return "üêæ Pet Shop"
    return "üê∂ Other"

dogs_raw["dog_type"] = dogs_raw.apply(classify, axis=1)

dogs_pts = dogs_raw[dogs_raw.geometry.type == "Point"].copy()
dogs_pts["lon"] = dogs_pts.geometry.x
dogs_pts["lat"] = dogs_pts.geometry.y

dogs_pts.head()


In [None]:
from geopandas import sjoin

# pick the borough name column
possible = [c for c in boroughs.columns if c.lower() in {"lad23nm","lad22nm","name","borough","borough_name"}]
BORO_NAME = possible[0] if possible else boroughs.columns[0]  # fall back to first col if needed

joined = sjoin(dogs_pts, boroughs, how="left", predicate="within")

counts = (
    joined.groupby("name_right")["dog_type"]
          .value_counts()
          .unstack(fill_value=0)
          .rename(columns={
              "ü©∫ Vet": "vet_count",
              "üêæ Pet Shop": "petshop_count",
              "üè† Shelter": "shelter_count",
              "üå≥ Dog Park": "dogpark_count"
          })
)
counts.head()



In [None]:
boro = boroughs.merge(
    counts,
    left_on="name",      # polygon name
    right_index=True,    # index = name_right
    how="left"
).fillna(0)


In [None]:
boro[["name","vet_count","petshop_count","shelter_count","dogpark_count"]].head()


In [None]:
boro_proj = boro.to_crs("EPSG:27700")
boro["area_km2"] = boro_proj.geometry.area / 1_000_000.0


In [None]:
for col in ["vet_count", "petshop_count", "shelter_count", "dogpark_count"]:
    boro[f"{col}_per10km2"] = (boro[col] / boro["area_km2"]) * 10


In [None]:
boro[["name","area_km2","vet_count_per10km2"]].head()


In [None]:
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
import numpy as np

cols = [
    "vet_count_per10km2",
    "petshop_count_per10km2",
    "shelter_count_per10km2",
    "dogpark_count_per10km2"
]

# normalize into new *_norm columns
scaler = MinMaxScaler()
boro[[c + "_norm" for c in cols]] = scaler.fit_transform(boro[cols].fillna(0))

# weighting
w = {
    "vet_count_per10km2_norm":     0.40,
    "petshop_count_per10km2_norm": 0.30,
    "shelter_count_per10km2_norm": 0.20,
    "dogpark_count_per10km2_norm": 0.10
}

boro["dog_score"] = (
    boro["vet_count_per10km2_norm"]     * w["vet_count_per10km2_norm"] +
    boro["petshop_count_per10km2_norm"] * w["petshop_count_per10km2_norm"] +
    boro["shelter_count_per10km2_norm"] * w["shelter_count_per10km2_norm"] +
    boro["dogpark_count_per10km2_norm"] * w["dogpark_count_per10km2_norm"]
)


In [None]:
boro[[ "name", "dog_score" ]].sort_values("dog_score", ascending=False).head()


In [None]:
try:
    boro["dog_class"] = pd.qcut(
        boro["dog_score"],
        q=4,
        labels=["üê∂ Low", "üê∂üê∂ Med", "üê∂üê∂üê∂ High", "üê∂üê∂üê∂üê∂ Supreme"]
    )
except:
    # fallback if too many identical values
    boro["dog_class"] = pd.cut(
        boro["dog_score"].rank(method="average"),
        bins=4,
        labels=["üê∂ Low", "üê∂üê∂ Med", "üê∂üê∂üê∂ High", "üê∂üê∂üê∂üê∂ Supreme"]
    )


In [None]:
poly_out = "data/london_dogs/london_borough_dogscore.geojson"
boro.to_file(poly_out, driver="GeoJSON")


In [None]:
pts_out = "data/london_dogs/london_dogs_points.csv"
dogs_pts[["lon","lat","name","amenity","shop","leisure","dog_type"]].to_csv(pts_out, index=False)
