In [1]:
import json

osm = json.load(open("woolworths-osm-raw.json"))["elements"]
raw = []
for line in open("woolworths-raw.jsonl"):
    raw.append(json.loads(line))

manual = {}
for x in osm:
    w = x["tags"].get("website")
    if w is None:
        continue
    if not w.startswith("https://www.woolworths.com.au/shop/storelocator/"):
        continue
    store = int(w.split("-")[-1])
    manual[store] = x["id"]

In [2]:
from math import sqrt

matched = {}

for x in raw:
    store = int(x["no"])
    if store in manual:
        continue
    
    lat_a = float(x["latitude"])
    lon_a = float(x["longtitude"]) # upstream typo :)
    name = x["name"]

    for y in osm:
        if y["type"] == "node":
            lat_b = y["lat"]
            lon_b = y["lon"]
        else:
            lat_b = (y["bounds"]["minlat"] + y["bounds"]["maxlat"]) / 2
            lon_b = (y["bounds"]["minlon"] + y["bounds"]["maxlon"]) / 2

        
        distance = sqrt((lat_a-lat_b)**2+(lon_a-lon_b)**2)
        if distance < 0.001:
            if store in matched:
                print("Duplicate:", store, name, "=>", matched[store], y["id"])
                print(f"    https://www.openstreetmap.org/search?query={lat_a}%20{lon_a}")
                print(f"    https://overpass-turbo.eu/?Q=nwr['shop'='supermarket']({{{{bbox}}}});out%20geom;&C={lat_a}%3B{lon_a}%3B16&R=")
                # print(f"    https://www.openstreetmap.org/{y['type']}/{y['id']}")
            else:
                # print(x["storeName"], y["tags"])
                matched[store] = y["id"]


Duplicate: 2657 Taigum => 25505345 1239527310
    https://www.openstreetmap.org/search?query=-27.35135465%20153.04876207
    https://overpass-turbo.eu/?Q=nwr['shop'='supermarket']({{bbox}});out%20geom;&C=-27.35135465%3B153.04876207%3B16&R=


In [3]:
seen = []
for k, v in matched.items():
    if v in seen:
        print("Duplicate:", k, v)
    seen.append(v)


Duplicate: 2696 290081135


In [4]:
total = len(matched) + len(manual)

print("Loaded from OSM:", len(osm))
print("Loaded from raw:", len(raw))
print("Matched:", total)
print("    Manually:", len(manual))
print("    Automatically:", len(matched))
print()

missing = len(raw) - total
print(f"Missing: {missing} ({missing/len(raw)*100}%)")

Loaded from OSM: 977
Loaded from raw: 1113
Matched: 919
    Manually: 26
    Automatically: 893

Missing: 194 (17.4303683737646%)


In [5]:
matched |= manual
matched = dict(sorted(matched.items()))

json.dump(matched, open("woolworths-osm.json","w"), indent=2)

In [6]:
for x in raw:
    store = int(x["no"])
    if store in matched:
        continue
    lat = x["latitude"]
    lon = x["longtitude"]
    name = x["name"]

    print(name, store)
    print(f"    https://www.openstreetmap.org/search?query={lat}%20{lon}")
    print(f"    https://overpass-turbo.eu/?Q=nwr['shop'='supermarket']({{{{bbox}}}});out%20geom;&C={lat}%3B{lon}%3B16&R=")
    print()


Spotswood 3391
    https://www.openstreetmap.org/search?query=-37.830928%20144.881557
    https://overpass-turbo.eu/?Q=nwr['shop'='supermarket']({{bbox}});out%20geom;&C=-37.830928%3B144.881557%3B16&R=

Union Rd 3576
    https://www.openstreetmap.org/search?query=-37.77299%20144.91573
    https://overpass-turbo.eu/?Q=nwr['shop'='supermarket']({{bbox}});out%20geom;&C=-37.77299%3B144.91573%3B16&R=

Ascot Vale 3094
    https://www.openstreetmap.org/search?query=-37.7758%20144.92836
    https://overpass-turbo.eu/?Q=nwr['shop'='supermarket']({{bbox}});out%20geom;&C=-37.7758%3B144.92836%3B16&R=

Gadsden 8285
    https://www.openstreetmap.org/search?query=-37.80689%20144.94292
    https://overpass-turbo.eu/?Q=nwr['shop'='supermarket']({{bbox}});out%20geom;&C=-37.80689%3B144.94292%3B16&R=

City North 8281
    https://www.openstreetmap.org/search?query=-37.80307%20144.95756
    https://overpass-turbo.eu/?Q=nwr['shop'='supermarket']({{bbox}});out%20geom;&C=-37.80307%3B144.95756%3B16&R=

Queens Pl