In [1]:
import json

osm = json.load(open("coles-osm-raw.json"))["elements"]
raw = []
for line in open("coles-raw.jsonl"):
    raw.append(json.loads(line))

manual = {}
for x in osm:
    w = x["tags"].get("website")
    if w is None:
        continue
    if not w.startswith("https://www.coles.com.au/find-stores/coles/"):
        continue
    store = int(w.split("-")[-1])
    manual[store] = x["id"]


In [2]:
from math import sqrt

matched = {}

for x in raw:
    store = int(x["storeId"])
    if store in manual:
        continue
    
    lat_a = x["latitude"]
    lon_a = x["longitude"]
    name = x["storeName"]

    if store == 4947:
        continue

    for y in osm:
        if y["type"] == "node":
            lat_b = y["lat"]
            lon_b = y["lon"]
        else:
            lat_b = y["center"]["lat"]
            lon_b = y["center"]["lon"]

        
        distance = sqrt((lat_a-lat_b)**2+(lon_a-lon_b)**2)
        if distance < 0.002:
            if store in matched:
                print("Duplicate:", store, name, "=>", matched[store], y["id"])
                print(f"    https://www.openstreetmap.org/search?query={lat_a}%20{lon_a}")
                print(f"    https://overpass-turbo.eu/?Q=nwr['shop'='supermarket']({{{{bbox}}}});out%20geom;&C={lat_a}%3B{lon_a}%3B16&R=")
                # print(f"    https://www.openstreetmap.org/{y['type']}/{y['id']}")
            else:
                # print(x["storeName"], y["tags"])
                matched[store] = y["id"]


In [3]:
seen = []
for k, v in matched.items():
    if v in seen:
        print("Duplicate:", k)
    seen.append(v)


In [4]:
total = len(matched) + len(manual)

print("Loaded from OSM:", len(osm))
print("Loaded from Coles:", len(raw))
print("Matched:", total)
print("    Manually:", len(manual))
print("    Automatically:", len(matched))
print()

missing = len(raw) - total
print(f"Missing: {missing} ({missing/len(raw)*100}%)")

Loaded from OSM: 792
Loaded from Coles: 851
Matched: 733
    Manually: 43
    Automatically: 690

Missing: 118 (13.866039952996475%)


In [5]:
matched |= manual
matched = dict(sorted(matched.items()))

json.dump(matched, open("coles-osm.json","w"), indent=2)

In [6]:
for x in raw:
    store = int(x["storeId"])
    if store in matched:
        continue
    lat = x["latitude"]
    lon = x["longitude"]
    name = x["storeName"]

    print(name, store)
    print(f"    https://www.openstreetmap.org/search?query={lat}%20{lon}")
    print(f"    https://overpass-turbo.eu/?Q=nwr['shop'='supermarket']({{{{bbox}}}});out%20geom;&C={lat}%3B{lon}%3B16&R=")
    print()


Coles Tuggeranong 911
    https://www.openstreetmap.org/search?query=-35.416908%20149.065036
    https://overpass-turbo.eu/?Q=nwr['shop'='supermarket']({{bbox}});out%20geom;&C=-35.416908%3B149.065036%3B16&R=

Coles Bankstown 7662
    https://www.openstreetmap.org/search?query=-33.916409%20151.037318
    https://overpass-turbo.eu/?Q=nwr['shop'='supermarket']({{bbox}});out%20geom;&C=-33.916409%3B151.037318%3B16&R=

Coles Banora Central 4577
    https://www.openstreetmap.org/search?query=-28.213609%20153.521894
    https://overpass-turbo.eu/?Q=nwr['shop'='supermarket']({{bbox}});out%20geom;&C=-28.213609%3B153.521894%3B16&R=

Coles Berkeley 4387
    https://www.openstreetmap.org/search?query=-34.480423%20150.845859
    https://overpass-turbo.eu/?Q=nwr['shop'='supermarket']({{bbox}});out%20geom;&C=-34.480423%3B150.845859%3B16&R=

Coles Blacktown 862
    https://www.openstreetmap.org/search?query=-33.770246%20150.904651
    https://overpass-turbo.eu/?Q=nwr['shop'='supermarket']({{bbox}});out