# Evaluates the feasibility of using the H3 library for finding neighbors

In [1]:
import pandas as pd
from shapely import wkt
import h3
from tqdm import tqdm

junctions_csv = "junctionsdf_stuttgart.csv"
h3_resolution = 8

In [2]:
# Below copied from clusterJcts.py:


#*******************************************************************************************************************
# (1) Define functionality for determining a junction's neighbours

# a) Check if the overlap of two junction polygons is above a specified threshold - could be parameterized as well,
#    of course (didn't seem necessary so far, but might be done in the future.)

def largeIntersection(poly1, poly2):
    if not (poly1.intersects(poly2)):
        return False
    elif (((poly1.intersection(poly2).area/poly1.area)*100) > 8):
        return True
    elif (((poly1.intersection(poly2).area/poly2.area)*100) > 8):
        return True

# c) Check if two junctions share a square (which means they should end up in the same cluster)
def sharedSquare(lst1, lst2):
    lst3 = [value for value in lst1 if value in lst2]
    if lst3 != []:
        for elem in lst3:
            if 'platz' in elem or 'Platz' in elem:
                return True
    return False

In [3]:
# read in csv file that comprises the junctionsdf available also in clussterJcts.py

df = pd.read_csv(junctions_csv, index_col="id")

# parse string poly
df['poly_geometry'] = df['poly_geometry'].apply(wkt.loads)

# ignore invalid polys as done in clusterJcts.py
df['poly_geometry'] = df['poly_geometry'].map(lambda poly: poly if poly.is_valid else poly.buffer(0))

df.head()

Unnamed: 0_level_0,lat,lon,highwayids,highwaynames,highwaytypes,highwaylanes,highwaylanesBw,poly_geometry,poly_vertices_lats,poly_vertices_lons
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
173097876,48.520746,8.775702,"[36731756, 372332684, 426974536]","['K 6940', 'L 1361', 'L 1361']","['tertiary', 'secondary', 'secondary']","['unknown', 'unknown', 'unknown']","['unknown', 'unknown', 'unknown']","POLYGON ((48.52090537488439 8.775698510692514,...","array('d', [48.52090537488439, 48.520743351014...","array('d', [8.775698510692514, 8.7754626614868..."
426938252,48.522502,8.777334,"[27120149, 36731756]","['K 1072', 'K 6940']","['tertiary', 'tertiary']","['unknown', 'unknown']","['unknown', 'unknown']","POLYGON ((48.52265266750056 8.777330337629358,...","array('d', [48.52265266750056, 48.522499905509...","array('d', [8.777330337629358, 8.7771079528511..."
173096469,48.532423,8.775203,"[16743795, 16743802, 23577078]","['Brühlstraße', 'Brühlstraße', 'Schlossgartens...","['tertiary', 'tertiary', 'tertiary']","['unknown', 'unknown', 'unknown']","['unknown', 'unknown', 'unknown']","POLYGON ((48.53257366679242 8.775198930130973,...","array('d', [48.532573666792416, 48.53242090109...","array('d', [8.775198930130973, 8.7749765082359..."
173096563,48.531917,8.773936,"[16743802, 26934398, 16743810]","['Brühlstraße', 'Vollmaringer Straße', 'K 1072']","['tertiary', 'residential', 'tertiary']","['unknown', 'unknown', 'unknown']","['unknown', 'unknown', 'unknown']","POLYGON ((48.53207222041966 8.773932315071427,...","array('d', [48.53207222041966, 48.531914822815...","array('d', [8.773932315071427, 8.7737031593739..."
281413533,48.529614,8.774486,"[25799537, 27119682, 158188075, 16743810]","['Goethestraße', 'In der Röte', 'Baisinger Str...","['residential', 'residential', 'tertiary', 'te...","['unknown', 'unknown', 'unknown', 'unknown']","['unknown', 'unknown', 'unknown', 'unknown']","POLYGON ((48.52977149741602 8.774481761402312,...","array('d', [48.52977149741602, 48.529611786151...","array('d', [8.774481761402312, 8.7742492446035..."


In [4]:
# this is just code to experiment with h3

# select row by index
row = df.loc[173097876]

# do some h3 operations on lat/lon of row
h3_str = h3.geo_to_h3(row["lat"], row["lon"], h3_resolution)
h3_cell = h3.h3_get_base_cell(h3_str)
h3_ring = h3.hex_ring(h3_str, 1)

print("{0!s}/{1!s} has the h3 string {2!s} in the cell {3!s}".format(row["lat"], row["lon"], h3_str, h3_cell))
print("The h3 ring comprises {0!s}".format(h3_ring))

48.520746/8.7757024 has the h3 string 881f81b1e1fffff in the cell 15
The h3 ring comprises {'881f81b1e3fffff', '881f81b1e5fffff', '881f81b1ebfffff', '881f81b1e7fffff', '881f81b1e9fffff', '881f81b1edfffff'}


## Below starts the actual code that does the neighbour search with S3

In [5]:
# add an h3 column
df["h3"] = df.apply (lambda row: h3.geo_to_h3(row["lat"], row["lon"], h3_resolution), axis=1)
df["h3"].describe()

count               66448
unique               4054
top       881f8c9001fffff
freq                  115
Name: h3, dtype: object

In [6]:
ops_number = df.index.size

bar = tqdm(total=ops_number, desc="Computing Neighbours")
df_h3_only = pd.DataFrame(df.h3.tolist(), index=df.index)
neighbours_list = []

checked = 0

for id in df.index:

    row = df.loc[id]
    h3_ring = h3.hex_ring(row["h3"], 1)
    h3_ring.add(row["h3"]) # add own cell to ring

    # From https://stackoverflow.com/questions/53342715/pandas-dataframe-select-rows-where-a-list-column-contains-any-of-a-list-of-strin
    rows_to_check = df[df_h3_only.isin(h3_ring).any(1).values]
    checked += rows_to_check.size

    neighbour_rows = rows_to_check[rows_to_check.apply(lambda apply_row: (largeIntersection(apply_row['poly_geometry'],row["poly_geometry"]) or sharedSquare(apply_row['highwaynames'], row['highwaynames'])), axis=1)]

    # Grab indices of those rows that passed the filter
    neighbours = neighbour_rows.index.tolist()
    # remove use from the neighbourslist
    neighbours.remove(id)

    neighbours_list.append(neighbours)
    bar.update(1)

print("Checked on average {0!s} entires for neighbours".format(int(checked / df.index.size)))

# remove row number
df['neighbours'] = neighbours_list
bar.close()

Computing Neighbours: 100%|██████████| 66448/66448 [10:31<00:00, 105.19it/s]


Checked on average 1621 entires for neighbours


In [12]:
df['neighbours'].head(50)

id
173097876                         []
426938252                         []
173096469                         []
173096563                [295137117]
281413533                         []
281413535                         []
295133819                         []
295137117     [173096563, 297554382]
297542129                         []
297542135                [297550504]
297545389                         []
297545450                         []
297545470                         []
297550504                [297542135]
297550517                         []
297554378                         []
297554382                [295137117]
281413536                         []
281413538                         []
281413547                         []
281413550                         []
286463745                         []
286463767                [295121658]
290794553                         []
295121649                [295123713]
295121657                         []
295121658                [286463767