<a href="https://colab.research.google.com/github/peppefdf/CSL_Gipuzkoa/blob/main/Aggregate_by_LatLon_coords.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import pandas as pd
import geopy.distance
import numpy as np
import geopandas
import scipy

import folium
import matplotlib
from matplotlib import cm

from collections import defaultdict
from collections import Counter

#tol = 0.05 # <-- very case-specific
tol = 5000 # <-- very case-specific

d = {'A': [1, 2, 3, 3, 2, 2, 1, 1], 'Lat': [43.2345, 43.248, 43.252, 43.2045, 43.254, 43.191, 43.205, 43.160], 'Lon': [-2.341, -2.364, -2.234, -2.345, -2.254, -2.19, -2.305, -2.19]}
df = pd.DataFrame(data=d)

gdf = geopandas.GeoDataFrame(
    df, geometry=geopandas.points_from_xy(df.Lon, df.Lat), crs="EPSG:4326")

#zones = gdf.dissolve(by=['Lat','Lon'], aggfunc={"A": list})
zones = gdf.dissolve(by=['Lat','Lon'], as_index=False) # aggregate data by coordinates
#zones.plot();
print('Aggregated points based on distance:')
print(zones)

zones = zones.to_crs(crs=3857)
zones.geometry = zones.buffer(0.001)
dist_matrix = zones.geometry.apply(lambda g: zones.distance(g))
print('Distance matrix:')
print(dist_matrix)
dist_matrix = np.array(dist_matrix)
list_el = np.argwhere(dist_matrix < tol)
print("The original list is : ", list(list_el))

# Record Similar tuple occurrences
# Using Counter() + map() + sorted
res = dict(Counter(tuple(ele) for ele in map(sorted, list_el)))

# printing result
print("The frequency of like tuples : " + str(res))

res = list(res.keys())
res = [res[i] for i in range(len(res)) if res[i][0]!=res[i][1]]
print('Diag. elements removed:', res)


def dfs(adj_list, visited, vertex, result, key):
    visited.add(vertex)
    result[key].append(vertex)
    for neighbor in adj_list[vertex]:
        if neighbor not in visited:
            dfs(adj_list, visited, neighbor, result, key)

adj_list = defaultdict(list)
for x, y in res:
    adj_list[x].append(y)
    adj_list[y].append(x)

result = defaultdict(list)
visited = set()
for vertex in adj_list:
    if vertex not in visited:
        dfs(adj_list, visited, vertex, result, vertex)

groups = list(result.values())
zones["Group"] = ""
zones["color"] = ""
group = 0
n_colors = len(groups)
cmap = cm.rainbow(np.linspace(0, 1, n_colors))

for i in groups:
   print('Group ',group, ' :',i)
   zones.loc[i,'Group'] = group
   zones.loc[i,'color'] = matplotlib.colors.rgb2hex(cmap[group])
   group+=1

print(zones)

mapPlot = folium.Map(location=[43.23450, -2.34100], zoom_start=12)
for lon, lat, c in zip(zones['Lon'], zones['Lat'], zones['color']):
    folium.CircleMarker(
        [lat, lon],
        radius=10,
        fill_color=c,
        color = c,
        fill=True,
        fill_opacity=0.7
        ).add_to(mapPlot)
mapPlot



Aggregated points based on distance:
       Lat    Lon                   geometry  A
0  43.1600 -2.190  POINT (-2.19000 43.16000)  1
1  43.1910 -2.190  POINT (-2.19000 43.19100)  2
2  43.2045 -2.345  POINT (-2.34500 43.20450)  3
3  43.2050 -2.305  POINT (-2.30500 43.20500)  1
4  43.2345 -2.341  POINT (-2.34100 43.23450)  1
5  43.2480 -2.364  POINT (-2.36400 43.24800)  2
6  43.2520 -2.234  POINT (-2.23400 43.25200)  3
7  43.2540 -2.254  POINT (-2.25400 43.25400)  2
Distance matrix:
              0             1             2             3             4  \
0      0.000000   4732.052806  18543.748359  14528.594102  20297.035606   
1   4732.052806      0.000000  17377.230717  12979.018163  18074.731580   
2  18543.748359  17377.230717      0.000000   4453.432327   4604.291043   
3  14528.594102  12979.018163   4453.432327      0.000000   6030.526200   
4  20297.035606  18074.731580   4604.291043   6030.526200      0.000000   
5  23575.267894  21236.654209   6974.122712   9289.370961   3288