In [None]:
import os
import sys

# Obtenir le répertoire de travail courant
current_dir = os.getcwd()

# Obtenir le répertoire parent de `maps` (qui est `src`)
parent_dir = os.path.abspath(os.path.join(current_dir, os.pardir))

# Ajouter `src` au chemin de recherche des modules
if parent_dir not in sys.path:
    sys.path.append(parent_dir)

In [None]:
import pandas as pd
import numpy as np
import folium.features
from sklearn.cluster import DBSCAN
from sklearn.cluster import HDBSCAN
from sklearn.cluster import OPTICS
from sklearn.linear_model import LinearRegression 
from sklearn.preprocessing import PolynomialFeatures
from pyproj import Transformer
import seaborn as sns
import branca

from road_utils import *
from city_utils import plotMapWithColors
from useful_methods.data_processing import extract_data
from useful_methods.neighbours_delaunay.graphs import delaunay_graph
from useful_methods.neighbours_delaunay.miscellaneous_for_neighbouring import mean_distance_choice
from useful_methods.ihm.maps.mapUtils import *

In [None]:
df = pd.read_csv("../../database/data.csv", sep=";", decimal=',')

In [None]:
df[['x','y']]

In [None]:
df_extracted = extract_data(df, provider='Orange', techno='4g')#, region='Normandie') #Occitanie, Île-de-France
G, pos = delaunay_graph(df_extracted)

In [None]:
from sklearn.neighbors import NearestNeighbors # type: ignore

def mean_distance_to_NN(coordsXY: list, n_neighbours: int = 4) -> pd.Series:
    """ Computes the mean distance to the n_neighbours.
        
        Parameters
        ----------
        coordsXY : list
            [x, y] coordinates of all points (lambert-93 projection).
        n_neighbours : int (default=4)
            Number of nearest neighbours.

        Returns
        -------
        mean_distances : pd.Series
            A Series containing the mean_distances to base stations' nearest neighbours.
    """
    nbrs = NearestNeighbors(n_neighbors=n_neighbours+1, metric='euclidean').fit(coordsXY)  # n_neighbors+1 because considering himself
    #lambda x, y : distance.distance(x[::-1], y[::-1]).km # we use this because less time and precision overall global
    distances, _ = nbrs.kneighbors(coordsXY)
    
    mean_distances = np.mean(distances[:, 1:]/1000, axis=1)  # we exclude the first element (distance to ourself is 0)

    return pd.Series(data=mean_distances, index=coordsXY.index)

In [None]:
mean_distances = mean_distance_to_NN(df_extracted[['x', 'y']], n_neighbours=3) # 3 to have more neighbours

In [None]:
mean_distance_params = {
    ']0, 1] km': {'colour': '#030464', 'angle': 40, 'distance': 2},
    ']1, 2] km': {'colour': '#069AF3', 'angle': 30, 'distance': 5},
    ']2, 4] km': {'colour': '#02D4BB', 'angle': 25, 'distance': 10},
    ']4, inf] km': {'colour': '#0DBF75', 'angle': 15, 'distance': 15},
}

In [None]:
countryside = []
for station in mean_distances.index:
    if(mean_distances[station]>2):
        countryside.append(station)

In [None]:
clust_dbscan = road_get_clust_dbscan(df_extracted, countryside)
clust_hdbscan = road_get_clust_hdbscan(df_extracted, countryside)
clust_optics = road_get_clust_optics(df_extracted, countryside)


In [None]:
def rgb_to_hex(rgb):
        return '#{:02x}{:02x}{:02x}'.format(int(rgb[0]*255), int(rgb[1]*255), int(rgb[2]*255))

def labelToColor(clustId, clusters, palette):   
    if (clustId == -1):
        return 'gray'
    else:
        return rgb_to_hex(palette[list(clusters).index(clustId)- (1 if -1 in clusters else 0)])

In [None]:
clusters_hdbscan = clust_hdbscan.unique()
num_clusters_hdbscan = len(clusters_hdbscan) - (1 if -1 in clusters_hdbscan else 0)

excluded_clusters_hdbscan, not_exluded_clusters_hdbscan, linearModels, xBounds = detect_roads_based_on_clusters(clust_hdbscan, df_extracted)

palette_hdbscan = sns.color_palette("hsv", num_clusters_hdbscan)
colors_hdbscan = clust_hdbscan.apply(lambda clustId : labelToColor(clustId, clusters_hdbscan, palette_hdbscan))
mapTest = plotMapWithColorsAndLayers(df_extracted, countryside, colors_hdbscan, "test_hdbscan", linearModels, xBounds, clust_hdbscan, [not_exluded_clusters_hdbscan, excluded_clusters_hdbscan, [-1]], layersLabel=["roads", "non-roads", "noise"], mapName="OpenStreetMap")

In [None]:
palette = sns.color_palette("hsv", len(np.unique(clust_dbscan)-1))
colors_dbscan = pd.Series([rgb_to_hex(palette[label]) if label !=-1 else 'gray' for label in clust_dbscan], index = countryside)
map1 = plotMapWithColors(df_extracted, countryside, colors_dbscan, "clusters_road_detection_dbscan")
palette = sns.color_palette("hsv", len(np.unique(clust_hdbscan)-1))
colors_hdbscan = pd.Series([rgb_to_hex(palette[label]) if label !=-1 else 'gray' for label in clust_hdbscan], index = countryside)
map2 = plotMapWithColors(df_extracted, countryside, colors_hdbscan, "clusters_road_detection_hdbscan")
palette = sns.color_palette("hsv", len(np.unique(clust_optics)-1))
colors_optics = pd.Series([rgb_to_hex(palette[label]) if label !=-1 else 'gray' for label in clust_optics], index = countryside)
map3 = plotMapWithColors(df_extracted, countryside, colors_optics, "clusters_road_detection_optics")


In [None]:
fig = branca.element.Figure()
for i, map in zip([1,2,4], [map1, map2, map3]):
    subplot = fig.add_subplot(2, 2, i)
    subplot.add_child(map)
fig.save("../../out/maps/clusters_road_detection.html")

In [None]:
for clustering in [clust_dbscan, clust_hdbscan, clust_optics]:
    for cluster in clustering.loc[clustering!=-1].unique():
        nb_elem = list(clustering).count(cluster)
        if(nb_elem <= 10):
            clustering.loc[clustering==cluster] = -1

In [None]:
clust = pd.Series(index=countryside)
for bs_id in countryside:
    if(((clust_optics[bs_id]==-1) or (clust_dbscan[bs_id]==-1)) and (clust_hdbscan[bs_id]!=-1)):
        clust[bs_id] = clust_hdbscan[bs_id]
    elif(((clust_hdbscan[bs_id]==-1) or (clust_dbscan[bs_id]==-1)) and (clust_optics[bs_id]!=-1)):
        clust[bs_id] = clust_optics[bs_id]
    elif(((clust_hdbscan[bs_id]==-1) or (clust_optics[bs_id]==-1)) and (clust_dbscan[bs_id]!=-1)):
        clust[bs_id] = clust_dbscan[bs_id]
    else:
        clust[bs_id] = clust_optics[bs_id]

In [None]:
map = folium.Map(location=np.mean(df_extracted[['latitude','longitude']], axis=0), zoom_start=7, tiles="Cartodb Positron")
points = folium.FeatureGroup(f"Points ({len(G)})").add_to(map)

for bs_id, latitude, longitude in df_extracted[['latitude', 'longitude']].itertuples():
    if(bs_id in countryside):
        color = 'gray' if (clust[bs_id]==-1) else 'magenta'
        points.add_child(folium.CircleMarker(location=[latitude, longitude], color=color, radius=1, popup=f"{bs_id}\nclust: {clust[bs_id]}\nmean_dist: {mean_distances.get(bs_id)}"))
    else:
        color = mean_distance_choice(bs_id, mean_distances, mean_distance_params, 'colour')
        points.add_child(folium.CircleMarker(location=[latitude, longitude], color=color, radius=1, popup=f"{bs_id}\nmean_dist: {mean_distances.get(bs_id)}"))

folium.LayerControl().add_to(map)

map.save("../../out/maps/city_test_hdbscan-optics-dbscan_imp.html")