In [None]:
import os
import sys

# Obtenir le répertoire de travail courant
current_dir = os.getcwd()

# Obtenir le répertoire parent de `maps` (qui est `src`)
parent_dir = os.path.abspath(os.path.join(current_dir, os.pardir))

# Ajouter `src` au chemin de recherche des modules
if parent_dir not in sys.path:
    sys.path.append(parent_dir)

In [None]:
import pandas as pd
import numpy as np
import folium.features
from sklearn.cluster import DBSCAN
from sklearn.cluster import HDBSCAN
from sklearn.cluster import OPTICS

from useful_methods.data_processing import extract_data
from useful_methods.neighbours_delaunay.graphs import delaunay_graph
from useful_methods.neighbours_delaunay.miscellaneous_for_neighbouring import mean_distance_choice
from useful_methods.ihm.maps.mapUtils import *

In [None]:
df = pd.read_csv("../../database/data.csv", sep=";", decimal=',')

In [None]:
df_extracted = extract_data(df, provider='Orange', techno='4g')#, region='Normandie') #Occitanie, Île-de-France
G, pos = delaunay_graph(df_extracted)

In [None]:
from sklearn.neighbors import NearestNeighbors # type: ignore

def mean_distance_to_NN(coordsXY: list, n_neighbours: int = 4) -> pd.Series:
    """ Computes the mean distance to the n_neighbours.
        
        Parameters
        ----------
        coordsXY : list
            [x, y] coordinates of all points (lambert-93 projection).
        n_neighbours : int (default=4)
            Number of nearest neighbours.

        Returns
        -------
        mean_distances : pd.Series
            A Series containing the mean_distances to base stations' nearest neighbours.
    """
    nbrs = NearestNeighbors(n_neighbors=n_neighbours+1, metric='euclidean').fit(coordsXY)  # n_neighbors+1 because considering himself
    #lambda x, y : distance.distance(x[::-1], y[::-1]).km # we use this because less time and precision overall global
    distances, _ = nbrs.kneighbors(coordsXY)
    
    mean_distances = np.mean(distances[:, 1:]/1000, axis=1)  # we exclude the first element (distance to ourself is 0)

    return pd.Series(data=mean_distances, index=coordsXY.index)

In [None]:
mean_distances = mean_distance_to_NN(df_extracted[['x', 'y']], n_neighbours=3) # 3 to have more neighbours

In [None]:
mean_distance_params = {
    ']0, 1] km': {'colour': '#030464', 'angle': 40, 'distance': 2},
    ']1, 2] km': {'colour': '#069AF3', 'angle': 30, 'distance': 5},
    ']2, 4] km': {'colour': '#02D4BB', 'angle': 25, 'distance': 10},
    ']4, inf] km': {'colour': '#0DBF75', 'angle': 15, 'distance': 15},
}

In [None]:
countryside = []
for station in mean_distances.index:
    if(mean_distances[station]>2):
        countryside.append(station)

In [None]:
clust_dbscan = pd.Series(DBSCAN(eps=4500, min_samples=4).fit(df_extracted[['x','y']].loc[countryside]).labels_, index = countryside)
clust_hdbscan = pd.Series(HDBSCAN(cluster_selection_epsilon=4500, min_cluster_size=2, min_samples=2, alpha=75).fit(df_extracted[['x','y']].loc[countryside]).labels_, index = countryside)
clust_optics = pd.Series(OPTICS(max_eps=4500, min_samples=4).fit(df_extracted[['x','y']].loc[countryside]).labels_, index = countryside)

In [None]:
for clustering in [clust_dbscan, clust_hdbscan, clust_optics]:
    for cluster in clustering.loc[clustering!=-1].unique():
        nb_elem = list(clustering).count(cluster)
        if(nb_elem <= 10):
            clustering.loc[clustering==cluster] = -1

In [None]:
clust = pd.Series(index=countryside)
for bs_id in countryside:
    if(((clust_optics[bs_id]==-1) or (clust_dbscan[bs_id]==-1)) and (clust_hdbscan[bs_id]!=-1)):
        clust[bs_id] = clust_hdbscan[bs_id]
    elif(((clust_hdbscan[bs_id]==-1) or (clust_dbscan[bs_id]==-1)) and (clust_optics[bs_id]!=-1)):
        clust[bs_id] = clust_optics[bs_id]
    elif(((clust_hdbscan[bs_id]==-1) or (clust_optics[bs_id]==-1)) and (clust_dbscan[bs_id]!=-1)):
        clust[bs_id] = clust_dbscan[bs_id]
    else:
        clust[bs_id] = clust_optics[bs_id]

In [None]:
clust.loc[clust==1]

In [None]:
map = folium.Map(location=np.mean(df_extracted[['latitude','longitude']], axis=0), zoom_start=7, tiles="Cartodb Positron")
points = folium.FeatureGroup(f"Points ({len(G)})").add_to(map)

for bs_id, latitude, longitude in df_extracted[['latitude', 'longitude']].itertuples():
    if(bs_id in countryside):
        color = 'gray' if (clust[bs_id]==-1) else 'magenta'
        points.add_child(folium.CircleMarker(location=[latitude, longitude], color=color, radius=1, popup=f"{bs_id}\nclust: {clust[bs_id]}\nmean_dist: {mean_distances.get(bs_id)}"))
    else:
        color = mean_distance_choice(bs_id, mean_distances, mean_distance_params, 'colour')
        points.add_child(folium.CircleMarker(location=[latitude, longitude], color=color, radius=1, popup=f"{bs_id}\nmean_dist: {mean_distances.get(bs_id)}"))

folium.LayerControl().add_to(map)

map.save("../../out/maps/city_test_hdbscan-optics-dbscan_imp.html")