In [None]:
import os
import sys

# Get the current working directory
current_dir = os.getcwd()

# Get the parent directory of `maps` (which is `src`)
parent_dir = os.path.abspath(os.path.join(current_dir, os.pardir))

# Add `src` to the module search path
if parent_dir not in sys.path:
    sys.path.append(parent_dir)

In [None]:
import pandas as pd
import numpy as np
import folium.features
from sklearn.cluster import DBSCAN
from sklearn.cluster import HDBSCAN
from sklearn.cluster import OPTICS
from sklearn.linear_model import LinearRegression 
from sklearn.preprocessing import PolynomialFeatures
from pyproj import Transformer
import seaborn as sns
import networkx as nx
from scipy.spatial import Delaunay
from itertools import combinations
import matplotlib.pyplot as plt
from sklearn.neighbors import NearestNeighbors
import pyproj


from python_scripts.road_detection.road_utils import *
from python_scripts.miscellaneaous.data_processing import extract_data
from python_scripts.graphs.graphs_creation import delaunay_graph
from python_scripts.city.city_utils import plotMapWithColors, mean_distance_choice, mean_distance_to_NN
from python_scripts.ihm.maps.mapUtils import *
from python_scripts.neighbours_criteria.enhanced_criteria import km_distance
from python_scripts.neighbours_criteria.simple_criteria import distance_criterion, angle_criterion


# Detect cities

In [None]:
df = pd.read_csv("../../database/data.csv", sep=";", decimal=',')

In [None]:
df_extracted = extract_data(df, provider='Orange')#, techno='4g')#, region='Normandie') #Occitanie, Île-de-France
# G, pos = delaunay_graph(df_extracted)

In [None]:
cityLabels = pd.Series(DBSCAN(eps=1000, min_samples=11).fit(df_extracted[['x','y']]).labels_, index = df_extracted.index)
cityLabels = cityLabels.drop(cityLabels.loc[cityLabels==-1].index)
clusters = cityLabels.unique()
num_clusters = len(clusters)

In [None]:
cityBsStationCount = cityLabels.value_counts()

In [None]:
def getCityCenters(df_extracted, clusters, cityBsStationCount):
    res = {cityLabel : (0,0) for cityLabel in clusters}
    for bs_id, x, y in df_extracted[['x', 'y']].loc[cityLabels.index].itertuples():
        coords = res.get(cityLabels[bs_id])
        res[cityLabels[bs_id]] = (x + coords[0], y + coords[1])

    for cluster in clusters:
        res[cluster] = res.get(cluster) / cityBsStationCount[cluster]
    return res

In [None]:
cityCenters = getCityCenters(df_extracted, clusters, cityBsStationCount)

In [None]:
# array1 = np.array(list(cityCenters.values())).reshape(-1,2)
# array2 = np.array(df_extracted[['x', 'y']].values)

# # Initialize the NearestNeighbors model and fit it to list2
# nbrs = NearestNeighbors(n_neighbors=1, algorithm='auto').fit(array2)

# # Find the nearest neighbors for each point in list1
# distances, indices = nbrs.kneighbors(array1)

# Compute cities names
# cityNames = {cityId : df_extracted['nom_com'].loc[df_extracted.index[indice][0]] for (cityId, indice) in zip(cityCenters.keys(), indices)}

cityNames = {cityId : df_extracted['nom_com'].loc[cityLabels.loc[cityLabels==cityId].index].value_counts().index[0] for cityId in clusters}

# Merge clusters that have the same names
for cityName in np.sort(np.unique(list(cityNames.values()))):
    clustersWithSameName = [key for i, key in enumerate(list(cityNames.keys())) if list(cityNames.values())[i] == cityName]
    if(len(clustersWithSameName) > 1):
        clusterToKeep = np.min(clustersWithSameName)
        cityLabels = cityLabels.apply(lambda v: clusterToKeep if v in clustersWithSameName else v)
        for i in clustersWithSameName :
            if i != clusterToKeep:
                cityNames.pop(i)
clusters = list(cityLabels.unique())
num_clusters = len(clusters)
cityBsStationCount = cityLabels.value_counts()
cityCenters = getCityCenters(df_extracted, clusters, cityBsStationCount)

# Plot cities

In [None]:
def rgb_to_hex(rgb):
        return '#{:02x}{:02x}{:02x}'.format(int(rgb[0]*255), int(rgb[1]*255), int(rgb[2]*255))

def labelToColor(clustId, clusters, palette):   
    return rgb_to_hex(palette[list(clusters).index(clustId)])

In [None]:
lambert93 = pyproj.CRS("EPSG:2154")  # Lambert 93
wgs84 = pyproj.CRS("EPSG:4326")       # WGS84
transformer = pyproj.Transformer.from_crs(lambert93, wgs84, always_xy=True)

In [None]:
palette = sns.color_palette("hsv", num_clusters)
colors = cityLabels.apply(lambda clustId : labelToColor(clustId, clusters, palette))

map = folium.Map(location=np.mean(df_extracted[['latitude','longitude']], axis=0), zoom_start=7, tiles="Cartodb Positron")
citiesLayer = folium.FeatureGroup(f"Cities").add_to(map)
cityCenterLayer = folium.FeatureGroup(f"Cities centers").add_to(map)
for bs_id, latitude, longitude in df_extracted[['latitude', 'longitude']].loc[cityLabels.index].itertuples():
    color = colors[bs_id]
    dot = folium.CircleMarker(location=[latitude, longitude], color=color, radius=1, popup=cityNames.get(cityLabels[bs_id]))
    citiesLayer.add_child(dot)

for cityLabel in clusters:
    locationXY = cityCenters[cityLabel]
    locationLl = transformer.transform(locationXY[0], locationXY[1])[::-1]
    dot = folium.CircleMarker(location=locationLl, color='black', fill_opacity = 1, fill=True, radius=5, popup=cityNames.get(cityLabel))
    cityCenterLayer.add_child(dot)


folium.LayerControl().add_to(map)

map.save(f"../../out/maps/CitiesDetected.html")

# Import Cities datas

In [None]:
cities_infos = pd.read_csv("../../database/data_population.csv", sep=";", decimal=',')

In [None]:
cities_infos.loc[cities_infos['Commune']=='Marseille']

In [None]:
biggest_cities = cities_infos['Commune'].loc[cities_infos['PMUN'].sort_values(ascending=False).index[:45]].values
np.sum([cityNames.get(cluster) in biggest_cities for cluster in clusters])/num_clusters

In [None]:
populations = {}

for cluster in clusters:
    cityName = cityNames.get(cluster)
    city_row = cities_infos[cities_infos['Commune']==cityName]
    population = city_row['PMUN'].values[0]
    populations[cluster]=population

print(f"Coefficient de corrélation : {np.corrcoef(cityBsStationCount.loc[cityNames.keys()].values, [populations.get(key) for key in cityNames.keys()])[0,1]}")

plt.figure(figsize=(15,10))
ax = plt.subplot(111)

X_axis = np.arange(len(cityNames.values())) 

nbBaseStationScores = cityBsStationCount.loc[cityNames.keys()].values/np.max(cityBsStationCount.values)
plt.bar(X_axis + 0.2, nbBaseStationScores, 0.4, label = 'NbBaseStion') 

populationScores = [populations.get(key) for key in cityNames.keys()]/np.max(list(populations.values()))
plt.bar(X_axis - 0.2, populationScores, 0.4, label = 'Population') 
  
plt.xticks(X_axis, cityNames.values()) 
plt.xlabel("Cities detected") 
plt.ylabel("% of the max") 
plt.title("Population vs nb of base station") 
plt.legend() 
plt.xticks(rotation=90)

plt.show() 



plt.show()

In [None]:
representation_score = pd.Series(data = nbBaseStationScores / populationScores, index=cityNames.values()).sort_values(ascending=False)
print(representation_score)
print(np.median(representation_score))