## Libraries importation

In [None]:
import os
import sys

# Obtenir le répertoire de travail courant
current_dir = os.getcwd()

# Obtenir le répertoire parent de `maps` (qui est `src`)
parent_dir = os.path.abspath(os.path.join(current_dir, os.pardir))

# Ajouter `src` au chemin de recherche des modules
if parent_dir not in sys.path:
    sys.path.append(parent_dir)

In [None]:
import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors
import seaborn as sns
import matplotlib.pyplot as plt

from python_scripts.ihm.plots import plot_graph
from python_scripts.neighbours_criteria.simple_criteria import distance_criterion, quadrant_criterion, angle_criterion
from python_scripts.neighbours_criteria.enhanced_criteria import distance_criterion_enhanced, quadrant_criterion_enhanced, angle_criterion_enhanced
from python_scripts.graphs.graphs_creation import delaunay_graph
from python_scripts.miscellaneaous.data_processing import extract_data
from python_scripts.city.city_utils import city_detection_enhanced, mean_distance_to_NN

In [None]:
# Path to where you want the outputs to be saved
out_directory = "../../out/"

In [None]:
mean_distance_params = {
    ']0, 1] km': {'colour': '#030464', 'angle': 40, 'distance': 2},
    ']1, 2] km': {'colour': '#069AF3', 'angle': 30, 'distance': 5},
    ']2, 4] km': {'colour': '#02D4BB', 'angle': 25, 'distance': 10},
    ']4, inf] km': {'colour': '#0DBF75', 'angle': 15, 'distance': 15},
}

## Database import and data extraction

We will focus only on the Normandie region, using Orange 4G base stations.

In [None]:
df = pd.read_csv("../../database/data.csv", sep=";", decimal=",")
df_dep = pd.read_csv("../../database/data_popArea.csv", sep=";", decimal=",")

In [None]:
df_extracted = extract_data(df, provider='Orange', min_info=True)
df_extracted.head()

In [None]:
mean_distances = mean_distance_to_NN(df_extracted[['x', 'y']], n_neighbours=3) # 3 to have more neighbours

In [None]:
for index in df_extracted.index:
    df_extracted.loc[index, 'mean_dist'] = mean_distances[index]

In [None]:
for bs_id in mean_distances.index:
    df_extracted.loc[bs_id, 'countryside'] = 1 if(mean_distances[bs_id]>3) else 0

df_extracted['countryside'] = df_extracted['countryside'].astype(int)

In [None]:
df_extracted.head()

In [None]:
df_meanDistance_dep = pd.DataFrame(columns=['nom_dep', 'city', 'countryside', 'total'])
# df_meanDistance_dep = pd.DataFrame(columns=['nom_dep', 'total'])

In [None]:
df_meanDistance_dep['nom_dep']=df_dep['nom_dep']

In [None]:
def distance_to_NN(coordsXY: list, n_neighbours: int = 4) -> pd.Series:
    nbrs = NearestNeighbors(n_neighbors=n_neighbours+1, metric='euclidean').fit(coordsXY)  # n_neighbors+1 because considering himself
    distances, _ = nbrs.kneighbors(coordsXY)
    
    mean_distances = np.mean(distances[:, 1:]/1000, axis=1)  # we exclude the first element (distance to ourself is 0)

    return pd.Series(data=mean_distances, index=coordsXY.index)

In [None]:
for dep in df_dep['nom_dep']:
    n = 10
    df_tmp_city = df_extracted.loc[((df_extracted['nom_dep']==dep) & (df_extracted['countryside']==0))]
    # tmp_dist_city = distance_to_NN(df_tmp_city[['x', 'y']], n_neighbours=len(df_tmp_city)-1) if(len(df_tmp_city) > 1) else np.array([-1])
    tmp_dist_city = distance_to_NN(df_tmp_city[['x', 'y']], n_neighbours=n) if(len(df_tmp_city) >= n) else np.array([-1])

    df_tmp_coun = df_extracted.loc[((df_extracted['nom_dep']==dep) & (df_extracted['countryside']==1))]
    # tmp_dist_coun = distance_to_NN(df_tmp_coun[['x', 'y']], n_neighbours=len(df_tmp_coun)-1) if(len(df_tmp_coun) > 1) else np.array([-1])
    tmp_dist_coun = distance_to_NN(df_tmp_coun[['x', 'y']], n_neighbours=n) if(len(df_tmp_coun) >= n) else np.array([-1])

    df_tmp_tota = df_extracted.loc[(df_extracted['nom_dep']==dep)]
    # tmp_dist_tota = distance_to_NN(df_tmp_tota[['x', 'y']], n_neighbours=len(df_tmp_tota)-1)
    tmp_dist_tota = distance_to_NN(df_tmp_tota[['x', 'y']], n_neighbours=4)

    df_meanDistance_dep.loc[df_meanDistance_dep['nom_dep']==dep, 'city'] = np.round(tmp_dist_city.mean(), decimals=5)
    df_meanDistance_dep.loc[df_meanDistance_dep['nom_dep']==dep, 'countryside'] = np.round(tmp_dist_coun.mean(), decimals=5)
    df_meanDistance_dep.loc[df_meanDistance_dep['nom_dep']==dep, 'total'] = np.round(tmp_dist_tota.mean(), decimals=5)
df_meanDistance_dep[['city','countryside','total']] = df_meanDistance_dep[['city','countryside','total']].astype(float)
# df_meanDistance_dep['total'] = df_meanDistance_dep['total'].astype(float)

In [None]:
# for dep in df_extracted['nom_dep'].unique():
#     dep_distance_city = np.round(df_extracted.loc[((df_extracted['nom_dep']==dep) & (df_extracted['countryside']==0)), 'mean_dist'].mean(), decimals=5)
#     dep_distance_coun = np.round(df_extracted.loc[((df_extracted['nom_dep']==dep) & (df_extracted['countryside']==1)), 'mean_dist'].mean(), decimals=5)
#     dep_distance = np.round(df_extracted.loc[(df_extracted['nom_dep']==dep), 'mean_dist'].mean(), decimals=5)

#     df_new_row = pd.DataFrame(data=np.array([[dep,dep_distance_city,dep_distance_coun,dep_distance]]), columns=['nom_dep','city','countryside','total'])
#     df_meanDistance_dep = pd.concat([df_meanDistance_dep, df_new_row], ignore_index=True)
# df_meanDistance_dep[['city','countryside','total']] = df_meanDistance_dep[['city','countryside','total']].astype(float)

In [None]:
# for dep in df_dep['nom_dep']:
#     df_meanDistance_dep.loc[df_meanDistance_dep['nom_dep']==dep, 'Population'] = df_dep.loc[df_dep['nom_dep']==dep, 'Population'].iloc[0]
#     df_meanDistance_dep.loc[df_meanDistance_dep['nom_dep']==dep, 'Superficie [en km²]'] = df_dep.loc[df_dep['nom_dep']==dep, 'Superficie [en km²]'].iloc[0]
#     df_meanDistance_dep.loc[df_meanDistance_dep['nom_dep']==dep, 'Densite [hab. /km²]'] = df_dep.loc[df_dep['nom_dep']==dep, 'Densite [hab. /km²]'].iloc[0]
df_meanDistance_dep.insert(1, 'Densite [hab. /km²]', df_dep['Densite [hab. /km²]'])
df_meanDistance_dep.insert(1, 'Superficie [en km²]', df_dep['Superficie [en km²]'])
df_meanDistance_dep.insert(1, 'Population', df_dep['Population'])

df_meanDistance_dep[['Population', 'Superficie [en km²]', 'Densite [hab. /km²]']] = df_meanDistance_dep[['Population', 'Superficie [en km²]', 'Densite [hab. /km²]']].astype(int)

In [None]:
df_meanDistance_dep.head()

In [None]:
departments = list(np.unique(df_extracted["nom_dep"]))
nb_dep = len(departments)

df_proPerDep = pd.DataFrame({
    "nom_dep" : departments,
    "Total" : [int for i in range(nb_dep)]
})

for dep in departments: # number of sites per department per provider
    df_proPerDep.loc[df_proPerDep["nom_dep"]==dep, "Total"] = list(df_extracted["nom_dep"]).count(dep)

### ... Bidouillage

In [None]:
for dep in df_meanDistance_dep['nom_dep']:
    tmp = df_meanDistance_dep.loc[df_meanDistance_dep['nom_dep']==dep, 'total'].astype(float).values / (df_dep.loc[df_dep['nom_dep']==dep, 'Superficie [en km²]'].astype(int).values) #(df_proPerDep.loc[df_proPerDep["nom_dep"]==dep, "Total"].astype(int).values * df_dep.loc[df_dep['nom_dep']==dep, 'Superficie [en km²]'].astype(int).values)
    df_meanDistance_dep.loc[df_meanDistance_dep['nom_dep']==dep, 'normalized'] = np.round(tmp[0] * 100, decimals=5)

In [None]:
df_meanDistance_dep.sort_values(by=['total'])

In [None]:
results = open(out_directory + "meanDistDeptClas.md", "w")

results.write(df_meanDistance_dep.sort_values(by=['normalized']).to_markdown(index=False))

results.close()

In [None]:
print("Superficie : " + str(np.corrcoef(df_meanDistance_dep['total'],df_meanDistance_dep['Superficie [en km²]'])[0,1]))
print("Population : " + str(np.corrcoef(df_meanDistance_dep['total'],df_meanDistance_dep['Population'])[0,1]))
print("Densite : " + str(np.corrcoef(df_meanDistance_dep['total'],df_meanDistance_dep['Densite [hab. /km²]'])[0,1]))

In [None]:
print("Superficie : " + str(np.corrcoef(df_meanDistance_dep['countryside'],df_meanDistance_dep['Superficie [en km²]'])[0,1]))
print("Population : " + str(np.corrcoef(df_meanDistance_dep['countryside'],df_meanDistance_dep['Population'])[0,1]))
print("Densite : " + str(np.corrcoef(df_meanDistance_dep['countryside'],df_meanDistance_dep['Densite [hab. /km²]'])[0,1]))

In [None]:
print("Superficie : " + str(np.corrcoef(df_meanDistance_dep['city'],df_meanDistance_dep['Superficie [en km²]'])[0,1]))
print("Population : " + str(np.corrcoef(df_meanDistance_dep['city'],df_meanDistance_dep['Population'])[0,1]))
print("Densite : " + str(np.corrcoef(df_meanDistance_dep['city'],df_meanDistance_dep['Densite [hab. /km²]'])[0,1]))

## Experimentation on only 4 departements
Mountains, Flat/Sea, Flat/City, Flat/Coutryside

In [None]:
df_tmp = df_meanDistance_dep.loc[(df_meanDistance_dep['nom_dep']=='Yvelines') | (df_meanDistance_dep['nom_dep']=='Loire-Atlantique') | (df_meanDistance_dep['nom_dep']=='Haute-Savoie') | (df_meanDistance_dep['nom_dep']=='Indre-et-Loire')]

In [None]:
def pretty_barPlot(df, x_col, y_col, **kwargs):
    plot_title = kwargs.get('save_as', None)
    
    sns.set_context('paper') # size of ticks labels
    sns.set_theme(style="whitegrid") # style of the backgroung
    g = sns.catplot(
        data=df,
        kind='bar',
        x=x_col,
        y=y_col,
    )
    g.tick_params(axis='x', rotation=15)
    plt.ylabel('distance [km]')
    
    if(plot_title):
        plt.savefig(out_directory + plot_title + ".png", dpi=100)

In [None]:
pretty_barPlot(df_tmp,'nom_dep','total')

In [None]:
pretty_barPlot(df_tmp,'nom_dep','countryside')

In [None]:
pretty_barPlot(df_tmp,'nom_dep','Densite [hab. /km²]')#'Population', 'Superficie [en km²]', 'Densite [hab. /km²]'