# Analyse des voies

In [1]:
import json
import pandas as pd

In [2]:
with open("../data/code_voie_reference.json", "r", encoding="utf-8") as file:
    code_nature_voie_reference = json.load(file)
    
with open("../data/type_voie_reference.json", "r", encoding="utf-8") as file:
    type_voie_reference = json.load(file)
    type_voie_reference = {int(k): v for k, v in type_voie_reference.items()}

In [3]:
df = pd.read_csv("../data/parsed_voies.csv", dtype={"code_departement": "str", "code_commune": "str"})
df.head()

Unnamed: 0,code_departement,code_direction,code_commune,identifiant_voie,cle_rivoli,code_nature_voie,libelle_voie,type_commune,caractere_rur,caractere_voie,caractere_population,population_a_part,population_fictive,caractere_annulation,date_annulation,date_annulation.1,code_identifiant_majic,type_voie,caractere_lieu_dit,dernier_mot
0,1,0,1,A008,W,LOT,BELLEVUE,N,3.0,0,,0,0,,0,2001351,59,2,,BELLEVUE
1,1,0,1,A015,D,LOT,LES CHARMILLES,N,3.0,0,,0,0,,0,1998274,56,2,,CHARMILL
2,1,0,1,A025,P,LOT,LES COQUELICOTS,N,3.0,0,,0,0,,0,1999300,57,2,,COQUELIC
3,1,0,1,A028,T,LOT,LES LILAS,N,3.0,0,,0,0,,0,2001025,58,2,,LILAS
4,1,0,1,A030,V,LOT,MUNETVILLE,N,3.0,0,,0,0,,0,1991302,52,2,,MUNETVIL


In [4]:
df["code_nature_voie"].value_counts()

RUE    833962
CHE    261810
IMP    194602
RTE    161868
ALL     95526
        ...  
GBD         1
TPL         1
TRN         1
AGL         1
LAC         1
Name: code_nature_voie, Length: 120, dtype: int64

In [12]:
df["nature_voie"] = df["code_nature_voie"].apply(code_nature_voie_reference.get)
(df["nature_voie"].value_counts(normalize=True, dropna=False) * 100)

None               76.510474
RUE                10.259551
CHEMIN              3.220834
IMPASSE             2.394029
ROUTE               1.991329
                     ...    
GRAND BOULEVARD     0.000012
TERRE-PLEIN         0.000012
TERRAIN             0.000012
AGGLOMERATION       0.000012
LAC                 0.000012
Name: nature_voie, Length: 119, dtype: float64

In [7]:
df.isna().mean()

code_departement          0.000000e+00
code_direction            0.000000e+00
code_commune              0.000000e+00
identifiant_voie          0.000000e+00
cle_rivoli                0.000000e+00
code_nature_voie          7.648042e-01
libelle_voie              1.230218e-07
type_commune              0.000000e+00
caractere_rur             1.209945e-01
caractere_voie            0.000000e+00
caractere_population      1.000000e+00
population_a_part         0.000000e+00
population_fictive        0.000000e+00
caractere_annulation      9.240174e-01
date_annulation           0.000000e+00
date_annulation.1         0.000000e+00
code_identifiant_majic    0.000000e+00
type_voie                 0.000000e+00
caractere_lieu_dit        2.416345e-01
dernier_mot               7.504330e-06
nature_voie               7.651047e-01
dtype: float64

In [8]:
df[df["code_nature_voie"].isna()][["libelle_voie", "type_voie", "caractere_lieu_dit"]]

Unnamed: 0,libelle_voie,type_voie,caractere_lieu_dit
8,LES BAILLERES,3,1.0
9,LES BASQUES,3,1.0
10,AUX BASQUES,3,1.0
11,BRODY,3,1.0
12,SUR BRODY,3,1.0
...,...,...,...
8128413,TSINGONI,3,0.0
8128414,VALBOEUF,3,1.0
8128415,VILLAGE DE TSINGONI,3,0.0
8128416,ZIDAKANI,3,1.0


In [13]:
df["type_voie"].apply(type_voie_reference.get).value_counts(dropna=False, normalize=True)

lieu-dit               0.758365
voie                   0.227059
ensemble immobilier    0.012951
pseudo-voie            0.001583
voie provisoire        0.000042
Name: type_voie, dtype: float64

In [10]:
pd.crosstab(df["type_voie"].apply(type_voie_reference.get), df["nature_voie"].isna())

nature_voie,False,True
type_voie,Unnamed: 1_level_1,Unnamed: 2_level_1
ensemble immobilier,87242,18031
lieu-dit,0,6164480
pseudo-voie,6188,6678
voie,1815694,29989
voie provisoire,255,83


In [11]:
pd.crosstab(
    df["nature_voie"],
    df["type_voie"].apply(type_voie_reference.get),
)

type_voie,ensemble immobilier,pseudo-voie,voie,voie provisoire
nature_voie,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AERODROME,0,3,0,0
AGGLOMERATION,1,0,0,0
ALLEE,0,17,95501,8
ANCIEN CHEMIN,0,15,524,0
ANCIENNE ROUTE,0,5,301,0
...,...,...,...,...
ZA,1082,59,0,2
ZAC,479,26,0,1
ZAD,7,0,0,0
ZI,432,42,0,4


In [22]:
df_lib_voies = (
    df
    .loc[df["type_voie"].isin([1, 4]), "libelle_voie"]
    .value_counts(normalize=False).reset_index()
    .rename(columns={"index": "libelle_voie", "libelle_voie": "frequence"})
)
df_lib_voies.head()

Unnamed: 0,libelle_voie,frequence
0,DE L EGLISE,14833
1,DE LA MAIRIE,10401
2,DU MOULIN,7958
3,DE LA GARE,7256
4,DU CHATEAU,7182


In [24]:
df_type_voies = (
    df
    .loc[:, "nature_voie"]
    .value_counts(normalize=False, dropna=False)
    .reset_index()
    .fillna("LIEU DIT")
    .rename({"index": "nature_voie", "nature_voie": "frequence"})
)
df_type_voies.head()

Unnamed: 0,index,nature_voie
0,LIEU DIT,6219261
1,RUE,833962
2,CHEMIN,261810
3,IMPASSE,194602
4,ROUTE,161868


In [25]:
df_lib_voies.to_csv("../data/libelles_voies.csv", index=False)

In [26]:
df_type_voies.to_csv("../data/natures_voies.csv", index=False)