## Manage packages

In [317]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


Install packages

In [318]:
# pip install psycopg2-binary
# pip install geoalchemy2

Load packages

In [2]:
# system tools
from pathlib import Path
import getpass

# data tools
import pandas as pd
import geopandas as gpd

# geo tools
import json
from shapely.geometry import mapping

# map tools
from keplergl import KeplerGl

# DB tools
import sqlalchemy 

---
## Setup PostGIS database

Create file docker-compose.yaml

In [3]:
# version: '3.8' # version docker-compose
#  
# services:
#   db_atelier_1:
#     image: postgis/postgis:17-3.5
#     ports:
#       - 7654:5432 # host:container
#     env_file:
#       - ./db.env  # Fichier db.env pour la base de données
#     volumes:
#       - ./postgis_A420_atelier_1:/var/lib/postgresql/data  # Volume persistant pour la base de données


Create file db.env

In [4]:
# POSTGRES_USER=tdubois
# POSTGRES_PASSWORD=tdubois
# POSTGRES_DB=atelier_1

Run docker-compose
- Set configuration (image_path, port, env, volumes)
- Retrieve default POSTGIS docker-image
- Instanciate a new container based on the docker image and the specified configuration

In [5]:
# ! cd /home/tdubois/dev/python/INSERM_plateforme_donnees_sante/
# ! nohup docker-compose up

# ! docker volume ls | grep atelier_1 # check persisted data on the server

Expose DB on port 7654
- DB : 'atelier_1'
- User : tdubois

In [6]:
# ! export $(grep -v '^#' db.env | xargs)
# ! psql -h localhost -p 7654 -U $POSTGRES_USER -d $POSTGRES_DB

---

## Organize project repos

In [7]:
path_project = Path.cwd().parent.parent
path_data = f"{path_project}/data/"

---

## First tests with PostGIS

Load and check geodata airports.parquet

In [8]:
data_airport = gpd.read_parquet(f"{path_data}/parquet/airports/part-00000-b950f0ae-c815-4a20-b44d-5556dcd46339-c000.snappy.parquet")

In [9]:
data_airport.head()

Unnamed: 0,geometry,scalerank,featurecla,type,name,abbrev,location,gps_code,iata_code,wikipedia,natlscale
0,POINT (113.93502 22.31533),2,Airport ...,major ...,Hong Kong Int'l ...,HKG,terminal ...,VHHH ...,HKG ...,http://en.wikipedia.org/wiki/Hong_Kong_Interna...,150.0
1,POINT (121.23137 25.07674),2,Airport ...,major ...,Taoyuan ...,TPE,terminal ...,RCTP ...,TPE ...,http://en.wikipedia.org/wiki/Taiwan_Taoyuan_In...,150.0
2,POINT (4.76438 52.30893),2,Airport ...,major ...,Schiphol ...,AMS,terminal ...,EHAM ...,AMS ...,http://en.wikipedia.org/wiki/Amsterdam_Schipho...,150.0
3,POINT (103.98641 1.35616),2,Airport ...,major ...,Singapore Changi ...,SIN,terminal ...,WSSS ...,SIN ...,http://en.wikipedia.org/wiki/Singapore_Changi_...,150.0
4,POINT (-0.45316 51.471),2,Airport ...,major ...,London Heathrow ...,LHR,parking ...,EGLL ...,LHR ...,http://en.wikipedia.org/wiki/London_Heathrow_A...,150.0


---

Populate DB with one .parquet table

In [10]:
engine = sqlalchemy.create_engine(
    f"postgresql+psycopg2://tdubois:{getpass.getpass('pwd')}@localhost:{7654}/{'atelier_1'}"
)

In [11]:
data_airport.to_postgis('airports', engine, if_exists='replace', index=False)

  srid = _get_srid_from_crs(gdf)


Check database

In [12]:
pd.read_sql_query("SELECT table_name FROM information_schema.tables WHERE table_schema = 'public';", engine)

Unnamed: 0,table_name
0,geography_columns
1,geometry_columns
2,spatial_ref_sys
3,osm_idf
4,osm_data_idf
5,municipalities
6,osm_idf_1000
7,airports
8,hospitals
9,doctors


---

## Check perfs of PostGIS

### Compute distances on municipalities data

Load municipalities data

In [13]:
# not in the git folder
data_municipalities = gpd.read_parquet(f"{path_data}/data_municipalities/communes_fr_geoparquet/")

In [14]:
data_municipalities.head()

Unnamed: 0,geometry,wikipedia,surf_ha,nom,insee
0,"POLYGON ((9.32017 42.38507, 9.32028 42.3851, 9...",fr:Pie-d'Orezza ...,573.0,Pie-d'Orezza,2B222
1,"POLYGON ((9.2001 42.39013, 9.20014 42.39014, 9...",fr:Lano ...,824.0,Lano,2B137
2,"POLYGON ((9.27757 42.37509, 9.27758 42.37512, ...",fr:Cambia ...,833.0,Cambia,2B051
3,"POLYGON ((9.2512 42.37605, 9.25132 42.37603, 9...",fr:Érone ...,393.0,Érone,2B106
4,"POLYGON ((9.2834 42.66273, 9.28345 42.66273, 9...",fr:Oletta ...,2674.0,Oletta,2B185


In [15]:
data_municipalities.dtypes

geometry     geometry
wikipedia      object
surf_ha        object
nom            object
insee          object
dtype: object

Populate PostGIS DB

In [16]:
data_municipalities.to_postgis('municipalities', engine, if_exists='replace', index=False, dtype={'geometry': 'Geometry'})

  srid = _get_srid_from_crs(gdf)


In [17]:
pd.read_sql_query("SELECT table_name FROM information_schema.tables WHERE table_schema = 'public';", engine)

Unnamed: 0,table_name
0,geography_columns
1,geometry_columns
2,spatial_ref_sys
3,osm_idf
4,osm_data_idf
5,osm_idf_1000
6,airports
7,hospitals
8,doctors
9,municipalities


In [31]:
# with engine.connect() as connection:
#     connection.execute(sqlalchemy.text(f"DROP TABLE IF EXISTS municipalities;"))
#     connection.commit()

Compute distance from CASD to all french municipalities

In [35]:
def get_nearest_commune(latitude:str, longitude:str, table_name:str, max_commune_number:int):
    """
    This function calculates the nearest commune to the given latitude and longitude.
    :param latitude: latitude of the given point
    :param longitude: longitude of the given point
    :parm table_name: name of PostGIS table of municipalities coordinates 
    :param max_commune_number: Specify the max number of commune in the result
    :return: 
    """
    
    query = f"""
            SELECT 
                z.geometry,
                z.nom AS commune_name, 
                z.insee, 
                ST_DistanceSphere(
                    ST_SetSRID(ST_MakePoint({longitude}, {latitude}), 4326), 
                    ST_Centroid(z.geometry)
                ) AS distance
            FROM {table_name} AS z
            ORDER BY distance ASC
            LIMIT {max_commune_number};
            """

    # Execute and load into a GeoDataFrame
    nearest_commune_df = gpd.read_postgis(query, con=engine, geom_col='geometry')
    
    return nearest_commune_df

In [265]:
# the gps coordinates for casd is 48.8190155° N, 2.3081911° E
casd_latitude = "48.8190155"
casd_longitude = "2.3081911"

casd_geo = f"POINT({casd_longitude} {casd_latitude})"

In [37]:
%%time

nearest_municipalities_casd = get_nearest_commune(casd_latitude, casd_longitude, "municipalities", 36000)

display(nearest_municipalities_casd.head(10))
display(nearest_municipalities_casd.count())

Unnamed: 0,geometry,commune_name,insee,distance
0,"POLYGON ((2.30002 48.81132, 2.30023 48.81151, ...",Montrouge,92049,782.422647
1,"POLYGON ((2.2744 48.81346, 2.27812 48.81433, 2...",Malakoff,92046,931.076174
2,"POLYGON ((2.27262 48.81439, 2.27282 48.81478, ...",Vanves,92075,1548.46556
3,"POLYGON ((2.2714 48.79494, 2.27167 48.79504, 2...",Châtillon,92020,2244.229981
4,"POLYGON ((2.29229 48.796, 2.29235 48.79611, 2....",Bagneux,92007,2306.689819
5,"POLYGON ((2.31787 48.8084, 2.31797 48.80854, 2...",Arcueil,94003,2420.722466
6,"POLYGON ((2.32906 48.81378, 2.32993 48.81481, ...",Gentilly,94037,2712.777013
7,"POLYGON ((2.23571 48.82164, 2.23604 48.82157, ...",Issy-les-Moulineaux,92040,3212.543985
8,"POLYGON ((2.31868 48.788, 2.31877 48.78826, 2....",Cachan,94016,3506.334937
9,"POLYGON ((2.27167 48.79049, 2.27171 48.79051, ...",Fontenay-aux-Roses,92032,3619.864413


geometry        34955
commune_name    34955
insee           34955
distance        34955
dtype: int64

CPU times: user 16.2 s, sys: 1.44 s, total: 17.7 s
Wall time: 20.3 s


In [266]:
# the gps coordinates for Paul-Brousse is 48.7951606539 N, 2.3636935981 E
pb_latitude = "48.7951606539"
pb_longitude = "2.3636935981"

pb_geo = f"POINT({pb_longitude} {pb_latitude})"

In [40]:
%%time

nearest_municipalities_pb = get_nearest_commune(pb_latitude, pb_longitude, "municipalities", 36000)

display(nearest_municipalities_pb.head(10))
display(nearest_municipalities_pb.count())

Unnamed: 0,geometry,commune_name,insee,distance
0,"POLYGON ((2.34376 48.79769, 2.34472 48.79718, ...",Villejuif,94076,417.138787
1,"POLYGON ((2.3438 48.80323, 2.34411 48.80419, 2...",Le Kremlin-Bicêtre,94043,1616.495228
2,"POLYGON ((2.31868 48.788, 2.31877 48.78826, 2....",Cachan,94016,2350.137022
3,"POLYGON ((2.36735 48.77943, 2.37077 48.78062, ...",Vitry-sur-Seine,94081,2391.395349
4,"POLYGON ((2.31787 48.8084, 2.31797 48.80854, 2...",Arcueil,94003,2455.207841
5,"POLYGON ((2.32906 48.81378, 2.32993 48.81481, ...",Gentilly,94037,2469.389409
6,"POLYGON ((2.36425 48.81633, 2.36473 48.81648, ...",Ivry-sur-Seine,94041,2592.684734
7,"POLYGON ((2.31588 48.7667, 2.31625 48.7671, 2....",L'Haÿ-les-Roses,94038,2842.835241
8,"POLYGON ((2.33265 48.76464, 2.33293 48.76484, ...",Chevilly-Larue,94021,3239.031087
9,"POLYGON ((2.30684 48.77835, 2.30686 48.77838, ...",Bourg-la-Reine,92014,3788.693706


geometry        34955
commune_name    34955
insee           34955
distance        34955
dtype: int64

CPU times: user 15.5 s, sys: 1.58 s, total: 17.1 s
Wall time: 18 s


### Perform geospatial operations on hospitals and doctors in Île-de-France

To get the list of hospitals and doctors in Île-de-France, we use the (OSM)Open Street Map sample data.

The sample data which I will use in this notebook can be downloaded from this page: 
https://download.geofabrik.de/europe/france.html. I use the `Ile-de-France` map (`ile-de-france-latest.osm.pbf`)

Load osm data of IDF 

In [12]:
# not in the git folder
osm_data_idf = pd.read_parquet(f"{path_data}/data_municipalities/ile-de-france-geo-parquet/")

In [13]:
display(osm_data_idf.head())
display(osm_data_idf.dtypes)
display(osm_data_idf.shape)

Unnamed: 0,id,type,latitude,longitude,nodes,relations,tags,info
0,122626,0,49.115966,2.554912,[],[],[],"{'version': 3, 'timestamp': 158910315600000000..."
1,122627,0,49.110294,2.552173,[],[],[],"{'version': 4, 'timestamp': 123454962800000000..."
2,122631,0,49.083439,2.551138,[],[],[],"{'version': 15, 'timestamp': 16250652720000000..."
3,122632,0,49.067523,2.552468,[],[],[],"{'version': 17, 'timestamp': 15548895360000000..."
4,122633,0,49.063616,2.552241,[],[],[],"{'version': 17, 'timestamp': 12345496290000000..."


id             int64
type            int8
latitude     float64
longitude    float64
nodes         object
relations     object
tags          object
info          object
dtype: object

(33402307, 8)

Add geometry column

In [14]:
osm_geodata_idf = gpd.GeoDataFrame(
    osm_data_idf,
    geometry=gpd.points_from_xy(osm_data_idf.longitude, osm_data_idf.latitude),
    crs="EPSG:4326"
)

In [15]:
osm_geodata_idf = osm_geodata_idf[["geometry", "id", "tags"]]

In [16]:
display(osm_geodata_idf.head())
display(osm_geodata_idf.dtypes)

Unnamed: 0,geometry,id,tags
0,POINT (2.55491 49.11597),122626,[]
1,POINT (2.55217 49.11029),122627,[]
2,POINT (2.55114 49.08344),122631,[]
3,POINT (2.55247 49.06752),122632,[]
4,POINT (2.55224 49.06362),122633,[]


geometry    geometry
id             int64
tags          object
dtype: object

Populate PostGIS DB

In [51]:
osm_geodata_idf.to_postgis('osm_idf', engine, if_exists='replace', index=False, dtype={'geometry': 'Geometry'})

In [41]:
pd.read_sql_query("SELECT table_name FROM information_schema.tables WHERE table_schema = 'public';", engine)

Unnamed: 0,table_name
0,geography_columns
1,geometry_columns
2,spatial_ref_sys
3,osm_data_idf
4,airports
5,municipalities
6,osm_idf
7,osm_idf_1000


Add column **amenity**

In [221]:
with engine.connect() as connection:
    result = connection.execute(sqlalchemy.text("""
                                                ALTER TABLE osm_idf
                                                ADD COLUMN amenity TEXT;

                                                UPDATE osm_idf
                                                SET amenity = (
                                                    CASE
                                                        WHEN tags LIKE '%amenity%' THEN
                                                            (REGEXP_MATCH(tags, '\(amenity'', ''(\w+)'))[1]
                                                        ELSE 'Raté'
                                                    END
                                                );
                                                """))
    connection.commit()

Compute hospitals table and save it to PostGIS database

In [255]:
data_hospitals = gpd.read_postgis("""
                                    SELECT 
                                        id, 
                                        geometry, 
                                        tags
                                    FROM 
                                        osm_idf
                                    WHERE 
                                        amenity = 'hospital' OR amenity = 'clinic' ;
                                    """, con=engine, geom_col='geometry')

In [256]:
display(data_hospitals.head())
data_hospitals.shape

Unnamed: 0,id,geometry,tags
0,476313165,POINT (2.4146 48.87852),"[('name', 'Maternité des Lilas'), ('type:FR:FI..."
1,783760856,POINT (2.24429 48.83526),"[('name', ""Centre de Pneumologie de l'Enfant"")..."
2,1362787029,POINT (2.35781 48.82342),"[('amenity', 'clinic'), ('healthcare', 'clinic..."
3,1763282456,POINT (2.26171 48.79047),"[('name', 'Clinique du Plateau'), ('source', '..."
4,1768419851,POINT (2.34639 48.90397),"[('name', 'CMS et PMI Bauer'), ('source', 'Le ..."


(574, 3)

In [257]:
data_hospitals.to_postgis('hospitals', engine, if_exists='replace', index=False, dtype={'geometry': 'Geometry'})

In [309]:
pd.read_sql_query("SELECT table_name FROM information_schema.tables WHERE table_schema = 'public';", engine)

Unnamed: 0,table_name
0,geography_columns
1,geometry_columns
2,spatial_ref_sys
3,osm_idf
4,osm_data_idf
5,airports
6,municipalities
7,osm_idf_1000
8,hospitals
9,doctors


Compute doctors table and save it to PostGIS database

In [258]:
data_doctors = gpd.read_postgis("""
                                SELECT 
                                    id, 
                                    geometry, 
                                    tags
                                FROM 
                                    osm_idf
                                WHERE 
                                    amenity = 'doctors';
                                """, con=engine, geom_col='geometry')

In [259]:
display(data_doctors.head())
data_doctors.shape

Unnamed: 0,id,geometry,tags
0,302305751,POINT (2.24142 48.58334),"[('amenity', 'doctors'), ('healthcare', 'docto..."
1,627223341,POINT (2.19607 48.8945),"[('name', 'Groupe Médical du Centre'), ('sourc..."
2,704816193,POINT (2.16825 48.99164),"[('amenity', 'doctors'), ('healthcare', 'docto..."
3,705016941,POINT (2.16768 48.99259),"[('amenity', 'doctors'), ('healthcare', 'docto..."
4,783285280,POINT (2.37118 48.53282),"[('addr:street', ""Rue de l'Essonne""), ('addr:p..."


(1301, 3)

In [260]:
data_doctors.to_postgis('doctors', engine, if_exists='replace', index=False, dtype={'geometry': 'Geometry'})

In [398]:
pd.read_sql_query("SELECT table_name FROM information_schema.tables WHERE table_schema = 'public';", engine)

Unnamed: 0,table_name
0,geography_columns
1,geometry_columns
2,spatial_ref_sys
3,osm_idf
4,osm_data_idf
5,municipalities
6,osm_idf_1000
7,hospitals
8,doctors
9,airports


#### Find nearest hospitals from a given POINT

In [348]:
def get_nearest_hospitals(patient_loc:str, table_name:str, distance:float):
    """
    This function get the nearest hospital based on distance with a given patient location
    :param patient_loc: gps coordinates in format "POINT(longitude, latitude)"
    :parm table_name: name of PostGIS table of municipalities coordinates 
    :param distance: the max distance between hospital and patient
    :return: 
    """
    query = f"""
            SELECT * 
            FROM (
                SELECT 
                    geometry,
                    id, 
                    tags,
                    ST_DistanceSphere(
                        ST_GeomFromText('{patient_loc}', 4326),
                        geometry  
                    ) AS distance_meter
                FROM 
                    {table_name}
            ) AS subquery
            WHERE 
                distance_meter < {distance}
            ORDER BY 
                distance_meter ASC;
            """
    
    # Execute and load into a GeoDataFrame
    nearest_hospital_df = gpd.read_postgis(query, con=engine, geom_col='geometry')
        
    return nearest_hospital_df

Nearest hospitals from CASD

In [349]:
%%time

# get all hospitals within 5000 meter of CASD
nearest_hospitals_casd = get_nearest_hospitals(casd_geo, "hospitals", 5000)

display(nearest_hospitals_casd.head(10))
display(nearest_hospitals_casd.count())

Unnamed: 0,geometry,id,tags,distance_meter
0,POINT (2.31574 48.81153),2506232459,"[('website', 'http://www.centrelafontaine.fr/i...",999.45156
1,POINT (2.31413 48.82755),4936398378,"[('amenity', 'clinic'), ('healthcare', 'clinic...",1043.571591
2,POINT (2.31193 48.83105),9073838043,"[('healthcare:speciality', 'kinesitherapy'), (...",1365.466904
3,POINT (2.32938 48.82623),10883369653,"[('name', 'Clinique Sainte-Geneviève'), ('amen...",1746.047959
4,POINT (2.32946 48.81081),7703266191,"[('healthcare:speciality', 'ophtalmology;denta...",1804.989201
5,POINT (2.27683 48.82354),483569726,"[('website', 'https://www.hopitalsuissedeparis...",2350.259688
6,POINT (2.30366 48.84038),11573359157,"[('addr:housenumber', '236'), ('addr:street', ...",2398.574115
7,POINT (2.31791 48.79827),4751279276,"[('amenity', 'clinic'), ('name', 'Centre de Ra...",2414.455118
8,POINT (2.33468 48.83393),10061970658,"[('healthcare:speciality', 'child_psychiatry')...",2551.240536
9,POINT (2.30965 48.84199),10736464005,"[('website', 'https://www.pasteur.fr'), ('name...",2556.500801


geometry          39
id                39
tags              39
distance_meter    39
dtype: int64

CPU times: user 31.2 ms, sys: 0 ns, total: 31.2 ms
Wall time: 56 ms


Nearest hospitals from Paul-Brousse

In [287]:
%%time

# get all hospital within 5000 meter of Paul-Brousse
nearest_hospitals_pb = get_nearest_hospitals(pb_geo, "hospitals", 5000)

display(nearest_hospitals_pb.head(10))
display(nearest_hospitals_pb.count())

Unnamed: 0,geometry,id,tags,distance_meter
0,POINT (2.36662 48.78588),10926295628,"[('amenity', 'clinic'), ('healthcare', 'clinic...",1054.161414
1,POINT (2.37687 48.80541),8860878651,"[('amenity', 'hospital'), ('name', 'Pôle Santé...",1493.186834
2,POINT (2.38436 48.81202),9247842729,"[('amenity', 'clinic'), ('healthcare', 'clinic...",2409.751586
3,POINT (2.34997 48.77454),9371510917,"[('website', 'https://www.lhaylesroses.fr/sant...",2503.199902
4,POINT (2.34936 48.77099),9371501687,"[('name', ""Service d'accueil médical initial (...",2885.793018
5,POINT (2.37159 48.82208),8269359942,"[('name', 'Cabinet paramédical'), ('addr:stree...",3048.269969
6,POINT (2.32946 48.81081),7703266191,"[('healthcare:speciality', 'ophtalmology;denta...",3051.847927
7,POINT (2.35781 48.82342),1362787029,"[('amenity', 'clinic'), ('healthcare', 'clinic...",3171.286978
8,POINT (2.31791 48.79827),4751279276,"[('amenity', 'clinic'), ('name', 'Centre de Ra...",3371.25784
9,POINT (2.37802 48.82526),9247842736,"[('healthcare:speciality', 'general'), ('name'...",3507.607601


geometry          25
id                25
tags              25
distance_meter    25
dtype: int64

CPU times: user 21.8 ms, sys: 0 ns, total: 21.8 ms
Wall time: 26.1 ms


In [306]:
kepler_map_hospitals_pb = KeplerGl()
kepler_map_hospitals_pb.add_data(data=nearest_hospitals_pb, name="nearest_hospitals_pb")
kepler_map_hospitals_pb.save_to_html(file_name= f"{path_project}/data/tmp/map_nearest_hospitals_pb.html")

kepler_map_hospitals_pb

User Guide: https://docs.kepler.gl/docs/keplergl-jupyter
Map saved to /home/tdubois/dev/python/INSERM_plateforme_donnees_sante/data/tmp/map_nearest_hospitals_pb.html!


KeplerGl(data={'nearest_hospitals_pb': {'index': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17…

#### Find nearest doctors from a given POINT

Nearest doctors from CASD

In [290]:
%%time

# get all hospitals within 5000 meter of CASD
nearest_doctors_casd = get_nearest_hospitals(casd_geo, "doctors", 5000)

display(nearest_doctors_casd.head(10))
display(nearest_hospitals_casd.count())

Unnamed: 0,geometry,id,tags,distance_meter
0,POINT (2.30665 48.81661),6278834900,"[('name', 'Cabinet médical'), ('amenity', 'doc...",290.068496
1,POINT (2.30924 48.8216),2985005476,"[('amenity', 'doctors'), ('name', 'Centre de S...",297.26054
2,POINT (2.30491 48.82065),3089193717,"[('healthcare:speciality', 'biology'), ('websi...",301.378169
3,POINT (2.31509 48.81591),7038200207,"[('healthcare:speciality', 'general'), ('name'...",612.192838
4,POINT (2.3173 48.81866),6024677863,"[('healthcare:speciality', 'general'), ('name'...",668.336023
5,POINT (2.31856 48.8184),4927250021,"[('amenity', 'doctors'), ('healthcare:speciali...",762.140342
6,POINT (2.31843 48.81697),6129113837,"[('healthcare:speciality', 'general'), ('name'...",783.637307
7,POINT (2.31843 48.81634),9756403885,"[('amenity', 'doctors'), ('healthcare', 'docto...",806.493839
8,POINT (2.30619 48.82644),3906875692,"[('website', 'https://www.doctolib.fr/maison-d...",838.565765
9,POINT (2.30829 48.81122),3686959643,"[('name:signed', 'no'), ('healthcare:specialit...",866.57475


geometry          39
id                39
tags              39
distance_meter    39
dtype: int64

CPU times: user 32.5 ms, sys: 47 μs, total: 32.6 ms
Wall time: 42.9 ms


Nearest doctors from Paul-Brousse

In [292]:
%%time

# get all hospitals within 5000 meter of Paul-Brousse
nearest_doctors_pb = get_nearest_hospitals(pb_geo, "doctors", 5000)

display(nearest_doctors_pb.head(10))
display(nearest_doctors_pb.count())

Unnamed: 0,geometry,id,tags,distance_meter
0,POINT (2.36462 48.78441),8852847105,"[('amenity', 'doctors'), ('healthcare', 'docto...",1197.886052
1,POINT (2.36728 48.78361),10235278017,"[('name', 'Cabinet médical'), ('addr:street', ...",1310.803105
2,POINT (2.37528 48.80751),9402522413,"[('addr:housenumber', '161'), ('addr:street', ...",1614.021815
3,POINT (2.33633 48.79336),11147009623,"[('healthcare:speciality', 'general'), ('name'...",2014.413515
4,POINT (2.33633 48.79335),11147009622,"[('healthcare:speciality', 'general'), ('name'...",2014.869298
5,POINT (2.33632 48.79333),9045375319,"[('healthcare:speciality', 'general'), ('name'...",2015.547492
6,POINT (2.33432 48.79398),8243636860,"[('healthcare:speciality', 'psychiatry'), ('na...",2155.677552
7,POINT (2.33333 48.79284),8482661011,"[('healthcare:speciality', 'general'), ('name'...",2238.818672
8,POINT (2.33334 48.79281),8482661013,"[('healthcare:speciality', 'osteopathy'), ('na...",2238.908633
9,POINT (2.33333 48.79282),8482661012,"[('healthcare:speciality', 'optometrist'), ('n...",2238.968727


geometry          132
id                132
tags              132
distance_meter    132
dtype: int64

CPU times: user 38.1 ms, sys: 315 μs, total: 38.4 ms
Wall time: 47.8 ms


Display results on a map

In [307]:
kepler_map_doctors_pb = KeplerGl()
kepler_map_doctors_pb.add_data(data=nearest_doctors_pb, name="nearest_doctors_pb")
kepler_map_doctors_pb.save_to_html(file_name= f"{path_project}/data/tmp/map_nearest_doctors_pb.html")

kepler_map_doctors_pb

User Guide: https://docs.kepler.gl/docs/keplergl-jupyter
Map saved to /home/tdubois/dev/python/INSERM_plateforme_donnees_sante/data/tmp/map_nearest_doctors_pb.html!


KeplerGl(data={'nearest_doctors_pb': {'index': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, …

#### Count hospitals in each municipallity

In [20]:
data_municipalities_near_paris = gpd.read_postgis("""
                                                    WITH paris AS (
                                                        SELECT 
                                                            ST_Transform(
                                                                ST_Buffer(
                                                                    ST_Transform(ST_Centroid(ST_SetSRID(geometry, 4326)), 2154),
                                                                    8000
                                                                ),
                                                                4326
                                                            ) AS buffer
                                                        FROM 
                                                            municipalities
                                                        WHERE 
                                                            nom = 'Paris'
                                                    )
                                                    SELECT 
                                                        m.* 
                                                    FROM 
                                                        municipalities AS m, paris AS p
                                                    WHERE 
                                                        ST_Intersects(ST_Centroid(ST_SetSRID(m.geometry, 4326)), p.buffer);
                                        """, con=engine, geom_col='geometry')

In [21]:
display(data_municipalities_near_paris.head())
data_municipalities_near_paris.shape

Unnamed: 0,geometry,wikipedia,surf_ha,nom,insee
0,"POLYGON ((2.41844 48.8464, 2.4187 48.84753, 2....",fr:Vincennes ...,191.0,Vincennes,94080
1,"POLYGON ((2.41345 48.87315, 2.41432 48.87388, ...",fr:Bagnolet ...,257.0,Bagnolet,93006
2,"POLYGON ((2.42415 48.8916, 2.4243 48.89214, 2....",fr:Romainville ...,343.0,Romainville,93063
3,"POLYGON ((2.24562 48.87636, 2.24585 48.87659, ...",fr:Neuilly-sur-Seine ...,373.0,Neuilly-sur-Seine,92051
4,"POLYGON ((2.41084 48.87842, 2.4118 48.87871, 2...",fr:Les Lilas ...,126.0,Les Lilas,93045


(30, 5)

In [43]:
code_insee_municipalities = ', '.join(f"'{name}'" for name in data_municipalities_near_paris.insee)
code_insee_municipalities

"'94080', '93006', '93063', '92051', '93045', '94067', '93061', '92040', '92026', '93048', '93001', '93070', '94018', '93055', '94041', '92004', '94037', '94043', '94003', '94076', '94016', '92046', '92049', '92024', '92007', '92012', '92020', '92044', '92075', '75056'"

In [46]:
nb_hospitals_in_municipalities = gpd.read_postgis(f"""
                                                    WITH sel_municipalities AS (
                                                        SELECT 
                                                            *
                                                        FROM 
                                                            municipalities
                                                        WHERE 
                                                            insee IN ({code_insee_municipalities})
                                                    )
                                                    SELECT 
                                                        sel_municipalities.nom, 
                                                        sel_municipalities.geometry, 
                                                        COUNT(hospitals.id) AS hospitals_count
                                                    FROM 
                                                        sel_municipalities
                                                    LEFT JOIN 
                                                        hospitals
                                                    ON 
                                                        ST_Contains(ST_SetSRID(sel_municipalities.geometry, 4326), hospitals.geometry)
                                                    GROUP BY 
                                                        sel_municipalities.nom, sel_municipalities.geometry
                                                    ORDER BY hospitals_count;
                                        """, con=engine, geom_col='geometry')

In [47]:
nb_hospitals_in_municipalities

Unnamed: 0,nom,geometry,hospitals_count
0,Montreuil,"POLYGON ((2.41528 48.85518, 2.4168 48.85588, 2...",0
1,Gentilly,"POLYGON ((2.32906 48.81378, 2.32993 48.81481, ...",0
2,Malakoff,"POLYGON ((2.2744 48.81346, 2.27812 48.81433, 2...",0
3,Le Kremlin-Bicêtre,"POLYGON ((2.3438 48.80323, 2.34411 48.80419, 2...",0
4,Asnières-sur-Seine,"POLYGON ((2.26493 48.91027, 2.265 48.91029, 2....",0
5,Vanves,"POLYGON ((2.27262 48.81439, 2.27282 48.81478, ...",0
6,Cachan,"POLYGON ((2.31868 48.788, 2.31877 48.78826, 2....",0
7,Charenton-le-Pont,"POLYGON ((2.39026 48.82573, 2.39095 48.82603, ...",0
8,Châtillon,"POLYGON ((2.2714 48.79494, 2.27167 48.79504, 2...",0
9,Saint-Mandé,"POLYGON ((2.41123 48.83387, 2.41132 48.83392, ...",0


In [48]:
kepler_map_doctors_pb = KeplerGl()
kepler_map_doctors_pb.add_data(data=nb_hospitals_in_municipalities, name="nb_hospitals_in_municipalities")
kepler_map_hospitals_pb.save_to_html(file_name= f"{path_project}/data/tmp/map_nb_hospitals_in_municipalities.html")

kepler_map_doctors_pb

User Guide: https://docs.kepler.gl/docs/keplergl-jupyter


KeplerGl(data={'nb_hospitals_in_municipalities': {'index': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, …