In [29]:
import pandas as pd 
import geopandas as gpd 
from shapely.geometry import Point
import numpy as np 

In [30]:
# Load Wilayas Features and calculate polygon centroids
wilayas_gjson = '../dz-admin/wilayas_48.geojson'
wilayas_gdf = gpd.read_file(wilayas_gjson)
wilayas_gdf = wilayas_gdf[['code', 'nom', 'geometry']]
wilayas_gdf = wilayas_gdf.rename(columns={'nom': 'nom_wil', 'code': 'code_wil'})
wilayas_gdf['centroid'] = wilayas_gdf['geometry'].centroid
wilayas_gdf.head()


  wilayas_gdf['centroid'] = wilayas_gdf['geometry'].centroid


Unnamed: 0,code_wil,nom_wil,geometry,centroid
0,1,Adrar,"MULTIPOLYGON (((1.33461 20.72282, 1.32667 20.7...",POINT (-0.62174 26.01400)
1,2,Chlef,"MULTIPOLYGON (((1.55326 36.02990, 1.55325 36.0...",POINT (1.25669 36.23826)
2,3,Laghouat,"MULTIPOLYGON (((2.88464 32.88137, 2.87848 32.8...",POINT (2.72955 33.60757)
3,4,Oum El Bouaghi,"MULTIPOLYGON (((7.47320 35.52005, 7.47227 35.5...",POINT (7.06527 35.83646)
4,5,Batna,"MULTIPOLYGON (((6.10102 35.05459, 6.10090 35.0...",POINT (5.87455 35.41969)


In [31]:
wilayas_gdf = wilayas_gdf.set_crs("EPSG:4326")
wilayas_gdf.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [32]:
# Load Tagreted dataset and set to_check to False assuming all geocoded coordinates are OK 
pois_path = '../dz-datasets/notaries/'
pois = pd.read_csv(pois_path + 'notaries_geocoded.csv')
pois['to_check'] = False
pois.head()

Unnamed: 0,num,name,address,code_wil,court,tel,status,lat,lon,to_check
0,1,BARIK BOUDJEMAA,CITE 103 LOGTS BP 575 Adrar,1,Adrar,049 96 53 89,physical,28.01744,-0.26425,False
1,2,BENHADJ AHMED,CITE DES 40 LOGTS LOCAL 25 Adrar,1,Adrar,049 96 67 26,physical,28.01744,-0.26425,False
2,3,AZZOU ABD ERRAHMANE,Rue Amrad Cité OULED ANKAL Adrar,1,Adrar,049 96 53 19,physical,36.102009,2.712287,False
3,4,MIMOUNI ABDELKADER,"Rue Mokadem El Arbi, lot 30 Group 191 Adrar",1,Adrar,0665 01 20 18,physical,28.01744,-0.26425,False
4,5,OULED ALI AHMED,Cité 60 Logements Timimoun Adrar,1,Timimoun,0555 40 62 94,physical,35.261165,6.368815,False


In [33]:
# Convert pandas DataFrame to GeoPandas 
pois['geometry'] = pois.apply(lambda row: Point(row.lon, row.lat), axis=1)
pois_gdf  = gpd.GeoDataFrame(pois)
pois_gdf.head()

Unnamed: 0,num,name,address,code_wil,court,tel,status,lat,lon,to_check,geometry
0,1,BARIK BOUDJEMAA,CITE 103 LOGTS BP 575 Adrar,1,Adrar,049 96 53 89,physical,28.01744,-0.26425,False,POINT (-0.26425 28.01744)
1,2,BENHADJ AHMED,CITE DES 40 LOGTS LOCAL 25 Adrar,1,Adrar,049 96 67 26,physical,28.01744,-0.26425,False,POINT (-0.26425 28.01744)
2,3,AZZOU ABD ERRAHMANE,Rue Amrad Cité OULED ANKAL Adrar,1,Adrar,049 96 53 19,physical,36.102009,2.712287,False,POINT (2.71229 36.10201)
3,4,MIMOUNI ABDELKADER,"Rue Mokadem El Arbi, lot 30 Group 191 Adrar",1,Adrar,0665 01 20 18,physical,28.01744,-0.26425,False,POINT (-0.26425 28.01744)
4,5,OULED ALI AHMED,Cité 60 Logements Timimoun Adrar,1,Timimoun,0555 40 62 94,physical,35.261165,6.368815,False,POINT (6.36881 35.26116)


In [34]:
pois_gdf.count()

num         892
name        892
address     892
code_wil    892
court       335
tel         296
status      310
lat         883
lon         883
to_check    892
geometry    892
dtype: int64

In [35]:
# Set the Coordinate reference system
pois_gdf = pois_gdf.set_crs("EPSG:4326")
pois_gdf.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [36]:
# Check for rows with no coordiantes
no_coords_gdf = pois_gdf[(pois_gdf['lat'].isnull() | pois_gdf['lat'].isnull())]

# Merge with Wialyas DataFrame and set geometry to centroids
# The merge may be done either with Wilayas code or wilayas names   
no_coords_gdf = no_coords_gdf.merge(wilayas_gdf, left_on='code_wil', right_on='code_wil', how='left')
no_coords_gdf['geometry'] = no_coords_gdf['centroid']
no_coords_gdf['to_check'] = True

with_coords_gdf = no_coords_gdf.drop(columns=['geometry_x'], axis=1)

with_coords_gdf.count() 

num           9
name          9
address       9
code_wil      9
court         2
tel           2
status        1
lat           0
lon           0
to_check      9
nom_wil       9
geometry_y    9
centroid      9
geometry      9
dtype: int64

In [37]:
# Update the Targeted DataFrame by adding Wilayas geometries and centroids 
pois_gdf = pois_gdf[(pois_gdf['lat'].notnull() & pois_gdf['lat'].notnull())]

# Merge with Wialyas DataFrame and set geometry to centroids
# The merge may be done either with Wilayas code or wilayas names   
pois_gdf = pois_gdf.merge(wilayas_gdf, left_on='code_wil', right_on='code_wil', how='left')
pois_gdf.rename(columns={"geometry_x": "geometry"}, inplace=True)
pois_gdf.count()

num           883
name          883
address       883
code_wil      883
court         333
tel           294
status        309
lat           883
lon           883
to_check      883
geometry      883
nom_wil       883
geometry_y    883
centroid      883
dtype: int64

In [38]:
# Concat the Targeted DataFrame with rows without coordinates and approximated with Wilayas centroids 
pois_gdf = pd.concat([pois_gdf, with_coords_gdf])

# Drop unecessary columns
pois_gdf = pois_gdf.drop(columns=['centroid', 'nom_wil'], axis=1)

pois_gdf[(pois_gdf['lat'].isnull() | pois_gdf['lon'].isnull())]
pois_gdf.count()

num           892
name          892
address       892
code_wil      892
court         335
tel           296
status        310
lat           883
lon           883
to_check      892
geometry      892
geometry_y    892
dtype: int64

In [39]:
# Sort the DataFrame by code (Wilaya number) and reset the index
pois_gdf = pois_gdf.sort_values(by=['code_wil'],ignore_index=True)
pois_gdf.reset_index(drop=True)
pois_gdf.head()

Unnamed: 0,num,name,address,code_wil,court,tel,status,lat,lon,to_check,geometry,geometry_y
0,1,BARIK BOUDJEMAA,CITE 103 LOGTS BP 575 Adrar,1,Adrar,049 96 53 89,physical,28.01744,-0.26425,False,POINT (-0.26425 28.01744),"MULTIPOLYGON (((1.33461 20.72282, 1.32667 20.7..."
1,2,BENHADJ AHMED,CITE DES 40 LOGTS LOCAL 25 Adrar,1,Adrar,049 96 67 26,physical,28.01744,-0.26425,False,POINT (-0.26425 28.01744),"MULTIPOLYGON (((1.33461 20.72282, 1.32667 20.7..."
2,3,AZZOU ABD ERRAHMANE,Rue Amrad Cité OULED ANKAL Adrar,1,Adrar,049 96 53 19,physical,36.102009,2.712287,False,POINT (2.71229 36.10201),"MULTIPOLYGON (((1.33461 20.72282, 1.32667 20.7..."
3,4,MIMOUNI ABDELKADER,"Rue Mokadem El Arbi, lot 30 Group 191 Adrar",1,Adrar,0665 01 20 18,physical,28.01744,-0.26425,False,POINT (-0.26425 28.01744),"MULTIPOLYGON (((1.33461 20.72282, 1.32667 20.7..."
4,5,OULED ALI AHMED,Cité 60 Logements Timimoun Adrar,1,Timimoun,0555 40 62 94,physical,35.261165,6.368815,False,POINT (6.36881 35.26116),"MULTIPOLYGON (((1.33461 20.72282, 1.32667 20.7..."


In [40]:
# Check if coordinates are within Wilayas polygons
pois_gdf['coords_in_wil'] = [pois_gdf.geometry[i].within(pois_gdf.geometry_y[i]) for i in range(pois_gdf.geometry.count())] 
pois_gdf[pois_gdf.coords_in_wil == False]

Unnamed: 0,num,name,address,code_wil,court,tel,status,lat,lon,to_check,geometry,geometry_y,coords_in_wil
2,3,AZZOU ABD ERRAHMANE,Rue Amrad Cité OULED ANKAL Adrar,1,Adrar,049 96 53 19,physical,36.102009,2.712287,False,POINT (2.71229 36.10201),"MULTIPOLYGON (((1.33461 20.72282, 1.32667 20.7...",False
4,5,OULED ALI AHMED,Cité 60 Logements Timimoun Adrar,1,Timimoun,0555 40 62 94,physical,35.261165,6.368815,False,POINT (6.36881 35.26116),"MULTIPOLYGON (((1.33461 20.72282, 1.32667 20.7...",False
12,17,HADJ SADOUK MALIKA,RUE EMIR ABDELKADER Chlef,2,,,,35.731771,-0.577258,False,POINT (-0.57726 35.73177),"MULTIPOLYGON (((1.55326 36.02990, 1.55325 36.0...",False
16,13,SAMET BOUHAYEK DJILLALI,26 RUE EL MOUSSAAD AZZOUN Chlef,2,,,,32.177101,35.055957,False,POINT (35.05596 32.17710),"MULTIPOLYGON (((1.55326 36.02990, 1.55325 36.0...",False
25,26,EL TAHER MAHFOUD,RUE AKKARI BELKACEM Aflou Laghouat,3,,,,50.272480,1.667315,False,POINT (1.66732 50.27248),"MULTIPOLYGON (((2.88464 32.88137, 2.87848 32.8...",False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
857,860,TOUAMI ETTAIB,06 Cité 90 Logements El Baraka Hammam Bouhdjar...,46,Hamam Bouhdjar,043 73 61 73 / 0696 54 45 09,,43.946679,7.179026,False,POINT (7.17903 43.94668),"MULTIPOLYGON (((-1.06748 35.12752, -1.06791 35...",False
861,856,BELKHCHI FATIHA,41 CITE ABOU BEKR ESSEDIK Ain Temouchent,46,Ain Temouchent,043 79 59 05 / 043 79 32 41 / 0661 22 71 00,physical,47.676190,1.415907,False,POINT (1.41591 47.67619),"MULTIPOLYGON (((-1.06748 35.12752, -1.06791 35...",False
865,871,SIROUKANE OMAR BEN MOUSSA,RUE CHEIKH ABDELAZIZ ETHAMNI BP 138 Beni Isgue...,47,,,,49.929941,2.191623,False,POINT (2.19162 49.92994),"MULTIPOLYGON (((4.12740 33.01380, 4.14386 33.0...",False
871,888,KROUOUDRLI MOHAMED,Rue Abid Ahmed Bousrour Mazouna Relizane,48,Mazouna,046 94 73 82 / 0665 50 14 71,physical,36.166763,1.336110,False,POINT (1.33611 36.16676),"MULTIPOLYGON (((0.69164 35.48482, 0.69160 35.4...",False


In [41]:
# Update to_check flag to take into account points outside Wilays and without geocoding coordinates
final_gdf = pois_gdf
final_gdf['to_check'] = (pois_gdf['to_check'] == True) | (pois_gdf['coords_in_wil'] == False)   

# Remove unecessary columns
final_gdf = final_gdf.drop(['geometry_y', 'coords_in_wil'], axis=1)

final_gdf[final_gdf.to_check == True]

Unnamed: 0,num,name,address,code_wil,court,tel,status,lat,lon,to_check,geometry
2,3,AZZOU ABD ERRAHMANE,Rue Amrad Cité OULED ANKAL Adrar,1,Adrar,049 96 53 19,physical,36.102009,2.712287,True,POINT (2.71229 36.10201)
4,5,OULED ALI AHMED,Cité 60 Logements Timimoun Adrar,1,Timimoun,0555 40 62 94,physical,35.261165,6.368815,True,POINT (6.36881 35.26116)
12,17,HADJ SADOUK MALIKA,RUE EMIR ABDELKADER Chlef,2,,,,35.731771,-0.577258,True,POINT (-0.57726 35.73177)
16,13,SAMET BOUHAYEK DJILLALI,26 RUE EL MOUSSAAD AZZOUN Chlef,2,,,,32.177101,35.055957,True,POINT (35.05596 32.17710)
25,26,EL TAHER MAHFOUD,RUE AKKARI BELKACEM Aflou Laghouat,3,,,,50.272480,1.667315,True,POINT (1.66732 50.27248)
...,...,...,...,...,...,...,...,...,...,...,...
857,860,TOUAMI ETTAIB,06 Cité 90 Logements El Baraka Hammam Bouhdjar...,46,Hamam Bouhdjar,043 73 61 73 / 0696 54 45 09,,43.946679,7.179026,True,POINT (7.17903 43.94668)
861,856,BELKHCHI FATIHA,41 CITE ABOU BEKR ESSEDIK Ain Temouchent,46,Ain Temouchent,043 79 59 05 / 043 79 32 41 / 0661 22 71 00,physical,47.676190,1.415907,True,POINT (1.41591 47.67619)
865,871,SIROUKANE OMAR BEN MOUSSA,RUE CHEIKH ABDELAZIZ ETHAMNI BP 138 Beni Isgue...,47,,,,49.929941,2.191623,True,POINT (2.19162 49.92994)
871,888,KROUOUDRLI MOHAMED,Rue Abid Ahmed Bousrour Mazouna Relizane,48,Mazouna,046 94 73 82 / 0665 50 14 71,physical,36.166763,1.336110,True,POINT (1.33611 36.16676)


In [42]:
final_gdf.count()

num         892
name        892
address     892
code_wil    892
court       335
tel         296
status      310
lat         883
lon         883
to_check    892
geometry    892
dtype: int64

In [43]:
# Write to a GeoJSON file

gpd.GeoDataFrame(final_gdf).to_file(pois_path + "pois_checked_in_out_wilayas.geojson", driver='GeoJSON')