In [29]:
import pandas as pd 
import geopandas as gpd 
from shapely.geometry import Point
import numpy as np 

In [30]:
# Load Wilayas Features and calculate polygon centroids
wilayas_gjson = '../dz-admin/wilayas_48.geojson'
wilayas_gdf = gpd.read_file(wilayas_gjson)
wilayas_gdf = wilayas_gdf[['code', 'nom', 'geometry']]
wilayas_gdf = wilayas_gdf.rename(columns={'nom': 'nom_wil', 'code': 'code_wil'})
wilayas_gdf['centroid'] = wilayas_gdf['geometry'].centroid
wilayas_gdf.head()


  wilayas_gdf['centroid'] = wilayas_gdf['geometry'].centroid


Unnamed: 0,code_wil,nom_wil,geometry,centroid
0,1,Adrar,"MULTIPOLYGON (((1.33461 20.72282, 1.32667 20.7...",POINT (-0.62174 26.01400)
1,2,Chlef,"MULTIPOLYGON (((1.55326 36.02990, 1.55325 36.0...",POINT (1.25669 36.23826)
2,3,Laghouat,"MULTIPOLYGON (((2.88464 32.88137, 2.87848 32.8...",POINT (2.72955 33.60757)
3,4,Oum El Bouaghi,"MULTIPOLYGON (((7.47320 35.52005, 7.47227 35.5...",POINT (7.06527 35.83646)
4,5,Batna,"MULTIPOLYGON (((6.10102 35.05459, 6.10090 35.0...",POINT (5.87455 35.41969)


In [31]:
wilayas_gdf = wilayas_gdf.set_crs("EPSG:4326")
wilayas_gdf.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [32]:
# Load Tagreted dataset and set to_check to False assuming all geocoded coordinates are OK 
pois_path = '../dz-datasets/notaries/'
pois = pd.read_csv(pois_path + 'notaries_to_check.csv')
pois['to_check'] = False
pois.head()

Unnamed: 0,num,name,address,lat,lon,code_wil,to_check
0,1,SAADI ABDELLAH,CITE 180 200 BAT 8 APPART 01 Bab Ezzouar,36.722018,3.185681,16,False
1,2,CHAMBRE NATIONALE DES NOTAIRES,2 RUE EL BAKRI Ben Aknoun Rp,36.764634,3.015649,16,False
2,3,BOUCHALI FOUZIA,12 RUE MOULOUD ZADI ET LE 110 ANGLE DIDOUCHE M...,36.763738,3.048095,16,False
3,4,CHIALI HIND,157 CITE AISSAT IDIR Cheraga Rp,36.757828,2.957272,16,False
4,5,OUBAD NADJET,62 R.N. Bainem,36.812723,2.962592,16,False


In [33]:
# Convert pandas DataFrame to GeoPandas 
pois['geometry'] = pois.apply(lambda row: Point(row.lon, row.lat), axis=1)
pois_gdf  = gpd.GeoDataFrame(pois)
pois_gdf.head()

Unnamed: 0,num,name,address,lat,lon,code_wil,to_check,geometry
0,1,SAADI ABDELLAH,CITE 180 200 BAT 8 APPART 01 Bab Ezzouar,36.722018,3.185681,16,False,POINT (3.18568 36.72202)
1,2,CHAMBRE NATIONALE DES NOTAIRES,2 RUE EL BAKRI Ben Aknoun Rp,36.764634,3.015649,16,False,POINT (3.01565 36.76463)
2,3,BOUCHALI FOUZIA,12 RUE MOULOUD ZADI ET LE 110 ANGLE DIDOUCHE M...,36.763738,3.048095,16,False,POINT (3.04809 36.76374)
3,4,CHIALI HIND,157 CITE AISSAT IDIR Cheraga Rp,36.757828,2.957272,16,False,POINT (2.95727 36.75783)
4,5,OUBAD NADJET,62 R.N. Bainem,36.812723,2.962592,16,False,POINT (2.96259 36.81272)


In [34]:
pois_gdf.count()

num         665
name        665
address     665
lat         665
lon         665
code_wil    665
to_check    665
geometry    665
dtype: int64

In [35]:
# Set the Coordinate reference system
pois_gdf = pois_gdf.set_crs("EPSG:4326")
pois_gdf.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [36]:
# Check for rows with no coordiantes
no_coords_gdf = pois_gdf[(pois_gdf['lat'].isnull() | pois_gdf['lat'].isnull())]

# Merge with Wialyas DataFrame and set geometry to centroids
# The merge may be done either with Wilayas code or wilayas names   
no_coords_gdf = no_coords_gdf.merge(wilayas_gdf, left_on='code_wil', right_on='code_wil', how='left')
no_coords_gdf['geometry'] = no_coords_gdf['centroid']
no_coords_gdf['to_check'] = True

with_coords_gdf = no_coords_gdf.drop(columns=['geometry_x'], axis=1)

with_coords_gdf.count() 

num           0
name          0
address       0
lat           0
lon           0
to_check      0
code_wil      0
nom_wil       0
geometry_y    0
centroid      0
geometry      0
dtype: int64

In [37]:
# Update the Targeted DataFrame by adding Wilayas geometries and centroids 
pois_gdf = pois_gdf[(pois_gdf['lat'].notnull() & pois_gdf['lat'].notnull())]

# Merge with Wialyas DataFrame and set geometry to centroids
# The merge may be done either with Wilayas code or wilayas names   
pois_gdf = pois_gdf.merge(wilayas_gdf, left_on='code_wil', right_on='code_wil', how='left')
pois_gdf.rename(columns={"geometry_x": "geometry"}, inplace=True)
pois_gdf.count()

num           665
name          665
address       665
lat           665
lon           665
code_wil      665
to_check      665
geometry      665
nom_wil       665
geometry_y    665
centroid      665
dtype: int64

In [38]:
# Concat the Targeted DataFrame with rows without coordinates and approximated with Wilayas centroids 
pois_gdf = pd.concat([pois_gdf, with_coords_gdf])

# Drop unecessary columns
pois_gdf = pois_gdf.drop(columns=['centroid', 'nom_wil'], axis=1)

pois_gdf[(pois_gdf['lat'].isnull() | pois_gdf['lon'].isnull())]
pois_gdf.count()

num           665
name          665
address       665
lat           665
lon           665
code_wil      665
to_check      665
geometry      665
geometry_y    665
dtype: int64

In [39]:
# Sort the DataFrame by code (Wilaya number) and reset the index
pois_gdf = pois_gdf.sort_values(by=['code_wil'],ignore_index=True)
pois_gdf.reset_index(drop=True)
pois_gdf.head()

Unnamed: 0,num,name,address,lat,lon,code_wil,to_check,geometry,geometry_y
0,58,AROUCHE HOCINE,CITE BEN HAMMADI ALLAL Timimoun,29.261691,0.241596,1,False,POINT (0.24160 29.26169),"MULTIPOLYGON (((1.33461 20.72282, 1.32667 20.7..."
1,55,BENHADJ AHMED,CITE DES 40 LOGTS LOCAL 25 Adrar,28.01744,-0.26425,1,False,POINT (-0.26425 28.01744),"MULTIPOLYGON (((1.33461 20.72282, 1.32667 20.7..."
2,56,BARIK BOUDJEMAA,CITE 103 LOGTS BP 575 Adrar,28.01744,-0.26425,1,False,POINT (-0.26425 28.01744),"MULTIPOLYGON (((1.33461 20.72282, 1.32667 20.7..."
3,17,BOUDJELTIA ABDELKADER,42 RUE DES MARTYRS Chlef,36.157966,1.337282,2,False,POINT (1.33728 36.15797),"MULTIPOLYGON (((1.55326 36.02990, 1.55325 36.0..."
4,202,BEN DAOUDIA DRISS,1 CITE 136 LOGTS BT I Chlef,36.157966,1.337282,2,False,POINT (1.33728 36.15797),"MULTIPOLYGON (((1.55326 36.02990, 1.55325 36.0..."


In [40]:
# Check if coordinates are within Wilayas polygons
pois_gdf['coords_in_wil'] = [pois_gdf.geometry[i].within(pois_gdf.geometry_y[i]) for i in range(pois_gdf.geometry.count())] 
pois_gdf[pois_gdf.coords_in_wil == False]

Unnamed: 0,num,name,address,lat,lon,code_wil,to_check,geometry,geometry_y,coords_in_wil
92,189,DRAFLI HOURIA,RUE MOHAMED BOURAS Boufarik,36.663154,3.106977,9,False,POINT (3.10698 36.66315),"MULTIPOLYGON (((2.57442 36.37632, 2.57435 36.3...",False
225,527,BELGHERBI FARIDA,RESIDENCE DES ROSIERS BT A 1 Hydra,36.15831,-95.838298,16,False,POINT (-95.83830 36.15831),"MULTIPOLYGON (((2.95544 36.61091, 2.95366 36.6...",False
318,75,BOURABA MOHAMED,PL 1ER MAI GROUPE 10 BT B 1er Mai,36.048161,-95.886876,16,False,POINT (-95.88688 36.04816),"MULTIPOLYGON (((2.95544 36.61091, 2.95366 36.6...",False
321,172,ABIDELAH MESSAOUD,2EME GROUPE BT I 1er Mai,36.048161,-95.886876,16,False,POINT (-95.88688 36.04816),"MULTIPOLYGON (((2.95544 36.61091, 2.95366 36.6...",False
322,436,BEN KABOU MOHAMED FERIH,02 RUE KLAYLYA MOULAY BP 1009 Mohammadia,35.59191,0.06542,16,False,POINT (0.06542 35.59191),"MULTIPOLYGON (((2.95544 36.61091, 2.95366 36.6...",False
453,462,FANTAZI HAMID,CITE EL DAKSI ABDELSALEM BT KQ BLOC 1 LOCAL 5 ...,41.841141,-85.669719,25,False,POINT (-85.66972 41.84114),"MULTIPOLYGON (((6.53679 36.17123, 6.53731 36.1...",False
454,450,FILALI ABDERRAZAK,2 CITE DES MAGISTRATS Boussouf (cite),44.836643,-0.579026,25,False,POINT (-0.57903 44.83664),"MULTIPOLYGON (((6.53679 36.17123, 6.53731 36.1...",False
455,473,KOUADRI YOUSEF,148 RUE ALI GHACHI Didouche Mourad,48.837079,2.365043,25,False,POINT (2.36504 48.83708),"MULTIPOLYGON (((6.53679 36.17123, 6.53731 36.1...",False
457,208,DJAAFAR SAAD,2 RUE ABED EL KRIM BOUBERTAKH Constantine,28.033886,1.659626,25,False,POINT (1.65963 28.03389),"MULTIPOLYGON (((6.53679 36.17123, 6.53731 36.1...",False
517,356,BEN HAOUA ABED EL DJELIL,8 RUE MHOUR IDRISS Mascara,35.083432,-89.807304,29,False,POINT (-89.80730 35.08343),"MULTIPOLYGON (((0.15620 35.07143, 0.15508 35.0...",False


In [42]:
# Update to_check flag to take into account points outside Wilays and without geocoding coordinates
final_gdf = pois_gdf
final_gdf['to_check'] = (pois_gdf['to_check'] == True) | (pois_gdf['coords_in_wil'] == False)   

# Remove unecessary columns
final_gdf = final_gdf.drop(['geometry_y', 'coords_in_wil'], axis=1)

final_gdf[final_gdf.to_check == True]

Unnamed: 0,num,name,address,lat,lon,code_wil,to_check,geometry
92,189,DRAFLI HOURIA,RUE MOHAMED BOURAS Boufarik,36.663154,3.106977,9,True,POINT (3.10698 36.66315)
225,527,BELGHERBI FARIDA,RESIDENCE DES ROSIERS BT A 1 Hydra,36.15831,-95.838298,16,True,POINT (-95.83830 36.15831)
318,75,BOURABA MOHAMED,PL 1ER MAI GROUPE 10 BT B 1er Mai,36.048161,-95.886876,16,True,POINT (-95.88688 36.04816)
321,172,ABIDELAH MESSAOUD,2EME GROUPE BT I 1er Mai,36.048161,-95.886876,16,True,POINT (-95.88688 36.04816)
322,436,BEN KABOU MOHAMED FERIH,02 RUE KLAYLYA MOULAY BP 1009 Mohammadia,35.59191,0.06542,16,True,POINT (0.06542 35.59191)
453,462,FANTAZI HAMID,CITE EL DAKSI ABDELSALEM BT KQ BLOC 1 LOCAL 5 ...,41.841141,-85.669719,25,True,POINT (-85.66972 41.84114)
454,450,FILALI ABDERRAZAK,2 CITE DES MAGISTRATS Boussouf (cite),44.836643,-0.579026,25,True,POINT (-0.57903 44.83664)
455,473,KOUADRI YOUSEF,148 RUE ALI GHACHI Didouche Mourad,48.837079,2.365043,25,True,POINT (2.36504 48.83708)
457,208,DJAAFAR SAAD,2 RUE ABED EL KRIM BOUBERTAKH Constantine,28.033886,1.659626,25,True,POINT (1.65963 28.03389)
517,356,BEN HAOUA ABED EL DJELIL,8 RUE MHOUR IDRISS Mascara,35.083432,-89.807304,29,True,POINT (-89.80730 35.08343)


In [43]:
final_gdf.count()

num         665
name        665
address     665
lat         665
lon         665
code_wil    665
to_check    665
geometry    665
dtype: int64

In [44]:
# Write to a GeoJSON file

gpd.GeoDataFrame(final_gdf).to_file(pois_path + "pois_checked_in_out_wilayas.geojson", driver='GeoJSON')