In [34]:
import pandas as pd 
import geopandas as gpd 
from shapely.geometry import Point
import numpy as np 

In [35]:
# Load Wilayas Features and calculate polygon centroids
wilayas_gjson = '../dz-admin/wilayas_48.geojson'
wilayas_gdf = gpd.read_file(wilayas_gjson)
wilayas_gdf = wilayas_gdf[['code', 'nom', 'geometry']]
wilayas_gdf = wilayas_gdf.rename(columns={'nom': 'nom_wil', 'code': 'code_wil'})
wilayas_gdf['centroid'] = wilayas_gdf['geometry'].centroid
wilayas_gdf.head()


  wilayas_gdf['centroid'] = wilayas_gdf['geometry'].centroid


Unnamed: 0,code_wil,nom_wil,geometry,centroid
0,1,Adrar,"MULTIPOLYGON (((1.33461 20.72282, 1.32667 20.7...",POINT (-0.62174 26.01400)
1,2,Chlef,"MULTIPOLYGON (((1.55326 36.02990, 1.55325 36.0...",POINT (1.25669 36.23826)
2,3,Laghouat,"MULTIPOLYGON (((2.88464 32.88137, 2.87848 32.8...",POINT (2.72955 33.60757)
3,4,Oum El Bouaghi,"MULTIPOLYGON (((7.47320 35.52005, 7.47227 35.5...",POINT (7.06527 35.83646)
4,5,Batna,"MULTIPOLYGON (((6.10102 35.05459, 6.10090 35.0...",POINT (5.87455 35.41969)


In [36]:
wilayas_gdf = wilayas_gdf.set_crs("EPSG:4326")
wilayas_gdf.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [37]:
# Load Tagreted dataset and set to_check to False assuming all geocoded coordinates are OK 
pois_path = '../dz-datasets/agriculture/'
pois = pd.read_csv(pois_path + 'structures_mard.csv')
pois['to_check'] = False
pois.head()

Unnamed: 0,num,name,address,tel,fax,website,lon,lat,acronym,code_wil,to_check
0,0,"Ministère de l’Agriculture, du Développement R...","12 Boulevard Colonel Amirouche, Alger",023 50 32 38,023 50 31 17,madrp.gov.dz,3.058677,36.770581,MADR,16,False
1,1,Direction Générale des Forêts,"Chemin Doudou Mokhtar, BP N° 232, Ben Aknoun, ...",023 78 58 96,023 78 54 56,madrp.gov.dz/dgf,3.012842,36.75064,DGF,16,False
2,2,Direction des Services Agricole ADRAR,RUE TALEB MOHAMED,049 36 48 28,049364811,,-0.28185,27.87745,DSA ADRAR,1,False
3,3,Direction des Services Agricole Chlef,CITE BELHAMRI DJILALI CHLEF,027 77 59 00,027 79 05 75,,1.329005,36.161627,DSA Chlef,2,False
4,4,Direction des Services Agricole Laghouat,MHAFIR,029 14 54 12,029 14 64 84,,2.854042,33.79567,DSA Laghouat,3,False


In [38]:
# Convert pandas DataFrame to GeoPandas 
pois['geometry'] = pois.apply(lambda row: Point(row.lon, row.lat), axis=1)
pois_gdf  = gpd.GeoDataFrame(pois)
pois_gdf.head()

Unnamed: 0,num,name,address,tel,fax,website,lon,lat,acronym,code_wil,to_check,geometry
0,0,"Ministère de l’Agriculture, du Développement R...","12 Boulevard Colonel Amirouche, Alger",023 50 32 38,023 50 31 17,madrp.gov.dz,3.058677,36.770581,MADR,16,False,POINT (3.05868 36.77058)
1,1,Direction Générale des Forêts,"Chemin Doudou Mokhtar, BP N° 232, Ben Aknoun, ...",023 78 58 96,023 78 54 56,madrp.gov.dz/dgf,3.012842,36.75064,DGF,16,False,POINT (3.01284 36.75064)
2,2,Direction des Services Agricole ADRAR,RUE TALEB MOHAMED,049 36 48 28,049364811,,-0.28185,27.87745,DSA ADRAR,1,False,POINT (-0.28185 27.87745)
3,3,Direction des Services Agricole Chlef,CITE BELHAMRI DJILALI CHLEF,027 77 59 00,027 79 05 75,,1.329005,36.161627,DSA Chlef,2,False,POINT (1.32900 36.16163)
4,4,Direction des Services Agricole Laghouat,MHAFIR,029 14 54 12,029 14 64 84,,2.854042,33.79567,DSA Laghouat,3,False,POINT (2.85404 33.79567)


In [39]:
pois_gdf.count()

num         207
name        207
address     189
tel         196
fax         181
website      21
lon         207
lat         207
acronym     207
code_wil    207
to_check    207
geometry    207
dtype: int64

In [40]:
# Set the Coordinate reference system
pois_gdf = pois_gdf.set_crs("EPSG:4326")
pois_gdf.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [41]:
# Check for rows with no coordiantes
no_coords_gdf = pois_gdf[(pois_gdf['lat'].isnull() | pois_gdf['lat'].isnull())]

# Merge with Wialyas DataFrame and set geometry to centroids
# The merge may be done either with Wilayas code or wilayas names   
no_coords_gdf = no_coords_gdf.merge(wilayas_gdf, left_on='code_wil', right_on='code_wil', how='left')
no_coords_gdf['geometry'] = no_coords_gdf['centroid']
no_coords_gdf['to_check'] = True

with_coords_gdf = no_coords_gdf.drop(columns=['geometry_x'], axis=1)

with_coords_gdf.count() 

num           0
name          0
address       0
tel           0
fax           0
website       0
lon           0
lat           0
acronym       0
to_check      0
code_wil      0
nom_wil       0
geometry_y    0
centroid      0
geometry      0
dtype: int64

In [42]:
# Update the Targeted DataFrame by adding Wilayas geometries and centroids 
pois_gdf = pois_gdf[(pois_gdf['lat'].notnull() & pois_gdf['lat'].notnull())]

# Merge with Wialyas DataFrame and set geometry to centroids
# The merge may be done either with Wilayas code or wilayas names   
pois_gdf = pois_gdf.merge(wilayas_gdf, left_on='code_wil', right_on='code_wil', how='left')
pois_gdf.rename(columns={"geometry_x": "geometry"}, inplace=True)
pois_gdf.count()

num           207
name          207
address       189
tel           196
fax           181
website        21
lon           207
lat           207
acronym       207
code_wil      207
to_check      207
geometry      207
nom_wil       207
geometry_y    207
centroid      207
dtype: int64

In [43]:
# Concat the Targeted DataFrame with rows without coordinates and approximated with Wilayas centroids 
pois_gdf = pd.concat([pois_gdf, with_coords_gdf])

# Drop unecessary columns
pois_gdf = pois_gdf.drop(columns=['centroid', 'nom_wil'], axis=1)

pois_gdf[(pois_gdf['lat'].isnull() | pois_gdf['lon'].isnull())]
pois_gdf.count()

num           207
name          207
address       189
tel           196
fax           181
website        21
lon           207
lat           207
acronym       207
code_wil      207
to_check      207
geometry      207
geometry_y    207
dtype: int64

In [44]:
# Sort the DataFrame by code (Wilaya number) and reset the index
pois_gdf = pois_gdf.sort_values(by=['code_wil'],ignore_index=True)
pois_gdf.reset_index(drop=True)
pois_gdf.head()

Unnamed: 0,num,name,address,tel,fax,website,lon,lat,acronym,code_wil,to_check,geometry,geometry_y
0,136,Commissariat au Développement de l'Agriculture...,,,,,0.276064,27.873386,CDARS Antenne Adrar,1,False,POINT (0.27606 27.87339),"MULTIPOLYGON (((1.33461 20.72282, 1.32667 20.7..."
1,2,Direction des Services Agricole ADRAR,RUE TALEB MOHAMED,049 36 48 28,049364811,,-0.28185,27.87745,DSA ADRAR,1,False,POINT (-0.28185 27.87745),"MULTIPOLYGON (((1.33461 20.72282, 1.32667 20.7..."
2,115,Institut deTechnologie Moyen Agricole Spéciali...,,049 90 49 50/049 90-49-51,,,0.236256,29.253941,ITMAS Adrar,1,False,POINT (0.23626 29.25394),"MULTIPOLYGON (((1.33461 20.72282, 1.32667 20.7..."
3,50,Conservation des Forêts ADRAR,CITE OULED OUNGAL A ADRAR (ANCIEN SIEGE DE LA ...,049 36 12 32,049 36 12 34,,-0.287901,27.899426,CF ADRAR,1,False,POINT (-0.28790 27.89943),"MULTIPOLYGON (((1.33461 20.72282, 1.32667 20.7..."
4,3,Direction des Services Agricole Chlef,CITE BELHAMRI DJILALI CHLEF,027 77 59 00,027 79 05 75,,1.329005,36.161627,DSA Chlef,2,False,POINT (1.32900 36.16163),"MULTIPOLYGON (((1.55326 36.02990, 1.55325 36.0..."


In [45]:
# Check if coordinates are within Wilayas polygons
pois_gdf['coords_in_wil'] = [pois_gdf.geometry[i].within(pois_gdf.geometry_y[i]) for i in range(pois_gdf.geometry.count())] 
pois_gdf[pois_gdf.coords_in_wil == False]

Unnamed: 0,num,name,address,tel,fax,website,lon,lat,acronym,code_wil,to_check,geometry,geometry_y,coords_in_wil
36,11,Direction des Services Agricole Bouira,RUE BOUKAROU MAKHLOUF,026 73 02 10,026 73 01 77,,3.151952,36.564227,DSA Bouira,10,False,POINT (3.15195 36.56423),"MULTIPOLYGON (((3.77427 35.90221, 3.77327 35.9...",False
70,138,OFFICE NATIONAL INTERPROFESSIONNEL DU LAIT ET ...,"Zone industrielle, route nationale N° 01 Boufa...",025 28 37 87/ 025 28 37 88,021 28 36 32,www.onab.dz,2.915039,36.58804,ONIL,16,False,POINT (2.91504 36.58804),"MULTIPOLYGON (((2.95544 36.61091, 2.95366 36.6...",False
148,113,Centre de Formation et de Vulgarisation Agrico...,sidi mahdi bp 04 30001 nezla,029 69 31 74/(029) 69 32 79,,,0.236256,29.253941,CFVA Touggourt,30,False,POINT (0.23626 29.25394),"MULTIPOLYGON (((4.46283 32.21564, 4.46327 32.2...",False
172,38,Direction des Services Agricole Tindouf,RUE DE LA REVOLUTION AGRAIRE - CITE KSABI - T...,049 92 20 29,049 37 67 56,,-0.973435,29.096644,DSA Tindouf,37,False,POINT (-0.97344 29.09664),"MULTIPOLYGON (((-8.30703 28.92607, -8.29929 28...",False
188,156,GGR,,,,,2.866187,36.75586,EHEV,42,False,POINT (2.86619 36.75586),"MULTIPOLYGON (((1.65518 36.44814, 1.65519 36.4...",False


In [46]:
# Update to_check flag to take into account points outside Wilays and without geocoding coordinates
final_gdf = pois_gdf
final_gdf['to_check'] = (pois_gdf['to_check'] == True) | (pois_gdf['coords_in_wil'] == False)   

# Remove unecessary columns
final_gdf = final_gdf.drop(['geometry_y', 'coords_in_wil'], axis=1)

final_gdf[final_gdf.to_check == True]

Unnamed: 0,num,name,address,tel,fax,website,lon,lat,acronym,code_wil,to_check,geometry
36,11,Direction des Services Agricole Bouira,RUE BOUKAROU MAKHLOUF,026 73 02 10,026 73 01 77,,3.151952,36.564227,DSA Bouira,10,True,POINT (3.15195 36.56423)
70,138,OFFICE NATIONAL INTERPROFESSIONNEL DU LAIT ET ...,"Zone industrielle, route nationale N° 01 Boufa...",025 28 37 87/ 025 28 37 88,021 28 36 32,www.onab.dz,2.915039,36.58804,ONIL,16,True,POINT (2.91504 36.58804)
148,113,Centre de Formation et de Vulgarisation Agrico...,sidi mahdi bp 04 30001 nezla,029 69 31 74/(029) 69 32 79,,,0.236256,29.253941,CFVA Touggourt,30,True,POINT (0.23626 29.25394)
172,38,Direction des Services Agricole Tindouf,RUE DE LA REVOLUTION AGRAIRE - CITE KSABI - T...,049 92 20 29,049 37 67 56,,-0.973435,29.096644,DSA Tindouf,37,True,POINT (-0.97344 29.09664)
188,156,GGR,,,,,2.866187,36.75586,EHEV,42,True,POINT (2.86619 36.75586)


In [47]:
final_gdf.count()

num         207
name        207
address     189
tel         196
fax         181
website      21
lon         207
lat         207
acronym     207
code_wil    207
to_check    207
geometry    207
dtype: int64

In [48]:
# Write to a GeoJSON file

gpd.GeoDataFrame(final_gdf).to_file(pois_path + "madr_struct_checked_in_out_wilayas.geojson", driver='GeoJSON')