In [None]:
import pandas as pd 
import geopandas as gpd 
from shapely.geometry import Point
import numpy as np 

In [None]:
# Load Wilayas Features and calculate polygon centroids
wilayas_gjson = '../dz-admin/wilayas_48.geojson'
wilayas_gdf = gpd.read_file(wilayas_gjson)
wilayas_gdf = wilayas_gdf[['code', 'nom', 'geometry']]
wilayas_gdf = wilayas_gdf.rename(columns={'nom': 'nom_wil', 'code': 'code_wil'})
wilayas_gdf['centroid'] = wilayas_gdf['geometry'].centroid
wilayas_gdf.head()

In [None]:
wilayas_gdf = wilayas_gdf.set_crs("EPSG:4326")
wilayas_gdf.crs

In [None]:
# Load Tagreted dataset and set to_check to False assuming all geocoded coordinates are OK 
pois_path = '../dz-datasets/notaries/'
pois = pd.read_csv(pois_path + 'notaries_geocoded.csv')
pois['to_check'] = False
pois.head()

In [None]:
# Convert pandas DataFrame to GeoPandas 
pois['geometry'] = pois.apply(lambda row: Point(row.lon, row.lat), axis=1)
pois_gdf  = gpd.GeoDataFrame(pois)
pois_gdf.head()

In [None]:
pois_gdf.count()

In [None]:
# Set the Coordinate reference system
pois_gdf = pois_gdf.set_crs("EPSG:4326")
pois_gdf.crs

In [None]:
# Check for rows with no coordiantes
no_coords_gdf = pois_gdf[(pois_gdf['lat'].isnull() | pois_gdf['lat'].isnull())]

# Merge with Wialyas DataFrame and set geometry to centroids
# The merge may be done either with Wilayas code or wilayas names   
no_coords_gdf = no_coords_gdf.merge(wilayas_gdf, left_on='code_wil', right_on='code_wil', how='left')
no_coords_gdf['geometry'] = no_coords_gdf['centroid']
no_coords_gdf['to_check'] = True

with_coords_gdf = no_coords_gdf.drop(columns=['geometry_x'], axis=1)

with_coords_gdf.count() 

In [None]:
# Update the Targeted DataFrame by adding Wilayas geometries and centroids 
pois_gdf = pois_gdf[(pois_gdf['lat'].notnull() & pois_gdf['lat'].notnull())]

# Merge with Wialyas DataFrame and set geometry to centroids
# The merge may be done either with Wilayas code or wilayas names   
pois_gdf = pois_gdf.merge(wilayas_gdf, left_on='code_wil', right_on='code_wil', how='left')
pois_gdf.rename(columns={"geometry_x": "geometry"}, inplace=True)
pois_gdf.count()

In [None]:
# Concat the Targeted DataFrame with rows without coordinates and approximated with Wilayas centroids 
pois_gdf = pd.concat([pois_gdf, with_coords_gdf])

# Drop unecessary columns
pois_gdf = pois_gdf.drop(columns=['centroid', 'nom_wil'], axis=1)

pois_gdf[(pois_gdf['lat'].isnull() | pois_gdf['lon'].isnull())]
pois_gdf.count()

In [None]:
# Sort the DataFrame by code (Wilaya number) and reset the index
pois_gdf = pois_gdf.sort_values(by=['code_wil'],ignore_index=True)
pois_gdf.reset_index(drop=True)
pois_gdf.head()

In [None]:
# Check if coordinates are within Wilayas polygons
pois_gdf['coords_in_wil'] = [pois_gdf.geometry[i].within(pois_gdf.geometry_y[i]) for i in range(pois_gdf.geometry.count())] 
pois_gdf[pois_gdf.coords_in_wil == False]

In [None]:
# Update to_check flag to take into account points outside Wilays and without geocoding coordinates
final_gdf = pois_gdf
final_gdf['to_check'] = (pois_gdf['to_check'] == True) | (pois_gdf['coords_in_wil'] == False)   

# Remove unecessary columns
final_gdf = final_gdf.drop(['geometry_y', 'coords_in_wil'], axis=1)

final_gdf[final_gdf.to_check == True]

In [None]:
final_gdf.count()

In [None]:
# Write to a GeoJSON file

gpd.GeoDataFrame(final_gdf).to_file(pois_path + "pois_checked_in_out_wilayas.geojson", driver='GeoJSON')