In [75]:
import pandas as pd 
import geopandas as gpd 
from shapely.geometry import Point
import numpy as np 
import jellyfish

In [76]:
# Load Communes GeoJSON Features 
communes_json_gdf = gpd.read_file('communes.geojson')
communes_json_gdf.head()

Unnamed: 0,CODE,COMMUNE,NATURE,CODE_W,geometry
0,2132,KANOUA,COMMUNE-COTIERE,21,"MULTIPOLYGON (((6.47321 37.08773, 6.47359 37.0..."
1,2131,CHERAIA,COMMUNE-COTIERE,21,"MULTIPOLYGON (((6.47321 37.08773, 6.47296 37.0..."
2,2138,HAMADI KROUMA,COMMUNE-COTIERE,21,"MULTIPOLYGON (((7.28831 37.07547, 7.28840 37.0..."
3,2310,CHETAIBI,COMMUNE-COTIERE,23,"MULTIPOLYGON (((7.28831 37.07547, 7.28824 37.0..."
4,2113,OULED ATTIA,COMMUNE-COTIERE,21,"MULTIPOLYGON (((6.30451 37.03805, 6.30491 37.0..."


In [77]:
# Load Communes GeoJSON Features 
communes_csv_df = pd.read_csv('communes.csv')
communes_csv_df.head()

Unnamed: 0,num,code,nom,code_wil,nom_wil
0,1,1001,Adrar,1,Adrar
1,2,1002,Tamest,1,Adrar
2,3,1003,Charouine,1,Adrar
3,4,1004,Reggane,1,Adrar
4,5,1005,Inzegmir,1,Adrar


In [78]:
communes_json_gdf['code_5'] = [int(str(cd)[:1] + '0' + str(cd)[1:]) if cd < 1000 else int(str(cd)[:2] + '0' + str(cd)[2:])  for cd in communes_json_gdf.CODE]
communes_json_gdf.head()

Unnamed: 0,CODE,COMMUNE,NATURE,CODE_W,geometry,code_5
0,2132,KANOUA,COMMUNE-COTIERE,21,"MULTIPOLYGON (((6.47321 37.08773, 6.47359 37.0...",21032
1,2131,CHERAIA,COMMUNE-COTIERE,21,"MULTIPOLYGON (((6.47321 37.08773, 6.47296 37.0...",21031
2,2138,HAMADI KROUMA,COMMUNE-COTIERE,21,"MULTIPOLYGON (((7.28831 37.07547, 7.28840 37.0...",21038
3,2310,CHETAIBI,COMMUNE-COTIERE,23,"MULTIPOLYGON (((7.28831 37.07547, 7.28824 37.0...",23010
4,2113,OULED ATTIA,COMMUNE-COTIERE,21,"MULTIPOLYGON (((6.30451 37.03805, 6.30491 37.0...",21013


In [79]:
# Merge Communes CSV and GeoJSON 
merged_gdf = communes_json_gdf.merge(communes_csv_df, left_on='code_5', right_on='code', how='left')
merged_gdf.count()

CODE        1541
COMMUNE     1541
NATURE      1541
CODE_W      1541
geometry    1541
code_5      1541
num         1541
code        1541
nom         1541
code_wil    1541
nom_wil     1541
dtype: int64

In [80]:
merged_gdf.head()

Unnamed: 0,CODE,COMMUNE,NATURE,CODE_W,geometry,code_5,num,code,nom,code_wil,nom_wil
0,2132,KANOUA,COMMUNE-COTIERE,21,"MULTIPOLYGON (((6.47321 37.08773, 6.47359 37.0...",21032,782,21032,Kanoua,21,Skikda
1,2131,CHERAIA,COMMUNE-COTIERE,21,"MULTIPOLYGON (((6.47321 37.08773, 6.47296 37.0...",21031,781,21031,Cheraia,21,Skikda
2,2138,HAMADI KROUMA,COMMUNE-COTIERE,21,"MULTIPOLYGON (((7.28831 37.07547, 7.28840 37.0...",21038,787,21038,Hamadi Krouma,21,Skikda
3,2310,CHETAIBI,COMMUNE-COTIERE,23,"MULTIPOLYGON (((7.28831 37.07547, 7.28824 37.0...",23010,850,23010,Chetaibi,23,Annaba
4,2113,OULED ATTIA,COMMUNE-COTIERE,21,"MULTIPOLYGON (((6.30451 37.03805, 6.30491 37.0...",21013,763,21013,Ouled Attia,21,Skikda


In [81]:
# Checking for similarities between communes names
merged_gdf['similarity'] = merged_gdf.apply(lambda row: jellyfish.levenshtein_distance(str(row.nom).lower(), str(row.COMMUNE).lower()), axis=1)
merged_gdf[merged_gdf.similarity > 2]

Unnamed: 0,CODE,COMMUNE,NATURE,CODE_W,geometry,code_5,num,code,nom,code_wil,nom_wil,similarity


In [89]:
# Reorder and rename columns
communes_gdf = merged_gdf.drop(columns=['CODE_W', 'num', 'code', 'nom_wil', 'similarity', 'NATURE'])
communes_gdf.rename(columns={"CODE": "code", 'COMMUNE' : 'nom_maj'}, inplace=True)
communes_gdf = communes_gdf[['code', 'code_5', 'nom', 'nom_maj', 'code_wil', 'geometry']]
communes_gdf.count()

Unnamed: 0,code,code_5,nom,nom_maj,code_wil,geometry
0,2132,21032,Kanoua,KANOUA,21,"MULTIPOLYGON (((6.47321 37.08773, 6.47359 37.0..."
1,2131,21031,Cheraia,CHERAIA,21,"MULTIPOLYGON (((6.47321 37.08773, 6.47296 37.0..."
2,2138,21038,Hamadi Krouma,HAMADI KROUMA,21,"MULTIPOLYGON (((7.28831 37.07547, 7.28840 37.0..."
3,2310,23010,Chetaibi,CHETAIBI,23,"MULTIPOLYGON (((7.28831 37.07547, 7.28824 37.0..."
4,2113,21013,Ouled Attia,OULED ATTIA,21,"MULTIPOLYGON (((6.30451 37.03805, 6.30491 37.0..."


In [94]:
# Sort the DataFrame by code and reset the index
communes_gdf = communes_gdf.sort_values(by=['code'],ignore_index=True)
communes_gdf.reset_index(drop=True)
communes_gdf.head()

Unnamed: 0,code,code_5,nom,nom_maj,code_wil,geometry
0,101,1001,Adrar,ADRAR,1,"MULTIPOLYGON (((-0.22565 28.08656, -0.04751 28..."
1,102,1002,Tamest,TAMEST,1,"MULTIPOLYGON (((-2.98197 27.50450, -2.49629 27..."
2,103,1003,Charouine,GHAROUINE,1,"MULTIPOLYGON (((-0.16269 28.96347, -0.21556 28..."
3,104,1004,Reggane,REGGANE,1,"MULTIPOLYGON (((0.51798 26.92294, 0.90476 26.2..."
4,105,1005,Inzegmir,INZEGMIR,1,"MULTIPOLYGON (((0.47629 27.06526, 0.28392 27.0..."


In [97]:
# Write to GeoJSON and CSV files
communes_gdf.index = np.arange(1, len(communes_gdf) + 1)
communes_gdf.to_file("communes_48.geojson", driver='GeoJSON')

communes_df = pd.DataFrame(communes_gdf[['code', 'code_5', 'nom', 'nom_maj', 'code_wil']])
communes_df.to_csv("communes_48.csv")