In [7]:
import json
import pandas as pd
import re

# Load population CSV
pop_df = pd.read_csv("/home/ouyassine/Documents/projects/RMA_SIG_app/data_ingestion/raw_data/population_per_province.csv")
population_map = dict(zip(pop_df["Collectivités territoriales"].str.strip(), pop_df["Population"]))

# Function to strip Arabic characters from a string
def strip_arabic(text):
    return re.sub(r'[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF]+', '', text).strip()

# Load GeoJSON
with open("/home/ouyassine/Documents/projects/RMA_SIG_app/RMA_SIG/frontend/static/geojson/provinces.geojson", "r", encoding="utf-8") as f:
    geojson_data = json.load(f)

print("GeoJSON French names:")
for feature in geojson_data["features"]:
    original_name = feature["properties"].get("shapeName", "")
    french_name = strip_arabic(original_name).strip()
    print(f"- '{french_name}'")

print("\nPopulation CSV keys:")
for key in population_map.keys():
    print(f"- '{key}'")




GeoJSON French names:
- 'Province de Khémisset'
- 'Province de Rhamna'
- 'Province de Sidi Slimane'
- 'Province de Kénitra'
- 'Province de Taroudant'
- 'Province de Sidi Ifni'
- 'Province de Tata'
- 'Province d'Errachidia'
- 'Province de Boujdour'
- 'Province de Safi'
- 'Province de Youssoufia'
- 'Province de Moulay Yacoub'
- 'Province de Nador'
- 'Province de Chefchaouen'
- 'Préfecture de Skhirate-Témara'
- 'Province de Settat'
- 'Province de Chtouka-Ait Baha'
- 'Préfecture de M'diq-Fnideq'
- 'Province de Taza'
- 'Province d'Ouezzane'
- 'Province de Fahs-Anjra'
- 'Province d'Al Hoceima'
- 'Province de Benslimane'
- 'Province de Tarfaya'
- 'Province de Figuig'
- 'Province de Sidi Kacem'
- 'Province de Médiouna'
- 'Préfecture d'Oujda-Angad'
- 'Province de Khouribga'
- 'Province de Tan-Tan'
- 'Province de Tiznit'
- 'Province de Chichaoua'
- 'Province de Sidi Bennour'
- 'Province de Guelmim'
- 'Préfecture d'Agadir Ida-Outanane'
- 'Province d'Al Haouz'
- 'Préfecture d'Inezgane-Ait Melloul'

In [8]:
# Update features
for feature in geojson_data["features"]:
    original_name = feature["properties"].get("shapeName", "")
    french_name = strip_arabic(original_name).strip()
    
    # Optional: update the shapeName to be just French
    feature["properties"]["shapeName"] = french_name
    
    # Match and add population
    population = population_map.get(french_name)
    
    if population is None:
        for key in population_map:
            if key in french_name:
                population = population_map[key]
                break
    
    feature["properties"]["population"] = int(population) if population else None

# Save updated GeoJSON
with open("provinces_with_population.geojson", "w", encoding="utf-8") as f:
    json.dump(geojson_data, f, ensure_ascii=False, indent=2)
