# Map of Africa

## Install the missing libraries

In [None]:
!pip install folium geopandas pandas openpyxl



## Import necessary libraries




In [None]:
# Import all libraries
import pandas as pd
import urllib.parse
import requests
import zipfile
import geopandas as gpd
import os
import folium
from folium.plugins import MarkerCluster
from folium.plugins import FastMarkerCluster
from geopy.geocoders import ArcGIS # Import the necessary libraries for geocoding using ArcGIS and for data manipulation with pandas.

## Dataset

In [None]:
# https://docs.google.com/spreadsheets/d/1Hv5DbsglCMoCqKgdaCxJebCLHDUqN4wt/edit?usp=drive_link&ouid=102628204681332860378&rtpof=true&sd=true

# Define the spreadsheet ID and sheet name
spreadsheet_id = '1Hv5DbsglCMoCqKgdaCxJebCLHDUqN4wt'
sheet_name = 'Regions'

# URL encode the sheet name
encoded_sheet_name = urllib.parse.quote(sheet_name)

# Create the CSV export URL
url = f'https://docs.google.com/spreadsheets/d/{spreadsheet_id}/gviz/tq?tqx=out:csv&sheet={encoded_sheet_name}'

# Read the data into a DataFrame
countries_df = pd.read_csv(url)

# Display the DataFrame
print(countries_df.head())

                                       Ghana       Cameroon      Mali   \
0  Central (Yaoundé, Elig-Mfomo, Biyem-Assi)        Yaounde      KAYES   
1            Littoral (Douala, Bota-Limbe),          Melong  KOULIKORO   
2              Adamawa (Ngaoundal, Djohong),   Extrême-Nord    SIKASSO   
3                   Far North (Bogo, Mokolo)  Extrême nord       SEGOU   
4                           East (Messamena)       Yaoundé      BAMAKO   

                     Cote d'Iviore  
0                       Bongouanou  
1  District Sanitaire d'Abengourou  
2                        d'Abidjan  
3                         tengrela  
4                      Jacqueville  


In [None]:
countries_df.shape

(16, 4)

In [None]:
countries_df.isnull().sum()

Unnamed: 0,0
Ghana,6
Cameroon,0
Mali,6
Cote d'Iviore,1


### Capitalise the countries with their cities.

In [None]:
# Convert all words in the 'Mali' column to lowercase except the first letter
countries_df['Mali '] = countries_df['Mali '].astype(str).apply(lambda x: x.capitalize())

# Display the modified DataFrame
print("\nModified DataFrame with capitalized words in 'Mali' column:")
countries_df.head()


Modified DataFrame with capitalized words in 'Mali' column:


Unnamed: 0,Ghana,Cameroon,Mali,Cote d'Iviore
0,"Central (Yaoundé, Elig-Mfomo, Biyem-Assi)",Yaounde,Kayes,Bongouanou
1,"Littoral (Douala, Bota-Limbe),",Melong,Koulikoro,District Sanitaire d'Abengourou
2,"Adamawa (Ngaoundal, Djohong),",Extrême-Nord,Sikasso,d'Abidjan
3,"Far North (Bogo, Mokolo)",Extrême nord,Segou,tengrela
4,East (Messamena),Yaoundé,Bamako,Jacqueville


## Check for columns with entries that start with small letters.

In [None]:
# Check for entries within columns that start with lowercase letters
lowercase_entries = {}

for col in countries_df.columns:
    lowercase_entries[col] = [
        entry for entry in countries_df[col].dropna().astype(str).tolist()
        if entry and entry[0].islower()
    ]

# Display the columns and their lowercase entries
for col, entries in lowercase_entries.items():
    if entries:
        print(f"Column '{col}' has entries starting with lowercase letters:")
        for entry in entries:
            print(f"- {entry}")
    else:
        print(f"Column '{col}' has no entries starting with lowercase letters.")

Column 'Ghana' has no entries starting with lowercase letters.
Column 'Cameroon' has no entries starting with lowercase letters.
Column 'Mali ' has no entries starting with lowercase letters.
Column 'Cote d'Iviore' has entries starting with lowercase letters:
- d'Abidjan
- tengrela


In [None]:
# Capitalize entries in 'Cote d'Iviore' that start with lowercase letters
def capitalize_if_lowercase(entry):
    if isinstance(entry, str) and entry and entry[0].islower():
        return entry.capitalize()
    return entry

countries_df["Cote d'Iviore"] = countries_df["Cote d'Iviore"].apply(capitalize_if_lowercase)

# Display the modified DataFrame
print("\nModified DataFrame with capitalized entries in 'Cote d'Iviore':")
print(countries_df.head())


Modified DataFrame with capitalized entries in 'Cote d'Iviore':
                                       Ghana       Cameroon      Mali   \
0  Central (Yaoundé, Elig-Mfomo, Biyem-Assi)        Yaounde      Kayes   
1            Littoral (Douala, Bota-Limbe),          Melong  Koulikoro   
2              Adamawa (Ngaoundal, Djohong),   Extrême-Nord    Sikasso   
3                   Far North (Bogo, Mokolo)  Extrême nord       Segou   
4                           East (Messamena)       Yaoundé      Bamako   

                     Cote d'Iviore  
0                       Bongouanou  
1  District Sanitaire d'Abengourou  
2                        D'abidjan  
3                         Tengrela  
4                      Jacqueville  



Explode the entries in the 'Ghana' column of the `countries_df` DataFrame, creating a new column for the text before the parentheses and a new column with each word inside the parentheses as a separate entry. Display the modified DataFrame.

## Created a new dataframe with exploded ghana column and display the country and the town.

Define a function to extract the text before the parentheses and the content within the parentheses from a string.

In [None]:
import re
import pandas as pd

# Create an empty list to store the restructured data
restructured_data = []

# Iterate through each row of the countries_df
for index, row in countries_df.iterrows():
    # Process Ghana column
    ghana_entry = row.get('Ghana')
    if pd.notna(ghana_entry):
        match = re.match(r'([^()]+)\s*\(([^)]+)\)', str(ghana_entry))
        if match:
            region = match.group(1).strip()
            towns_raw = match.group(2).strip()
            towns = [town.strip() for town in towns_raw.split(',')]
            for town in towns:
                restructured_data.append({'country': 'Ghana', 'region': region, 'town': town})
        else:
            # Handle entries without parentheses as just a town
            restructured_data.append({'country': 'Ghana', 'region': None, 'town': str(ghana_entry).strip()})

    # Process Cameroon column
    cameroon_entry = row.get('Cameroon')
    if pd.notna(cameroon_entry):
         restructured_data.append({'country': 'Cameroon', 'region': None, 'town': str(cameroon_entry).strip()})

    # Process Mali column
    mali_entry = row.get('Mali ') # Note the space in the column name
    if pd.notna(mali_entry):
         restructured_data.append({'country': 'Mali', 'region': None, 'town': str(mali_entry).strip()})

    # Process Cote d'Iviore column
    cote_divoire_entry = row.get("Cote d'Iviore")
    if pd.notna(cote_divoire_entry):
         restructured_data.append({'country': "Cote d'Iviore", 'region': None, 'town': str(cote_divoire_entry).strip()})


# Create the new DataFrame from the restructured data
restructured_df = pd.DataFrame(restructured_data)

# Display the new DataFrame
display(restructured_df[['country', 'town']])

Unnamed: 0,country,town
0,Ghana,Yaoundé
1,Ghana,Elig-Mfomo
2,Ghana,Biyem-Assi
3,Cameroon,Yaounde
4,Mali,Kayes
...,...,...
59,Cameroon,South West
60,Mali,Nan
61,Cote d'Iviore,Seguela
62,Cameroon,Ebolowa


In [None]:
restructured_df.shape

(64, 3)

Use a geocoding service (ArcGIS), and finally list all towns in the `restructured_df` DataFrame.

## Initialize arcgis geocoder
Initialize the ArcGIS geocoder.


In [None]:
arcgis_geolocator = ArcGIS(timeout=10)

## Define geocoding function
Define a geocoding function with a retry mechanism using the initialized ArcGIS geocoder.


In [None]:
from tenacity import retry, stop_after_attempt, wait_fixed, retry_if_exception_type

@retry(stop=stop_after_attempt(5), wait=wait_fixed(5), retry=retry_if_exception_type(Exception))
def geocode_location_arcgis(location_name):
    """Retries geocoding a location using ArcGIS with a delay."""
    return arcgis_geolocator.geocode(location_name)

## Geocode locations

Geocode the unique towns from the `restructured_df` DataFrame using the `geocode_location_arcgis` function and store the results back into the restructured_df by addind new columns for geocoding results.


Enter a search term to find matching countries, regions, or towns.

In [None]:
# Add new columns for geocoding results to restructured_df
if 'Latitude' not in restructured_df.columns:
    restructured_df['Latitude'] = None
    restructured_df['Longitude'] = None
    restructured_df['Geometry'] = None

# Geocode each town in restructured_df and store the results
for index, row in restructured_df.iterrows():
    # Construct a combined location string
    location_parts = [row['town']]
    if pd.notna(row['region']):
        location_parts.append(row['region'])
    location_parts.append(row['country'])
    location_name = ', '.join(location_parts)

    try:
        location_data = geocode_location_arcgis(location_name)
        if location_data:
            restructured_df.loc[index, 'Latitude'] = location_data.latitude
            restructured_df.loc[index, 'Longitude'] = location_data.longitude
            restructured_df.loc[index, 'Geometry'] = f"POINT({location_data.longitude} {location_data.latitude})"
    except Exception as e:
        print(f"Geocoding failed for {location_name} at index {index} after multiple retries: {e}")
        # The Latitude, Longitude, and Geometry columns will remain None for failed geocoding

# Display the updated restructured_df with geocoding results
print(restructured_df.head())

# Display the shape of the updated restructured_df
print("\nShape of the updated restructured_df:", restructured_df.shape)

# Check for rows where geometry is present (successful geocoding)
print("\nNumber of rows with geometry:", restructured_df['Geometry'].notna().sum())

    country   region        town  Latitude  Longitude  \
0     Ghana  Central     Yaoundé  5.568071  -1.213654   
1     Ghana  Central  Elig-Mfomo  5.568071  -1.213654   
2     Ghana  Central  Biyem-Assi  5.662009  -1.312553   
3  Cameroon     None     Yaounde  3.861768   11.51875   
4      Mali     None       Kayes  14.44682 -11.437937   

                                Geometry  
0  POINT(-1.213654385693 5.568071442516)  
1  POINT(-1.213654385693 5.568071442516)  
2   POINT(-1.31255297924 5.662008976147)  
3            POINT(11.5187504 3.8617683)  
4          POINT(-11.4379366 14.4468202)  

Shape of the updated restructured_df: (64, 6)

Number of rows with geometry: 64


## Display results

Display the resulting DataFrame containing the geocoded locations and their coordinates.


In [None]:
# Rename the dataframe
df = pd.DataFrame(restructured_df)
display(df.head())

Unnamed: 0,country,region,town,Latitude,Longitude,Geometry
0,Ghana,Central,Yaoundé,5.568071,-1.213654,POINT(-1.213654385693 5.568071442516)
1,Ghana,Central,Elig-Mfomo,5.568071,-1.213654,POINT(-1.213654385693 5.568071442516)
2,Ghana,Central,Biyem-Assi,5.662009,-1.312553,POINT(-1.31255297924 5.662008976147)
3,Cameroon,,Yaounde,3.861768,11.51875,POINT(11.5187504 3.8617683)
4,Mali,,Kayes,14.44682,-11.437937,POINT(-11.4379366 14.4468202)


**Reasoning**:
Display the entire DataFrame containing the geocoded locations and their coordinates.



In [None]:
# Create a base map
m = folium.Map(location=[df['Latitude'].mean(), df['Longitude'].mean()], zoom_start=5)

# Define a dictionary to map countries to colors and emojis
country_colors = {
    'Ghana': {'color': 'blue', 'emoji': '🇬🇭'},
    'Cameroon': {'color': 'green', 'emoji': '🇨🇲'},
    'Mali': {'color': 'orange', 'emoji': '🇲🇱'},
    "Cote d'Iviore": {'color': 'purple', 'emoji': '🇨🇮'}
}

# Add markers for each location in the DataFrame
for index, row in df.iterrows():
    if pd.notna(row['Latitude']) and pd.notna(row['Longitude']):
        country = row['country']
        color = country_colors.get(country, {'color': 'gray', 'emoji': ''})['color'] # Default to gray if country not in dict
        emoji = country_colors.get(country, {'color': 'gray', 'emoji': ''})['emoji'] # Default to empty emoji

        # Create the popup content with country, region, town, and emoji
        popup_content = f"<b>Country:</b> {country} {emoji}<br>"
        if pd.notna(row.get('region')): # Add region only if it exists and is not None
             popup_content += f"<b>Region:</b> {row['region']}<br>"
        popup_content += f"<b>Town:</b> {row['town']}"


        folium.Marker(
            location=[row['Latitude'], row['Longitude']],
            popup=popup_content,  # Use the formatted popup content
            icon=folium.Icon(color=color) # Set marker color based on country
        ).add_to(m)

# Display the map
m