---
title: "30 Day Map Challenge 2024 - Day 8: Humanitarian Data Exchange (HDX)"
categories:
  - Project
tags:
  - 30daymapchallenge
  - python
  - programming
classes: wide
header:
  teaser: /assets/images/30daymapchallenge2024-day8.png
---

The eighth day requires to use a specific data source - _Humanitarian Data Exchange (HDX)_:
> Use data from HDX to map humanitarian topics. Explore the datasets from the Humanitarian Data Exchange, covering disaster response, health, population, and development. Map for social good.

### Data


## Implementation

The necessary imports include the usual mapping and data handling libraries.
`geopy` provides an abstraction to convenientky access the [Nominatim](https://nominatim.org/) API that uses OpenStreetMap data to retrieve a city's coordinates by name.

In [None]:
from pathlib import Path
import cartopy.crs as ccrs
import cartopy.io.img_tiles as cimgt
import cartopy.feature as cfeature
import pycountry
import matplotlib.pyplot as plt
import pandas as pd
from geopy.geocoders import Nominatim
from matplotlib.patches import FancyArrowPatch

First, we have to get the location of each of the cities on the journey, store it in a pandas `DataFrame`.
Additionally, we add two columns that hold the next destination, which makes drawing the connections much easier when creating the map.

In [None]:
data_path = Path("data/hdx/end_year_population_totals_residing_deu.csv")
df = (
    pd.read_csv(data_path, header=[0, 1])
    .droplevel(1, axis="columns")
    .query("Year == 2023")
    .set_index("Country of Origin Code")
    .assign(
        alpha_2=lambda df: df.apply(
            lambda row: (
                c.alpha_2 if (c := pycountry.countries.get(alpha_3=row.name)) else None
            ),
            axis="columns",
        )
    )
)

In [None]:
df.head(3)

In [None]:
cache_path = Path("data/cache")
cache_path.mkdir(parents=True, exist_ok=True)
cache_file_path = cache_path / (data_path.stem + "_locations.csv")

geolocator = Nominatim(user_agent="30daymapchallenge")

def locate(alpha_2: str):
    try:
        result = geolocator.geocode({"country": alpha_2})
        if result is None:
            print(f"Failed to geolocate country {alpha_2} by alpha 2 code, trying alpha 3 ...")
            result = geolocator.geocode({"country": pycountry.countries.get(alpha_2=alpha_2).alpha_3})
        if result is None:
            print(f"Failed to geolocate country {alpha_2} by alpha 3 code, trying by official name ...")
            result = geolocator.geocode(pycountry.countries.get(alpha_2=alpha_2).official_name)
        if result is None:
            print(f"Failed to geolocate country {alpha_2} by official name code, trying by generic name ...")
            result = geolocator.geocode(pycountry.countries.get(alpha_2=alpha_2).name)
        if result is None:
            print(f"Finally failed to geolocate country {alpha_2}")
        return result
    except TimeoutError as _:
        print(f"Timeout while geolocating country {alpha_2}")
        return None
    except LookupError as _:
        print(f"Lookup error while geolocating country {alpha_2}")
        return None


use_cache = True
override_cache = True

if use_cache and cache_file_path.exists():
    locations = pd.read_csv(cache_file_path).set_index("country")
else:
    locations = pd.DataFrame(
        [
            {"country": country, "latitude": location.latitude, "longitude": location.longitude}
            for country in df['alpha_2'].unique()
            if country and (location := locate(country)) is not None
        ]
    ).set_index("country")
    if override_cache or not cache_file_path.exists():
        locations.to_csv(cache_file_path)

In [None]:
pd.merge(df, locations, left_on=['alpha_2'], right_index=True)

In [None]:
position_ger = geolocator.geocode({"country": "DE"})
# projection = ccrs.AzimuthalEquidistant(central_latitude=position_ger.latitude, central_longitude=position_ger.longitude)
projection = ccrs.EuroPP()

fig = plt.figure(figsize=(20, 20))

ax = fig.add_subplot(1, 1, 1, projection=projection)

ax.set_global()

ax.add_feature(cfeature.OCEAN)
ax.add_feature(cfeature.LAND)
ax.add_feature(cfeature.COASTLINE)
ax.add_feature(cfeature.BORDERS)

for country_code, country_data in df.iterrows():
    if not country_data["alpha_2"]:
        continue

    ax.plot(
        [position_ger.longitude, locations.loc[country_data["alpha_2"], 'longitude']],
        [position_ger.latitude, locations.loc[country_data["alpha_2"], 'latitude']],
        lw=2,
        # transform=ccrs.AzimuthalEquidistant(central_latitude=position_ger.latitude, central_longitude=position_ger.longitude),
        transform=ccrs.Geodetic(),
    )


# for label, city in df.iterrows():
#     ax.plot(
#         city["longitude"],
#         city["latitude"],
#         marker=r"$\bigotimes$",
#         color="black",
#         markersize=16,
#         transform=ccrs.Geodetic(),
#     )

#     ax.annotate(
#         city["city"],
#         xy=(city["longitude"], city["latitude"]),
#         xycoords="data",
#         xytext=offset_map[city["city"]],
#         textcoords="offset points",
#         fontsize=18,
#         transform=ccrs.Geodetic(),
#         color="white" if city["city"] == "Dresden" else "black",
#         bbox=dict(
#             boxstyle="round,pad=0.2",
#             fc="darkgreen" if city["city"] == "Dresden" else "white",
#             ec="darkgreen",
#             lw=2,
#         ),
#     )

#     if city.isnull().any():
#         continue

#     arrow = FancyArrowPatch(
#         (city["longitude"], city["latitude"]),
#         (city["dest_lon"], city["dest_lat"]),
#         connectionstyle="arc3,rad=.1",
#         arrowstyle="Simple, head_width=12, head_length=8",
#         color="green",
#         shrinkA=15,
#         shrinkB=15,
#         lw=8,
#     )
#     patch = ax.add_patch(
#         arrow,
#     )
#     patch.set_transform(ccrs.Geodetic())


plt.show()