# Geocode disasters in Uganda collected from EM-DAT database

## Environment Setup

In [2]:
import re
from pathlib import Path

import numpy as np
import pandas as pd
import geopandas as gpd
from shapely.geometry import Polygon, Point

from osmnx.geocoder import geocode

import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap

## Folder Structure

In [3]:
data_dir = Path("../data/processed")

## Functions

In [129]:
def create_list_of_locations(locations_string):
    remove_par = re.sub(r"\([^()]*\)", "", locations_string)
    # remove_comment = re.sub(", creating a humanitarian disaster. Nord and Sud-Ubangi, Mongala and Kinshasa", "", remove_par)
    remove_symb = re.sub(r"[?!]", "", remove_par)
    # remove_num = re.sub(r"\d+", "", remove_comment)
    remove_and = re.sub(r"\band\b", ",", remove_symb)
    remove_et = re.sub(r"\bet\b", ",", remove_and)
    remove_of = re.sub(r"\bof\b", ",", remove_et)
    remove_empty = re.sub(r"\,\s+\,", "", remove_of)
    remove_in = re.sub(r"\bin\b", ",", remove_empty)
    return (
        remove_in
        .replace("Girigiri", "Girigiroi")
        .replace("Himumtu", "Himutu")
        .replace("Kubehwo", "Bukalasi")
        .replace("Lubowaa", "Lubowa")
        .replace("Kigez", "Kigezi")
        .replace("Kigezii", "Kigezi")
        .replace("Ntokoro", "Ntoroko")
        .replace("Kanabulemu", "Kyebe")
        .replace("Nabeyo", "Nabweyo")
        # .replace("area", "")
        .replace(".", ",")
        .replace(";", ",")
        .split(",")
    )

def remove_loc_types(location):
    loc_title = location.title()
    remove_district = re.sub(r"Distric\w+", "", loc_title)
    remove_province = re.sub(r"Provinc\w+", "", remove_district)
    remove_territory = re.sub(r"Territor\w+", "", remove_province)
    remove_city = re.sub(r"Cit\w+", "", remove_territory)
    remove_town = re.sub(r"Tow\w+", "", remove_city)
    remove_commune = re.sub(r"Commun\w+", "", remove_town)
    remove_municipality = re.sub(r"Municipalit\w+", "", remove_commune)
    remove_plains = re.sub("Lower Plains", "", remove_municipality)
    remove_subregion = re.sub("Sub-Region", "", remove_plains)
    remove_rural = re.sub("Rural", "", remove_subregion)
    remove_areas = re.sub(r"Are\w+", "", remove_rural)
    remove_villages = re.sub(r"Villag\w+", "", remove_areas)
    remove_suburbs = re.sub(r"Subur\w+", "", remove_villages)
    remove_parishes = re.sub(r"Paris\w+", "", remove_suburbs)
    remove_subcounties = re.sub(r"Sub-Count\w+", "", remove_parishes)
    remove_subcounties_2 = re.sub(r"Sub Count\w+", "", remove_subcounties)
    remove_council = re.sub(r"Counc\w+", "", remove_subcounties_2)
    return (
        remove_council
        .strip()
    )

## Import Data

In [1]:
df_dis_uga = (
    pd.read_csv(
        data_dir / "uganda_relevant_disasters_2010_2025_cleaned.csv"
    )
)

NameError: name 'pd' is not defined

In [42]:
df_dis_uga.head()

Unnamed: 0,disno,start_date,end_date,country,location,disaster_type,disaster_subtype,origin,associated_types,total_deaths,no_injured,no_homeless,no_affected,total_affected,aid_contribution_000_usd,total_damage_adjusted_000_usd
0,2010-9082-UGA,2011-03-01,,Uganda,"Kaabong, Kotido, Abim, Moroto, Nakapiripirit p...",Drought,Drought,,,,,,669000.0,669000.0,,
1,2022-9436-UGA,2022-07-01,2022-12-01,Uganda,"Napak, Kaabong, Kotido, Moroto districts (Kara...",Drought,Drought,,Famine,2465.0,,,518000.0,518000.0,,
2,2019-0227-UGA,2019-06-04,2019-06-08,Uganda,"Bududa, Bushenyi, Rukiga, Sironko, Mbale, Buta...",Mass movement (wet),Landslide (wet),Heavy rains,Flood|Lightening,61.0,25.0,,129928.0,129953.0,,
3,2020-0182-UGA,2020-05-08,2020-05-08,Uganda,"Kween District (central-east Uganda), Girigiri...",Flood,Flash flood,Heavy rains,"Slide (land, mud, snow, rock)",3.0,,,100000.0,100000.0,,
4,2022-0481-UGA,2022-07-30,2022-08-05,Uganda,"Mbale, Kapchorwa, Sironko, Bulambuli, Bukedea,...",Flood,Flood (General),Heavy rains,,32.0,10.0,,78328.0,78338.0,,


## Geocode

In [133]:
df_emdat_geocoded = (
    df_dis_uga
    .dropna(subset = ["location"])
    .assign(
        location_clean = lambda x: x.apply(lambda y: [remove_loc_types(l) for l in create_list_of_locations(y["location"])], axis = 1),
        affected = lambda x: x[["no_injured", "no_affected", "no_homeless", "total_deaths", "total_affected"]].ffill(axis = 1).iloc[:, -1],
    )
    .explode("location_clean")
    .loc[lambda x: ~x["location_clean"].isin(["Namanga", "Kalingatha  To Kasika", "Humya Bumadu", "Eastern Regions", "The Lake Kyoga Regions", "Kabugundo", "Itanwa", "Nfasha"])]
    .assign(
        affected = lambda x: x.groupby(["disno"])["affected"].transform(lambda y: y / len(y)),
        address = lambda x: x["location_clean"] + ", " + x["country"],
        address_geocoded = lambda x: x.apply(lambda y: geocode(y["address"]), axis = 1),
        geometry = lambda x: x["address_geocoded"].apply(lambda y: Point(y[1], y[0]))
    )   
    .reset_index(drop = True)
)

InsufficientResponseError: Nominatim could not geocode query 'Gimadu Bugatiisa, Uganda'.

In [119]:
test = (
    df_emdat_geocoded
    .loc[lambda x: x["location"] == 'Bubukwanga, Bukhonzo, Humya, and Bumadu village in Bundibugyo town council (Bundinbugyo district)']
    ["location_clean"].values
)

In [120]:
test

array(['Bubukwanga', 'Bukhonzo', 'Humya Bumadu', 'Bundibugyo'],
      dtype=object)

In [31]:
df_emdat_geocoded.head()

Unnamed: 0,disno,start_date,end_date,country,location,disaster_type,disaster_subtype,origin,associated_types,total_deaths,no_injured,no_homeless,no_affected,total_affected,aid_contribution_000_usd,total_damage_adjusted_000_usd,location_clean,affected
0,2010-9082-UGA,2011-03-01,,Uganda,"Kaabong, Kotido, Abim, Moroto, Nakapiripirit p...",Drought,Drought,,,,,,669000.0,669000.0,,,Kaabong,669000.0
0,2010-9082-UGA,2011-03-01,,Uganda,"Kaabong, Kotido, Abim, Moroto, Nakapiripirit p...",Drought,Drought,,,,,,669000.0,669000.0,,,Kotido,669000.0
0,2010-9082-UGA,2011-03-01,,Uganda,"Kaabong, Kotido, Abim, Moroto, Nakapiripirit p...",Drought,Drought,,,,,,669000.0,669000.0,,,Abim,669000.0
0,2010-9082-UGA,2011-03-01,,Uganda,"Kaabong, Kotido, Abim, Moroto, Nakapiripirit p...",Drought,Drought,,,,,,669000.0,669000.0,,,Moroto,669000.0
0,2010-9082-UGA,2011-03-01,,Uganda,"Kaabong, Kotido, Abim, Moroto, Nakapiripirit p...",Drought,Drought,,,,,,669000.0,669000.0,,,Nakapiripirit,669000.0


In [None]:
gdf_uvira_disasters = (
    gpd.GeoDataFrame(
        df_emdat_geocoded,
        geometry = "geometry",
        crs = "EPSG:4326"
    )
    # .loc[lambda x: x["geometry"].within(east_drc.loc[0, "geometry"])]
    .loc[lambda x: x["geometry"].within(uvira_area.loc[0, "geometry"])]
    .reset_index(drop = True)
    # .to_crs("EPSG:3857")
    .assign(
        start_day = lambda x: x["start_day"].fillna(15).astype(int),
        start_month = lambda x: x["start_month"].astype(int),
        start_year = lambda x: x["start_year"].astype(int),
        end_day = lambda x: x["end_day"].fillna(16).astype(int),
        end_month = lambda x: x["start_month"].astype(int),
        end_year = lambda x: x["start_year"].astype(int),
        start_date = lambda x: pd.to_datetime(x[["start_year", "start_month", "start_day"]].astype(str).agg("-".join, axis = 1)),
        end_date = lambda x: pd.to_datetime(x[["end_year", "end_month", "end_day"]].astype(str).agg("-".join, axis = 1)),
        # distance = lambda x: x["geometry"].distance(uvira_point.to_crs("EPSG:3857").loc[0, "geometry"]) / 1000,
        # disaster_affected = lambda x: x["disaster_affected"] * np.exp(-x["distance"]/(20/0.693)),  # affected numbers half at 20 km
        # num_disasters = lambda x: np.exp(-x["distance"]/(20/0.693))  # Conflict weight half at 20 km
        num_disasters = 1
    )
    .to_crs("EPSG:4326")   
    [[
        "disno",
        "start_date",
        "end_date",
        "disaster_address",
        "disaster_subtype",
        "disaster_affected",
        "num_disasters",
        "geometry"
    ]]
)

In [None]:
gdf_uvira_disasters.to_file(processed_data_dir / "gdf_uvira_disasters.geojson", driver = "GeoJSON")