In [None]:
# Imports
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

import plotly.express as px
import plotly.graph_objects as go

from geopy.geocoders import Nominatim

import folium

# hide warnings
import warnings
warnings.filterwarnings("ignore")

In [None]:
# Load the health facilities dataset
health_facilities_df = pd.read_excel(
    "data/health-facilities-data-kenya.xlsx"
)
health_facilities_df.head(5)

In [None]:
population_df = pd.read_csv("data/kenya-population-by-sex-and-county.csv")
population_df.head(5)

### Exploratory Data Analysis

In [None]:
# Get the shape of the health facilities data
health_facilities_df.shape

In [None]:
# Get columns in the health facilities dataframe
health_facilities_df.info()

In [None]:
# Determine the number of provinces in the df
provinces = health_facilities_df.Province.unique()
provinces

In [None]:
# Determine the number of counties in the df
counties = health_facilities_df.County.unique()
counties

In [None]:
# Get only Nairobi county for this analysis
health_facilities_nairobi_df = health_facilities_df.query("County == 'Nairobi'")
health_facilities_nairobi_df.head(5)

In [None]:
# Get the shape/stats of the Nairobi facilities dataframe
print(f"Data shape: {health_facilities_nairobi_df.shape}")
print("---------------------------------")
print(health_facilities_nairobi_df.info())

We have a total of 942 facilities in Nairobi dataset, the facility code and facility name rows match meaning they are all unique and no null values.

### Explore facilities location and distribution in Nairobi county

In [None]:
# Administrative groups count
districts = health_facilities_nairobi_df["District"].nunique()
divisions = health_facilities_nairobi_df["Division"].nunique()
locations = health_facilities_nairobi_df["Location"].nunique()
sub_locations = health_facilities_nairobi_df["Sub Location"].nunique()

print("Number of individual administrative areas in the Nairobi dataframe")
print(f"Number of districts: {districts}")
print(f"Number of divisions: {divisions}")
print(f"Number of locations: {locations}")
print(f"Number of sub_locations: {sub_locations}")

In [None]:
# Get health facility distribution per district in Nairobi
district_health_facilities_nbi = (
    health_facilities_nairobi_df.groupby("District")
    .size()
    .reset_index(name="Number of Facilities")
    .sort_values(by="Number of Facilities", ascending=False)
)
district_health_facilities_nbi

In [None]:
# Visualize this data on a map of Kenya
# First we get the latitudes and longitudes of each district
def get_coordinates(row):
    geolocator = Nominatim(user_agent="kenya_healthcare")

    address = f"{row['District']}, Kenya"
    location = geolocator.geocode(address)

    try:
        if location:
            return location.latitude, location.longitude
        else:
            return None, None
    except:
        return None, None

# Create a new column for latitudes and longitudes
district_health_facilities_nbi[["Latitude", "Longitude"]] = district_health_facilities_nbi.apply(get_coordinates, axis=1, result_type="expand")

# Save the dataframe just in case
district_health_facilities_nbi.to_csv("data/nairobi-health-facilities-locations.csv")

district_health_facilities_nbi.head()

In [None]:
# We put the data on a map
# Text we see when we hover over a district
district_health_facilities_nbi["Text"] = (
    district_health_facilities_nbi["District"]
    + "<br>Total health facilities: "
    + district_health_facilities_nbi["Number of Facilities"].astype(str)
)

nairobi_lat, nairobi_long = -1.28333, 36.81667
nairobi_map = folium.Map(location=[nairobi_lat, nairobi_long], zoom_start=10, control_scale=True)
nairobi_map

In [None]:
fig = px.scatter_mapbox(
    district_health_facilities_nbi,
    lat="Latitude",
    lon="Longitude",
    size="Number of Facilities",
    zoom=10,
    mapbox_style="open-street-map",
    hover_name="District"
)
fig.show()