## **Imports and Paths**

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [7]:
import geopandas as gpd
import pandas as pd

BASE_DIR= "/content/drive/MyDrive/Colab Notebooks/Kenya_Cholera_Climate_Risk_Model"

PRED_PATH = f"{BASE_DIR}/data/processed/kenya_cholera_predictions.csv"
GADM_PATH = f"{BASE_DIR}/data/raw/gadm/gadm41_KEN_2.shp"

OUTPUT_GEOJSON = f"{BASE_DIR}/data/processed/gadm_kenya_admin2_cholera.geojson"

### **Loading Predictions Table**

In [8]:
pred = pd.read_csv(PRED_PATH)
pred.head()

Unnamed: 0,GID_2,location_clean,year,month,cholera_incidence,predicted_incidence
0,KEN.10.2_1,kajiado east,2023,3,826874.3,6871783.0
1,KEN.10.2_1,kajiado east,2023,4,620155.7,22301270.0
2,KEN.10.2_1,kajiado east,2023,5,1033593.0,3882410.0
3,KEN.10.2_1,kajiado east,2023,6,206718.6,299121.6
4,KEN.10.3_1,kajiado north,2023,1,18049.67,831481.9


In [10]:
assert pred["GID_2"].isna().sum() == 0
assert pred["predicted_incidence"].isna().sum() == 0

### **Loading GADM Admin-2 Geometry**

In [11]:
gadm = gpd.read_file(GADM_PATH)

gadm = gadm[["GID_2","NAME_2","geometry"]]
gadm.head()

Unnamed: 0,GID_2,NAME_2,geometry
0,KEN.1.1_1,805,"POLYGON ((35.87727 -0.02973, 35.87699 -0.02947..."
1,KEN.1.2_1,Baringo Central,"POLYGON ((35.7977 0.3395, 35.79765 0.33912, 35..."
2,KEN.1.3_1,Baringo North,"POLYGON ((35.81346 0.60276, 35.81318 0.602, 35..."
3,KEN.1.4_1,Baringo South,"POLYGON ((36.22934 0.42293, 36.22926 0.42207, ..."
4,KEN.1.5_1,Eldama Ravine,"POLYGON ((35.82341 0.0244, 35.8237 0.02429, 35..."


### **CRS check**

In [12]:
gadm = gadm.to_crs(epsg=4326)

## **Merging Geometry with Predictions**

In [13]:
gdf = gadm.merge(
    pred,
    on="GID_2",
    how="inner"
    )

In [15]:
print("Rows in predictions: ",len(pred))
print("Rows after merge: ",len(gdf))

Rows in predictions:  235
Rows after merge:  235


## **Cleaning Columns for Streamlit**

In [17]:
gdf=gdf.rename(columns={
    "NAME_2":"district",
    "cholera_incidence":"observed_incidence"
})

In [18]:
gdf=gdf[
    [
        "GID_2",
        "district",
        "year",
        "month",
        "observed_incidence",
        "predicted_incidence",
        "geometry"
    ]
]

## **Exporting GeoJSON**

In [19]:
gdf.to_file(OUTPUT_GEOJSON, driver="GeoJSON")

print("GeoJSON saved to:")
print(OUTPUT_GEOJSON)

GeoJSON saved to:
/content/drive/MyDrive/Colab Notebooks/Kenya_Cholera_Climate_Risk_Model/data/processed/gadm_kenya_admin2_cholera.geojson
