<a href="https://www.kaggle.com/code/joshuaokolo/geospatial-data-analytics-with-folium-and-rasterio?scriptVersionId=104089483" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

## Import libraries

Learning GDA with [this tutorial](https://omdena.com/blog/geospatial-data-analytics/).

In [1]:
# Import required packages
# --------------------
import wget
import rasterio
from rasterio import mask

import matplotlib.pyplot as plt
%matplotlib inline

import numpy as np
import geopandas as gpd
import pandas as pd

import folium


# Download raster/tif file (download gridded population data from WorldPop)
# --------------------
FILE_DIR = "data/worldpop"
# d/l yr 2020 data
url2020 = "ftp://ftp.worldpop.org.uk/GIS/Population/Global_2000_2020/2020/UGA/uga_ppp_2020.tif"   
wget.download(url2020, FILE_DIR)

# also d/l yr 2019 data
url2019 = "ftp://ftp.worldpop.org.uk/GIS/Population/Global_2000_2020/2019/UGA/uga_ppp_2019.tif"   
wget.download(url2019, FILE_DIR)


# Read raster/tif file
# --------------------
uga_tif = 'data/worldpop/uga_ppp_2020.tif'
raster_uga = rasterio.open(uga_tif)
pop_uga_data = raster_uga.read(1)

# A crude way to count the population size represented in the image
pop_uga_count = pop_uga_data[pop_uga_data > 0].sum()


## Plot raster/tif image
# --------------------
def plot_raster(rast_data, title='', figsize=(10,10)):
  """
  Plots population count in log scale(+1)
  """
    plt.figure(figsize = figsize)
    im1 = plt.imshow(np.log1p(rast_data),) # vmin=0, vmax=2.1)

    plt.title("{}".format(title), fontdict = {'fontsize': 20})  
    plt.axis('off')
plt.colorbar(im1, fraction=0.03)

IndentationError: unexpected indent (2647915961.py, line 45)

In [None]:
# A crude way to aggregate the population size represented in the image

pop_uga_count = pop_uga_data[pop_uga_data > 0].sum()

## Aggregate Regional Estimates at District Levels

In [2]:
# the shape files for Uganda were downloaded here:

# https://gadm.org/

uga_gdf = gpd.GeoDataFrame.from_file("data/shpfiles/UGA/gadm36_UGA_2.shp")


# plot the shapefile
plt.rcParams['figure.figsize'] = 5,5
uga_gdf.plot(color="white", edgecolor="#2e3131")
plt.title('Uganda: level 2 regions')

NameError: name 'gpd' is not defined

In [None]:
uga_gdf.head(3) #check the first few rows of the shapefile dataframe

In [None]:
uga_gdf.crs   #to see the CRS format of the file

In [None]:
# using mask.mask function from Rasterio to define the region of interest
gtraster, bound = mask.mask(raster_uga, uga_gdf[uga_gdf.GID_1 == “Apac”].geometry, crop=True)

gtraster[0][gtraster[0]>0].sum()

In [None]:
# the shapefiles for Uganda were downloaded here: 
# https://gadm.org/

# Load in the shapefile
# --------------------
uga_gdf = gpd.GeoDataFrame.from_file("data/shpfiles/UGA/gadm36_UGA_2.shp") 


# Estimate the population size per defined district for each year (from the .tif image available for each year)
# --------------------
for year in range(2019, 2021):
    # Read raster/tif file
    raster_uga = 'data/worldpop/UGA/uga_ppp_{}.tif'.format(year) 
    pop_raster_uga = rasterio.open(raster_uga)   
    pop_uga_data = pop_raster_uga.read(1)
    
    # loop through each defined district contained in the shapefile and use it as the mask to extract values
    _results = []
    for i in uga_gdf['GID_1']:
        roi = uga_gdf[uga_gdf.GID_1 == i]

        # using the mask.mask module from Rasterio to specify the ROI
        gtraster, bound = mask.mask(pop_raster_uga, roi["geometry"], crop=True) 
        
        # values greater than 0 represent the estimated population count for that pixel
        _results.append(gtraster[0][gtraster[0]>0].sum())
     
    # save the estimated counts for each year in a new column
    uga_gdf[str(year)] = _results
    
# also, compute the percentage change in estimated counts across years
uga_gdf['growth_rate'] = uga_gdf[['2019', '2020']].pct_change(axis=1)['2020']*100

## Create interactive choropleth maps with `mask.mask` Folium

In [None]:
# Create a map object using Folium
# --------------------
map_uga_popdist = folium.Map(location=[cent_y, cent_x], zoom_start=7, tiles='OpenStreetMap')  

# Create the choropleth map
# --------------------
choro = folium.Choropleth(geo_data=uga_gdf,
                          name='choropleth',     
                          data=uga_gdf,
                          columns=['NAME_1', 'growth_rate'], 
                          key_on='feature.NAME_1', 
                          fill_color='YlOrRd',
                          fill_opacity=0.6, 
                          line_opacity=0.8,
                          legend_name= "Population size across Uganda's subregion"
                          ).add_to(map_uga_popdist) 

# add labels to map
choro.geojson.add_child(folium.features.GeoJsonTooltip(fields=['NAME_1', '2020', 'growth_rate'],
                                                       aliases=['District', 'Est Population in 2020', 'Est growth_rate'], 
                                                       labels = False))
folium.LayerControl().add_to(uga_popdist_map)


uga_popdist_map

In [None]:
import pandas as pd
import geopandas as gpd

"""
assume train_df, a pandas dataframe, exists with longitude and latitude columns
"""
# create geopandas dataframe
gpd_df = gpd.GeoDataFrame(train_df,
                          geometry = gpd.points_from_xy(train_df['longitude'], train_df['latitude']))

# approximately 10km radius
gpd_df['geometry_1km'] = gp_df['geometry'].apply(lambda x: x.buffer(0.1))

## References

https://www.southampton.ac.uk/news/2020/10/worldpop-census-calculation-nigeria.page

African Development Bank (AfDB). 2018. African economic outlook 2018: Macroeconomic Developments and Structural Change: Infrastructure and its Financing. Abidjan: African Development Bank.

https://omdena.com/blog/geospatial-data-analytics/