In [1]:
%%html
<script>
    // AUTORUN ALL CELLS ON NOTEBOOK-LOAD!
    require(
        ['base/js/namespace', 'jquery'], 
        function(jupyter, $) {
            $(jupyter.events).on("kernel_ready.Kernel", function () {
                console.log("Auto-running all cells-below...");
                jupyter.actions.call('jupyter-notebook:run-all-cells-below');
                jupyter.notebook.scroll_to_top();
                jupyter.actions.call('jupyter-notebook:save-notebook');                
                
            });
        });
        
        $( document ).ready(function(){
        code_shown=false;
        $('div.input').hide()});
    
    
</script>

Note: Above this cell is a hidden cell that hides and runs all code in the file. This is intended for those who do not want to see or interact with the code. It can be seen by converting the cell to markdown(see toolbar above) and then back to code.

# Overview

The following steps walk users through converting their WorldPop data of interest from the decimal based numbers to integers. This is necessary for discrete agent object creation. 

- It also produces a heat map of population density for the country of interest.

- Due to the potential size of the population files syntheticpopulation uses hdf5 (saved as h5) file format to keep the data on disk rather than RAM. **Please be aware depending on the country this may take up substantial memory.** As an example a country like Niger based on geogrpahic size takes up approximately 70 gigabytes. 

- The next step is the density and demographic merge files, if the user has done demographic exploration and conversion. 

## 0: Import the Dependencies

In [2]:
from toggle_code import toggle_code as hide_code
from toggle_code import run_code as run_code

import glob 
import os
from netCDF4 import Dataset
import rasterio
import pandas as pd
import numpy as np
import math
import datetime
import ipywidgets as widgets
from ipywidgets import interact
from bokeh.io import push_notebook, show, output_notebook
from bokeh.plotting import figure
from bokeh.tile_providers import get_provider, Vendors
from bokeh.palettes import RdYlGn, Plasma256
from bokeh.models import Legend, BoxAnnotation, Toggle, CustomJS,ColumnDataSource,LinearColorMapper, ColorBar, BasicTicker,\
                          PrintfTickFormatter 
from bokeh.transform import transform
from bokeh.models.tools import *
tile_provider = get_provider('STAMEN_TERRAIN')
tile_provider2 = get_provider('CARTODBPOSITRON')
import tables #to ensure avialable for pandas
#create pyproj transformer to convert form lat/long to web mercator
from pyproj import Transformer
transformer = Transformer.from_crs('epsg:4326','epsg:3857')
#from IPython.display import HTML
output_notebook()
import warnings
warnings.filterwarnings("ignore", message="Cannot find a last shown plot to update.")

# 1. Select population file

The following code looks in the data folder for an file with the world pop population density marker "ppp".

In [3]:
hide_code()
run_code()


pot_list =["Select File"]
filepath= r"./data/*"
for pop_file in glob.glob(filepath):
    if "ppp" in pop_file: 
        pot_list.append(pop_file)

pop_file = widgets.Dropdown(options=pot_list, value=pot_list[0], description="File: ", disabled = False)

def update(file):
    return file

pop_file_select = interact(update, file=pop_file)

#call the function 


#pop_table

interactive(children=(Dropdown(description='File: ', options=('Select File', './data\\alb_ppp_2020.tif'), valu…

# 2. Convert file into table

The following code converts the downloaded worldpop file into a table of latitudes, longitudes and number of people.

6 decimal points for the coordinates represents and accuracy of ~0.11 meters at the equator

In [4]:
hide_code()
run_code()
# takes in a worldpop dataset url and populates a 3d array with 3 slices, one for the latitude, one for the longitude,
# and one for the population at that specified co-ordinate box
# The array is then loaded into the dictionary of all the worldpop age and sex demographics

if pop_file.value == "Select File":
    print("Waiting for Input")
    pop_table =pd.DataFrame({"latitiude":[0], "longitude":[0], "Population":[0]})
else: 
    data_store =pd.HDFStore(".\data\density.h5")
    
    print("Depending on the size of your country this may take awhile.....")
    def get_array(filename, data_store):#, demographic, struct_dict):
        with rasterio.open(filename) as src:
            #read image
            image= src.read()
            # transform image
            bands,rows,cols = np.shape(image)
            image1 = image.reshape (rows*cols,bands)
            # bounding box of image
            l,b,r,t = src.bounds
            #resolution of image
            res = src.res
            # meshgrid of X and Y
            x = np.arange(l,r, res[-1])
            y = np.arange(t,b, -res[-1])
            #adjust for rounding errors
            if len(x) != image[0].shape[1]:
                diff_x = len(x)-image[0].shape[1]
                x = x[0:-diff_x]
            if len(y) != image[0].shape[0]:
                diff_y = len(y)-image[0].shape[0]
                y = y[0:-diff_y]
            #TUrn into a two dimensional array of all lats and longs
            lon, lat = np.meshgrid(x, y)
            lon_flat = lon.flatten()
            lat_flat= lat.flatten()
            pop_flat= image[0].flatten()
            x1, y1 = np.shape(lat)
            pop_dict = {"longitude":lon_flat, "latitude":lat_flat,"Population":pop_flat}
            pop_table = pd.DataFrame.from_dict(pop_dict)
            #Remove non values
            pop_table =pop_table[pop_table["Population"]!=-99999.0]
            total_peeps = sum(pop_table["Population"])
            print("There are approximately {} people.".format(total_peeps))
            data_store["density"] = pop_table
            data_store["visuals"] = pop_table
            return total_peeps

    
    total = get_array(pop_file.value, data_store)
    data_store.close()

Waiting for Input


In [5]:
hide_code()
run_code()

if pop_file.value == "Select File":
    print("Waiting for Input")
else: 
    print("Preparing Plot...")
    data_store =pd.HDFStore(".\data\density.h5")
    
    viz_table = data_store["visuals"]                            
    
    lat_min = viz_table["latitude"].min()
    lat_max = viz_table["latitude"].max()
    lon_min = viz_table["longitude"].min()
    lon_max = viz_table["longitude"].max()
    
    
    #Have to convert to 2 decimal places otherwise to dense
    #round longitude
    viz_table["longitude"] = viz_table["longitude"].round(2)

    #round latitude
    viz_table["latitude"] = viz_table["latitude"].round(2)
    
       
    viz_table = viz_table.groupby(['longitude','latitude']).sum().reset_index()
    
    #Clear redundant lats/longs
    viz_table =viz_table.drop_duplicates(subset=["longitude", "latitude"], ignore_index=True)  
      
    viz_table['web_lon'], viz_table["web_lat"]  = transformer.transform(viz_table["latitude"].values, 
                                                                                      viz_table["longitude"].values)
    #grouped_poptable2 = grouped_poptable2[grouped_poptable2["Population"]!=0]
    new_total2 = round(sum(viz_table["Population"]))
        
    data_store["visuals"] = viz_table
    
    min_max_pts = [(lat_min, lon_min), (lat_max, lon_max)]
    bbox2 = []
    for pt in transformer.itransform(min_max_pts): 
        bbox2.append(pt)   

    size = widgets.IntText(
        value=600,
        description='Plot Size',
        disabled=False
    )

    title = widgets.Text(
        value='Population Density',
        description='Title',
        disabled=False
    )

    colors = list(RdYlGn[8]) 
    colors.reverse()
    #Is there a better color mapper?
    mapper = LinearColorMapper(palette=colors, low=viz_table.Population.min(),
                               high=viz_table.Population.max())

    color_bar = ColorBar(color_mapper=mapper, location=(0, 0),
                     ticker=BasicTicker(desired_num_ticks=len(colors)))

    def heatmap(size, title):

        p2 = figure(plot_width=size, plot_height=size, title=title,
                    x_range=(bbox2[0][0], bbox2[1][0]),y_range=(bbox2[0][1], bbox2[1][1]),
                    x_axis_type="mercator", y_axis_type="mercator")
        p2.title.text_font_size = '20pt'
        map_base = p2.add_tile(tile_provider2)
        map_base.level ='underlay'
        #convert source to selected dictionary value
        source = ColumnDataSource(viz_table)

        p2.rect(x='web_lon', y='web_lat', width=3500, height=3500, source=source,
                line_color=None, fill_color= transform("Population", mapper), alpha=0.07)

        p2.add_layout(color_bar, 'right')

        show(p2)




    heatmap_out = interact(heatmap, size = size, title=title)
    data_store.close()
    

Waiting for Input


# 4. Select the level of accuracy needed

After visualizing the data we can select the level of accuracy and if desired the final output to a specific city 

In [6]:
hide_code()
run_code()

accuracy = widgets.Dropdown(options =["6 decimals (~0.11 meters)",
                                      "5 decimals (~1.1 meters)",
                                      "4 decimals (~11 meters)",
                                      "3 decimals (~110 meters)",
                                      "2 decimals (~1.1 kilometers)"],
                           value = "4 decimals (~11 meters)",
                           description = "Accuracy",
                           disabled = False)
def update(acc):
    return acc

acc_select = interact(update, acc=accuracy)

interactive(children=(Dropdown(description='Accuracy', index=2, options=('6 decimals (~0.11 meters)', '5 decim…

# 5: Smooth the population 

As shown in the earlier table there are many decimal people, which cannot exist. So based on the desired latitude/longitude accuracy the goal is to get close to the total population. The following code is based on Pareto distributions of populations or a rich get richer approach. In essence, if there is a high population density, then that area gets more people. 

(This is obviously somewhat coarse and we welcome contributions.)

In [7]:
hide_code()
run_code()

if pop_file.value == "Select File":
    print("Waiting for Input")
else: 
    acc_dict = {"6 decimals (~0.11 meters)":6,
                  "5 decimals (~1.1 meters)":5,
                  "4 decimals (~11 meters)":4,
                  "3 decimals (~110 meters)":3,
                  "2 decimals (~1.1 kilometers)":2}
    rd = acc_dict[accuracy.value]

    goal = round(total)
    print("The goal population is {}.\n".format(goal))
    print()
    print("Calculating......")
    
    data_store =pd.HDFStore(".\data\density.h5")
    
    grouped = data_store["density"]
    #round longitude
    grouped["longitude"] = grouped["longitude"].round(rd)

    #round latitude
    grouped["latitude"] = grouped["latitude"].round(rd)


    grouped = grouped.groupby(['longitude','latitude']).sum().reset_index()
    #grouped_poptable.aggregate(np.sum)
    
    #get all the population that would be eliminated through rounding
    grouped["smalls"] = np.where((grouped['Population']< 0.4) & (grouped["Population"]>0.0), grouped["Population"], 0)
    #get the total number
    mod_sum = int(grouped["smalls"].sum())
    #Identify the largest populations < 0.5
    largest = grouped.nlargest(mod_sum, columns='smalls')
    #Change them to 1
    grouped.at[largest.index, "smalls"] = 1
    #add them back in
    grouped["Population"] = grouped["Population"] + grouped['smalls']
    
    grouped["Population"] = grouped["Population"].apply(np.rint)
    
    new_total = np.sum(grouped["Population"])
    
    diff = goal-new_total
    
    del grouped["smalls"]
        
    data_store["density"] = grouped
    data_store.close()

    print("The aggregated total population is: " + str(new_total))
    print()
    print("The new aggregated total accounts for: " + str(round(new_total/goal*100,2))+"% of the population.")
    

Waiting for Input


# 6: If desired narrow your choice

The following provides sliders to narrow down to a desired geographic area.

In [8]:
hide_code()
run_code()

if pop_file.value == "Select File":
    print("Waiting for Input")
else: 
    data_store =pd.HDFStore(".\data\density.h5")
    
    viz_table =data_store["visuals"]
        
    #Convert viz_table to dictionary
    long_points = {"globe":list(viz_table["longitude"]), "web":list(viz_table["web_lon"])}
    lat_points = {"globe":list(viz_table["latitude"]), "web":list(viz_table["web_lat"])}
    
    
    def update(min_latitude, max_latitude, min_longitude, max_longitude):
        #Create the base figure
        p = figure( x_range=(bbox2[0][0], bbox2[1][0]),y_range=(bbox2[0][1], bbox2[1][1]),x_axis_type="mercator", y_axis_type="mercator")
        #add the map form the Bokeh map vendor in this case Stamen_Terrain --- see documentation
        p.add_tile(tile_provider2)
        #Convert from lat/long to mercator projection
        idx_long_min = long_points["globe"].index(min_longitude)
        idx_lat_min = lat_points["globe"].index(min_latitude)
        idx_long_max = long_points["globe"].index(max_longitude)
        idx_lat_max = lat_points["globe"].index(max_latitude)
        p.patch([long_points["web"][idx_long_min],long_points["web"][idx_long_min],\
                 long_points["web"][idx_long_max],long_points["web"][idx_long_max]],\
                [lat_points["web"][idx_lat_min],lat_points["web"][idx_lat_max],\
                 lat_points["web"][idx_lat_max],lat_points["web"][idx_lat_min]],\
                 color="red", alpha=0.5)

        #p.patch([min_lat, min_lat, max_lat, max_lat],[min_lon, max_lon, max_lon, min_lon], color="blue", alpha=0.25)
        push_notebook()
        show(p, notebook_handle=True)


    loc_input = interact(update,min_latitude=widgets.FloatSlider(min=viz_table['latitude'].min(),
                                                                 max=viz_table['latitude'].max(), 
                                                                 value =viz_table['latitude'].min()),
                         max_latitude=widgets.FloatSlider(min=viz_table['latitude'].min(),
                                                          max=viz_table['latitude'].max(),
                                                          value =viz_table['latitude'].max()),
                         min_longitude=widgets.FloatSlider(min=viz_table['longitude'].min(),
                                                           max=viz_table['longitude'].max()),
                                                           value= viz_table['longitude'].min(), 
                         max_longitude=widgets.FloatSlider(min=viz_table['longitude'].min(),
                                                           max=viz_table['longitude'].max(),
                                                           value= viz_table['longitude'].max()))


Waiting for Input


# 7. New Population Size 

**You should skip this step if you did not reduce the area in step 6**

The following takes the down selected population and updates the HDF5 files. 

In [9]:
hide_code()
run_code()

if pop_file.value == "Select File":
    print("Waiting for Input")
else: 
    print("Updating.....")
    data_store =pd.HDFStore(".\data\density.h5")

    #Take user inputs and get new arrays from lat and long
    lat_min = loc_input.__dict__["widget"].children[0].__dict__["_trait_values"]["value"]
    lat_max = loc_input.__dict__["widget"].children[1].__dict__["_trait_values"]["value"]
    lon_min = loc_input.__dict__["widget"].children[2].__dict__["_trait_values"]["value"]
    lon_max = loc_input.__dict__["widget"].children[3].__dict__["_trait_values"]["value"]

    grouped = grouped[(grouped["latitude"]>=lat_min) & (grouped["latitude"]<= lat_max) &
                                       (grouped["longitude"] <= lon_max) & (grouped["longitude"]>= lon_min )]
    
    data_store["density"] = grouped
    
    vis = data_store["visuals"]
    
    data_store["visuals"] = vis[(vis["latitude"]>=lat_min) & (vis["latitude"]<= lat_max) &
                                       (vis["longitude"] <= lon_max) & (vis["longitude"]>= lon_min )]
    
    print("Your new population total is {}".format(grouped["Population"].sum()))
    print("Your density file is now ready for merging with the demographic file")
    data_store.close()

Waiting for Input
