# Simulation

In [None]:
import random
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans
import geopandas as gpd
from shapely.geometry import Point, Polygon
import matplotlib.pyplot as plt
import seaborn as sns
from geovoronoi import voronoi_regions_from_coords
import palettable
import scipy.spatial as spatial
import math
from tqdm import tqdm
import gc
import time
import multiprocessing as mp
from sklearn.linear_model import LinearRegression

In [None]:
%%capture
# load propagation models
%run ./propagation_model_extended_hata.ipynb

In [None]:
def myround(x):
    return (math.ceil(10*x) - 0.5)/10

# Parameters

In [None]:
random.seed(123)
pixel_x = 100
pixel_y = 100
pop_rural = int(50*10**4 * (pixel_x/100)**2)
pop_urban = int(50*10**4 * (pixel_x/100)**2)
ms_per_pixel_rural = 1 * pop_rural
ms_per_pixel_urban = 1 * pop_urban
bts_count_rural = ms_per_pixel_rural / 10000
bts_count_urban = ms_per_pixel_urban / 5000
urban_tile_size = 5
rural_tile_size = 20
p = 0.2

In [None]:
start_time = time.time()

pop_rural + pop_urban

bts_count = bts_count_rural + bts_count_urban

# Administrative frame
### Area classification: 0 = urban, 1 = rural

base_map = gpd.GeoDataFrame(columns = ['area_id','area_type'], crs={'init': 'epsg:4326'}, geometry=[])

target_urban = int(pixel_x*p/urban_tile_size)
target_rural = int(pixel_x*(1 - p)/rural_tile_size)
i = 0

for x in range(target_urban):

    for y in range(target_urban):
        
        lat = [y*p*pixel_y/target_urban, (y+1)*p*pixel_y/target_urban, (y+1)*p*pixel_y/target_urban, y*p*pixel_y/target_urban, y*p*pixel_y/target_urban]
        lon = [x*p*pixel_x/target_urban, x*p*pixel_x/target_urban, (x+1)*p*pixel_x/target_urban, (x+1)*p*pixel_x/target_urban, x*p*pixel_x/target_urban]

        base_map = base_map.append(gpd.GeoDataFrame(data = {'area_id' : [i], 'area_type' : 0}, index = [i], crs={'init': 'epsg:4326'}, geometry=[Polygon(zip(lon, lat))]))
        
        i += 1
        
for x in range(int(pixel_x*p/rural_tile_size)):

    for y in range(target_rural):
        
        lat = [(p + (1 - p)/target_rural*y)*pixel_y, (p + (1 - p)/target_rural*(y+1))*pixel_y, (p + (1 - p)/target_rural*(y+1))*pixel_y, (p + (1 - p)/target_rural*y)*pixel_y, (p + (1 - p)/target_rural*y)*pixel_y]
        lon = [x*(1-p)*pixel_x/target_rural, x*(1-p)*pixel_x/target_rural, (x+1)*(1-p)*pixel_x/target_rural, (x+1)*(1-p)*pixel_x/target_rural, x*(1-p)*pixel_x/target_rural]

        base_map = base_map.append(gpd.GeoDataFrame(data = {'area_id' : [i], 'area_type' : 1}, index = [i], crs={'init': 'epsg:4326'}, geometry=[Polygon(zip(lon, lat))]))
        
        i += 1
        
for x in range(target_rural):

    for y in range(int(pixel_x*p/rural_tile_size)):
        
        lat = [y*(1-p)*pixel_y/target_rural, (y+1)*(1-p)*pixel_y/target_rural, (y+1)*(1-p)*pixel_y/target_rural, y*(1-p)*pixel_y/target_rural, y*(1-p)*pixel_y/target_rural]
        lon = [(p + (1 - p)/target_rural*x)*pixel_x, (p + (1 - p)/target_rural*x)*pixel_x, (p + (1 - p)/target_rural*(x+1))*pixel_x, (p + (1 - p)/target_rural*(x+1))*pixel_x, (p + (1 - p)/target_rural*x)*pixel_x]
        
        base_map = base_map.append(gpd.GeoDataFrame(data = {'area_id' : [i], 'area_type' : 1}, index = [i], crs={'init': 'epsg:4326'}, geometry=[Polygon(zip(lon, lat))]))
        
        i += 1
        
for x in range(target_rural):

    for y in range(target_rural):
        
        lat = [(p + (1 - p)/target_rural*y)*pixel_y, (p + (1 - p)/target_rural*(y+1))*pixel_y, (p + (1 - p)/target_rural*(y+1))*pixel_y, (p + (1 - p)/target_rural*y)*pixel_y, (p + (1 - p)/target_rural*y)*pixel_y]
        lon = [(p + (1 - p)/target_rural*x)*pixel_x, (p + (1 - p)/target_rural*x)*pixel_x, (p + (1 - p)/target_rural*(x+1))*pixel_x, (p + (1 - p)/target_rural*(x+1))*pixel_x, (p + (1 - p)/target_rural*x)*pixel_x]
        
        base_map = base_map.append(gpd.GeoDataFrame(data = {'area_id' : [i], 'area_type' : 1}, index = [i], crs={'init': 'epsg:4326'}, geometry=[Polygon(zip(lon, lat))]))
        
        i += 1
        
base_map['area_km2'] = base_map.area

park_x_low = int(0.5*pixel_x)
park_y_low = int(0.5*pixel_y)
park_x_high = int(0.75*pixel_x)
park_y_high = int(0.75*pixel_y)

# Grid frame

xmin = 0
ymin = 0
xmax = pixel_x*10
ymax = pixel_y*10

length = 1
width = 1

cols = list(range(int(np.floor(xmin)), int(np.ceil(xmax)), width))
rows = list(range(int(np.floor(ymin)), int(np.ceil(ymax)), length))
rows.reverse()

polygons = []

for x in cols:    
    for y in rows:
        polygons.append(Polygon([(x/10,y/10), ((x+width)/10, y/10), ((x+width)/10, (y+length)/10), (x/10, (y+length)/10)]))
        
grid_map = gpd.GeoDataFrame(index=[], crs={'init': 'epsg:4326'}, geometry = polygons)

polygons = None

grid_map.insert(loc=0, column='grid_id', value=list(range(len(grid_map))))

grid_centroid = grid_map.copy()

grid_centroid['geometry'] = grid_centroid.centroid

df = gpd.tools.sjoin(
    base_map[['area_id', 'area_type', 'geometry']], grid_centroid[['grid_id', 'geometry']], how="left", op='intersects').reset_index().drop(
    columns = ['index', 'index_right', 'geometry']).astype({'grid_id' : 'uint32', 'area_id' : 'uint16', 'area_type' : 'uint8'}).drop_duplicates(subset = 'grid_id')

grid_centroid['x'] = np.around(grid_centroid.geometry.x, decimals = 2)
grid_centroid['y'] = np.around(grid_centroid.geometry.y, decimals = 2)

# Poverty frame

xmin = 0
ymin = 0
xmax = pixel_x
ymax = pixel_y

length = 4
width = 4

cols = list(range(int(np.floor(xmin)), int(np.ceil(xmax)), width))
rows = list(range(int(np.floor(ymin)), int(np.ceil(ymax)), length))
rows.reverse()

polygons = []

for x in cols:    
    for y in rows:
        polygons.append(Polygon([(x, y), ((x+width), y), ((x+width), (y+length)), (x, (y+length))]))
        
poverty_map = gpd.GeoDataFrame(index=[], crs={'init': 'epsg:4326'}, geometry = polygons)

polygons = None

poverty_map.insert(loc=0, column='pov_id', value=list(range(len(poverty_map))))

poverty_df = gpd.tools.sjoin(
    poverty_map[['pov_id', 'geometry']], grid_centroid[['grid_id', 'geometry']], how="left", op='intersects').reset_index().drop(
    columns = ['index', 'index_right', 'geometry']).astype({'grid_id' : 'uint32', 'pov_id' : 'uint16'})

print("--- %s seconds ---" % (time.time() - start_time))

In [None]:
grid_centroid.shape

In [None]:
df.shape

In [None]:
base_map.plot(edgecolor = 'white')

# Prepare

In [None]:
df_full = df.copy()

# Start simulation

In [None]:
def simulation_small(df_full, pixel_x, pixel_y, park_x_low, park_y_low, park_x_high, park_y_high, 
                     pop_urban, pop_rural, r):
    
    # Urban Population
    mean = [int(pixel_x/10), int(pixel_x/10)]
    cov = [[pixel_x/2, 0], [0, pixel_y/2]]

    x_gaussian, y_gaussian = np.random.RandomState().multivariate_normal(mean, cov, 10000000).T

    gaussian = pd.DataFrame({'x': x_gaussian, 'y': y_gaussian})
    gaussian = gaussian[(gaussian['x'] >= 0) & (gaussian['y'] >= 0) & (gaussian['x'] < pixel_x) & (gaussian['y'] < pixel_y)]
    gaussian = gaussian[0:pop_urban]

    # Rural Population
    towns = pd.DataFrame()

    while towns.shape[0] < pop_rural:

        mean = [np.random.RandomState().uniform(0, pixel_x, 1)[0], np.random.RandomState().uniform(0, pixel_y, 1)[0]]

        # No towns in national park
        if (mean[0] >= park_x_low) & (mean[1] >= park_y_low) & (mean[0] < park_x_high) & (mean[1] < park_y_high):
            continue

        else:

            cov_size = np.random.RandomState().normal(10000, 10000, 1).astype('uint16')[0]

            cov = [[abs(np.random.RandomState().normal(cov_size/5000, cov_size/50000, 1))[0], 0],
                   [0, abs(np.random.RandomState().normal(cov_size/5000, cov_size/50000, 1))[0]]]

            x_gaussian, y_gaussian = np.random.RandomState().multivariate_normal(mean, cov, 1000000).T

            town = pd.DataFrame({'x': x_gaussian, 'y': y_gaussian})
            town = town[(town['x'] >= 0) & (town['y'] >= 0) & (town['x'] < pixel_x) & (town['y'] < pixel_y)]
            town = town[0:cov_size]
            towns = towns.append(town)

    towns = towns[0:int(0.995*pop_rural)]

    x_uni = np.random.RandomState().uniform(0, pixel_x, 100000)
    y_uni = np.random.RandomState().uniform(0, pixel_y, 100000)

    uni = pd.DataFrame({'x': x_uni, 'y': y_uni})

    uni = uni[~((uni.x >= park_x_low) & (uni.y >= park_y_low) & (uni.x < park_x_high) & (uni.y < park_y_high))]

    towns = towns.append(uni[0:(pop_rural - towns.shape[0])])

    gaussian = gaussian.append(towns).reset_index().drop(columns = ['index'])

    x_gaussian = None
    y_gaussian = None
    x_uni = None
    y_uni = None
    towns = None
    uni = None

    gaussian.insert(loc=0, column='id', value=list(range(len(gaussian))))

    # BTS Locations
    sample_size = 0.01

    bts_sample = gaussian[gaussian['id'].isin(np.random.choice(gaussian['id'], int(sample_size*gaussian.shape[0]), replace=False))].copy()

    kmeans = KMeans(n_clusters=int(bts_count), random_state = np.random.RandomState()).fit(bts_sample[['x', 'y']])
    bts_sample['bts'] = kmeans.labels_

    kmeans_centroid = pd.DataFrame(data = {'x': [row[0] for row in kmeans.cluster_centers_], 'y': [row[1] for row in kmeans.cluster_centers_]})
    kmeans_centroid['bts'] = list(range(0, kmeans.labels_.max() + 1))

    # Population per Grid
    gaussian['x'] = np.around(gaussian['x'].apply(lambda x: myround(x)), decimals = 2)
    gaussian['y'] = np.around(gaussian['y'].apply(lambda x: myround(x)), decimals = 2)

    grid_count = gaussian.merge(
        grid_centroid[['grid_id', 'x', 'y']], on = ['x', 'y'], how = 'left').groupby(['grid_id', 'x', 'y'])['id'].count().reset_index().rename(columns={'id':'pop'})

    df = df_full.merge(grid_count, on = 'grid_id', how = 'left').fillna(0)

    # BTS Specifications
    nA = np.array(list(zip(grid_centroid.x, grid_centroid.y)) )
    nB = np.array(list(zip(kmeans_centroid.x, kmeans_centroid.y)) )
    btree = spatial.cKDTree(nB)

    links_all = pd.DataFrame()

    for i in range(1,6):

        dist, idx = btree.query(nA, k = [i])

        temp = grid_centroid[['grid_id']].copy()

        temp['bts'] = kmeans_centroid.loc[idx.flatten(), kmeans_centroid.columns == 'bts'].reset_index().drop(columns = ['index'])
        temp['distance'] = dist.flatten()
        temp['k'] = i

        links_all = links_all.append(temp)

    nA = None
    nB = None
    btree = None
    temp = None

    links_all = links_all.merge(df, on = 'grid_id', how = 'left').astype({'grid_id' : 'uint32', 'bts' : 'uint16', 'k' : 'uint8', 'pop' : 'uint8'})

    bts_specs = links_all[links_all.k == 1].groupby('bts')['grid_id'].count().reset_index().rename(columns = {'grid_id' : 'grid_count'})

    bts_specs = kmeans_centroid.merge(bts_specs, on = 'bts', how = 'left')

    bts_specs['urbanity'] = np.where(bts_specs['grid_count'] <= np.quantile(bts_specs['grid_count'], 0.5), 0, 2)
    bts_specs.loc[bts_specs.urbanity != 0, 'urbanity'] = np.where(bts_specs.loc[bts_specs.urbanity != 0, 'grid_count'] > np.quantile(bts_specs['grid_count'], 0.95), 1, 2)

    bts_specs.loc[bts_specs.urbanity == 0, 'f'] = np.random.RandomState().binomial(1, 0.5, bts_specs.loc[bts_specs.urbanity == 0].shape[0]) #0.9
    bts_specs.loc[bts_specs.urbanity == 2, 'f'] = np.random.RandomState().binomial(1, 0.3, bts_specs.loc[bts_specs.urbanity == 2].shape[0]) #0.7
    bts_specs.loc[bts_specs.urbanity == 1, 'f'] = np.random.RandomState().binomial(1, 0, bts_specs.loc[bts_specs.urbanity == 1].shape[0])

    bts_specs.loc[bts_specs.f == 0, 'f'] = 900
    bts_specs.loc[bts_specs.f == 1, 'f'] = 2100

    bts_specs.loc[bts_specs.f == 900, 'h_tx'] = np.random.RandomState().uniform(20, 50, bts_specs[bts_specs.f == 900].shape[0]).astype('int')
    bts_specs.loc[bts_specs.f == 2100, 'h_tx'] = np.random.RandomState().uniform(15, 60, bts_specs[bts_specs.f == 2100].shape[0]).astype('int')

    bts_specs.loc[bts_specs.f == 900, 'p_tx'] = np.random.RandomState().uniform(43, 47, bts_specs[bts_specs.f == 900].shape[0]).astype('int')
    bts_specs.loc[bts_specs.f == 2100, 'p_tx'] = np.random.RandomState().uniform(40, 47, bts_specs[bts_specs.f == 2100].shape[0]).astype('int')

    # Poverty Rate
    
    df = df[df['pop'] != 0]

    df = df.merge(poverty_df, on = 'grid_id', how = 'left')

    poverty_probability = np.random.RandomState().uniform(0, 1, 100000)

    pov_factor = df.groupby(['pov_id'])['pop'].sum().reset_index()
    pov_factor['pop_factor'] = 1 - (pov_factor['pop'] / max(pov_factor['pop']))

    for i in df['pov_id'].astype('int').unique():

        df.loc[(df.pov_id == i) & (df.pop != 0), 'poverty_rate'] = np.random.RandomState().normal(
            pov_factor.loc[pov_factor['pov_id'] == i, 'pop_factor']*poverty_probability[i], 0.5, len(df.loc[(df.pov_id == i) & (df.pop != 0)]))
    
    df.loc[df.poverty_rate < 0, 'poverty_rate'] = 0
    df.loc[df.poverty_rate > 1, 'poverty_rate'] = 1

    df['poverty_model'] = df['poverty_rate']
    
    df = df[df['pop'] != 0]

    links = links_all[links_all['grid_id'].isin(df['grid_id'])].copy()

    links = links.merge(bts_specs[['bts', 'urbanity', 'f', 'h_tx', 'p_tx']], on = 'bts', how = 'left')
    
    links_all = None

    # Simulation: True Coverage
    for i in bts_specs.index:

        for j in np.arange(0.1, 10000, 0.1):

            # Bagged path loss estimation
            alpha = alpha_value(bts_specs.loc[i, 'f'], j, bts_specs.loc[i, 'h_tx'])
            path_loss_hata = calculate_path_loss_hata_novar(bts_specs.loc[i, 'f'], bts_specs.loc[i, 'urbanity'], j, bts_specs.loc[i, 'h_tx'], 1, alpha)

            if (bts_specs.loc[i, 'p_tx'] - path_loss_hata) < -110:
                bts_specs.loc[i, 'max_range'] = j
                break

        true_coverage = bts_specs[['bts', 'urbanity', 'max_range']].rename(columns = {'max_range' : 'distance'})

    true_coverage = true_coverage[true_coverage['distance'] != 0]
    true_coverage = true_coverage.merge(bts_specs[['bts', 'x', 'y']]).rename(columns = {'bts' : 'true_bts'})
    true_coverage = gpd.GeoDataFrame(true_coverage, crs={'init': 'epsg:4326'}, geometry=[Point(xy) for xy in zip(true_coverage.x, true_coverage.y)])
    true_coverage['geometry'] = true_coverage.apply(lambda x : x.geometry.buffer(x.distance), axis = 1)
    true_coverage['true_bts_km2'] = true_coverage.area

    df_eval_cov = true_coverage[['true_bts', 'urbanity', 'true_bts_km2']]

    # Simulation: Home Location
    df_loop = links[['grid_id', 'bts', 'distance', 'urbanity', 'f', 'h_tx', 'p_tx']].copy()

    df_loop['alpha'] = np.vectorize(alpha_value)(df_loop['f'], df_loop['distance'], df_loop['h_tx'])
    df_loop['path_loss_hata'] = np.vectorize(calculate_path_loss_hata_novar)(df_loop['f'], df_loop['urbanity'], df_loop['distance'], df_loop['h_tx'], 1, df_loop['alpha'])
    
    df_loop['rss'] = df_loop['p_tx'] - df_loop['path_loss_hata']

    df_loop = df_loop.drop(columns = ['urbanity', 'f', 'h_tx', 'p_tx', 'path_loss_hata'])

    df_loop = df_loop[df_loop.rss > -110]

    df_loop.set_index('grid_id', inplace = True)
    df_loop['w_best_ant'] = 0
    df_loop.loc[df_loop.groupby('grid_id')['rss'].transform(max) == df_loop.rss, 'w_best_ant'] = 1
    df_loop.reset_index(inplace=True)

    temp = df_loop.groupby('grid_id')['w_best_ant'].sum().reset_index()

    for i in temp[temp.w_best_ant > 1].grid_id:

        if df_loop[df_loop.grid_id == i].w_best_ant.sum() > 1:

            df_loop.loc[df_loop.grid_id == i, 'w_best_ant'] = np.where(df_loop[df_loop.grid_id == i].w_best_ant == 1, 1/df_loop[df_loop.grid_id == i].w_best_ant.sum(), 0)

        else:
            continue

    df_loop = df_loop.loc[df_loop['w_best_ant'] != 0, ['grid_id', 'bts', 'distance']]

    # True home location
    true_home = df_loop[['grid_id', 'bts']].copy()

    df_loop = None

    # P2P
    bts_specs = gpd.GeoDataFrame(bts_specs, crs={'init': 'epsg:4326'}, geometry=[Point(xy) for xy in zip(bts_specs.x, bts_specs.y)])

    map_p2p = gpd.tools.sjoin(bts_specs[['bts', 'geometry']], base_map, how="left", op='intersects').drop(columns = ['index_right']).drop_duplicates('bts')

    # Voronoi
    bts_coords = np.array(
        list(
            [list(xy) for xy in zip(bts_specs.x, bts_specs.y)]))

    poly_shapes, pts, poly_to_pt_assignments = voronoi_regions_from_coords(bts_coords, base_map.unary_union)

    voronoi = gpd.GeoDataFrame(pd.DataFrame(poly_to_pt_assignments)[0].rename('voronoi_id'), crs = base_map.crs, geometry = poly_shapes)

    voronoi['voronoi_km2'] = voronoi.area

    voronoi = gpd.tools.sjoin(voronoi, bts_specs[['bts', 'geometry']], how="left", op='intersects').drop(columns = ['index_right'])

    map_voronoi = gpd.overlay(base_map, voronoi, how = 'intersection')
    map_voronoi.insert(loc=0, column='intersection_id', value=list(range(len(map_voronoi))))

    map_voronoi['intersection_km2'] = map_voronoi.area

    gdf = gpd.GeoDataFrame(df[['grid_id','pop']], crs={'init': 'epsg:4326'}, geometry=[Point(xy) for xy in zip(df.x, df.y)])

    voronoi_count = gpd.tools.sjoin(
        map_voronoi, gdf[['pop', 'geometry']], how="left", op='intersects').drop(
        columns = ['index_right']).groupby('intersection_id')['pop'].count().reset_index()

    map_voronoi = map_voronoi.merge(voronoi_count, on = 'intersection_id', how = 'left').rename(columns = {'pop' : 'intersection_count'})

    voronoi_count = None

    map_voronoi.set_index('voronoi_id', inplace = True)
    map_voronoi['voronoi_count'] = map_voronoi.groupby('voronoi_id')['intersection_count'].sum()
    map_voronoi.reset_index(inplace=True)

    map_voronoi.set_index('area_id', inplace = True)
    map_voronoi['area_count'] = map_voronoi.groupby('area_id')['intersection_count'].sum()
    map_voronoi.reset_index(inplace=True)

    vor_s_overlap = gpd.tools.sjoin(gdf, voronoi, how="left", op='intersects').drop_duplicates(subset = 'grid_id')

    gdf = None

    map_voronoi['w_geo_vor_ant'] = map_voronoi['intersection_km2'] / map_voronoi['voronoi_km2']

    map_voronoi['w_geo_vor_area'] = map_voronoi['intersection_km2'] / map_voronoi['area_km2']

    map_voronoi['w_knn_vor_ant'] = map_voronoi['intersection_count'] / map_voronoi['voronoi_count']

    map_voronoi['w_knn_vor_area'] = map_voronoi['intersection_count'] / map_voronoi['area_count']

    #Simple HATA
    hata_simple = links[['grid_id', 'bts', 'area_id', 'distance', 'urbanity', 'pop']].copy()

    hata_simple.loc[hata_simple.urbanity == 0, 'f'] = 2100
    hata_simple.loc[hata_simple.urbanity == 2, 'f'] = 900
    hata_simple.loc[hata_simple.urbanity == 1, 'f'] = 900

    hata_simple['h_tx'] = 30

    hata_simple['p_tx'] = 45

    hata_simple = hata_simple.astype({'urbanity' : 'uint8', 'f': 'uint16', 'h_tx' : 'uint8', 'p_tx' : 'uint8'})

    hata_simple['alpha'] = np.vectorize(alpha_value)(hata_simple['f'], hata_simple['distance'], hata_simple['h_tx'])
    hata_simple['path_loss'] = np.vectorize(calculate_path_loss_hata_novar)(hata_simple['f'], hata_simple['urbanity'], hata_simple['distance'], hata_simple['h_tx'], 1, hata_simple['alpha'])
    
    hata_simple['rss'] = hata_simple['p_tx'] - hata_simple['path_loss']

    hata_simple_overlap = hata_simple[['bts', 'urbanity', 'f', 'h_tx', 'p_tx']].drop_duplicates()

    hata_simple = hata_simple.drop(columns = ['alpha', 'path_loss', 'urbanity', 'f', 'h_tx', 'p_tx'])

    hata_simple = hata_simple[hata_simple.rss > -110]

    hata_simple.set_index('grid_id', inplace = True)
    hata_simple['w_best_ant'] = 0
    hata_simple.loc[hata_simple.groupby('grid_id')['rss'].transform(max) == hata_simple.rss, 'w_best_ant'] = 1
    hata_simple.reset_index(inplace=True)

    temp = hata_simple.groupby('grid_id')['w_best_ant'].sum().reset_index()

    for i in temp[temp.w_best_ant > 1].grid_id:
        if hata_simple[hata_simple.grid_id == i].w_best_ant.sum() > 1:
            hata_simple.loc[hata_simple.grid_id == i, 'w_best_ant'] = np.where(hata_simple[hata_simple.grid_id == i].w_best_ant == 1, 1/hata_simple[hata_simple.grid_id == i].w_best_ant.sum(), 0)

        else:
            continue

    hata_simple_best = hata_simple.loc[hata_simple['w_best_ant'] != 0, ['grid_id', 'bts', 'distance', 'w_best_ant', 'pop']]

    hata_simple['rss_inv'] = 1 / (hata_simple.rss)**2

    hata_simple.set_index('grid_id', inplace = True)
    hata_simple['rss_sum_inv'] = hata_simple.groupby('grid_id')['rss_inv'].sum()
    hata_simple['w_knn_ant'] = hata_simple.rss_inv / hata_simple.rss_sum_inv
    hata_simple.reset_index(inplace=True)

    hata_simple.set_index('bts', inplace = True)
    hata_simple['w_knn_ant_pop'] = hata_simple.w_knn_ant # * hata_simple['pop_avg']
    hata_simple['w_knn_ant_sum'] = hata_simple.groupby('bts')['w_knn_ant_pop'].sum()
    hata_simple['w_knn_site'] = hata_simple.w_knn_ant_pop / hata_simple.w_knn_ant_sum
    hata_simple.reset_index(inplace=True)

    hata_simple.set_index('area_id', inplace = True)
    hata_simple['w_knn_ant_sum'] = hata_simple.groupby('area_id')['w_knn_ant_pop'].sum()
    hata_simple['w_knn_area'] = hata_simple.w_knn_ant_pop / hata_simple.w_knn_ant_sum
    hata_simple.reset_index(inplace=True)

    # Overlaps with True Coverage
    ### P2P
    p2p_a_overlap = true_coverage.merge(map_p2p[['bts', 'area_id', 'area_km2']].rename(columns = {'area_id' : 'true_bts_area_id'}), left_on = 'true_bts', right_on = 'bts', how ='left')

    p2p_a_overlap = gpd.overlay(p2p_a_overlap, base_map[['geometry', 'area_id']], how = 'intersection')

    p2p_a_overlap['intersection_km2'] = p2p_a_overlap.area

    p2p_a_overlap = (p2p_a_overlap[p2p_a_overlap.area_id == p2p_a_overlap.true_bts_area_id].groupby('true_bts')['intersection_km2'].sum() /
                     (p2p_a_overlap.groupby('true_bts')['true_bts_km2'].mean() + 
                      p2p_a_overlap[p2p_a_overlap.area_id == p2p_a_overlap.true_bts_area_id].groupby('true_bts')['area_km2'].mean() - 
                      p2p_a_overlap[p2p_a_overlap.area_id == p2p_a_overlap.true_bts_area_id].groupby('true_bts')['intersection_km2'].sum()
                     )
                    ).rename('p2p_a_overlap').reset_index()

    df_eval_cov = df_eval_cov.merge(p2p_a_overlap, on = 'true_bts', how = 'left')

    s_per_area = df[['area_id', 'pop']].groupby('area_id').count().reset_index().rename(columns = {'area_id' : 'area_id_id', 'pop' : 'pop_per_area'})

    p2p_s_overlap = df[['grid_id', 'area_id', 'pop']].merge(true_home.rename(columns = {'bts' : 'true_bts'}), on = 'grid_id', how = 'left')

    p2p_s_overlap = p2p_s_overlap.merge(map_p2p[['bts', 'area_id']].rename(columns = {'area_id' : 'true_bts_area_id'}), left_on = 'true_bts', right_on = 'bts', how ='left')

    p2p_s_overlap = p2p_s_overlap.merge(s_per_area, left_on = 'true_bts_area_id', right_on = 'area_id_id', how = 'left')

    p2p_s_overlap = (p2p_s_overlap[p2p_s_overlap.area_id == p2p_s_overlap.true_bts_area_id].groupby('true_bts')['pop'].count() / 
                     (p2p_s_overlap.groupby('true_bts')['pop'].count() + 
                      p2p_s_overlap[p2p_s_overlap.area_id == p2p_s_overlap.true_bts_area_id].groupby('true_bts')['pop_per_area'].mean() - 
                      p2p_s_overlap[p2p_s_overlap.area_id == p2p_s_overlap.true_bts_area_id].groupby('true_bts')['pop'].count()
                     )
                    ).rename('p2p_s_overlap').reset_index().fillna(0)

    df_eval_cov = df_eval_cov.merge(p2p_s_overlap, on = 'true_bts', how = 'left')

    ### Voronoi
    vor_overlap = gpd.overlay(true_coverage, voronoi, how = 'intersection')

    vor_overlap['intersection_km2'] = vor_overlap.area

    vor_a_overlap = (vor_overlap[vor_overlap.true_bts == vor_overlap.bts].groupby('true_bts')['intersection_km2'].sum() / 
                     (vor_overlap.groupby('true_bts')['true_bts_km2'].mean() +
                      vor_overlap[vor_overlap.true_bts == vor_overlap.bts].groupby('true_bts')['voronoi_km2'].mean() -
                      vor_overlap[vor_overlap.true_bts == vor_overlap.bts].groupby('true_bts')['intersection_km2'].sum()
                     )
                    ).rename('vor_a_overlap').reset_index()

    df_eval_cov = df_eval_cov.merge(vor_a_overlap, on = 'true_bts', how = 'left')

    vor_s_overlap = vor_s_overlap[['grid_id', 'bts', 'pop']].merge(true_home.rename(columns = {'bts' : 'true_bts'}), on = 'grid_id', how = 'left')

    vor_s_overlap = (vor_s_overlap[vor_s_overlap.bts == vor_s_overlap.true_bts].groupby('true_bts')['pop'].count() / 
                     (vor_s_overlap.groupby('true_bts')['pop'].count() +
                      vor_s_overlap.groupby('bts')['pop'].count() -
                      vor_s_overlap[vor_s_overlap.bts == vor_s_overlap.true_bts].groupby('true_bts')['pop'].count()
                     )
                    ).reset_index().fillna(0).rename(columns = {'index' : 'true_bts', 'pop' : 'vor_s_overlap'})

    df_eval_cov = df_eval_cov.merge(vor_s_overlap, on = 'true_bts', how = 'left')

    ### Simple HATA
    for i in hata_simple_overlap.index:

        for j in np.arange(0, 100, 0.1):

            alpha = alpha_value(hata_simple_overlap.loc[i, 'f'], j, hata_simple_overlap.loc[i, 'h_tx'])
            path_loss = calculate_path_loss_hata_novar(hata_simple_overlap.loc[i, 'f'], hata_simple_overlap.loc[i, 'urbanity'], j, hata_simple_overlap.loc[i, 'h_tx'], 1, alpha)

            if (hata_simple_overlap.loc[i, 'p_tx'] - path_loss) < -110:
                hata_simple_overlap.loc[i, 'max_range'] = j
                break

    hata_simple_overlap = hata_simple_overlap[['bts', 'max_range']].rename(columns = {'max_range' : 'distance'})

    hata_simple_overlap = hata_simple_overlap[hata_simple_overlap['distance'] != 0]

    hata_simple_overlap = hata_simple_overlap.merge(bts_specs[['bts', 'x', 'y']], on = 'bts', how = 'left')

    hata_simple_overlap = gpd.GeoDataFrame(hata_simple_overlap,
                                      crs = {'init': 'epsg:4326'},
                                      geometry=[Point(xy) for xy in zip(hata_simple_overlap.x, hata_simple_overlap.y)])

    hata_simple_overlap['geometry'] = hata_simple_overlap.apply(lambda x : x.geometry.buffer(x.distance), axis = 1)

    hata_simple_overlap['bts_km2'] = hata_simple_overlap.area

    hata_simple_a_overlap = gpd.overlay(true_coverage, hata_simple_overlap, how = 'intersection')

    hata_simple_a_overlap['intersection_km2'] = hata_simple_a_overlap.area

    hata_simple_a_overlap = (hata_simple_a_overlap[hata_simple_a_overlap.true_bts == hata_simple_a_overlap.bts].groupby('true_bts')['intersection_km2'].sum() / 
                     (hata_simple_a_overlap.groupby('true_bts')['true_bts_km2'].mean() +
                      hata_simple_a_overlap[hata_simple_a_overlap.true_bts == hata_simple_a_overlap.bts].groupby('true_bts')['bts_km2'].mean() -
                      hata_simple_a_overlap[hata_simple_a_overlap.true_bts == hata_simple_a_overlap.bts].groupby('true_bts')['intersection_km2'].sum()
                     )
                    ).rename('hata_simple_a_overlap').reset_index()

    df_eval_cov = df_eval_cov.merge(hata_simple_a_overlap, on = 'true_bts', how = 'left')

    hata_simple_s_overlap = hata_simple_best[['bts', 'grid_id', 'pop']].merge(true_home.rename(columns = {'bts' : 'true_bts'}), on = 'grid_id', how = 'left')

    hata_simple_s_overlap = (hata_simple_s_overlap[hata_simple_s_overlap.bts == hata_simple_s_overlap.true_bts].groupby('true_bts')['pop'].count() / 
                     (hata_simple_s_overlap.groupby('true_bts')['pop'].count() +
                      hata_simple_s_overlap.groupby('bts')['pop'].count() -
                      hata_simple_s_overlap[hata_simple_s_overlap.bts == hata_simple_s_overlap.true_bts].groupby('true_bts')['pop'].count()
                     )
                    ).reset_index().rename(columns = {'index' : 'true_bts', 'id' : 'hata_simple_s_overlap'})

    df_eval_cov = df_eval_cov.merge(hata_simple_s_overlap, on = 'true_bts', how = 'left')
    
    df_eval_cov['run'] = r

    # Estimation Effects
    est_effects = true_home.rename(columns = {'bts' : 'true_bts'}).merge(df[['grid_id', 'area_id', 'pop', 'poverty_model']], on = 'grid_id', how = 'left')

    est_effects_bts = est_effects.groupby('true_bts')['pop', 'poverty_model'].agg(
        lambda x: np.average(x, weights=est_effects.loc[x.index, "pop"])).reset_index().drop(columns = ['pop'])

    df_eval_est = pd.DataFrame(base_map['area_id'].unique(), columns = ['area_id'])

    ### True Poverty
    df_eval_est_true = df.groupby('area_id')['pop', 'poverty_rate'].agg(
        lambda x: np.average(x, weights=df.loc[x.index, "pop"])).reset_index().drop(columns = ['pop'])

    df_eval_est = df_eval_est.merge(df_eval_est_true, on = 'area_id', how = 'left')

    # True Home (Benchmark)
    est_effects_true = est_effects.drop(columns = ['poverty_model']).merge(est_effects_bts, on = 'true_bts', how = 'left')

    est_effects_true = est_effects_true.dropna().groupby('area_id')['poverty_model'].mean().reset_index().rename(columns = {'poverty_model' : 'p_true'})

    df_eval_est = df_eval_est.merge(est_effects_true, on = 'area_id', how = 'left')

    ### P2P
    est_effects_p2p = est_effects_bts.merge(map_p2p[['bts', 'area_id']], left_on = 'true_bts', right_on = 'bts', how = 'left')

    est_effects_p2p = est_effects_p2p.dropna().groupby('area_id')['poverty_model'].mean().rename('p_p2p').reset_index()

    df_eval_est = df_eval_est.merge(est_effects_p2p, on = 'area_id', how = 'left')

    ### Voronoi
    est_effects_vor = map_voronoi[['bts', 'area_id', 'w_geo_vor_area']].merge(
        est_effects_bts, left_on = 'bts', right_on = 'true_bts', how = 'left')

    est_effects_vor = est_effects_vor.dropna().groupby('area_id')['w_geo_vor_area', 'poverty_model'].agg(
        lambda x: np.average(x, weights=est_effects_vor.loc[x.index, "w_geo_vor_area"])).reset_index().drop(columns = ['w_geo_vor_area']).rename(columns = {'poverty_model' : 'p_vor'})

    df_eval_est = df_eval_est.merge(est_effects_vor, on = 'area_id', how = 'left')

    ### Augmented Voronoi
    est_effects_a_vor = map_voronoi[['bts', 'area_id', 'w_knn_vor_area']].merge(
        est_effects_bts, left_on = 'bts', right_on = 'true_bts', how = 'left')

    est_effects_a_vor = est_effects_a_vor[est_effects_a_vor.w_knn_vor_area != 0]

    est_effects_a_vor = est_effects_a_vor.dropna().groupby('area_id')['w_knn_vor_area', 'poverty_model'].agg(
        lambda x: np.average(x, weights=est_effects_a_vor.loc[x.index, "w_knn_vor_area"])).reset_index().drop(columns = ['w_knn_vor_area']).rename(columns = {'poverty_model' : 'p_a_vor'})

    df_eval_est = df_eval_est.merge(est_effects_a_vor, on = 'area_id', how = 'left')

    ### Simple HATA BSA
    est_effects_bsa_simple = hata_simple[['grid_id', 'bts', 'area_id', 'w_best_ant']].merge(est_effects_bts, left_on = 'bts', right_on = 'true_bts', how = 'left')

    est_effects_bsa_simple = est_effects_bsa_simple[est_effects_bsa_simple.w_best_ant != 0]

    est_effects_bsa_simple = est_effects_bsa_simple.dropna().groupby('area_id')['w_best_ant', 'poverty_model'].agg(
        lambda x: np.average(x, weights=est_effects_bsa_simple.loc[x.index, "w_best_ant"])).reset_index().drop(columns = ['w_best_ant']).rename(columns = {'poverty_model' : 'p_bsa_simple'})

    df_eval_est = df_eval_est.merge(est_effects_bsa_simple, on = 'area_id', how = 'left')

    ### Simple HATA IDW
    est_effects_idw_simple = hata_simple[['grid_id', 'bts', 'area_id', 'w_knn_area']].merge(est_effects_bts, left_on = 'bts', right_on = 'true_bts', how = 'left')

    est_effects_idw_simple = est_effects_idw_simple[est_effects_idw_simple.w_knn_area != 0]

    est_effects_idw_simple = est_effects_idw_simple.dropna().groupby('area_id')['w_knn_area', 'poverty_model'].agg(
        lambda x: np.average(x, weights=est_effects_idw_simple.loc[x.index, "w_knn_area"])).reset_index().drop(columns = ['w_knn_area']).rename(columns = {'poverty_model' : 'p_idw_simple'})

    df_eval_est = df_eval_est.merge(est_effects_idw_simple, on = 'area_id', how = 'left')
    
    df_eval_est['run'] = r
    
    return(df_eval_cov.values.tolist(), df_eval_est.values.tolist())

In [None]:
def collect_results(result):
    """Uses apply_async's callback to setup up a separate Queue for each process"""
    output_eval_cov.extend(result[0])
    output_eval_est.extend(result[1])

In [None]:
output_eval_cov = []
output_eval_est = []

if __name__ == "__main__":
    start_time = time.time()  
    
    # Repeats the compute intensive operation on 7 data frames concurrently
    pool = mp.Pool(processes=mp.cpu_count()-1)
    [pool.apply_async(simulation_small, args=(df_full, pixel_x, pixel_y, park_x_low, park_y_low, park_x_high, park_y_high,
                     pop_urban, pop_rural, r), callback=collect_results) for r in range(1000)]
    pool.close()
    pool.join()
    
    # Converts list of lists to a data frame
    eval_cov = pd.DataFrame(output_eval_cov, columns = [
        'true_bts', 'urbanity', 'true_bts_km2', 'p2p_a_overlap', 'p2p_s_overlap', 'vor_a_overlap', 'vor_s_overlap', 'hata_simple_a_overlap', 'hata_simple_s_overlap', 'run'])
    eval_est = pd.DataFrame(output_eval_est, columns = [
        'area_id', 'poverty_rate', 'p_true', 'p_p2p', 'p_vor', 'p_a_vor', 'p_bsa_simple', 'p_idw_simple', 'run'])
    print("--- %s seconds ---" % (time.time() - start_time))

# Evaluation

### Overlap

In [None]:
overlap_final = pd.DataFrame({'Mean_T' : np.round(eval_cov.mean(numeric_only = True), decimals = 3),
                              #'sigma_T' : np.round(eval_cov.std(numeric_only = True), decimals = 3),
                              'Mean_R' : np.round(eval_cov[eval_cov.urbanity == 1].mean(numeric_only = True), decimals = 3),
                              #'sigma_R' : np.round(eval_cov[eval_cov.urbanity == 1].std(numeric_only = True), decimals = 3),
                              'Mean_S' : np.round(eval_cov[eval_cov.urbanity == 2].mean(numeric_only = True), decimals = 3),
                              #'sigma_S' : np.round(eval_cov[eval_cov.urbanity == 2].std(numeric_only = True), decimals = 3),
                              'Mean_U' : np.round(eval_cov[eval_cov.urbanity == 0].mean(numeric_only = True), decimals = 3),
                              #'sigma_U' : np.round(eval_cov[eval_cov.urbanity == 0].std(numeric_only = True), decimals = 3)
             })

### Ranking - Prediction

In [None]:
df_predict = eval_est.copy()

In [None]:
prediction_rank = pd.DataFrame()

for m in tqdm(df_predict.columns.to_series().drop(['area_id', 'poverty_rate', 'run', 'p_true']).to_list()):
 
    result = pd.DataFrame()
    
    for i in df_predict['run'].unique():
        
        temp = df_predict[df_predict['run'] == i].dropna().copy()
        
        result.loc[i, 'run'] = i
        
        result.loc[i, 'bias'] = (temp[m] - temp['poverty_rate']).mean()
        result.loc[i, 'rmse'] = np.sqrt(((temp[m] - temp['poverty_rate'])**2).mean())
        result.loc[i, 'model'] = m
        
        temp2 = temp[['poverty_rate', m]]
        x = np.array(temp2[m]).reshape((-1, 1))
        y = np.array(temp2.poverty_rate)
        result.loc[i, 'r2'] = LinearRegression().fit(x, y).score(x, y)
        
    prediction_rank = prediction_rank.append(result)

In [None]:
prediction_rank['bias'] = abs(prediction_rank['bias'])
prediction_rank['rmse'] = np.sqrt(prediction_rank['rmse'])

R²

In [None]:
prediction_rank['best_r2'] = 0
prediction_rank.loc[prediction_rank.groupby('run')['r2'].transform(max) == prediction_rank.r2, 'best_r2'] = 1

In [None]:
prediction_rank.groupby('model')['best_r2'].mean().reset_index().sort_values(by=['best_r2']).to_latex()

Bias

In [None]:
prediction_rank['best_bias'] = 0
prediction_rank.loc[prediction_rank.groupby('run')['bias'].transform(min) == prediction_rank.bias, 'best_bias'] = 1

In [None]:
prediction_rank.groupby('model')['best_bias'].mean().reset_index().sort_values(by=['best_bias']).to_latex()

RMSE

In [None]:
prediction_rank['best_rmse'] = 0
prediction_rank.loc[prediction_rank.groupby('run')['rmse'].transform(min) == prediction_rank.rmse, 'best_rmse'] = 1

In [None]:
prediction_rank.groupby('model')['best_rmse'].mean().reset_index().sort_values(by=['best_rmse']).to_latex()

### Ranking - Coverage

In [None]:
df_coverage = eval_cov.copy()

In [None]:
coverage_rank = pd.DataFrame()

for m in tqdm(df_coverage.columns.to_series().drop(['true_bts', 'urbanity', 'true_bts_km2', 'run', 'p2p_s_overlap', 'vor_s_overlap', 'hata_simple_s_overlap']).to_list()):
 
    result = pd.DataFrame()
    
    for i in df_coverage['run'].unique():
        
        temp = df_coverage[df_coverage['run'] == i].copy()
        
        result.loc[i, 'coverage'] = temp[m].mean()
        result.loc[i, 'run'] = i
        result.loc[i, 'model'] = m
        result.loc[i, 'type'] = 'geography'
        
    coverage_rank = coverage_rank.append(result)

In [None]:
for m in tqdm(df_coverage.columns.to_series().drop(['true_bts', 'urbanity', 'true_bts_km2', 'run', 'p2p_a_overlap', 'vor_a_overlap', 'hata_simple_a_overlap']).to_list()):
 
    result = pd.DataFrame()
    
    for i in df_coverage['run'].unique():
        
        temp = df_coverage[df_coverage['run'] == i].copy()
        
        result.loc[i, 'coverage'] = temp[m].mean()
        result.loc[i, 'run'] = i
        result.loc[i, 'model'] = m
        result.loc[i, 'type'] = 'settlement'
        
    coverage_rank = coverage_rank.append(result)

Geography

In [None]:
coverage_rank_geo = coverage_rank[coverage_rank['type'] == 'geography'].copy()

In [None]:
coverage_rank_geo['best_geo'] = 0
coverage_rank_geo.loc[coverage_rank_geo.groupby('run')['coverage'].transform(max) == coverage_rank_geo.coverage, 'best_geo'] = 1

In [None]:
coverage_rank_geo.groupby('model')['best_geo'].mean().reset_index().sort_values(by=['best_geo'])

Settlements

In [None]:
coverage_rank_stl = coverage_rank[coverage_rank['type'] == 'settlement'].copy()

In [None]:
coverage_rank_stl['best_geo'] = 0
coverage_rank_stl.loc[coverage_rank_stl.groupby('run')['coverage'].transform(max) == coverage_rank_stl.coverage, 'best_geo'] = 1

In [None]:
coverage_rank_stl.groupby('model')['best_geo'].mean().reset_index().sort_values(by=['best_geo'])

### Correlation & Sample size

In [None]:
eval_est_final = eval_est.merge(base_map[['area_id', 'area_type']], on = 'area_id', how = 'left')

In [None]:
eval_est_final = pd.DataFrame({'Corr' : np.round(eval_est_final.corr()['poverty_rate'], decimals = 3),
              'N_Total' : np.round(eval_est_final.count()/1000).astype('int'),
              'Corr_Rural' : np.round(eval_est_final[eval_est_final['area_type'] == 1].corr()['poverty_rate'], decimals = 3),
              'N_Rural' : np.round(eval_est_final[eval_est_final['area_type'] == 1].count()/1000).astype('int'),
              'Corr_Urban' : np.round(eval_est_final[eval_est_final['area_type'] == 0].corr()['poverty_rate'], decimals = 3),
              'N_Urban' : np.round(eval_est_final[eval_est_final['area_type'] == 0].count()/1000).astype('int')
             })[2:]

### Predictive Performance

In [None]:
df_predict = eval_est.copy()

In [None]:
result = pd.DataFrame()

for i in tqdm(df_predict['run'].unique()):
    
    temp = df_predict[df_predict['run'] == i].dropna().copy()
    
    result.loc[i, 'run'] = i
    
    for j in temp.columns.to_series().drop(['area_id', 'poverty_rate', 'run']).to_list():
         
        result.loc[i, str(j+'_bias')] = (temp[j] - temp['poverty_rate']).mean()
        result.loc[i, str(j+'_rmse')] = np.sqrt(((temp[j] - temp['poverty_rate'])**2).mean())
        
        temp2 = temp[['poverty_rate', j]]
        x = np.array(temp2[j]).reshape((-1, 1))
        y = np.array(temp2.poverty_rate)
        
        result.loc[i, str(j+'_r2')] = LinearRegression().fit(x, y).score(x, y)

R²

In [None]:
f, ax = plt.subplots(1, figsize=(15, 5))
sns.boxplot(data = result[[
    'p_true_r2', 'p_p2p_r2', 'p_vor_r2', 'p_a_vor_r2', 'p_bsa_simple_r2', 'p_idw_simple_r2']].rename(columns = {
    'p_true_r2' : 'Benchmark', 'p_p2p_r2' : 'Point-to-Polygon', 'p_vor_r2' : 'Voronoi', 'p_a_vor_r2' : 'Augmented \n Voronoi', 
    'p_bsa_simple_r2' : 'Simple HATA \n (BSA)', 'p_idw_simple_r2' : 'Simple HATA \n (IDW)'}),
            palette = 'rocket')

ax.tick_params(labelsize=14)

plt.show()

Bias

In [None]:
f, ax = plt.subplots(1, figsize=(15, 5))
sns.boxplot(data = result[[
    'p_true_bias', 'p_p2p_bias', 'p_vor_bias', 'p_a_vor_bias', 'p_bsa_simple_bias', 'p_idw_simple_bias']].rename(columns = {
    'p_true_bias' : 'Benchmark', 'p_p2p_bias' : 'Point-to-Polygon', 'p_vor_bias' : 'Voronoi', 'p_a_vor_bias' : 'Augmented \n Voronoi', 
    'p_bsa_simple_bias' : 'Simple HATA \n (BSA)', 'p_idw_simple_bias' : 'Simple HATA \n (IDW)'}),
            palette = 'rocket')

ax.tick_params(labelsize=14)

plt.show()

RMSE

In [None]:
f, ax = plt.subplots(1, figsize=(15, 5))
sns.boxplot(data = result[[
    'p_true_rmse', 'p_p2p_rmse', 'p_vor_rmse', 'p_a_vor_rmse', 'p_bsa_simple_rmse', 'p_idw_simple_rmse']].rename(columns = {
    'p_true_rmse' : 'Benchmark', 'p_p2p_rmse' : 'Point-to-Polygon', 'p_vor_rmse' : 'Voronoi', 'p_a_vor_rmse' : 'Augmented \n Voronoi', 
    'p_bsa_simple_rmse' : 'Simple HATA \n (BSA)', 'p_idw_simple_rmse' : 'Simple HATA \n (IDW)'}),
            palette = 'rocket')

ax.tick_params(labelsize=14)

plt.show()

In [None]:
results = base_map.merge(eval_est[eval_est['run'] == 1], on = 'area_id', how = 'left')

### Differences

In [None]:
results['delta_p_true'] = results['poverty_rate'] - results['p_true']
results['delta_p_p2p'] = results['poverty_rate'] - results['p_p2p']
results['delta_p_vor'] = results['poverty_rate'] - results['p_vor']
results['delta_p_bsa_simple'] = results['poverty_rate'] - results['p_bsa_simple']

In [None]:
f, (ax1, ax2, ax3, ax4, ax5) = plt.subplots(5, figsize=(10, 5))

results[['geometry', 'poverty_rate']].dropna().plot(ax = ax1, column = 'poverty_rate', cmap = plt.get_cmap("rocket"), vmin = 0, vmax = 1, linewidth = 0.8, edgecolor = 'black')
ax1.set_title('True Poverty Rate')
results[['geometry', 'delta_p_true']].dropna().plot(ax = ax2, column = 'delta_p_true', cmap = plt.get_cmap("rocket"), vmin = 0, vmax = 1, linewidth = 0.8, edgecolor = 'black')
ax2.set_title('Benchmark')
results[['geometry', 'delta_p_p2p']].dropna().plot(ax = ax3, column = 'delta_p_p2p', cmap = plt.get_cmap("rocket"), vmin = 0, vmax = 1, linewidth = 0.8, edgecolor = 'black')
ax3.set_title('Point-to-Polygon')
results[['geometry', 'delta_p_vor']].dropna().plot(ax = ax4, column = 'delta_p_vor', cmap = plt.get_cmap("rocket"), vmin = 0, vmax = 1, linewidth = 0.8, edgecolor = 'black')
ax4.set_title('Voronoi')
results[['geometry', 'delta_p_bsa_simple']].dropna().plot(ax = ax5, column = 'delta_p_bsa_simple', cmap = plt.get_cmap("rocket"), vmin = 0, vmax = 1, linewidth = 0.8, edgecolor = 'black')
ax5.set_title('Simple HATA')

for ax in f.get_axes():
    ax.label_outer()

    
f.subplots_adjust(right=0.8)
im = plt.cm.ScalarMappable(cmap = plt.get_cmap("rocket"), norm = plt.Normalize(vmin = 0, vmax = 1))
im._A = []
cbar_ax = f.add_axes([0.85, 0.15, 0.05, 0.7])
f.colorbar(im, cax=cbar_ax)

plt.show()