This notebook creates a comparison between the features and actual(/predicted) wash indicators across 2018, 2019, 2020

In [1]:
import pandas as pd

import sys
sys.path.insert(0, '../utils')
import modelutils
from settings import *



In [23]:
preds_2019 = pd.read_csv(data_dir + 'metro_area_predictions.csv')
preds_2020 = pd.read_csv(data_dir + 'metro_area_predictions_2020.csv')

In [2]:
feats_2019 = pd.read_csv(data_dir + '20200902_dataset_2019.csv')
feats_2020 = pd.read_csv(data_dir + '20200908_dataset_2020.csv')

In [17]:
raw = pd.read_csv(data_dir + '20200830_dataset.csv').drop_duplicates('id')

In [15]:
grid_in_metro = feats_2019[['id', 'metro_id']].drop_duplicates('id')

In [40]:
keep_cols = ['metro_id'] + features + indicators

In [45]:
metro_names = pd.read_csv(data_dir + 'metro_areas_id_name.csv')

In [59]:
# join
wash_grid_2018 = pd.merge(raw, grid_in_metro, on = 'id')[keep_cols]
wash_grid_2019 = pd.merge(feats_2019, preds_2019, on = 'metro_id')[keep_cols]
wash_grid_2020 = pd.merge(feats_2020, preds_2020, on = 'metro_id')[keep_cols]
# get median
wash_metro_2018 = wash_grid_2018.groupby('metro_id').median().reset_index()
wash_metro_2019 = wash_grid_2019.groupby('metro_id').median().reset_index()
wash_metro_2020 = wash_grid_2020.groupby('metro_id').median().reset_index()

In [62]:
wash_metro = pd.merge(pd.merge(pd.merge(
    metro_names, wash_metro_2018, left_on = 'OBJECTID', right_on = 'metro_id', suffixes = ['', '']),
    wash_metro_2019, left_on = 'metro_id', right_on = 'metro_id', suffixes = ['', '_2019']),
    wash_metro_2020, left_on = 'metro_id', right_on = 'metro_id', suffixes = ['', '_2020'])

In [63]:
wash_metro.columns

Index(['OBJECTID', 'a_mtro', 'metro_id', 'vegetation', 'aridity_cgiarv2',
       'temperature', 'nighttime_lights', 'population', 'elevation',
       'urban_index', 'nearest_waterway', 'nearest_commercial',
       'nearest_restaurant', 'nearest_hospital', 'nearest_airport',
       'nearest_highway', 'perc_hh_no_water_supply', 'perc_hh_no_toilet',
       'perc_hh_no_sewage', 'vegetation_2019', 'aridity_cgiarv2_2019',
       'temperature_2019', 'nighttime_lights_2019', 'population_2019',
       'elevation_2019', 'urban_index_2019', 'nearest_waterway_2019',
       'nearest_commercial_2019', 'nearest_restaurant_2019',
       'nearest_hospital_2019', 'nearest_airport_2019', 'nearest_highway_2019',
       'perc_hh_no_water_supply_2019', 'perc_hh_no_toilet_2019',
       'perc_hh_no_sewage_2019', 'vegetation_2020', 'aridity_cgiarv2_2020',
       'temperature_2020', 'nighttime_lights_2020', 'population_2020',
       'elevation_2020', 'urban_index_2020', 'nearest_waterway_2020',
       'nearest_

In [66]:
wash_metro.to_csv(data_dir + 'wash_metro.csv')

In [67]:
wash_metro.median()

OBJECTID                           12.000000
metro_id                           12.000000
vegetation                       2901.000000
aridity_cgiarv2                 12444.000000
temperature                     15302.000000
nighttime_lights                   27.651249
population                         49.088737
elevation                         477.000000
urban_index                        33.000000
nearest_waterway                  470.218414
nearest_commercial               1355.286865
nearest_restaurant                846.053223
nearest_hospital                 1136.366699
nearest_airport                  8025.561523
nearest_highway                   677.732300
perc_hh_no_water_supply             0.000000
perc_hh_no_toilet                   0.009091
perc_hh_no_sewage                   0.005115
vegetation_2019                  2816.000000
aridity_cgiarv2_2019            12444.000000
temperature_2019                15297.000000
nighttime_lights_2019              27.811251
population

In [73]:
wash_metro.query("metro_id == 7").reset_index(drop = True).iloc[0]

OBJECTID                                                   7
a_mtro                          AREA METROPOLITANA DE BOGOTÁ
metro_id                                                   7
vegetation                                              1730
aridity_cgiarv2                                        10247
temperature                                          15031.5
nighttime_lights                                     48.4137
population                                           180.225
elevation                                               2559
urban_index                                               34
nearest_waterway                                     486.428
nearest_commercial                                   803.471
nearest_restaurant                                   485.257
nearest_hospital                                     1004.39
nearest_airport                                      11773.3
nearest_highway                                      511.421
perc_hh_no_water_supply 

In [74]:
wash_metro.query("metro_id == 5").reset_index(drop = True).iloc[0]

OBJECTID                                5
a_mtro                           Riohacha
metro_id                                5
vegetation                           1983
aridity_cgiarv2                      3776
temperature                       15370.8
nighttime_lights                  30.7463
population                        49.0887
elevation                              10
urban_index                            34
nearest_waterway                  2008.56
nearest_commercial                569.568
nearest_restaurant                415.529
nearest_hospital                   837.85
nearest_airport                   1891.63
nearest_highway                   724.117
perc_hh_no_water_supply         0.0423387
perc_hh_no_toilet               0.0231283
perc_hh_no_sewage               0.0768696
vegetation_2019                      2039
aridity_cgiarv2_2019                 3776
temperature_2019                    15387
nighttime_lights_2019               30.29
population_2019                   

In [75]:
wash_metro.query("metro_id == 4").reset_index(drop = True).iloc[0]

OBJECTID                               4
a_mtro                            Quibdo
metro_id                               4
vegetation                          2775
aridity_cgiarv2                    29418
temperature                        15046
nighttime_lights                    7.45
population                        12.667
elevation                             46
urban_index                            5
nearest_waterway                 695.419
nearest_commercial                 40000
nearest_restaurant               1923.03
nearest_hospital                 1566.72
nearest_airport                  1702.34
nearest_highway                  1069.64
perc_hh_no_water_supply         0.660287
perc_hh_no_toilet               0.380282
perc_hh_no_sewage               0.731707
vegetation_2019                     1708
aridity_cgiarv2_2019               29418
temperature_2019                   15004
nighttime_lights_2019            4.58125
population_2019                  13.9429
elevation_2019  