In [1]:
import os
repo_dir = os.environ.get("REPO_DIR")
code_dir = os.path.join(repo_dir, "code/")
data_dir = os.path.join(repo_dir, "data/")
os.chdir(code_dir)

import matplotlib.pyplot as plt
import numpy as np
import pickle
import pandas as pd
import geopandas as gpd
import shapely
import fiona

from affine import Affine

import pandas as pd
import rasterio
import rasterio.mask
from rasterio import warp

import warnings

from nl_helpers import (apply_polygon_mask_and_return_flat_array, 
                        correct_nl_df_creation, bins,create_nl_binned_dataframe)

# For NL downscaling experiment, we want to get mean lumninosity values of various polygons

### We need these for ADM2, ADM1, and ADM0 shapes. 

##### Since it's easy, we're going to get the pop weighted and the area weighted values

In [2]:
pop_adj_to_nl_outpath = (data_dir + "int/GPW_pop_density/"
           "gpw_v4_population_density_rev10_2015_30_sec_shifted_to_match_DMSP.tif")

dmsp_adj_to_pop_outpath = (data_dir + "int/DMSP_NL/"
           "DMSP_F182013.v4c_web.stable_lights.avg_vis_shifted_to_match_pop_raster.tif")

nl_adj = rasterio.open(dmsp_adj_to_pop_outpath)
pop_adj = rasterio.open(pop_adj_to_nl_outpath)

In [3]:
def get_avg_nl_and_weighted_avg_nl(shp_file,raster_file=nl_adj, weight_raster = pop_adj):
    
    for i, polygon in enumerate(shp_file["geometry"]):
        a = apply_polygon_mask_and_return_flat_array(polygon, plot=False, raster_file=raster_file)

        w = apply_polygon_mask_and_return_flat_array(polygon, plot=False, raster_file=weight_raster)
    
        avg = np.mean(a)
        
        if len(w) == 0:
            weighted_avg = None
        else:
            weighted_avg = np.average(a, weights=w)
        
        averages = np.array([avg, weighted_avg])

        if i == 0:
            stacked = averages
        else:
            stacked = np.vstack([stacked, averages])
    
    out = pd.DataFrame(stacked, index = shp_file.index)
    out.columns =  ["nl_avg","nl_weighted_avg"]
    
    return out


In [4]:
def correct_nl_Ys(out, shp_file, raster_file, bins = bins, off_raster_val=np.nan):
    """
    Some of the ADM2 and other polygons are so small that we need to get the nearest NL pixel, 
    rather the consider the pixel to be contained by the polygon. This implements this correction.
    
    If the polygon has no intersection with the raster, we assume that it is well off the raster. 
    For these we, actually only input nan values by default.
    
    
    
    """
    null_idxs = out[out.iloc[:,0].isnull()].index
    
    num_missing =  len(null_idxs)
    print("Num missing = ", num_missing)
    if num_missing == 0:
        return out
    
    with warnings.catch_warnings():
        warnings.simplefilter("ignore") # suppress warnings for unprojected buffer
        #Create buffer the centroid of the adm polygon, equivelant to calculating centroid to centroid nearest
        buffers = shp_file.loc[null_idxs]["geometry"].centroid.buffer(0.00833333333333/2)
    
    
    for i, buffer in enumerate(buffers):
        
        a = apply_polygon_mask_and_return_flat_array(buffer, raster_file = raster_file)
        assert len(a) <= 1
        
        # if there is still no nl value being grabbed, it means we are off the raster. Assume 0
        if len(a) == 0:
            a = np.array([off_raster_val])
            
        avg = a[0]
        averages = np.array([avg,avg]) # Weighted avereage and average are both just a.value
        
        if i == 0:
            stacked = averages
        else:
            stacked = np.vstack([stacked, averages])
            
    fixed_out = pd.DataFrame(stacked, index = null_idxs)
    
    fixed_out.columns =  ["nl_avg","nl_weighted_avg"]
        
    out_dropped = out.drop(null_idxs)
        
    return pd.concat([fixed_out,out_dropped])

### ADM2 

In [5]:
file = data_dir + "raw/geoBoundaries/geoBoundariesCGAZ_ADM2.geojson"
adm2 = gpd.read_file(file).set_index("shapeID").rename(columns = {"ADM1_shapeID": "ADM1_shape"})

In [6]:
out_adm2 = get_avg_nl_and_weighted_avg_nl(adm2, raster_file=nl_adj, weight_raster =pop_adj)
out_adm2 = correct_nl_Ys(out_adm2, adm2, raster_file=nl_adj)

out_adm2.to_pickle(data_dir + "nl_downscaling/nl_Ys/dmsp_avg_nl_Ys_geoB_adm2.p")

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


Num missing =  7058


### ADM1 -- from ADM2 geoBoundaries shapefile dissolved

In [7]:
adm1_geoboundaries = adm2.dissolve("ADM1_shape")
out_adm1 = get_avg_nl_and_weighted_avg_nl(adm1_geoboundaries, raster_file=nl_adj, weight_raster =pop_adj)
out_adm1 = correct_nl_Ys(out_adm1, adm1_geoboundaries, raster_file=nl_adj)

out_adm1.to_pickle(data_dir + "nl_downscaling/nl_Ys/dmsp_avg_nl_Ys_geoB_adm1.p")

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


Num missing =  7


### ADM0 -- from ADM2 geoBoundaries shapefile dissolved

In [8]:
adm0_geoboundaries = adm2.dissolve("shapeGroup")
out_adm0 = get_avg_nl_and_weighted_avg_nl(adm0_geoboundaries, raster_file=nl_adj, weight_raster =pop_adj)
out_adm0 = correct_nl_Ys(out_adm0, adm0_geoboundaries, raster_file=nl_adj)
out_adm0.to_pickle(data_dir + "nl_downscaling/nl_Ys/dmsp_avg_nl_Ys_geoB_adm0.p")


Num missing =  0
