<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Prepare-the-dataset" data-toc-modified-id="Prepare-the-dataset-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Prepare the dataset</a></span></li><li><span><a href="#Mask-out-cloud,-snow,-and-cloud-shadow" data-toc-modified-id="Mask-out-cloud,-snow,-and-cloud-shadow-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Mask out cloud, snow, and cloud shadow</a></span></li><li><span><a href="#Multi-yr-composite" data-toc-modified-id="Multi-yr-composite-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Multi-yr composite</a></span></li><li><span><a href="#Add-nightlight" data-toc-modified-id="Add-nightlight-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Add nightlight</a></span></li><li><span><a href="#Add-topography" data-toc-modified-id="Add-topography-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Add topography</a></span></li><li><span><a href="#Export-TF-Records" data-toc-modified-id="Export-TF-Records-6"><span class="toc-item-num">6&nbsp;&nbsp;</span>Export TF Records</a></span></li></ul></div>

In [None]:
import os, sys
home = os.path.abspath('/Users/zztang/Documents/EE/')
sys.path.append(home)

import ee
import optical_datasource as optx
import imgtools
import ee_tf_exports as tf

In [36]:
ee.Initialize()

## Prepare the dataset

In [2]:
dhs = ee.FeatureCollection('ft:1lFCM5_0xgkHG2-dBKB2WCuQnFHCecO7z_4VOLrhA')

In [5]:
dhs_oldest = dhs.filter([ee.Filter.gt('year', 2008),ee.Filter.lte('year', 2011)])

dhs_middle = dhs.filter([ee.Filter.gt('year', 2011),
                            ee.Filter.lte('year', 2014)])
dhs_recent = dhs.filter(ee.Filter.gt('year', 2014))

In [6]:
print(dhs_oldest.size().getInfo())

7130


In [7]:
print(dhs_middle.size().getInfo())

8500


In [8]:
print(dhs_recent.size().getInfo())

4041


In [12]:
countries = dhs.distinct('country').aggregate_array('country').getInfo()
countries

[u'senegal',
 u'benin',
 u'malawi',
 u'ghana',
 u'mozambique',
 u'tanzania',
 u'ethiopia',
 u'zambia',
 u'nigeria',
 u'cote_d_ivoire',
 u'angola',
 u'burkina_faso',
 u'cameroon',
 u'uganda',
 u'zimbabwe',
 u'rwanda',
 u'drc',
 u'guinea',
 u'mali',
 u'togo',
 u'lesotho',
 u'sierra_leone',
 u'kenya']

In [17]:
countries = dhs_oldest.distinct('country').aggregate_array('country').getInfo()
for i in countries:
    df = dhs_oldest.filter(ee.Filter.eq('country', i))
    fname = 'lx_median_2012-14_'+i+'_dhslocs'
    print(fname)
    print(df.size().getInfo())

lx_median_2012-14_uganda_dhslocs
570
lx_median_2012-14_lesotho_dhslocs
395
lx_median_2012-14_mozambique_dhslocs
879
lx_median_2012-14_malawi_dhslocs
827
lx_median_2012-14_ethiopia_dhslocs
571
lx_median_2012-14_zimbabwe_dhslocs
393
lx_median_2012-14_senegal_dhslocs
385
lx_median_2012-14_tanzania_dhslocs
1031
lx_median_2012-14_burkina_faso_dhslocs
541
lx_median_2012-14_nigeria_dhslocs
239
lx_median_2012-14_rwanda_dhslocs
492
lx_median_2012-14_angola_dhslocs
230
lx_median_2012-14_cameroon_dhslocs
577


In [16]:
countries = dhs_middle.distinct('country').aggregate_array('country').getInfo()
for i in countries:
    df = dhs_middle.filter(ee.Filter.eq('country', i))
    fname = 'lx_median_2012-14_'+i+'_dhslocs'
    print(fname)
    print(df.size().getInfo())

lx_median_2012-14_senegal_dhslocs
200
lx_median_2012-14_benin_dhslocs
746
lx_median_2012-14_cote_d_ivoire_dhslocs
341
lx_median_2012-14_guinea_dhslocs
300
lx_median_2012-14_mali_dhslocs
413
lx_median_2012-14_malawi_dhslocs
280
lx_median_2012-14_zambia_dhslocs
719
lx_median_2012-14_nigeria_dhslocs
889
lx_median_2012-14_drc_dhslocs
492
lx_median_2012-14_togo_dhslocs
330
lx_median_2012-14_sierra_leone_dhslocs
435
lx_median_2012-14_ghana_dhslocs
423
lx_median_2012-14_burkina_faso_dhslocs
248
lx_median_2012-14_rwanda_dhslocs
492
lx_median_2012-14_kenya_dhslocs
1585
lx_median_2012-14_lesotho_dhslocs
399
lx_median_2012-14_uganda_dhslocs
208


In [15]:
countries = dhs_recent.distinct('country').aggregate_array('country').getInfo()
for i in countries:
    df = dhs_recent.filter(ee.Filter.eq('country', i))
    fname = 'lx_median_2012-14_'+i+'_dhslocs'
    print(fname)
    print(df.size().getInfo())

lx_median_2012-14_tanzania_dhslocs
608
lx_median_2012-14_nigeria_dhslocs
322
lx_median_2012-14_mali_dhslocs
177
lx_median_2012-14_malawi_dhslocs
850
lx_median_2012-14_kenya_dhslocs
245
lx_median_2012-14_angola_dhslocs
625
lx_median_2012-14_zimbabwe_dhslocs
400
lx_median_2012-14_ethiopia_dhslocs
622
lx_median_2012-14_ghana_dhslocs
192


## Mask out cloud, snow, and cloud shadow

In [20]:
def decode_qamask(scene):
    """
    Pixel QA Bit Flags
    Bit	Attribute
    0	Fill
    1	Clear
    2	Water
    3	Cloud Shadow
    4	Snow
    5	Cloud
    """

    qa = scene.select('pixel_qa')
    clear = qa.bitwiseAnd(2).neq(0)
    clear = clear.updateMask(clear).rename(['pxqa_clear'])

    water = qa.bitwiseAnd(4).neq(0)
    water = water.updateMask(water).rename(['pxqa_water'])

    cloud_shadow = qa.bitwiseAnd(8).eq(0)
    cloud_shadow = cloud_shadow.updateMask(cloud_shadow).rename(['pxqa_cloudshadow'])

    snow = qa.bitwiseAnd(16).eq(0)
    snow = snow.updateMask(snow).rename(['pxqa_snow'])

    cloud = qa.bitwiseAnd(32).eq(0)
    cloud = cloud.updateMask(cloud).rename(['pxqa_cloud'])

    masks = ee.Image.cat([
        clear, water, cloud_shadow, snow,
        cloud
    ])

        # return scene.select(scene.bandNames().remove('pixel_qa')).addBands(masks)
    return masks

def mask_qaclear(img):

    clear_mask = decode_qamask(img).select('pxqa_clear')
    cloudshadow_mask = decode_qamask(img).select('pxqa_cloudshadow')
    snow_mask = decode_qamask(img).select('pxqa_snow')
    cloud_mask = decode_qamask(img).select('pxqa_cloud')
        
    return img.updateMask(cloudshadow_mask).updateMask(snow_mask).updateMask(cloud_mask).updateMask(snow_mask)



## Multi-yr composite

In [18]:
selbands = ['BLUE', 'GREEN', 'RED', 'NIR', 'SWIR1', 'SWIR2', 'TEMP1'] 

In [21]:
roi_recent = dhs_recent.geometry()
srcoll_recent = optx.LandsatSR(roi_recent, '2015-1-1', '2017-12-31').merged
srcoll_recent = srcoll_recent.map(mask_qaclear)
srmedian_recent = srcoll_recent.select(selbands).median().reproject('EPSG:3857', None, 30)
srmedian_recent = imgtools.add_latlon(srmedian_recent)

In [22]:
roi_middle = dhs_middle.geometry()
srcoll_middle = optx.LandsatSR(roi_middle, '2012-1-1', '2014-12-31').merged
srcoll_middle = srcoll_middle.map(mask_qaclear)
srmedian_middle = srcoll_middle.select(selbands).median().reproject('EPSG:3857', None, 30)
srmedian_middle = imgtools.add_latlon(srmedian_middle)

In [23]:
roi_oldest = dhs_oldest.geometry()
srcoll_oldest = optx.LandsatSR(roi_oldest, '2009-1-1', '2011-12-31').merged
srcoll_oldest = srcoll_oldest.map(mask_qaclear)
srmedian_oldest = srcoll_oldest.select(selbands).median().reproject('EPSG:3857', None, 30)
srmedian_oldest = imgtools.add_latlon(srmedian_oldest)

## Add nightlight

In [24]:
viirs = ee.ImageCollection("NOAA/VIIRS/DNB/MONTHLY_V1/VCMSLCFG")
dmsp = ee.ImageCollection("NOAA/DMSP-OLS/CALIBRATED_LIGHTS_V4")

In [25]:
nlband = ['NIGHTLIGHTS']
viirs_recent = viirs.filterDate('2015-1-1', '2017-12-31').median().select([0],nlband)
viirs_mid = viirs.filterDate('2012-1-1', '2014-12-31').median().select([0],nlband)
dmsp_oldest = dmsp.filterDate('2009-1-1', '2011-12-31').median().select([0],nlband)

In [26]:
srmedian_recent = srmedian_recent.addBands(viirs_recent.reproject('EPSG:3857', None, 30))
srmedian_middle = srmedian_middle.addBands(viirs_mid.reproject('EPSG:3857', None, 30))
srmedian_oldest = srmedian_oldest.addBands(dmsp_oldest.reproject('EPSG:3857', None, 30))

## Add topography

In [27]:
dem = ee.Image("USGS/SRTMGL1_003")

In [28]:
tbands = ['ELEV','SLO', 'ASP']
topogr = ee.Algorithms.Terrain(dem).select(['elevation', 'slope', 'aspect'], tbands)

In [29]:
srmedian_recent = srmedian_recent.addBands(topogr.reproject('EPSG:3857', None, 30))
srmedian_middle = srmedian_middle.addBands(topogr.reproject('EPSG:3857', None, 30))
srmedian_oldest = srmedian_oldest.addBands(topogr.reproject('EPSG:3857', None, 30))

## Export TF Records

In [30]:
dhsinfo = dhs_recent.first().propertyNames().getInfo()

In [34]:
countries = dhs_recent.distinct('country').aggregate_array('country').getInfo()
for i in countries:
    seldhs = dhs_recent.filter(ee.Filter.eq('country', i))
    fname = 'lx_median_2015-17_'+i+'_dhslocs_'
    print(fname)
    
    bands = selbands+dhsinfo+['LAT', 'LON']+tbands+nlband
    
    test = tf.get_array_patches(srmedian_recent, 30, 127, seldhs, 
                                   True, True, bands, None, 
                                   'sustainlab-common',
                                   'Poverty_tfrecords/', 
                                   fname)

lx_median_2015-17_tanzania_dhslocs_
lx_median_2015-17_nigeria_dhslocs_
lx_median_2015-17_mali_dhslocs_
lx_median_2015-17_malawi_dhslocs_
lx_median_2015-17_kenya_dhslocs_
lx_median_2015-17_angola_dhslocs_
lx_median_2015-17_zimbabwe_dhslocs_
lx_median_2015-17_ethiopia_dhslocs_
lx_median_2015-17_ghana_dhslocs_


In [33]:
service_account = 'datahandler1@earth-sys262cs325b.iam.gserviceaccount.com'
credentials = ee.ServiceAccountCredentials(service_account, '/Users/zztang/Documents/EE/earth-sys262cs325b-d0222b03e2af.json')
ee.Initialize(credentials)

In [35]:
countries = dhs_middle.distinct('country').aggregate_array('country').getInfo()
for i in countries:
    seldhs = dhs_middle.filter(ee.Filter.eq('country', i))
    fname = 'lx_median_2012-14_'+i+'_dhslocs_'
    print(fname)
    
    bands = selbands+dhsinfo+['LAT', 'LON']+tbands+nlband
    
    test = tf.get_array_patches(srmedian_middle, 30, 127, seldhs, 
                                   True, True, bands, None, 
                                   'sustainlab-common',
                                   'Poverty_tfrecords/', 
                                   fname)

lx_median_2012-14_senegal_dhslocs_
lx_median_2012-14_benin_dhslocs_
lx_median_2012-14_cote_d_ivoire_dhslocs_
lx_median_2012-14_guinea_dhslocs_
lx_median_2012-14_mali_dhslocs_
lx_median_2012-14_malawi_dhslocs_
lx_median_2012-14_zambia_dhslocs_
lx_median_2012-14_nigeria_dhslocs_
lx_median_2012-14_drc_dhslocs_
lx_median_2012-14_togo_dhslocs_
lx_median_2012-14_sierra_leone_dhslocs_
lx_median_2012-14_ghana_dhslocs_
lx_median_2012-14_burkina_faso_dhslocs_
lx_median_2012-14_rwanda_dhslocs_
lx_median_2012-14_kenya_dhslocs_
lx_median_2012-14_lesotho_dhslocs_
lx_median_2012-14_uganda_dhslocs_


In [37]:
countries = dhs_oldest.distinct('country').aggregate_array('country').getInfo()
for i in countries:
    seldhs = dhs_oldest.filter(ee.Filter.eq('country', i))
    fname = 'lx_median_2009-11_'+i+'_dhslocs_'
    print(fname)
    
    bands = selbands+dhsinfo+['LAT', 'LON']+tbands+nlband
    
    test = tf.get_array_patches(srmedian_oldest, 30, 127, seldhs, 
                                   True, True, bands, None, 
                                   'sustainlab-common',
                                   'Poverty_tfrecords/', 
                                   fname)

lx_median_2009-11_uganda_dhslocs_
lx_median_2009-11_lesotho_dhslocs_
lx_median_2009-11_mozambique_dhslocs_
lx_median_2009-11_malawi_dhslocs_
lx_median_2009-11_ethiopia_dhslocs_
lx_median_2009-11_zimbabwe_dhslocs_
lx_median_2009-11_senegal_dhslocs_
lx_median_2009-11_tanzania_dhslocs_
lx_median_2009-11_burkina_faso_dhslocs_
lx_median_2009-11_nigeria_dhslocs_
lx_median_2009-11_rwanda_dhslocs_
lx_median_2009-11_angola_dhslocs_
lx_median_2009-11_cameroon_dhslocs_
