<a href="https://colab.research.google.com/github/kylecshan/xplore_idm/blob/master/getLandsatClusters.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##Import packages and authenticate

In [0]:
# Cloud authentication.
from google.colab import auth
auth.authenticate_user()

In [0]:
# Earth Engine install to notebook VM.
!pip install earthengine-api

Collecting earthengine-api
[?25l  Downloading https://files.pythonhosted.org/packages/a8/ea/e2fd58451df14e9a85e25d330f43103055003ad613b4916b0ab40154774a/earthengine-api-0.1.203.tar.gz (146kB)
[K     |████████████████████████████████| 153kB 8.9MB/s 
Building wheels for collected packages: earthengine-api
  Building wheel for earthengine-api (setup.py) ... [?25l[?25hdone
  Created wheel for earthengine-api: filename=earthengine_api-0.1.203-cp36-none-any.whl size=174918 sha256=5b693fa66cc188d1cff32126ca1d9545f2574e7eae8aed467d433778da472ab6
  Stored in directory: /root/.cache/pip/wheels/26/69/ab/4ca47642ccc729cdc6e39b57b9e83164cddefc23b576aadac0
Successfully built earthengine-api
Installing collected packages: earthengine-api
Successfully installed earthengine-api-0.1.203


In [0]:
# Import, authenticate and initialize the Earth Engine library.
import ee
ee.Authenticate()
ee.Initialize()

To authorize access needed by Earth Engine, open the following URL in a web browser and follow the instructions. If the web browser does not start automatically, please manually browse the URL below.

    https://accounts.google.com/o/oauth2/auth?client_id=517222506229-vsmmajv00ul0bs7p89v5m89qs8eb9359.apps.googleusercontent.com&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fearthengine+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdevstorage.full_control&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&response_type=code

The authorization workflow will generate a code, which you should paste in the box below. 
Enter verification code: 4/sAF_QhYtwozi6PyyVfm8I3eWMuzRKAHa5_zwBkUkfDBtNe3O2QhXOF8

Successfully saved authorization token.


## Configuration

In [0]:
# Specify names locations for outputs in Cloud Storage. 
BUCKET = 'xplore_idm_fall2019'
FOLDER = 'data'
TRAINING_BASE = 'clusters'

# Specify inputs (Landsat bands) to the model and the response variable.
opticalBands = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7']
thermalBands = ['B10', 'B11']
BANDS = opticalBands + thermalBands
RESPONSE = 'avg_rad'
FEATURES = BANDS + [RESPONSE]

# Specify the size and shape of patches expected by the model.
KERNEL_SIZE = 166

# Sizes of the training and evaluation datasets.
TRAIN_SIZE = 2000

## Get images
### Nigeria country border

In [0]:
nigeria = ee.FeatureCollection('USDOS/LSIB_SIMPLE/2017') \
  .filterMetadata('country_co', 'equals', 'NI') \
  .first();
nigeria = ee.Feature(nigeria).geometry();

### Landsat8 and VIIRS night lights

In [0]:
# Cloud masking function.
def maskL8sr(image):
  cloudShadowBitMask = ee.Number(2).pow(3).int()
  cloudsBitMask = ee.Number(2).pow(5).int()
  qa = image.select('pixel_qa')
  mask1 = qa.bitwiseAnd(cloudShadowBitMask).eq(0).And(
    qa.bitwiseAnd(cloudsBitMask).eq(0))
  mask2 = image.mask().reduce('min')
  mask3 = image.select(opticalBands).gt(0).And(
          image.select(opticalBands).lt(10000)).reduce('min')
  mask = mask1.And(mask2).And(mask3)
  return image.select(opticalBands).divide(10000).addBands(
          image.select(thermalBands).divide(10).clamp(273.15, 373.15)
            .subtract(273.15).divide(100)).updateMask(mask)

In [0]:
landsatComposite = ee.ImageCollection('LANDSAT/LC08/C01/T1_SR') \
  .filterDate('2013-01-01', '2015-12-31') \
  .filterBounds(nigeria) \
  .map(maskL8sr) \
  .median();

lightComposite = ee.ImageCollection("NOAA/VIIRS/DNB/MONTHLY_V1/VCMSLCFG") \
  .filter(ee.Filter.date('2014-01-01', '2014-12-31')) \
  .filterBounds(nigeria) \
  .select('avg_rad') \
  .median(); 

### Upscale night lights to match landsat, and sample data

In [0]:
landsatComposite = ee.Image.cat([landsatComposite, lightComposite]).float()
arrays = landsatComposite.neighborhoodToArray(ee.Kernel.square(166, 'pixels'))

In [0]:
from google.colab import files
uploaded = files.upload()

Saving dhs_gps.csv to dhs_gps (1).csv


In [0]:
import io
import pandas as pd
clusters = pd.read_csv(io.BytesIO(uploaded['dhs_gps.csv']))
clusters = clusters.iloc[:,-2:].to_numpy()
clusters

array([[6.90227 , 8.097115],
       [7.854454, 7.876807],
       [7.781454, 7.940457],
       ...,
       [8.765458, 3.603125],
       [8.660406, 3.52278 ],
       [8.687992, 3.412814]])

In [0]:
n = clusters.shape[0]
for i in range(n):
  geomSample = ee.FeatureCollection([])
  point = ee.Geometry.Point(ee.List([clusters[i,0], clusters[i,1]]))
  sample = arrays.sample(
    region = point, 
    scale = 30, 
    numPixels = 1,
    seed = i,
    tileScale = 8
  )
  geomSample = geomSample.merge(sample)

desc = TRAINING_BASE
task = ee.batch.Export.table.toCloudStorage(
  collection = geomSample,
  description = desc, 
  bucket = BUCKET, 
  fileNamePrefix = FOLDER + '/' + desc,
  fileFormat = 'TFRecord',
  selectors = BANDS + [RESPONSE]
)
task.start()

### Check task status

In [0]:
# Print all tasks.
import pprint
pp = pprint.PrettyPrinter()
pp.pprint(ee.batch.Task.list()[:2])

[<Task EXPORT_FEATURES: clusters (COMPLETED)>,
 <Task EXPORT_FEATURES: patches_g9 (COMPLETED)>]
