<a href="https://colab.research.google.com/github/kylecshan/xplore_idm/blob/master/getLandsatClusters.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##Import packages and authenticate

In [0]:
# Cloud authentication.
from google.colab import auth
auth.authenticate_user()

In [0]:
# Earth Engine install to notebook VM.
!pip install earthengine-api



In [0]:
# Import, authenticate and initialize the Earth Engine library.
import ee
ee.Authenticate()
ee.Initialize()

To authorize access needed by Earth Engine, open the following URL in a web browser and follow the instructions. If the web browser does not start automatically, please manually browse the URL below.

    https://accounts.google.com/o/oauth2/auth?client_id=517222506229-vsmmajv00ul0bs7p89v5m89qs8eb9359.apps.googleusercontent.com&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fearthengine+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdevstorage.full_control&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&response_type=code

The authorization workflow will generate a code, which you should paste in the box below. 
Enter verification code: 4/sgEPXQS1iE52CaKgcYJjNlgqQ-jOFMabwK9Nuk2Z6HeDyDESNyBF9Wk

Successfully saved authorization token.


## Configuration

In [0]:
# Specify names locations for outputs in Cloud Storage. 
BUCKET = 'xplore_idm_fall2019'
FOLDER = 'data'
TRAINING_BASE = 'cluster'

# Specify inputs (Landsat bands) to the model and the response variable.
opticalBands = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7']
thermalBands = ['B10', 'B11']
BANDS = opticalBands + thermalBands
RESPONSE = 'avg_vis'
FEATURES = BANDS + [RESPONSE]

# Sizes of the training and evaluation datasets.
TRAIN_SIZE = 2000

## Get images
### Nigeria country border

In [0]:
nigeria = ee.FeatureCollection('USDOS/LSIB_SIMPLE/2017') \
  .filterMetadata('country_co', 'equals', 'NI') \
  .first();
nigeria = ee.Feature(nigeria).geometry();

### Landsat8 and DMSP OLS night lights

In [0]:
# Cloud masking function.
def maskL8sr(image):
  cloudShadowBitMask = ee.Number(2).pow(3).int()
  cloudsBitMask = ee.Number(2).pow(5).int()
  qa = image.select('pixel_qa')
  mask1 = qa.bitwiseAnd(cloudShadowBitMask).eq(0).And(
    qa.bitwiseAnd(cloudsBitMask).eq(0))
  mask2 = image.mask().reduce('min')
  mask3 = image.select(opticalBands).gt(0).And(
          image.select(opticalBands).lt(10000)).reduce('min')
  mask = mask1.And(mask2).And(mask3)
  return image.select(opticalBands).divide(10000).addBands(
          image.select(thermalBands).divide(10).clamp(273.15, 373.15)
            .subtract(273.15).divide(100)).updateMask(mask)

In [0]:
landsatComposite = ee.ImageCollection('LANDSAT/LC08/C01/T1_SR') \
  .filterDate('2013-01-01', '2015-12-31') \
  .filterBounds(nigeria) \
  .map(maskL8sr) \
  .median();

landsatComposite = landsatComposite.multiply(256).uint8()

lightComposite = ee.ImageCollection("NOAA/DMSP-OLS/NIGHTTIME_LIGHTS") \
  .filter(ee.Filter.date('2013-01-01', '2013-12-31')) \
  .filterBounds(nigeria) \
  .select('avg_vis') \
  .median(); 

lightComposite = lightComposite.uint8()

### Upscale night lights to match landsat, and sample data

In [0]:
landsatComposite = ee.Image.cat([landsatComposite, lightComposite]).float()
arrays = landsatComposite.neighborhoodToArray(ee.Kernel.square(417, 'pixels'))

In [0]:
from google.colab import files
uploaded = files.upload()

Saving dhs_gps.csv to dhs_gps.csv


In [0]:
import io
import pandas as pd
clusters = pd.read_csv(io.BytesIO(uploaded['dhs_gps.csv']))
print(clusters)
vaccines = clusters.columns[1:12]
print(vaccines)
clusters = clusters.to_numpy()
n = clusters.shape[0]

     cluster_id       bcg   measles  ...  any_vacc  latitude  longitude
0             1  0.880000  0.440000  ...  0.960000  6.902270   8.097115
1             2  0.892857  0.464286  ...  1.000000  7.854454   7.876807
2             3  0.681818  0.500000  ...  0.777778  7.781454   7.940457
3             4  0.535714  0.321429  ...  0.769231  7.176902   9.197081
4             5  0.592593  0.500000  ...  0.764706  7.148612   9.164210
..          ...       ...       ...  ...       ...       ...        ...
884         900  0.947368  0.684211  ...  0.916667  7.368560   3.937503
885         901  0.080645  0.000000  ...  0.370968  8.570109   3.547449
886         902  0.655172  0.310345  ...  0.562500  8.765458   3.603125
887         903  0.477273  0.318182  ...  0.400000  8.660406   3.522780
888         904  0.735294  0.470588  ...  0.590909  8.687992   3.412814

[889 rows x 14 columns]
Index(['bcg', 'measles', 'dpt1', 'dpt2', 'dpt3', 'polio0', 'polio1', 'polio2',
       'polio3', 'health_card', 

In [0]:
# bounds = ee.Geometry.Point(9,7).buffer(12500).bounds()
# bounds.getInfo()

{'coordinates': [[[8.887595352004267, 6.887557392664181],
   [9.112767634673872, 6.887557392664181],
   [9.112767634673872, 7.112485824393552],
   [8.887595352004267, 7.112485824393552],
   [8.887595352004267, 6.887557392664181]]],
 'geodesic': False,
 'type': 'Polygon'}

In [0]:
# task = ee.batch.Export.image.toCloudStorage(
#   image = landsatComposite,
#   region = bounds.getInfo()['coordinates'],
#   description = 'test',
#   bucket = BUCKET,
#   fileNamePrefix = FOLDER + '/' + desc,
#   scale = 30,
#   fileFormat = 'GeoTIFF',
#   fileDimensions = 1024
# )
# task.start()

In [0]:
def clipExport(lat, lon, filename):
  bounds = ee.Geometry.Point(lon, lat).buffer(12500).bounds()
  task = ee.batch.Export.image.toCloudStorage(
    image = landsatComposite,
    region = bounds.getInfo()['coordinates'],
    description = filename,
    bucket = BUCKET,
    fileNamePrefix = FOLDER + '/' + filename,
    scale = 30,
    fileFormat = 'GeoTIFF',
    fileDimensions = 1024
  )
  task.start()
  return

In [0]:
for k in range(400, 889):
  description = 'image_' + str(int(clusters[k, 0])).rjust(3,'0')
  clipExport(clusters[k, 12], clusters[k, 13], description)


In [0]:
# chunkSize = 10
# for k in range(1): #(n+199)//200):
#   a = k*chunkSize
#   b = min(a+chunkSize, n)
#   tempCollection = [ee.Feature(ee.Geometry.Point(x[1], x[0])) for x in clusters[a:b]]
#   tempCollection = ee.FeatureCollection(ee.List(tempCollection))
#   geomSample = tempCollection.map(sampleArray)

#   desc = TRAINING_BASE + '_g' + str(k)
#   task = ee.batch.Export.table.toCloudStorage(
#     collection = geomSample,
#     description = desc, 
#     bucket = BUCKET, 
#     fileNamePrefix = FOLDER + '/' + desc,
#     fileFormat = 'TFRecord',
#     selectors = BANDS + [RESPONSE]
#   )
#   task.start()

### Check task status

In [0]:
# Print all tasks.
import pprint
pp = pprint.PrettyPrinter()
pp.pprint(ee.batch.Task.list()[:150])  

[<Task EXPORT_IMAGE: image_904 (COMPLETED)>,
 <Task EXPORT_IMAGE: image_903 (COMPLETED)>,
 <Task EXPORT_IMAGE: image_902 (COMPLETED)>,
 <Task EXPORT_IMAGE: image_901 (COMPLETED)>,
 <Task EXPORT_IMAGE: image_900 (COMPLETED)>,
 <Task EXPORT_IMAGE: image_899 (COMPLETED)>,
 <Task EXPORT_IMAGE: image_898 (COMPLETED)>,
 <Task EXPORT_IMAGE: image_897 (COMPLETED)>,
 <Task EXPORT_IMAGE: image_896 (COMPLETED)>,
 <Task EXPORT_IMAGE: image_895 (COMPLETED)>,
 <Task EXPORT_IMAGE: image_894 (COMPLETED)>,
 <Task EXPORT_IMAGE: image_893 (COMPLETED)>,
 <Task EXPORT_IMAGE: image_892 (COMPLETED)>,
 <Task EXPORT_IMAGE: image_891 (COMPLETED)>,
 <Task EXPORT_IMAGE: image_890 (COMPLETED)>,
 <Task EXPORT_IMAGE: image_889 (COMPLETED)>,
 <Task EXPORT_IMAGE: image_888 (COMPLETED)>,
 <Task EXPORT_IMAGE: image_887 (COMPLETED)>,
 <Task EXPORT_IMAGE: image_886 (COMPLETED)>,
 <Task EXPORT_IMAGE: image_885 (COMPLETED)>,
 <Task EXPORT_IMAGE: image_884 (COMPLETED)>,
 <Task EXPORT_IMAGE: image_883 (COMPLETED)>,
 <Task EXP

'008'