## Analyzing NDWI through Time with Landsat 8

In this section, we'll be taking a look at how the 
Normalized Difference Water Index (NDWI) changes throughout 2016
for California.

In [None]:
import geopyspark as gps
from pyspark import SparkContext
import numpy as np
from datetime import datetime
from shapely.geometry import mapping, shape
import pyproj
from shapely.ops import transform
from functools import partial
import urllib.request, json
from geonotebook.wrappers import TMSRasterData
from PIL import Image

In [None]:
# Grab data for Nevada
state_name, county_name = "NV", "Mineral"
def get_state_shapes(state, county):
    project = partial(
        pyproj.transform,
        pyproj.Proj(init='epsg:4326'),
        pyproj.Proj(init='epsg:3857'))

    state_url = "https://raw.githubusercontent.com/johan/world.geo.json/master/countries/USA/{}.geo.json".format(state)
    county_url = "https://raw.githubusercontent.com/johan/world.geo.json/master/countries/USA/{}/{}.geo.json".format(state,county)
    read_json = lambda url: json.loads(urllib.request.urlopen(url).read().decode("utf-8"))
    state_ll = shape(read_json(state_url)['features'][0]['geometry'])
    state_wm = transform(project, state_ll)
    county_ll = shape(read_json(county_url)['features'][0]['geometry'])
    county_wm = transform(project, county_ll)
    return (state_ll, state_wm, county_ll, county_wm)

(state_ll, state_wm, county_ll, county_wm) = get_state_shapes(state_name, county_name) 

In [None]:
# Set up our spark context
conf = gps.geopyspark_conf(appName="Landsat") \
          .setMaster("local[*]") \
          .set(key='spark.ui.enabled', value='true') \
          .set(key="spark.driver.memory", value="4G") \
          .set("spark.hadoop.yarn.timeline-service.enabled", False)
sc = SparkContext(conf=conf)

## Initial Exploration: Seeing NDWI on the map



In [None]:
bands = { "Blue": 0,
          "Green": 1,
          "Red": 2,
          "NIR": 3,
          "QA": 4 }

### Loading up an RDD of Landsat data

We first grab an RDD of relevant landsat data. We will be grabbing some pre-ingested Landsat 8 data from a GeoTrellis layer in the Azavea DataHub. We'll query our specific state to only grab tiles from scenes which intersect our state boundary.

In [None]:
layer = gps.query("s3://datahub-catalogs-us-east-1", 
                  "landsat-8-continental-us-2016", 
                  layer_zoom=13,
                  time_intervals=[datetime(2016, 4, 1, 18, 26, 47),
                                  datetime(2016, 4, 3, 18, 26, 47)],
                  query_geom=county_wm,
                  num_partitions=100).cache()

## Cloud masking

In [None]:
def mask_clouds(tile):
    # Use the Landsat QA band to mask out cloud values
    qa = tile.cells[bands["QA"]]
    #cloud = np.bitwise_and(qa, 0x4000)
    #cirrus = np.bitwise_and(qa, 0x2000)
    cloud = np.right_shift(qa, 14)
    result_bands = []
    for band in tile.cells[:-1]:
        band[cloud == 3] = 0
        result_bands.append(band)
    return gps.Tile.from_numpy_array(np.array(result_bands), no_data_value=0)

cloud_masked = layer.to_numpy_rdd().mapValues(mask_clouds)

## Mosaicing layers

In [None]:
np_layer = layer.to_numpy_rdd()
 
def mosaic(tiles):
    # Mosiac by taking the youngest pixel.
    sorted_tiles = sorted(list(tiles), key=lambda x: x[0], reverse=True)
    result = sorted_tiles[0][1].cells.copy()
    no_data_value = sorted_tiles[0][1].no_data_value
    for _, tile_to_merge in sorted_tiles[1:]:
        cells_to_merge = tile_to_merge.cells
        left_merge_condition = result[0] == no_data_value
        right_merge_condition = cells_to_merge[0] != tile_to_merge.no_data_value
        
        # We want to merge in data that is not already set
        # in the result (where all pixels are set to the no_data_value),
        # and where the incoming pixel represents data
        # (where any pixel does not equal the no_data_value)
        for band_idx in range(1, result.shape[0] - 1):
            left_merge_condition = left_merge_condition & \
                                   (result[band_idx] == no_data_value)
            right_merge_condition = right_merge_condition | \
                                    (cells_to_merge[band_idx] != tile_to_merge.no_data_value)
            
        result_bands = []
        for band_idx in range(0, result.shape[0]):
            band = result[band_idx]
            np.copyto(band, 
                      cells_to_merge[band_idx], 
                      where=(left_merge_condition) & \
                            (right_merge_condition))
            result_bands.append(band)
        result = np.array(result_bands)    

    return gps.Tile.from_numpy_array(result, no_data_value=no_data_value)

mosaiced = np_layer.map(lambda tup: \
                       (gps.SpatialKey(tup[0].col, tup[0].row), 
                        (tup[0].instant, tup[1]))) \
                   .groupByKey() \
                   .mapValues(mosaic)

mosaiced_layer = \
    gps.TiledRasterLayer.from_numpy_rdd(layer_type=gps.LayerType.SPATIAL, 
                                        numpy_rdd=mosaiced, 
                                        metadata=layer.layer_metadata, 
                                        zoom_level=layer.zoom_level)
mosaiced_layer.count()


## Viewing color corrected landsat

In [None]:
def render_image(tile):
    cells = tile.cells
    # Color correct - use magic numbers
    magic_min, magic_max = 4000, 15176
    norm_range = magic_max - magic_min
    cells = cells.astype('int32')
    # Clamp cells
    cells[(cells != 0) & (cells < magic_min)] = magic_min
    cells[(cells != 0) & (cells > magic_max)] = magic_max
    colored = ((cells - magic_min) * 255) / norm_range
    (r, g, b) = (colored[2], colored[1], colored[0])
    alpha = np.full(r.shape, 255)
    alpha[(cells[0] == tile.no_data_value) & \
          (cells[1] == tile.no_data_value) & \
          (cells[2] == tile.no_data_value)] = 0
    rgba = np.dstack([r,g,b, alpha]).astype('uint8')
    #return Image.fromarray(colored[1], mode='P')
    return Image.fromarray(rgba, mode='RGBA')

#render_image(mosaiced_layer.to_numpy_rdd().first()[1])
mosaic_pyramid = mosaiced_layer.pyramid(resample_method=gps.ResampleMethod.BILINEAR)
tms_server = gps.TMS.build(mosaic_pyramid, display=render_image)
M.add_layer(TMSRasterData(tms_server), name="mosaic")

In [None]:
M.remove_layer(M.layers[0])

## Viewing NDWI

In [None]:
# Grab a date from the set of available dates
#date = list(map(lambda k: gps.geotrellis._convert_to_unix_time(k.instant), layer.collect_keys()))
date = list(map(lambda k: k.instant, layer.collect_keys()))

In [None]:
#date = datetime.strptime('2016-03-30 15:32:57', '%Y-%m-%d %H:%M:%S')
dates = list(set(date))
dates

In [None]:
g = layer.bands(bands["Green"]).convert_data_type(gps.CellType.FLOAT64).cache()
nir = layer.bands(bands["NIR"]).convert_data_type(gps.CellType.FLOAT64).cache()

ndwi = (g - nir) / (g + nir)

In [None]:
#nir.to_numpy_rdd().first()[1].cells.min()
ndwi.get_min_max()

In [None]:
ndwi_spatial = ndwi.to_spatial_layer(target_time=dates[0]).mask(county_wm)
ndwi_spatial.get_min_max()

In [None]:
pyramid = ndwi_spatial.pyramid(resample_method=gps.ResampleMethod.BILINEAR)

In [None]:
ndwi_color_map = \
     gps.ColorMap.build(breaks= {-0.1 : 0xaacdffaa,                                
                                 0.0 : 0x70abffff,
                                 0.05 : 0x3086ffff,
                                 0.1 : 0x1269e2ff,
                                 0.15 : 0x094aa5ff,
                                 0.2 : 0x012c69ff,
                                 0.25: 0x012cbcff},
                        classification_strategy=gps.ClassificationStrategy.LESS_THAN_OR_EQUAL_TO)

In [None]:
tms_server = gps.TMS.build(pyramid, display=ndwi_color_map)
M.add_layer(TMSRasterData(tms_server), name="landsat")

In [None]:
M.remove_layer(M.layers[0])

In [None]:
ndwi.mean_series(county_wm)