In [1]:
import logging
logging.getLogger().setLevel(logging.INFO)
logging.captureWarnings(True)

import json
import pkg_resources

import numpy as np
import matplotlib.pyplot as plt
import descarteslabs as dl
from descarteslabs import workflows as wf
from tqdm.notebook import tqdm
import pandas as pd
from shapely import geometry as sgeom

import will_utils as utils

### NDVI

In this section we are going to calculate the NDVI for an area of Brazil using workflows. We will employ workflows functionality that allows us to mask for clouds, create a daily composite of images, apply a moving window average of the images, and lastly deploy the NDVI workflow across a large AOI using workflows jobs.

In [2]:
product_id = 'sentinel-2:L1C'
start_datetime='2018-01-01'
end_datetime='2021-01-01'

In [3]:
#import yolo country
tomato_aoi = json.load(open('yolo.geojson'))

# #Get UTM EPSG code
shapely_aoi = sgeom.shape(tomato_aoi['features'][0]['geometry'])
lat, lon = shapely_aoi.centroid.y, shapely_aoi.centroid.x
utm_epsg = utils.wgs_to_epsg(lat, lon)

#Create workflows context
wf_ctx = wf.GeoContext(
                geometry=tomato_aoi['features'][0]['geometry'],
                resolution=10.0,
                crs=f'EPSG:{utm_epsg}',
                bounds_crs='EPSG:4326')

# #Convert to a tile 
wf_ctx_data = wf_ctx.compute(progress_bar=True)
reflat, reflon = wf_ctx_data['bounds'][1], wf_ctx_data['bounds'][0]
tilesize= max(wf_ctx_data['arr_shape'])
tile = dl.scenes.DLTile.from_latlon(reflat, reflon, wf_ctx_data['resolution'], tilesize, 0)



Job ID: 075785a12330c1399615eb88b00fa6069a083c4870a89c9d
[      ] | Steps: 0/0 | Stage: SUCCEEDED                                      

In [4]:
# Get shape of Yolo county in California
places_client = dl.Places()
state = places_client.shape("north-america_united-states_california_sacramento-valley_yolo")

# Define Workflows GeoContext
geoctx = dl.scenes.AOI(sg.shape(state.geometry).simplify(0.01),
                       crs="EPSG:4326", resolution=0.01)

# Display on a map:
dsp.GeoJSON(geojson.Feature(geometry=geoctx.geometry))

NameError: name 'sg' is not defined

Here we start to define the workflow. We will use Sentinel-2 imagery which has a resolution of 10 meters. In this next block of code, comments have been added before each step explaining what is happening in more detail.

In [None]:
#Create a basic workflow defining the product, date range, and finally picking which bands we want
sent2 = wf.ImageCollection.from_id(product_id=product_id,
                                   start_datetime=start_datetime,
                                   end_datetime=end_datetime
                                   ).pick_bands('derived:ndvi cloud-mask')



# Also import the 2020 CDL 
cdl = wf.ImageCollection.from_id("usda:cdl:v1", start_datetime="2020-12-31", end_datetime="2021-01-01")

s2 = wf.ImageCollection.from_id(product_id=product_id,
                                   start_datetime=start_datetime,
                                   end_datetime=end_datetime
                                   ).pick_bands('red green blue').mask(cdl.mosaic() != 54) 

#Here we mask all pixels in each image where the cloud mask is equal to 1.
#We then just return the ndvi band using pick_bands. 
ndvi_masked = sent2.map(lambda img: img.mask(img.pick_bands('cloud-mask') == 1)).pick_bands('derived:ndvi')

# And mask by the CDL = 54, which limits the calculation to only tomatoers
ndvi_masked = ndvi_masked.mask(cdl.mosaic() != 54) 


#This step groups the images by day. If there is more than one image on a day in the AOI
#Then they are grouped into an Image collection together. This step also easily exposes
#The date information for all the images in our time window
ndvi_grouped = ndvi_masked.groupby(dates=('year', 'month', 'day'))

#Next, we create a composite image for each day by taking the mean across images
ndvi_mean = ndvi_grouped.mean('images')

#Next we apply a moving window to the grouped images. In this case, we are combining the images
#in the window with a mean operator and making sure the group information is included in the 
#properties of the new image. We are including the current image, plus the two images directly
#before and after
ndvi_windowed = ndvi_mean.map_window(lambda back, img, fwd: 
                                     wf.concat(back, img, fwd)
                                     .mean('images')
                                     .with_properties(group=img.properties['group']), back=2, fwd=2)

#Now since we want to summarize the NDVI over the entire AOI using a mean, we will return the sum
#and count of the unmasked pixels in each group. Additionally, we will return the group which is the date
#of the central image in the window. We use workflows containers to organize the results. 
ndvi_stats = ndvi_windowed.map(lambda img: wf.Dict[wf.Str, wf.Any]({
                                      'sum': img.sum('pixels')['derived:ndvi'],
                                      'count': img.count('pixels')['derived:ndvi'],
                                      'group': img.properties['group']
}))

In [None]:
#Get the groups and split the AOI into DL Tiles
groups = ndvi_grouped.groups.keys().compute(wf_ctx, progress_bar=True)
tiles = dl.scenes.DLTile.from_shape(tomato_aoi, 10, 1024, 0)

In [None]:
#Submit the workflows jobs using compute and setting block=False
jobs = list(map(lambda ctx: ndvi_stats.compute(ctx, block=False), tiles))

In the step below, we are going to combine the output of the jobs by group. We use a function located in utils.py 
called as_completed which loops over our job list and yeilds jobs as they complete.

In [None]:
total_sum = {key: 0 for key in groups}
total_count = {key: 0 for key in groups}
for job in tqdm(utils.as_completed(jobs, interval_sec=1), total=len(jobs)):
    data = job.result(progress_bar=True)
    for entry in data:
        if not np.ma.is_masked(entry['sum']):
            total_sum[entry['group']] += entry['sum']
            total_count[entry['group']] += entry['count']

All that is left to do is combine the information into a couple lists for plotting or saving later

In [None]:
ndvi_vals = []
group_dates = []
for key in groups:
    summed = total_sum[key]
    counted = total_count[key]
    if summed > 0:
        ndvi_vals.append(summed/counted)
    else:
        ndvi_vals.append(np.nan)
    
    group_dates.append(pd.to_datetime(str(key), format='(%Y, %m, %d)'))


In [None]:
# Save these lists to text files for further processing
with open('group_dates.txt', 'w') as f:
    for item in group_dates:
        f.write("%s\n" % item)
textfile.close()

with open('ndvi_vals.txt', 'w') as f:
    for item in ndvi_vals:
        f.write("%s\n" % item)
textfile.close()

### Weather

In this section we are calculating the Growing Degree Days, Precipitation, and Soil Moisture content in a daily intervals for the entire year. We define a basic workflow object below and then expand on it for each of the layers we are going to compute.

In [None]:
ncepflow = wf.ImageCollection.from_id('ncep:cfsr-v2:daily:v1', 
                                       start_datetime=start_datetime, 
                                       end_datetime=end_datetime)

#### Growing Degree Days

In [None]:
# https://en.wikipedia.org/wiki/Growing_degree-day#GDD_calculation
# NCEP data is in 0.01 K
tmin, tmax =  ncepflow.unpack_bands('tmin tmax')

gdd_base = 283.
gdd = (((tmax + tmin) / (2*100)) - gdd_base).clip_values(min=0.)
gdd_ts = gdd.map(lambda img: wf.Dict[wf.Datetime, wf.Float].from_pairs([(img.properties['date'], img.median('pixels')['tmax_add_tmin'])]))

In [None]:
gdd_res = gdd_ts.compute(tile_weather)

In [None]:
gdd_dates = []
gdd_vals = []
for res in gdd_res:
    key = list(res.keys())[0]
    gdd_dates.append(pd.to_datetime(key.split('T')[0], format='%Y-%m-%d'))
    gdd_vals.append(res[key])

In [None]:
# Save these lists to text files for further processing
with open('gdd_vals.txt', 'w') as f:
    for item in gdd_vals:
        f.write("%s\n" % item)
textfile.close()

#### Precipitation

In [None]:
precip = ncepflow.pick_bands('prec')
precip_ts = precip.map(lambda img: wf.Dict[wf.Datetime, wf.Float].from_pairs([(img.properties['date'], img.max('pixels')['prec'])]))

In [None]:
precip_res = precip_ts.compute(tile)

In [None]:
precip_dates = []
precip_vals = []
for res in precip_res:
    key = list(res.keys())[0]
    precip_dates.append(pd.to_datetime(key.split('T')[0], format='%Y-%m-%d'))
    precip_vals.append(res[key] * 0.1)

#### Soil Moisture

In [None]:
soil_moisture = ncepflow.pick_bands('soilmoist2')
soil_moisture_ts = soil_moisture.map(lambda img: wf.Dict[wf.Datetime, wf.Float].from_pairs([(img.properties['date'], img.median('pixels')['soilmoist2'])]))

In [None]:
soil_res = soil_moisture_ts.compute(tile)

In [None]:
soil_dates = []
soil_vals = []
for res in soil_res:
    key = list(res.keys())[0]
    soil_dates.append(pd.to_datetime(key.split('T')[0], format='%Y-%m-%d'))
    soil_vals.append(res[key] * 0.01)

Now lets plot the results

In [None]:
ylabs = ['NDVI', 'GDDs', 'Precipitation (mm)', 'Soil Moisture (%)']

fig, axs = plt.subplots(4, 1, figsize=(14, 10), sharex=True)
axs[0].plot(group_dates, ndvi_vals)
# axs[1].plot(gdd_dates, gdd_vals)
# axs[2].plot(precip_dates, precip_vals)
# axs[3].plot(soil_dates, soil_vals)

for (ax, lab) in zip(axs, ylabs):
    ax.set_ylabel(lab, size=13)
    ax.tick_params('both', labelsize=12)

axs[-1].set_xlabel('Date', size=13)