In [None]:
from datacube import Datacube
cdc = Datacube(config='/g/data/u46/users/ext547/ewater/cambodia_cube/cambodia.conf')
from datacube_stats.statistics import GeoMedian
from datacube.storage import masking
from datacube.storage.masking import mask_to_dict
from datacube.storage.storage import write_dataset_to_netcdf

import numpy as np
import xarray as xr
import dask
import pickle

import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
%matplotlib inline

sys.path.append('/g/data/u46/users/sc0554/datacube-hyptest/')
import 

#Import external dea-notebooks functions using relative link to Scripts directory
import sys
sys.path.append('/g/data/u46/users/sc0554/dea-notebooks/Scripts')
import DEAPlotting
import BandIndices

In [None]:
def LoadAreaOfInterest(study_area):
    '''
    Firstly, LoadAreaOfInterest checks whether a pickle, that contains an xarray of nbar data, 
    is saved in the output folder. If there is no pickle, the function searches the 
    "AreaOfInterest" function to gain lat/lon information for that particular study_area. If 
    there is no lat/lon coordinates in the AreaOfInterest function, then an error is returned. 
    If the coordinates are found, nbar data is loaded and masked. Finally data from all 
    sensors are concatenated together into one xarray.
   
    Last modified: March 2018
    
    Author: Erin Telfer
    
    Inputs: 
    study_area - the name of the study area
    '''
    
    study_area=study_area.lower().replace(" ","")
    pickle_location=('{0}{1}.pkl'.format(output_folder,study_area))
    try:
        nbar_clean= pickle.load( open(pickle_location, "rb" ) )
        print("Nbar pickle has been found on file")
        print("Nbar pickle has been loaded")
        return(nbar_clean)
    
    except FileNotFoundError:
        try:
            print("No {0}.pkl file found on file".format(study_area))
            print("Location information from the AreaOfInterest function has been read")

            lat_min, lat_max, lon_min, lon_max = AreaOfInterest(study_area)
            
            print("Loading Cambodia Cube data")
            sensor_clean = {}

            #define wavelengths/bands of interest
            bands_of_interest = ['green',
                                 'red', 
                                 'nir',
                                 'pixel_qa',
                                 #'blue',
                                 #'swir2',
                                 #'swir1'
                                 ]

            #query is created
            query = {'time': (start_of_epoch, end_of_epoch),}
            query['x'] = (lon_min, lon_max)
            query['y'] = (lat_max, lat_min)
            query['crs'] = 'EPSG:4326'

            for sensor in sensors: #loop through specified
                sensor_nbar = cdc.load(product= sensor+'_usgs_sr_scene',
                                       measurements = bands_of_interest,group_by='solar_day', 
                                       **query) #load nbar
                #retrieve the projection information before masking/sorting
                crs = sensor_nbar.crs
                crswkt = sensor_nbar.crs.wkt
                affine = sensor_nbar.affine
                #assign pq data variable
                sensor_pq= sensor_nbar.pixel_qa
                #create and use quality and cloud masks
                mask_components = {'cloud_shadow': 'no_cloud_shadow',
                           'cloud': 'no_cloud',}
                quality_mask = masking.make_mask(sensor_pq, **mask_components)
                good_data = quality_mask.loc[start_of_epoch:end_of_epoch]
                sensor_nbar2 = sensor_nbar.where(good_data)
                del (sensor_nbar)

                #calculate the percentage cloud free for each scene
                cloud_free = masking.make_mask(sensor_pq,
                                               cloud_shadow= 'no_cloud_shadow',cloud= 'no_cloud')
                mostly_cloud_free = cloud_free.mean(dim=('x','y')) >= cloud_free_threshold
                
                del(cloud_free)
                #discard data that does not meet the cloud_free_threshold
                mostly_good = sensor_nbar2.where(mostly_cloud_free).dropna(dim='time', 
                                                                           how='all')
                nodata_mask=mostly_good.mean(dim=('x','y')) >= -9998
                mostly_good=mostly_good.drop('pixel_qa')
                mostly_good=mostly_good.where(nodata_mask).dropna(dim='time',
                                                               how='all') 
                del(sensor_nbar2)
                #assign masked data to array
                sensor_clean[sensor] = mostly_good

                print('loaded %s' % sensor) 
            print('ls load complete')


            #data from different sensors are joined together and sorted so that observations are sorted by time rather than sensor
            nbar_clean = xr.concat(sensor_clean.values(), 'time')
            nbar_clean = nbar_clean.sortby('time')
            nbar_clean.attrs['crs'] = crs
            nbar_clean.attrs['affin|e'] = affine          
                    
            print("saving nbar data as {0}.pkl".format(study_area))

            pickle.dump(nbar_clean, open(pickle_location,"wb")) #save nbar as pickle
            return nbar_clean
        except TypeError:
            print("please add lat/lon details to AreaOfInterest function")

In [None]:
#Define function to define the coordinates for the study area#Define 
def AreaOfInterest(study_area):
    if study_area == 'phumsrahkaev':
        lat_min = 13.000 #down
        lat_max = 13.100 #up
        lon_min = 103.300 #left
        lon_max = 103.400 #right  
    elif study_area == 'outapaong':
        lat_min = 12.600 #down
        lat_max = 12.800 #up
        lon_min = 103.600 #left
        lon_max = 103.800 #right
    elif study_area == 'mondulkiri':
        lat_min = 12.863 #down
        lat_max = 13.663 #up
        lon_min = 106.350 #left
        lon_max = 107.236 #right
    elif study_area == 'krongstungtreng':
        lat_min = 13.181 #down
        lat_max = 13.681 #up
        lon_min = 105.781 #left
        lon_max = 106.381 #right
    elif study_area == 'kaohnheaek':
        lat_min = 13.000 #down
        lat_max = 13.100 #up
        lon_min = 107.000 #left
        lon_max = 107.100 #right
    elif study_area == 'neakleoang':
        lat_min = 11.246 #down
        lat_max = 11.532 #up
        lon_min = 105.141 #left
        lon_max = 105.380 #right
    elif study_area == 'tonlesaplake':
        lat_min = 13.020 #down
        lat_max = 13.120 #up
        lon_min = 103.740 #left
        lon_max = 103.840 #right
    elif study_area == 'maximum_extent':
        lat_min = 9.25 #down
        lat_max = 15.25 #up
        lon_min = 101.75 #left
        lon_max = 108.25 #right     
    else:
        print('FileNotFoundError')
    return (lat_min, lat_max, lon_min, lon_max)



In [None]:
def one_band_image_subplots(ds, num_cols, figsize = [10,40], left  = 0.125, 
                              right = 0.9, bottom = 0.1, top = 0.9, 
                              wspace = 0.2, hspace = 0.4):
    '''
    one_band_image_subplots takes a dataset with one band and multiple time steps, 
    and plots them in image. 
    Last modified: March 2018
    Author: Mike Barnes
    Modified by: Claire Krause and Erin Telfer
    
    Inputs: 
    ds -   Dataset containing the bands to be plotted
    num_cols - number of columns for the subplot
    
    Optional:
    figsize - dimensions for the output figure
    left  - the space on the left side of the subplots of the figure
    right - the space on the right side of the subplots of the figure
    bottom - the space on the bottom of the subplots of the figure
    top - the space on the top of the subplots of the figure
    wspace - the amount of width reserved for blank space between subplots
    hspace - the amount of height reserved for white space between subplots
    '''
    # Find the number of rows/columns we need, based on the number of time steps in ds
    fig = plt.figure(figsize = figsize)
    timesteps = ds.time.size
    num_rows = int(np.ceil(timesteps/num_cols))
    fig, axes = plt.subplots(num_rows, num_cols, figsize = figsize)
    fig.subplots_adjust(left  = left, right = right, bottom = bottom, top = top, 
                        wspace = wspace, hspace = hspace)
    try: #loop through all scenes, prepare imagery and create subplots
        for i, ax in enumerate(fig.axes):
            image_ds = ds.isel(time =i)
            ax.set_title(str(image_ds.time.values)[0:10])
            ax.imshow(image_ds, interpolation = 'nearest') #plot image as subplot
    except IndexError: #if there are an odd number of plots, this code will allow plotting of images
        fig.delaxes(ax)
        plt.draw() 

In [None]:
#define study area
study_area = 'phumsrahkaev' #name of study area
study_area=study_area.lower().replace(" ","") #reformat to remove uppercase and spaces

#define temporal range ()
start_of_epoch = '2003-01-01'
end_of_epoch =  '2017-12-01'

#define Landsat sensors of interest
sensors = ['ls5', 'ls8']

cloud_free_threshold = 0.10 

#specify output folder
output_folder= '/g/data/u46/users/sc0554/NDVI_NDWI_hyptest/'

In [None]:
nbar_clean=LoadAreaOfInterest(study_area)

In [None]:
NDVI = BandIndices.calculate_indices(nbar_clean, 'NDVI')
NDWI = BandIndices.calculate_indices(nbar_clean, 'NDWI')

In [None]:
fig, axes = plt.subplots(100, 2, sharex='col', sharey='row', figsize = (12, 500))
for i in range(100):
    NDVIplot = axes[i][0].imshow(NDVI.isel(time=i).values)
    axes[i][1].imshow(NDWI.isel(time=i).values)
plt.tight_layout()
plt.show()