## Inverse Distance Weighting (IDW) for sptial data gap fillling due to cloud coverage

We started with defining the specific geometry as a synthetic cloud that is located in the landsat scene to test IDW algorithm. This algorithm is used to fill the spatial data gap due to cloud coverage. We are interpolating Evapotranspiration data extracted from google earth engine data repository and Evapotranspiration (ET) data is calculated from eeMETRIC model. 

In [1]:
# Importing necessary modules use for gee python api interface
import ee
ee.Initialize(project='enter your GEE cloud project name')  # Initialize with your GEE project name by replacing " enter your GEE cloud project name" with your project name

In [2]:
# Read  daily ET data calculated from eeMETRIC model for all available dates from GEE repository 
ET = ee.ImageCollection("projects/openet/assets/eemetric/conus/gridmet/landsat/c02")

# Note: Geometry polygon for any location of interest could be defined and here we have tested one location at Nebraska
# Define geometry for synthetic cloud
artificial_cloud = ee.Geometry.Polygon(\
        [[[-96.3384851427627, 41.98461447066241],\
          [-96.3384851427627, 41.89727618376869],\
          [-96.19978275018458, 41.89727618376869],\
          [-96.19978275018458, 41.98461447066241]]], None, False)

# test location for small field
Field1=ee.Geometry.Polygon(\
        [[[-96.22126709256656,41.97858925760811],\
          [-96.20822082791813,41.97858925760811],\
          [-96.20822082791813,41.9830555272184],\
          [-96.22126709256656,41.9830555272184],\
          [-96.22126709256656,41.97858925760811]]], None, False)

area2mask=Field1


In [3]:
# Define function to masking artifical cloud
def artificial_cloud_image(image):
    filters = image.updateMask(masking)
    return filters.copyProperties(image).copyProperties(image, ['system:time_start'])

# Define function to rename time with short name that is associated with image collections
def addTimeBand(image):
    timeImage = image.metadata('system:time_start').rename('timestamp')
    timeImageMasked = timeImage.updateMask(image.mask().select(0))
    return image.addBands(timeImageMasked)

# Define function for IDW interpolation to interpolate data for data gap filling
def interpolation(image):
    image = ee.Image(image)
    geometryImage = ee.Geometry(image.geometry())
    #30 is for the landsat pixel size
    rectangle = area2mask.buffer(ee.Number(30))
    rectangle = rectangle.difference(area2mask,0.001)
    
    image1 = image.unmask(-20)
    
    imageSample = ee.Image(image).sample(**{
        'region': rectangle,\
        'geometries': True
    });
    imageSample = ee.FeatureCollection(imageSample)
    sampleSize = imageSample.size();
    isNotEmpty = sampleSize.gt(0);
    
    def stats(stats1):
        stats1 = ee.Dictionary(stats1)
        mean_image = stats1.get('mean')
        stDev_image = stats1.get('stdDev')
        IDW_image = imageSample.inverseDistance(**{
                'range': 1e4,\
                'propertyName': 'et',\
                'mean': mean_image,\
                'stdDev': stDev_image,\
                'gamma': 0.3
            })
        return image.unmask(IDW_image).copyProperties(image, ['system:time_start'])
    
    interpolatedImage = ee.Algorithms.If(
        isNotEmpty,
        stats(imageSample.reduceColumns(**{\
            'reducer': ee.Reducer.mean(),\
            'selectors': ["et"]
        }).combine(
            imageSample.reduceColumns(**{\
                'reducer': ee.Reducer.stdDev(),\
                'selectors': ["et"]
            }),
            False
        )),
        image1.copyProperties(image, ['system:time_start'])
    )
    
    return ee.Image(interpolatedImage)

# Function to add feature (lat/long) in feature collection to export data in csv format
def addGeom2Property(feature):
    coordinates =ee.Feature(feature).geometry().coordinates()
    long = coordinates.get(0)
    lat  = coordinates.get(1)
    return feature.set({'long':long,'lat':lat})

In [4]:
# select band "et" which ET data from image collections
et = ET.select('et')

# filter the ET data based on the specific rows and path 
filtered = et.filter(ee.Filter.And(\
        ee.Filter.eq('wrs2_path',28),\
        ee.Filter.eq('wrs2_row',31)))

# select the image collection with cloud cover less than 10%
filtered_control = filtered.filter(\
          ee.Filter.lte('CLOUD_COVER',10))

# select the image collection with cloud cover greater than 10%          
filtered = filtered.filter(ee.Filter.gt('CLOUD_COVER',10))

# creating image with constant value 1 for the pixels, cliped for synthetic cloud and create the masking for the sythetic cloud
masking = ee.Image.constant(1).clip(area2mask).mask().Not()

# combine the synthetic cloud with the original image and create the image collections with synthetic cloud
filtered_control1 = filtered_control.map(artificial_cloud_image)


In [5]:
# Image collections applying interpolation function to interpolate value in synthetic cloud based on the surrounding pixels with known values
interpolatedCol = ee.ImageCollection(filtered_control1.map(interpolation))
print(interpolatedCol.size().getInfo())

122


In [6]:
# Create the list of intepolated (filled) images and original image (control) preparing to export from GEE
size = ee.ImageCollection(interpolatedCol).size().getInfo()
filledList = ee.ImageCollection(interpolatedCol).toList(size)
controlList = ee.ImageCollection(filtered_control).toList(size)
print(size)

122


Exporting filled/interpolated image data in csv format with x,y coordinates and ET value for the co-ordinates

In [7]:
for i in range(0,size):
    if(i == size-1):
        print('kk')
    img = ee.Image(filledList.get(i))
    outName = (img.id().getInfo()) + '_filled'
    filled = img.multiply(0.0001)
    sample  = filled.sample(**{
        'region':area2mask,\
        'scale':30,\
        'geometries':True})
     
    sample = sample.map(addGeom2Property)
    task_config = {
        'fileNamePrefix': outName,
        'fileFormat': 'CSV',
        'selectors': ['lat','long','et'],
        'folder': 'Output_IDW'
    }
    task = ee.batch.Export.table.toDrive(sample, outName, **task_config)
    task.start()

kk


Exporting control/original image in csv format with x,y coordinates and ET value for the co-ordinates

In [9]:
for i in range(0,size):
    img = ee.Image(controlList.get(i))
    outName = (img.id().getInfo()) + '_control'
    control = img.multiply(0.0001)
    sample  = control.sample(**{
        'region':area2mask,\
        'scale':30,\
        'geometries':True})
     
    sample = sample.map(addGeom2Property)
    task_config = {
        'fileNamePrefix': outName,
        'fileFormat': 'CSV',
        'selectors': ['lat','long','et'],
        'folder': 'Output_IDW'
    }
    task = ee.batch.Export.table.toDrive(sample, outName, **task_config)
    task.start()