In [1]:
import sys

import ee
import requests
import os
from osgeo import gdal, ogr, osr
import shutil
from time import sleep
from skimage.restoration import denoise_tv_bregman
from math import ceil
from googleapiclient.discovery import build
import pickle
import matplotlib.pyplot as plt
import numpy as np

ee.Initialize()

# Constants

In [2]:
# this is about 10 meters
METERS_TO_DECIMAL_DEGREES_CONST = 1/30/3600

#the value we use to signify no data at a pixel
NO_DATA_VALUE = 65535

#bands to despeckle
BANDS_TO_DESPECKLE = ['HH', 'HV']

#store the training data here
DATA_FOLDER = 'training_data'
PREDICTION_FOLDER = 'prediction_data'

#create training data folder if not exists
if DATA_FOLDER not in os.listdir():
    os.mkdir(DATA_FOLDER)

#create prediction data folder if not exists
if PREDICTION_FOLDER not in os.listdir():
    os.mkdir(PREDICTION_FOLDER)

# User Input Area

In [3]:
#the shapefile storing training polygons
INPUT_POLYGONS_FILE = 'C:/Users/ritvik/Desktop/JPLProject/mapping-colombia-wetlands/input_polygons/input_polygons.shp'

In [4]:
#the shapefile storing prediction polygons
PREDICTION_AREA_FILE = 'C:/Users/ritvik/Desktop/JPLProject/mapping-colombia-wetlands/prediction_polygons/prediction_region.shp'

In [5]:
#the list of bands to use for training. Choose from:
#Sentinel-2: ['B2', 'B3', 'B4', 'B8', 'NDVI', 'NDWI']
#ALOS-2: ['HH', 'HV']
SELECTED_BANDS = ['B2', 'NDWI', 'NDVI', 'HH', 'HV']
NUM_FEATURES = len(SELECTED_BANDS)

In [6]:
#any pixels above this elevation (in meters) will be disregarded from training 
MAX_CONSIDERED_ELEVATION = 100

In [7]:
#the folder id in Google Drive where to temporarily store the GEE data before locally downloading
GOOGLE_EARTH_ENGINE_GDRIVE_FOLDER_ID = '1KvlrUHs_rN7xPlw53qtd9pweeLwmrJSP'

# Functions to Manipulate data from Google Drive

In [8]:
def download_file_from_google_drive(file_id, destination):
    URL = "https://docs.google.com/uc?export=download"

    session = requests.Session()

    response = session.get(URL, params = { 'id' : file_id }, stream = True)
    token = get_confirm_token(response)

    if token:
        params = { 'id' : file_id, 'confirm' : token }
        response = session.get(URL, params = params, stream = True)

    save_response_content(response, destination)    

In [9]:
def get_confirm_token(response):
    for key, value in response.cookies.items():
        if key.startswith('download_warning'):
            return value

    return None

In [10]:
def save_response_content(response, destination):
    CHUNK_SIZE = 32768

    with open(destination, "wb") as f:
        for chunk in response.iter_content(CHUNK_SIZE):
            if chunk: # filter out keep-alive new chunks
                f.write(chunk)

In [11]:
def get_file_ids_from_google_drive():
    creds = None
    if os.path.exists('token.pickle'):
        with open('token.pickle', 'rb') as token:
            creds = pickle.load(token)

    service = build('drive', 'v3', credentials=creds)

    result = service.files().list(q="parents in '%s'"%GOOGLE_EARTH_ENGINE_GDRIVE_FOLDER_ID).execute()

    file_name_to_file_id = {info['name'].split('-')[0]: info['id'] for info in result['files'] if len(info['name'].split('-')) == 2}
    
    return file_name_to_file_id

In [12]:
def delete_file_from_google_drive_by_file_id(fid):
    creds = None
    if os.path.exists('token.pickle'):
        with open('token.pickle', 'rb') as token:
            creds = pickle.load(token)

    service = build('drive', 'v3', credentials=creds)
    
    service.files().delete(fileId=fid).execute()

# Functions to Download Data From Google Earth Engine

In [13]:
def get_training_data(polygon_features, features, gdrive_folder, date_range, primary_dataset, selected_bands):
    """
    This function accepts the below parameters and querys Google Earth Engine for data. The data is stored in 
    Google Drive.
    
    TODO
    """
    
    #this will store all started tasks
    tasks = {}
    
    #work through each sub-region 
    for idx,polygon_feature in enumerate(polygon_features):
        
        filtered_imgs = []

        #store the reference coordinates
        x1 = polygon_feature.GetGeometryRef().GetEnvelope()[0]
        y1 = polygon_feature.GetGeometryRef().GetEnvelope()[2]
        ref_coords = (x1,y1)
        
        #get polygon area coordinates
        area_coords = [list(pair) for pair in polygon_feature.GetGeometryRef().GetBoundary().GetPoints()]

        #create an area of interest from Earth Engine Geometry
        area_of_interest = ee.Geometry.Polygon(coords=area_coords)

        #iterate over each data source
        for data_type_source, bands in features.items():
            data_type = data_type_source[0]
            data_source = data_type_source[1]
                
            print('Working on data source: %s...'%data_source)
            
            if data_type == 'collection':
                #access the Earth Engine image collection with the specified bands
                data = ee.ImageCollection(data_source).select(bands)

                #filter on date range
                data_filtered = data.filterBounds(area_of_interest).filterDate(date_range[0], date_range[1])

                #ensure there is at least 1 image
                num_items = data_filtered.size().getInfo()
                if num_items == 0:
                    print('no items found, returning started tasks.')
                    return tasks

                band_info = data_filtered.first().getInfo()['bands'][0]

                #if crs is already EPSG 4326, get resolution directly, otherwise need to transform from meters
                if band_info['crs'] == 'EPSG:4326':
                    res = band_info['crs_transform'][0]
                else:
                    res = band_info['crs_transform'][0] * METERS_TO_DECIMAL_DEGREES_CONST

                #if this is the eventual primary dataset, store its resolution
                if data_source == primary_dataset:
                    eventual_res = res

                #get a mosaic as median of all returned images
                mosaic = ee.Image(data_filtered.median())

                if data_source == 'COPERNICUS/S2_SR':

                    #calculate NDVI
                    if 'B4' in features[('collection','COPERNICUS/S2_SR')] and 'B8' in features[('collection','COPERNICUS/S2_SR')]:
                        ndvi = mosaic.normalizedDifference(['B8', 'B4']).rename('NDVI')
                        mosaic = ee.Image.addBands(mosaic, ndvi)

                    #calculate NDWI
                    if 'B3' in features[('collection','COPERNICUS/S2_SR')] and 'B8' in features[('collection','COPERNICUS/S2_SR')]:
                        ndwi = mosaic.normalizedDifference(['B8', 'B3']).rename('NDWI')
                        mosaic = ee.Image.addBands(mosaic, ndwi)
                        
            elif data_type == 'image':
                mosaic = ee.Image(data_source).select(bands)

            #add this mosaic to the list
            filtered_imgs.append(mosaic)
            
        
        #generate file name
        features_str = '_'.join([item[1] for item in features.keys()]).replace('/','_')
        fname = '%s-%s'%(idx, features_str)
        print(fname)
        
        #add the various layers on top of each other to create a data cube with all features
        final_img = ee.Image()
        
        for img in filtered_imgs:
            final_img = ee.Image.addBands(final_img,img)
        
        #use the ALOS qa band to filter out invalid pixels
        if 'qa' in features[('collection','JAXA/ALOS/PALSAR/YEARLY/SAR')]:
            qa_band = final_img.select('qa')
            qa_mask = qa_band.eq(0)
            final_img = final_img.where(qa_mask, NO_DATA_VALUE)
        
        #use the Sentinel-2 SCL band to filter out invalid pixels
        if 'SCL' in features[('collection','COPERNICUS/S2_SR')]:
            scl_band = final_img.select('SCL')
            scl_nodata_vals = [0,3,8,9,10]
            scl_mask = scl_band.eq(0)
            for v in scl_nodata_vals:
                scl_mask = scl_mask.Or(scl_band.eq(v))
            final_img = final_img.where(scl_mask, NO_DATA_VALUE)
            
        #use the SRTM elevationband to filter out invaild pixels
        if 'elevation' in features['image','CGIAR/SRTM90_V4']:
            elevation_band = final_img.select('elevation')
            elevation_mask = elevation_band.gt(MAX_CONSIDERED_ELEVATION)
            final_img = final_img.where(elevation_mask, NO_DATA_VALUE)
            
        #if any of the selected bands has NO_DATA_VALUE, mark that whole pixel as NO_DATA_VALUE
        for b in selected_bands:
            b_values = final_img.select(b)
            b_mask = b_values.eq(NO_DATA_VALUE)
            final_img = final_img.where(b_mask, NO_DATA_VALUE)
         
        #store the result with just the needed bands
        selected_bands = sorted(selected_bands)
        result = final_img.select(*selected_bands).float()
          
        #define the task to gather the data
        task = ee.batch.Export.image.toDrive(image=result,
                                             region=area_of_interest.getInfo()['coordinates'],
                                             description=str(idx),
                                             folder=gdrive_folder,
                                             fileNamePrefix=fname,
                                             crs_transform=[eventual_res, 0.0, ref_coords[0], 0.0, -eventual_res, ref_coords[1]],
                                             crs='EPSG:4326')
        
        #store the task
        tasks[fname] = task
        
        print('==================================')
    
    return list(tasks.items())

In [14]:
def execute_tasks_in_batches(tasks, batch_size, FOLDER):

    #process the tasks in small batches to avoid memory running out
    for batch_idx in range(ceil(len(tasks) / batch_size)):

        #get the current batch of tasks
        curr_tasks = tasks[batch_size*batch_idx:batch_size*(batch_idx+1)]
        print('Processing Batch %s'%(batch_idx+1))

        #start all tasks in that batch
        for name,task in curr_tasks:
            task.start()

        print('Started all tasks in batch')

        #wait until all tasks in that batch are done
        curr_states = [task.status()['state'] for name,task in curr_tasks]
        while set(curr_states) != {'COMPLETED'}:
            print('Current states: %s'%curr_states)
            sleep(30)
            curr_states = [task.status()['state'] for name,task in curr_tasks]

        #once all tasks done, get their file ids on google drive
        file_name_to_file_id = get_file_ids_from_google_drive()

        #for each file...
        for fname, fid in file_name_to_file_id.items():

            #get feature file name
            features_file_name = '%s/features_%s.tiff'%(FOLDER, fname)

            #check if data already downloaded
            print('Downloading %s from Drive'%fname)
            download_file_from_google_drive(fid, features_file_name)

            print('Deleting %s from Drive'%fname)
            delete_file_from_google_drive_by_file_id(fid)

        print('================================')

# Despeckling Functions

In [15]:
def img_to_db(img):
    return 10 * np.log10(img)

def db_to_img(img):
    return 10**(img / 10)

def tv_denoise(arr, idxs_to_despeckle, weight):
    copy_arr = arr.copy()
    for idx in idxs_to_despeckle:
        #get the layer
        layer = copy_arr[:,:,idx]
        
        #denoise
        img_db = img_to_db(layer)
        img_db_tv = denoise_tv_bregman(img_db, weight)
        img_tv = db_to_img(img_db_tv)
        
        #set denoised into copy of array
        copy_arr[:,:,idx] = img_tv
        
    return copy_arr

# Functions to Process Feature Files

In [16]:
def process_feature_files(feat_file_names, confidence_levels=None, combine=True):
    """
    Inputs:
        feat_file_names: a list of names of the downloaded training data files
        confidence_levels: a list of confidence levels associated with each file in feat_file_names
        combine: True if we wish to return a single numpy matrix. False if we wish to return a list of numpy matrices
        
    Outputs:
        the processed training data and auxilary data like geotransforms
    """
    
    #this will store the numpy array of training data for each file
    data = []
    
    #this will store auxilary data for each file
    feat_file_data = {}
    
    #iterate over each file
    for feat_file_name in feat_file_names:

        #open file and get geotransform
        ds = gdal.Open(feat_file_name, gdal.GA_ReadOnly)
        gt = ds.GetGeoTransform()

        #despeckle any bands which need to be despeckled
        idx_to_despeckle = [idx for idx in range(ds.RasterCount) if ds.GetRasterBand(idx+1).GetDescription() in BANDS_TO_DESPECKLE]
        arr = ds.ReadAsArray()
        arr = np.stack([arr[i] for i in range(arr.shape[0])], axis=-1)
        data_mask = (arr == NO_DATA_VALUE) | np.isnan(arr)
        arr[data_mask] = NO_DATA_VALUE

        arr = tv_denoise(arr, idx_to_despeckle, 1)
        arr[data_mask] = np.nan

        ds = None
        
        #only pick indices where no bands are NaN
        chosen_indices = np.where(np.all(~np.isnan(arr), axis=-1))

        #add this training data to the list
        data.append(arr[chosen_indices])
        
        #add auxilary information
        feat_file_data[feat_file_name] = {'chosen_indices': chosen_indices, 'shape': arr.shape, 'gt': gt}

    #if combining, resample by confidence score and then concatenate training data 
    if combine:
        num_pixels_to_sample = np.median([item.shape[0] for item in data])
        data = [d[np.random.choice(d.shape[0], int(num_pixels_to_sample*confidence_levels[idx]))] for idx,d in enumerate(data)]
        return np.concatenate(data, axis=0), feat_file_data
    else:
        return data, feat_file_data

In [17]:
def get_classes(test_set, training_histograms, histogram_ranges, class_ids):
    """
    Inputs:
        test_set: the test set of features which we would like to classify
        training_histograms: a dictionary of histograms, one for each class
        histogram_ranges: the bin cuttoffs for the histograms
        class_ids: a set of class ids
        
    Output:
        an array of predicted classes and corresponding scores
    """
    
    #this will store the probabilities for each class
    class_id_to_probs = {}
    
    #any probability density below this is considered as 0
    min_allowable_density = min([0.25*np.max(training_histograms[cid]) for cid in class_ids])
    
    #iterate over each histogram
    for class_id, histogram in training_histograms.items():
        
        #get the position of each test pixel in the context of this histogram
        transposed_ranges = np.transpose(histogram_ranges)
        expanded_dims_ranges = np.expand_dims(transposed_ranges, axis=1)
        extended_ranges = np.concatenate([expanded_dims_ranges for _ in range(test_set.shape[0])], axis=1)
        diffs = extended_ranges - test_set
    
        #get the probability density at each position
        indices = np.argmax(diffs > 0, axis=0) - 1
        indices[indices < 0] = 0
        indices = indices - np.all((diffs>0)==False, axis=0)
        indices = tuple(np.transpose(indices))
        probs = histogram[indices]
        
        #store this in the dictionary
        class_id_to_probs[class_id] = probs
      
    #create matrix of probabilities for each class
    prob_mtx = np.stack([class_id_to_probs[cid] for cid in class_ids], axis=-1)
    
    #sort matrix of probs
    sorted_probs = np.sort(prob_mtx, axis=1)
    
    #any pixel where all classes have 0 probability is NaN
    nan_indices = np.where(sorted_probs[:,-1] < min_allowable_density)[0]
    
    #compute scores based on ratio of most likely class to second most likely class
    scores = 1/(1+np.exp(-(sorted_probs[:,-1] / sorted_probs[:,-2])))
    
    #apply NaN pixels
    scores[nan_indices] = np.nan

    #get the predicted class
    pred_class = np.argmax(prob_mtx, axis=1).astype(float)
    
    #apply NaN pixels
    pred_class[nan_indices] = np.nan
    
    return pred_class, scores

# Numpy Array to TIFF Functions

In [18]:
def create_raster(output_path, columns, rows, nband=1, gdal_data_type=gdal.GDT_Int32, driver=r'GTiff'):
    ''' 
    returns gdal data source raster object 
    '''
    
    # create driver
    driver = gdal.GetDriverByName(driver)

    output_raster = driver.Create(output_path, columns, rows, nband, eType = gdal_data_type)    
    
    return output_raster

def np_array_to_raster(output_path, arr, geotransform, no_data=None, nband=1, gdal_data_type=gdal.GDT_Int32, spatial_reference_system_wkid=4326, driver=r'GTiff'):
    ''' 
    returns a gdal raster data source

    keyword arguments:

    output_path -- full path to the raster to be written to disk
    numpy_array -- numpy array containing data to write to raster
    upper_left_tuple -- the upper left point of the numpy array (should be a tuple structured as (x, y))
    cell_resolution -- the cell resolution of the output raster
    no_data -- value in numpy array that should be treated as no data
    nband -- the band to write to in the output raster
    gdal_data_type -- gdal data type of raster (see gdal documentation for list of values)
    spatial_reference_system_wkid -- well known id (wkid) of the spatial reference of the data
    driver -- string value of the gdal driver to use
    '''

    rows, columns = arr.shape[0], arr.shape[1]

    # create output raster
    output_raster = create_raster(output_path, columns, rows, nband, gdal_data_type) 

    spatial_reference = osr.SpatialReference()
    spatial_reference.ImportFromEPSG(spatial_reference_system_wkid)
    output_raster.SetProjection(spatial_reference.ExportToWkt())
    output_raster.SetGeoTransform(geotransform)
    
    for band_idx in range(1,nband+1):
        output_band = output_raster.GetRasterBand(band_idx)
        if no_data != None:
            output_band.SetNoDataValue(no_data)
        if nband > 1:
            output_band.WriteArray(arr[:,:,band_idx-1])
        else:
            output_band.WriteArray(arr)
        output_band.FlushCache() 
        output_band.ComputeStatistics(False)

    if os.path.exists(output_path) == False:
        raise Exception('Failed to create raster: %s' % output_path)

    return output_raster

# Driver Code : Download Training Data

In [19]:
driver = ogr.GetDriverByName('ESRI Shapefile')

dataSource = driver.Open(INPUT_POLYGONS_FILE, gdal.GA_ReadOnly)

layer = dataSource.GetLayer()

polygon_features = [layer.GetNextFeature() for _ in range(layer.GetFeatureCount())]

class_ids = set([f.GetField('class_id') for f in polygon_features])

In [20]:
features = {('collection','JAXA/ALOS/PALSAR/YEARLY/SAR'): ['HH', 'HV', 'qa'], ('collection', 'COPERNICUS/S2_SR'): ['B2', 'B3', 'B4', 'B8', 'SCL'], ('image','CGIAR/SRTM90_V4'): ['elevation']}
date_range = ['2017-01-01', '2019-01-01']
gdrive_folder = 'GoogleEarthEngine'

In [21]:
tasks = get_training_data(polygon_features, features, gdrive_folder, date_range, 'COPERNICUS/S2_SR', SELECTED_BANDS)

Working on data source: JAXA/ALOS/PALSAR/YEARLY/SAR...
Working on data source: COPERNICUS/S2_SR...
Working on data source: CGIAR/SRTM90_V4...
0-JAXA_ALOS_PALSAR_YEARLY_SAR_COPERNICUS_S2_SR_CGIAR_SRTM90_V4
Working on data source: JAXA/ALOS/PALSAR/YEARLY/SAR...
Working on data source: COPERNICUS/S2_SR...
Working on data source: CGIAR/SRTM90_V4...
1-JAXA_ALOS_PALSAR_YEARLY_SAR_COPERNICUS_S2_SR_CGIAR_SRTM90_V4
Working on data source: JAXA/ALOS/PALSAR/YEARLY/SAR...
Working on data source: COPERNICUS/S2_SR...
Working on data source: CGIAR/SRTM90_V4...
2-JAXA_ALOS_PALSAR_YEARLY_SAR_COPERNICUS_S2_SR_CGIAR_SRTM90_V4
Working on data source: JAXA/ALOS/PALSAR/YEARLY/SAR...
Working on data source: COPERNICUS/S2_SR...
Working on data source: CGIAR/SRTM90_V4...
3-JAXA_ALOS_PALSAR_YEARLY_SAR_COPERNICUS_S2_SR_CGIAR_SRTM90_V4
Working on data source: JAXA/ALOS/PALSAR/YEARLY/SAR...
Working on data source: COPERNICUS/S2_SR...
Working on data source: CGIAR/SRTM90_V4...
4-JAXA_ALOS_PALSAR_YEARLY_SAR_COPERNIC

In [22]:
execute_tasks_in_batches(tasks, 10, DATA_FOLDER)

Processing Batch 1
Started all tasks in batch
Current states: ['READY', 'READY', 'READY', 'READY', 'READY', 'READY', 'READY', 'READY', 'READY', 'READY']
Current states: ['RUNNING', 'COMPLETED', 'RUNNING', 'READY', 'READY', 'READY', 'READY', 'READY', 'READY', 'READY']
Current states: ['COMPLETED', 'COMPLETED', 'COMPLETED', 'COMPLETED', 'READY', 'READY', 'READY', 'READY', 'READY', 'READY']
Current states: ['COMPLETED', 'COMPLETED', 'COMPLETED', 'COMPLETED', 'COMPLETED', 'COMPLETED', 'READY', 'READY', 'READY', 'READY']
Current states: ['COMPLETED', 'COMPLETED', 'COMPLETED', 'COMPLETED', 'COMPLETED', 'COMPLETED', 'COMPLETED', 'RUNNING', 'RUNNING', 'READY']
Current states: ['COMPLETED', 'COMPLETED', 'COMPLETED', 'COMPLETED', 'COMPLETED', 'COMPLETED', 'COMPLETED', 'COMPLETED', 'RUNNING', 'COMPLETED']
Current states: ['COMPLETED', 'COMPLETED', 'COMPLETED', 'COMPLETED', 'COMPLETED', 'COMPLETED', 'COMPLETED', 'COMPLETED', 'RUNNING', 'COMPLETED']
Downloading 8 from Drive
Deleting 8 from Drive
Do

# Driver Code : Get Training and Testing Sets

In [23]:
training_datasets = {}
testing_datasets = {}
        
for class_id in class_ids:
    feat_file_names_and_confidence = [('%s/features_%s.tiff'%(DATA_FOLDER, idx), f.GetField('confidence')) for idx,f in enumerate(polygon_features) if f.GetField('class_id') == class_id]
    feat_file_names = [item[0] for item in feat_file_names_and_confidence]
    confidence_levels = [item[1] for item in feat_file_names_and_confidence]
    
    data, _ = process_feature_files(feat_file_names, confidence_levels)

    lower_lims = data.mean(axis=0) - 2*data.std(axis=0)
    upper_lims = data.mean(axis=0) + 2*data.std(axis=0)

    valid_indices = np.all(data < upper_lims, axis=1) & np.all(data > lower_lims, axis=1)
    data = data[valid_indices]
    np.random.shuffle(data)

    train_len = int(len(data)*0.75)

    training_datasets[class_id] = data[:train_len]
    testing_datasets[class_id] = data[train_len:]

In [24]:
min_feature_values = [min([min(dataset[:,idx]) for dataset in training_datasets.values()]) for idx in range(NUM_FEATURES)]
max_feature_values = [max([max(dataset[:,idx]) for dataset in training_datasets.values()]) for idx in range(NUM_FEATURES)]

In [25]:
histogram_ranges = []
training_histograms = {}
num_bins = 3

for idx in range(NUM_FEATURES):
    width = (max_feature_values[idx] - min_feature_values[idx]) / num_bins
    histogram_ranges.append(np.arange(min_feature_values[idx], max_feature_values[idx]+width*.99, width))
histogram_ranges = np.array(histogram_ranges)

for class_id in class_ids:
    training_histograms[class_id] = np.histogramdd(training_datasets[class_id], bins=histogram_ranges, density=True)[0] 

In [26]:
for cid in class_ids:
    pred_class, scores = get_classes(testing_datasets[cid], training_histograms, histogram_ranges, sorted(list(class_ids)))
    print('Accuracy for Class %s : %s'%(cid, np.mean(pred_class == cid)))
    print('Unique Labels: %s'%np.unique(pred_class[~np.isnan(pred_class)]))

Accuracy for Class 1 : 0.9611449855856316
Unique Labels: [1.]




# Driver Code : Use Model to Predict Region

In [27]:
driver = ogr.GetDriverByName('ESRI Shapefile')
dataSource = driver.Open(PREDICTION_AREA_FILE, gdal.GA_ReadOnly)
layer = dataSource.GetLayer()
polygon_features = [layer.GetNextFeature() for _ in range(layer.GetFeatureCount())]

In [28]:
tasks = get_training_data(polygon_features, features, gdrive_folder, date_range, 'COPERNICUS/S2_SR', SELECTED_BANDS)

Working on data source: JAXA/ALOS/PALSAR/YEARLY/SAR...
Working on data source: COPERNICUS/S2_SR...
Working on data source: CGIAR/SRTM90_V4...
0-JAXA_ALOS_PALSAR_YEARLY_SAR_COPERNICUS_S2_SR_CGIAR_SRTM90_V4


In [29]:
execute_tasks_in_batches(tasks, 10, PREDICTION_FOLDER)

Processing Batch 1
Started all tasks in batch
Current states: ['READY']
Current states: ['RUNNING']
Current states: ['RUNNING']
Current states: ['RUNNING']
Current states: ['RUNNING']
Current states: ['RUNNING']
Current states: ['RUNNING']
Current states: ['RUNNING']
Current states: ['RUNNING']
Current states: ['RUNNING']
Current states: ['RUNNING']
Current states: ['RUNNING']
Current states: ['RUNNING']
Downloading 0 from Drive
Deleting 0 from Drive


In [None]:
feat_file_names = ['%s/features_%s.tiff'%(PREDICTION_FOLDER, idx) for idx,f in enumerate(polygon_features)]
data, feat_file_data = process_feature_files(feat_file_names, combine=False)
print('Processed Prediction Regions')
pred_class_scores = [get_classes(d, training_histograms, histogram_ranges, sorted(list(class_ids))) for d in data]
print('Got Predicted Classes and Scores')

Processed Prediction Regions


In [None]:
for idx, feat_file_name in enumerate(feat_file_names):
    shape = feat_file_data[feat_file_name]['shape'][:2] + (2,)
    result = np.empty(shape)
    result[:] = np.nan
    
    idx_classes = feat_file_data[feat_file_name]['chosen_indices'] + (np.array([0]*len(feat_file_data[feat_file_name]['chosen_indices'][0])),)
    idx_scores = feat_file_data[feat_file_name]['chosen_indices'] + (np.array([1]*len(feat_file_data[feat_file_name]['chosen_indices'][0])),)
    
    result[idx_classes] = pred_class_scores[idx][0]
    result[idx_scores] = pred_class_scores[idx][1]
    
    ds = np_array_to_raster('%s/predicted_%s.tiff'%(PREDICTION_FOLDER, idx), result, feat_file_data[feat_file_name]['gt'], no_data=-1, nband=2, gdal_data_type=gdal.GDT_Float64)
    ds = None