In [11]:
import sys
sys.path.append('C:/Users/ritvik/Desktop/JPLProject/mapping-colombia-wetlands/')

import requests
import os
import numpy as np
from osgeo import gdal
import pickle
from common_functions import *

import matplotlib as mpl
mpl.rc('image', cmap='gray')

In [12]:
#this is the base folder where all data will be stored
BASE_ROI_FOLDER = 'C:/Users/ritvik/Desktop/JPLProject/data/GoogleEarthEngineData/Sentinel2/roi'

# Functions to Download File from Google Drive

In [13]:
def download_file_from_google_drive(file_id, destination):
    URL = "https://docs.google.com/uc?export=download"

    session = requests.Session()

    response = session.get(URL, params = { 'id' : file_id }, stream = True)
    token = get_confirm_token(response)

    if token:
        params = { 'id' : file_id, 'confirm' : token }
        response = session.get(URL, params = params, stream = True)

    save_response_content(response, destination)    

In [14]:
def get_confirm_token(response):
    for key, value in response.cookies.items():
        if key.startswith('download_warning'):
            return value

    return None

In [15]:
def save_response_content(response, destination):
    CHUNK_SIZE = 32768

    with open(destination, "wb") as f:
        for chunk in response.iter_content(CHUNK_SIZE):
            if chunk: # filter out keep-alive new chunks
                f.write(chunk)

# Functions to Cache Training Data

In [16]:
def get_interior_exterior_edge_values(mode, arr_train_labels, split_arr_features_train, no_data_value, kernel_size=3, thresh_learning=0.1, data_limit=100000):
    """
    mode: a list with any of "interior", "exterior", "edge". Designates which data to return.
    arr_train_labels: the array of training data wetland labels
    split_arr_features_train: the features to use for training
    no_data_value: the vaule used to signify no data at a pixel
    kernel_size: the kernel size used in the process of identifying interior and enxterior points
    thresh_learning: between 0 and 1, this threshold is used to distinuish interior (wetland) and exterior (non wetland) pixels
    data_limit: the max number of interior/exterior/edge points to return
    """
    
    #get number of bands
    num_bands = split_arr_features_train.shape[-1]
    
    #let the edge detection threshold be half of the learning threshold
    thresh_edge_detection = thresh_learning / 2
    
    #apply a filter to the baseline wetlands image to find edge regions
    kernel = -np.ones((kernel_size,kernel_size)) / (kernel_size**2 - 1)
    kernel[kernel_size//2,kernel_size//2] = 1
    
    vicinity_score = abs(apply_convolution(arr_train_labels, kernel))
    
    interior, exterior, edges = None, None, None
    
    if 'interior' in mode:
        #interior points are wetlands confidently below the threshold
        interior_condition = (arr_train_labels==1)&(vicinity_score < thresh_learning)
        interiors = np.where(interior_condition == 1)
        interior_vicinity_scores = vicinity_score[interiors]
        
        if len(interiors[0]) == 0:
            print('empty interior')
            return None
        
        #get the values in the SAR image lining up with interior points, sample according to confidence
        chosen_interiors = sample_by_vicinity_scores(interiors, interior_vicinity_scores, thresh_learning)
        interior_vals = split_arr_features_train[chosen_interiors].reshape(-1, num_bands)
        interior_vals = interior_vals[(interior_vals != no_data_value).all(axis=1)]
        interior_sampled = np.random.choice(np.arange(interior_vals.shape[0]), min(data_limit, interior_vals.shape[0]), replace=False)
        interior_vals = interior_vals[interior_sampled]
        
    if 'exterior' in mode:
        #exterior points are non-wetlands confidently below the threshold
        exterior_condition = (arr_train_labels==0)&(vicinity_score < thresh_learning)
        exteriors = np.where(exterior_condition == 1)
        exterior_vicinity_scores = vicinity_score[exteriors]
        
        if len(exteriors[0]) == 0:
            print('empty exterior')
            return None
        
        #get the values in the SAR image lining up with exterior points, sample according to confidence
        chosen_exteriors = sample_by_vicinity_scores(exteriors, exterior_vicinity_scores, thresh_learning)
        exterior_vals = split_arr_features_train[chosen_exteriors].reshape(-1, num_bands)
        exterior_vals = exterior_vals[(exterior_vals != no_data_value).all(axis=1)]
        exterior_sampled = np.random.choice(np.arange(exterior_vals.shape[0]), min(data_limit, exterior_vals.shape[0]), replace=False)
        exterior_vals = exterior_vals[exterior_sampled]
        
    if 'edge' in mode:
        #edge points are above the threshold
        edge_condition = (vicinity_score >= thresh_learning)
        edges = np.where(edge_condition == 1)
        
        if len(edges[0]) == 0:
            print('empty edge')
            return None
    
    return interior_vals, exterior_vals, edges

# Dictionary of Folder Name to Google Drive File ID

In [17]:
file_ids = {'inland_wetland_0_0': '1iA5XwNA7S8exKZXlbWFnxdLUWB8xGsGP', 
            'inland_wetland_0_1': '1XIUJJrW0i0pSrWcQ1uPwpR55OEF9FVG4',
            'inland_wetland_0_2': '1Fo-iZ45nCewahN75C9Eg50Sp1HeIi5BO',
            'inland_wetland_0_3': '1Bih2hBv_zZpDxnWSPg_n3y59VftCcWTK',
            'inland_wetland_0_4': '156G5gd8LKh6kUqZVWhL8oVwr_1hZ-3-B',
            
            'inland_wetland_1_0': '', 
            'inland_wetland_1_1': '',
            'inland_wetland_1_2': '',
            'inland_wetland_1_3': '',
            'inland_wetland_1_4': '',
            
            'inland_wetland_2_0': '',
            'inland_wetland_2_1': '',
            'inland_wetland_2_2': '',
            'inland_wetland_2_3': '',
            'inland_wetland_2_4': '',
            
            'inland_wetland_3_0': '', 
            'inland_wetland_3_1': '',
            'inland_wetland_3_2': '',
            'inland_wetland_3_3': '',
            'inland_wetland_3_4': '',
            
            'inland_wetland_4_0': '', 
            'inland_wetland_4_1': '',
            'inland_wetland_4_2': '',
            'inland_wetland_4_3': '',
            'inland_wetland_4_4': ''
            
           }

# Driver Code

In [18]:
stored_training_data = {}

In [19]:
for roi_folder, fid in file_ids.items():
    if fid == '':
        continue
    print('Processing %s...'%roi_folder)
    features_file_name = '%s/%s/features_%s.tiff'%(BASE_ROI_FOLDER, roi_folder, roi_folder)
    baseline_file_name = '%s/%s/baseline_%s.tiff'%(BASE_ROI_FOLDER, roi_folder, roi_folder)
    
    #check if data already downloaded
    if features_file_name.split('/')[-1] not in os.listdir('%s/%s'%(BASE_ROI_FOLDER, roi_folder)):
        print('Downloading training data from drive...')
        download_file_from_google_drive(fid, features_file_name)
     
    #check if training data already cached
    if roi_folder not in stored_training_data:
    
        ds_features = gdal.Open(features_file_name, gdal.GA_ReadOnly)
        ds_labels = gdal.Open(baseline_file_name, gdal.GA_ReadOnly)

        print('Getting formatted features and labels...')
        arr_labels, split_arr_features, gt = preprocess_data_set_pair(ds_features, ds_labels, 'Marsh') 

        print('Getting Interior/Exterior/Edge info...')
        interior_vals, exterior_vals, edges = get_interior_exterior_edge_values(['interior', 'exterior'], arr_labels, split_arr_features, NO_DATA_VALUE)

        print('Storing in dictionary...')
        stored_training_data[roi_folder] = {}
        stored_training_data[roi_folder]['interior'] = interior_vals
        stored_training_data[roi_folder]['exterior'] = exterior_vals

        pickle.dump(stored_training_data, open("stored_training_data.p", "wb"))

        print('=======================================')

Processing inland_wetland_0_0...
Getting formatted features and labels...
Getting Interior/Exterior/Edge info...
Storing in dictionary...
Processing inland_wetland_0_1...
Downloading training data from drive...
Getting formatted features and labels...
Getting Interior/Exterior/Edge info...
Storing in dictionary...
Processing inland_wetland_0_2...
Downloading training data from drive...
Getting formatted features and labels...
Getting Interior/Exterior/Edge info...
Storing in dictionary...
Processing inland_wetland_0_3...
Downloading training data from drive...
Getting formatted features and labels...
Getting Interior/Exterior/Edge info...
Storing in dictionary...
Processing inland_wetland_0_4...
Downloading training data from drive...
Getting formatted features and labels...
Getting Interior/Exterior/Edge info...
Storing in dictionary...
