# Notebook for downloading and processing Sentinel-1 coherence data
The first 2 cells initiate the notebook and import the required modules, before allowing you to select the case study area. The first cell only needs to be run once to setup modules and paths

In [None]:
#Reload modules without shutting notebook
%load_ext autoreload
%autoreload 2

import os
import shutil
from datetime import date
import subprocess
from IPython.display import clear_output
from ipywidgets import Dropdown, interact, interactive, fixed, interact_manual
import ipywidgets as widgets
import sys
sys.path.append('/usr/bin')
import gdal_merge as gm
import gdal

# Do not show warnings
import warnings
warnings.filterwarnings('ignore')

# Global variables including folders
home = os.path.expanduser("~")
notebfolder = os.path.join(home, "notebooks")
basefolder = os.path.join(home, "my_shared_data_folder")
s1slcfolder = os.path.join(basefolder, "s1slc")
datasets = os.path.join(basefolder, "datasets")

# Upland burn modules
sys.path.append(os.path.join(notebfolder, "utils"))
import extract_aoi as extract
import portal_credentials as portalc
import call_cophub as callc
import onda_archive as onda
import functions as func

# Create a widget that can be used the choose the case study area of interest
list_of_cstudy = ["skye", "cairngorms", "pdistrict"]
cstudy_widget = Dropdown(options = list_of_cstudy, description = "Area:")
def change_cstudy(*args):
    print("Set to {}".format(cstudy_widget.value))
    
cstudy_widget.observe(change_cstudy, 'value')
print("Choose the case study area")
display(cstudy_widget)

In [None]:
cstudy = cstudy_widget.value

if cstudy == "skye":
    aoi = os.path.join(datasets, "Skye_extent_OSGB36.geojson")
elif cstudy == "cairngorms":
    # aoi = os.path.join(datasets, "Cairngorms_extent_OSGB36.geojson") # Original smaller area
    aoi = os.path.join(datasets, "Cairngorms_extent_OSGB36-extended.geojson")
else:
    aoi = os.path.join(datasets, "PDistrict_extent_OSGB36.geojson")
fstart = os.path.basename(aoi).split("_")[0]

# Load Copernicus hub credentials
pfile = "cophub.txt"
home = os.path.expanduser("~")
## setup and clear out temp folder
tmpfolder = os.path.join(home, "temp")
if not os.path.exists(tmpfolder):
    os.mkdir(tmpfolder)
else:
    shutil.rmtree(tmpfolder)
    os.mkdir(tmpfolder)

if not os.path.exists(os.path.join(home, pfile)):
    portalc.save_credentials(pfile)
cop_credentials = portalc.read_credentials(pfile)
final_folder = os.path.join(basefolder, "%s/coherence_tiffs"%(cstudy))

## Define date range and search Copernicus Hub
Once the case study area is chosen, the start and end dates for the area to process will be set (although they can be overridden at the end of the first cell). These dates and the area of interest are then passed to the API and all the potential Sentinel-1 products returned. 


In [None]:
# Set the date ranges - the ones below are already processed. Alternatively unhash the last 2 lines and set your own range
if cstudy == "skye":
    start_date = date(2018, 2, 1)
    end_date = date(2018, 4, 30)
elif cstudy == "cairngorms":
    start_date = date(2019, 3, 1)
    end_date = date(2019, 5, 31)
else:
    start_date = date(2018, 4, 1)
    end_date = date(2018, 8, 31)

#start_date = date(2018, 4, 1)
#end_date = date(2018, 8, 31)

In [None]:
# Call copernicus hub to get list of products
products, wkt2 = callc.call_cophub(cop_credentials, start_date, end_date, aoi, SLC = True)
print("Found {} products".format(len(products)))

# Work out pairings for analysis and filter any already processed files out
Running the cell below will allow you to choose a specific orbit that you want to process. Note, at this point it is worth checking how well the orbits overlap your area of interest, as the list of products will include any orbits that just clip the edge of your area.

In [None]:
# get a list of the available orbits
list_of_orbits = products['relativeorbitnumber'].to_list()
# Create a widget that can be used the choose the polygon of interest
orb_widget = Dropdown(options = list(set(list_of_orbits)), description = "Orbit:")#, value = list(poly_dict.keys())[0])
def change_orbit(*args):
    print("Set to {}".format(orb_widget.value))
    
orb_widget.observe(change_orbit, 'value')
print("Choose orbit you wish to analyse. Its probably worth checking it covers an appropriate amount of your area of interest")
display(orb_widget)

The next cell creates lists and dictionaries based on your chosen data, that will determine which, if any, of the products for your chosen area and orbit need downloading and or processing. It does this by back determining what intitial and intermediate products required for the final products, so if the final product exists it simply skips them. 

In [None]:
list_of_potential_pairs = []
pairing_coherence_dict = {}
file_merge_dict = {}
processed_folder = os.getcwd()# os.path.join(home, 'my_shared_data_folder/s1slc/Processed_images/')
times_needed = {}

for orb in set(list_of_orbits): # loop through the orbits available
    if orb != orb_widget.value:
        continue
    orb_products = products[products["relativeorbitnumber"]==orb] # get only the products matching the current orbit
    for id1, p1 in orb_products.iterrows():
        pairs = []
        filtered_data = func.filter_dataframe_by_time(orb_products, p1, max_time=8) # Lets filter by time
        filtered_data = func.filter_dataframe_by_overlap(filtered_data, id1, min_overlap=20) # Lets filter by overlap
        if len(filtered_data) == 0:
            continue
        for id2, data in filtered_data.iterrows():
            if p1['beginposition'] > data['beginposition']: # we will only use the files that are at a later date as not to duplicate pairings
                continue
            pairs = [p1['title'] + ".zip", data['title'] + ".zip"]
            list_of_potential_pairs.append(pairs)


            # we need to create the merge dictionary for later 
            basename = os.path.basename(p1['title']).strip(".zip")
            orbit_direction = products[products['title'] == basename]['orbitdirection'][0]
            file1 = os.path.basename(p1['title']).split("_")[-5]
            file2 = os.path.basename(data['title']).split("_")[-5]

            for pol in ["VV", "VH"]: 
                # lets note down how many times original files are needed
                for t in [p1['title'], data['title']]:
                    if not t in times_needed.keys():
                        times_needed[t] = 1
                    else:
                        times_needed[t] +=1
                # The merge id will be unique and will be the key to access files that were aquired with the same
                # relative orbit, sensor, orbit direction, polarisation that were acquired on the same day
                
                # create the final merged filename
                outfile = file1 + "_" + file2 + "_%s.tif"%(pol)
                sensor = os.path.basename(p1['title'])[2] + os.path.basename(data['title'])[2]
                merge_id = (str(orb), sensor, outfile[:8], outfile.split("_")[1][:8], orbit_direction, pol, cstudy)
                if not merge_id in file_merge_dict.keys():
                    file_merge_dict[merge_id] = [outfile]
                else:
                    file_merge_dict[merge_id].append(outfile)
                # use this dictionary to back calculate from final images so we know what we need to download
                pairing_coherence_dict[outfile] = [p1['title'], data['title']]

already_processed = []
combos_not_needed = []
# loop through the final folder
for file in os.listdir(final_folder):
    if int(file.split("_")[0]) != orb:
        continue
    identifier = (tuple(file.replace(".tif", "").split("_"))) # get the identifier for the file
    processed = file_merge_dict[identifier] # get the list of files that were used for the merge
    combos_not_needed.append(processed)
    for proc in processed:
        orig_files = pairing_coherence_dict[proc] # for each file used in the merge get the original paired images
        for f in orig_files:
            already_processed.append(f)

counts = dict()
for i in already_processed: # create a dictionary telling us how many times each original file has been used
    counts[i] = counts.get(i, 0) + 1

orig_files_not_needed = []
for file, num_used in counts.items(): # If the number of times used matches the number of times needed - we don't need to download it!
    if num_used == times_needed[file]:
        orig_files_not_needed.append(file)

# Download Copernicus Sentinel-1 SLC data
This cell filters the products based on the chosen information and downloads any original Sentinel-1 SLC files you need. 

In [None]:
# Log into Copernicus Hub

print("Downloading SLC files")
print("Found %s products"%(len(products)))
products_to_download = products[products["relativeorbitnumber"] == orb]
products_to_download = func.filter_dataframe_by_file_list(products_to_download, orig_files_not_needed)
products_to_download = onda.check_for_existing_downloads(products_to_download, s1slcfolder)
print("We already have or have processed %s files, so only need to download %s files. Starting download..."%(len(products) - len(products_to_download) , len(products_to_download)))
onda.batch_download_from_archive(cop_credentials, products_to_download, s1slcfolder)


# Processing SLC data
The next cells initiate the functions required for performing the coherence analysis and then determine the baselines for the pairings.

**Note:** ESA's SNAP tool (http://step.esa.int/main/download/snap-download/) needs to be installed an the path to the binary folder specified

In [None]:
def run_gpt(xml_filename, propfile):
    """
    Takes an xml file and a propertie file and processes it using snaps GPT tool
    
    Parameters
    ------
    xml_filename : path
        Path to the xml file
    propfile : path
        Path to the properties file
    """
    
    gpt = '/opt/snap/bin/gpt' # set the path for the SNAP gpt executable
    
    assert os.path.exists(gpt), "Unable to find 'gpt' executable."
    args = [
        gpt,
        xml_filename,
        "-p",
        propfile
    ]

    # create the subprocess
    p = subprocess.Popen(args,
                         stdout=subprocess.PIPE,
                         stderr=subprocess.STDOUT,
                         bufsize=1,
                         universal_newlines=True)

    # forward messages from stdout and stderr onto the console
    with p.stdout as stdout:
        for line in iter(stdout.readline, b""):
            if line == "":
                break
            print(line.rstrip())

    # wait to exit and retreieve the exit code
    exit_code = p.wait()

    # raise an exception if 'gpt' return an unexpected exit code
    if exit_code != 0:
        raise RuntimeError("Non-zero return code from GPT.")
        
def run_coherence(file1, file2, wkt, polarisation = "VV"):
    """
    Creates a coherence image based on 2 overlapping files for an area of interest
    
    Parameters
    ------
    file1 : path 
        Path to the first file 
    file2 : path 
        Path to the second file 
    wkt : string
        Polygon for the area of interest 
    polarisation : str, opt
        The polarisation band to use. Either "VV" or "VH" (default : "VV")
    """
    clear_output(wait=True)
    # First lets set the paths for intermediate data and final folders
    xml_folder = os.path.join(home, "notebooks/SLC_Data_processing/XML_files/")
    intermediate_path = os.path.join(home, "notebooks/SLC_Data_processing/Intermediate_files/")
    processed_folder =  os.path.join(home, 'my_shared_data_folder/s1slc/Processed_images/')
    # Create the final output name
    outputname = os.path.basename(file1).split("_")[5] + "_" + os.path.basename(file2).split("_")[5] + "_%s"%(polarisation)
    outputname = os.path.join(processed_folder, outputname+".tif")                  
    
    assert os.path.isdir(processed_folder), "Unable to locate '{}' please ensure this path exists.".format(processed_folder) 
    assert os.path.isdir(intermediate_path), "Unable to locate '{}' please ensure this path exists.".format(intermediate_path)
    
    if not (os.path.exists(file1)):
        print("Unable to locate '%s'."%(file1))
        return
    if not (os.path.exists(file2)):
        print("Unable to locate '%s'."%(file2))
        return
    
    if os.path.isfile(outputname):
        print("Already analysed pairing %s and %s"%(file1, file2))
        return
    try: # lets try and run it!
        print("Analysing pair: %s and %s"%(file1, file2))
        if os.path.exists(intermediate_path): # clear the intermediate folder in case any old files remain
            shutil.rmtree(intermediate_path)
        os.mkdir(intermediate_path)
        
        # First we need to update/create the properties files for each swath 
        input1 = "input1="+file1
        input2 = "input2="+file2
        outputs = []
        for swathnum in range(1, 4):
            output = "output=" + intermediate_path + "IW%s"%(swathnum)
            swath="swath=IW%s"%(swathnum)
            polar="polarisation=%s"%(polarisation)
            f = open(os.path.join(xml_folder, "subswath%s.properties"%(swathnum)), "w")
            f.write(input1+"\n")
            f.write(input2+"\n")
            f.write(output+"\n")
            f.write(swath+"\n")
            f.write(polar+"\n")
            f.close()
            outputs.append("input%s="%(swathnum) + intermediate_path + "IW%s.dim"%(swathnum))
        # now lets write the Mergeswaths.properties file
        f = open(os.path.join(xml_folder, "Mergeswaths.properties"), "w")
        f.write(outputs[0]+"\n")
        f.write(outputs[1]+"\n")
        f.write(outputs[2]+"\n")
        f.write(polar+"\n")
        f.write("output=" + outputname.strip(".tif") + "\n")
        f.write("polygon="+wkt)
        f.close()

        # finally lets write the subset.properties file
        f = open(os.path.join(xml_folder, "subset.properties"), "w")
        dimfile = outputname.strip(".tif") + ".dim"
        f.write("input1=" + dimfile + "\n")
        f.write("output=" + outputname.strip(".tif") + "\n")
        f.write("polygon="+wkt)
        f.close()
        xml_filename = os.path.join(xml_folder, 'ProcessSubswath.xml')
        assert os.path.isfile(xml_filename), "Unable to locate '{}'.".format(xml_filename)

        #Lets loop through and process the individual swaths
        properties_files = [os.path.join(xml_folder, file) for file in os.listdir(xml_folder) if (file.endswith(".properties")) and (file.startswith("subswath"))]

        # create the subprocess
        for propfile in properties_files:
            print("Running on %s for %s"%(propfile, outputname))
            run_gpt(xml_filename, propfile)
            #clear_output(wait=True)

        # Merge subswaths
        # OLD VERSION of doing together didnt always work, now we subset separately
        #print("Merging subswaths and saving to %s"%(outputname))
        #xml_filename = os.path.join(xml_folder, 'Mergeswaths_withsubset.xml')
        
        propfile = os.path.join(xml_folder, 'Mergeswaths.properties')
        xml_filename = os.path.join(xml_folder, 'Mergeswaths.xml')
        run_gpt(xml_filename, propfile)
        
        # Subset to the area of interest
        print("Subsetting subswaths and saving to %s"%(outputname))
        xml_filename = os.path.join(xml_folder, 'subset.xml')
        propfile = os.path.join(xml_folder, 'subset.properties')
        run_gpt(xml_filename, propfile)
        
        # Lets clear up the intermediate files
        if os.path.exists(dimfile):
            os.remove(dimfile)
        dimfile = dimfile.strip(".dim") + ".data"
        if os.path.exists(dimfile):
            shutil.rmtree(dimfile)
    except:
        if os.path.exists(dimfile):
            os.remove(dimfile)
        dimfile = dimfile.strip(".dim") + ".data"
        if os.path.exists(dimfile):
            shutil.rmtree(dimfile)
        
    clear_output(wait=True)    

In [None]:
try:
    %run ./Generate-Sentinel1SLC-Baseline.ipynb $list_of_potential_pairs $cstudy $products
except Exception as e:
    clear_output(wait=True)
    print("There was a problem determining the baselines: ", e.__class__, "occurred. Full error below:")
    print(e)

In [None]:
for p in list_of_potential_pairs:
    file1 = os.path.join(s1slcfolder, p[0])
    file2 = os.path.join(s1slcfolder, p[1])
    for pol in ["VV", "VH"]:
        #continue
        run_coherence(file1, file2, wkt2, polarisation = pol) # lets generate the coherence for these images
clear_output(wait=True)
print("Finished analysing the available pairings")

In [None]:
s1slc_analysed_folder = os.path.join(s1slcfolder, "Processed_images")
final_folder = os.path.join(basefolder, "%s/coherence_tiffs"%(cstudy))
assert os.path.exists(final_folder), "Final folder %s doesn't exist"%(final_folder)

# Check for same-day files and merge if they exist
Finally, the following cells run the coherence processing on each of the pairs before merging together any files that were acquired on the same day by consecutive slices and extracts the final area of interest. 

In [None]:
def run_sub_process(args):
    """
    Takes a list of arguments and runs a subprocess in terminal 
    
    Parameters
    ------
    args : list
        list of arguments
    """
    p = subprocess.Popen(args,
                         stdout=subprocess.PIPE,
                         stderr=subprocess.STDOUT,
                         bufsize=1,
                         universal_newlines=True)

    # forward messagefinal_folderrom stdout and stderr onto the console
    with p.stdout as stdout:
        for line in iter(stdout.readline, b""):
            if line == "":
                break
            print(line.rstrip())

    # wait to exit and retreieve the exit code
    exit_code = p.wait()

    # raise an exception if 'gpt' return an unexpected exit code
    if exit_code != 0:
        raise RuntimeError("Non-zero return code from GPT.")

ofiles = []
failed = 0
for id1, files in file_merge_dict.items():
    # Create the file names for the intermediate and final products
    outfile = "%s_%s_%s_%s_%s_%s_%s"%(id1[0], id1[1], id1[2], id1[3], id1[4], id1[5], id1[6])
    outfile_presub = outfile + "_presub"
    outfile_presub = os.path.join(final_folder, '%s.tif'%(outfile_presub))
    outfile_tif = os.path.join(final_folder, '%s.tif'%(outfile))
    before_projection = outfile + "_nonproj"
    before_projection = os.path.join(final_folder, '%s.tif'%(before_projection))
    
    if len(files) == 0:
        print("Extracting area of interest...")
        extract.cut_by_geojson(files[0], before_projection, aoi, verb = True, slc=True)
        os.remove(outfile_presub)
        
        print("Reprojecting %s to %s"%(before_projection, outfile_tif))
        new_res = 10
        gdal.Warp(outfile_tif, before_projection, dstSRS="EPSG:27700", dstNodata=0, xRes=new_res, yRes=new_res, format='Gtiff')
        os.remove(before_projection)
        
        ofiles.append(outfile_tif)
        continue
    if os.path.exists(outfile_tif):
        print("Already merged %s"%(outfile_tif))
        ofiles.append(outfile_tif)
        continue
    all_there = True 
    # lets check we have all the files we need for the correct merge
    for f in files:
        if not os.path.exists(os.path.join(s1slc_analysed_folder, f)):
            all_there = False
    if not all_there:
        print("Not all files available for this merge, skipping.")
        continue            
    try:
        print("Merging {} to create {}".format(files, outfile_presub))
        # build the vrt 
        args = ['gdalbuildvrt', '-srcnodata', '0', '%s.vrt'%(outfile)]
        for file in files:
            args.append(os.path.join(s1slc_analysed_folder, file))
        # create the subprocess
        run_sub_process(args)

        # translate files
        args = ['gdal_translate', '-of', 'GTiff', '%s.vrt'%(outfile), outfile_presub]
        run_sub_process(args)

        #delete the vrt 
        os.remove('%s.vrt'%(outfile))
        print("Extracting area of interest...")
        extract.cut_by_geojson(outfile_presub, before_projection, aoi, verb = True, slc=True)
        os.remove(outfile_presub)
        
        print("Reprojecting %s to %s"%(before_projection, outfile_tif))
        new_res = 10
        gdal.Warp(outfile_tif, before_projection, dstSRS="EPSG:27700", dstNodata=0, xRes=new_res, yRes=new_res, format='Gtiff')
        os.remove(before_projection)
        
        ofiles.append(outfile_tif)
    
    except: # clean up temp / corrupt files
        print("Failed to generate merged file {}".format(outfile))
        failed +=1
        if os.path.exists(outfile_tif):
            os.remove(outfile_tif)
        if os.path.exists(outfile_presub):
            os.remove(outfile_presub)
        if os.path.exists('%s.vrt'%(outfile)):
            os.remove('%s.vrt'%(outfile))
        if os.path.exists(before_projection):
            os.remove(before_projection)
    
clear_output(wait=True)
print("Finished processing, {} output subsets available".format(len(ofiles)))        
print("Failed on %s"%(failed))
print("Workbooks currently recognise folders called %1 and %2, so rename as needed or update utils/get_configuration.py"%(final_folder, final_folder))