In [1]:
import os
import time
import getpass
import glob
import ntpath
import pandas as pd
import subprocess
import sys

import ipywidgets as widgets
import scripts.filter_closing_smm as cfilter

from osgeo import gdal
from ipywidgets import VBox, HBox, Label, interactive, Box, HTML, fixed, interact, TwoByTwoLayout
from tqdm.auto import trange, tqdm

from scripts.stackcomposed.stack_composed import parse as ps

from scripts.itables import interactive_table
from scripts.itables import options as opt

<IPython.core.display.Javascript object>

In [2]:
opt.lengthMenu = [5, 10, 15, 20]
BUTTON_STYLE = {'button_color':'rgba(28,28,28,.99)'}
button_layout = widgets.Layout(
    width='175px')

In [3]:
def formatter(start, end, step):
    return '{}-{}'.format(start, end, step)

def re_range(lst):
    n = len(lst)
    result = []
    scan = 0
    while n - scan > 2:
        step = lst[scan + 1] - lst[scan]
        if lst[scan + 2] - lst[scan + 1] != step:
            result.append(str(lst[scan]))
            scan += 1
            continue

        for j in range(scan+2, n-1):
            if lst[j+1] - lst[j] != step:
                result.append(formatter(lst[scan], lst[j], step))
                scan = j+1
                break
        else:
            result.append(formatter(lst[scan], lst[-1], step))
            return '_'.join(result)

    if n - scan == 1:
        result.append(str(lst[scan]))
    elif n - scan == 2:
        result.append(','.join(map(str, lst[scan:])))

    return ','.join(result)
    pd.DataFrame._repr_javascript_ = _repr_datatable_

In [4]:
user = getpass.getuser()

RAW_PATH = f'/home/{user}/pysmm_downloads/0_raw/'

if not os.path.exists(RAW_PATH):
    os.makedirs(RAW_PATH)

PROCESSED_PATH = RAW_PATH.replace('0_raw','1_processed')

if not os.path.exists(PROCESSED_PATH):
    os.makedirs(PROCESSED_PATH)

cores = os.cpu_count()

In [5]:
desc_postprocess =  """<p style="line-height: 20px">After the data is filtered, a time series analysis of the soil moisture maps can be performed. Several statistics can be applied whether to the entire time series or to a specified range, statistics as median, mean, standard deviation or linear trend (slope of the line) are available to process the selected data.  </br>The slope of the linear trend, indicates if the trend in soil moisture is negative or positive. These trends might be related to peatland management practices. 
After the processing completes, download the outputs and check them in a GIS environment such as QGIS or ArcGIS.</p>"""

no_images_to_process = """<p style="line-height: 20px">There are no images to process.</p>"""

In [6]:
def images_summary(tifs):
    
    list_ = [(pd.Timestamp(ps.parse_other_files(image)[4]), image) for image in tifs]
    x = pd.DataFrame(list_, columns=['date','Image name'])
    x = x.sort_values(['date'])
    x = x.reset_index(drop=True).set_index('date')
    
    return x

In [7]:
def return_paths(raw_path):
    
    """ Create a list of folders in a given path
    skipping those with begin with '.' and are empty

    """
    paths = [folder for folder in os.listdir(raw_path) 
             if os.path.isdir(os.path.join(raw_path, folder)) and not folder.startswith('.') 
             and len(os.listdir(os.path.join(raw_path, folder))) != 0
    ]
    paths.sort()
    return paths

def get_select2(*args):
    options = return_paths(os.path.join(RAW_PATH, select_1.value))
    select_2.options = options

def get_months_years(path):
    
    """ From a given path of images, the function will return a list 
    of the months/years present in the forlder
    
    """

    tifs = glob.glob(f'{path}/*.tif')
    if tifs:
        try:
            years = list(set([ps.parse_other_files(image)[4].year for image in tifs]))
            months = list(set([ps.parse_other_files(image)[4].month for image in tifs]))
            years.sort()
            months.sort()
            return [years, months]
        except:
            print("ERROR: The image date name must follow the format: 'YYYY_mm_dd'")
            return 1
    else:
        print(tifs)
        print("ERROR: The folder is empty")
        return 1
    
def parse_months(months_list):
    
    """ From a given list of numbers, the function will return a list of 
    tuples. i.e. [(January, 1)]"""
    
    month_dict = {1:'January', 2:'February',
                    3:'March', 4:'April',
                    5:'May', 6:'June',
                    7:'July', 8:'August',
                    9:'September', 10:'October',
                    11:'November', 12:'December'
                 }
    
    month_list = [(month_dict[key], key) for key in months_list]
    
    return month_list


def get_dimension(image):
    raster = gdal.Open(image)
    return [raster.RasterXSize, raster.RasterYSize]

def closing_filter(process_path):
    
    IMAGES_TYPES = ('.tif')
    folder = process_path
    image_files = []
    if os.path.isdir(folder):
        for root, dirs, files in os.walk(folder):
            if len(files) != 0:
                files = [os.path.join(root, x) for x in files if x.endswith(IMAGES_TYPES)]
                [image_files.append(os.path.abspath(file)) for file in files]
        image_files.sort()
    
    else:
        print(f'ERROR: The {folder} is not a directory path.')
    
    if len(image_files) > 0:
        dimension = get_dimension(image_files[0])
        print(f'The image dimension is {dimension[0]} x {dimension[1]} px')
        print(f'There are {len(image_files)} images to process, please wait...')

    else:
        return 1

    for i in trange(len(image_files)):
        cfilter.raw_to_processed(image_files[i])

    return 0

def time_message(stat, cores, chunks):
    output_msg = f'Computing {stat.lower()} using {cores} cores and {chunks} as chunk size, please wait...'
    if stat == 'linear_trend':
        lt_msg = f'\nDepending on the extent and the number of images, this process could take several minutes...'
        return "".join((output_msg, lt_msg))
    else:
        return output_msg

def filter_season(tifs, months, years):
    """ Return a list of images filtered by months and 
        years.
    """
    # Get a list of tuples (date, image_name)

    list_ = [(pd.Timestamp(ps.parse_other_files(image)[4]), image) for image in tifs]
    x = pd.DataFrame(list_, columns=['date','image_name'])
    x = x.sort_values(['date'])
    
    # Create a indexdate
    x = x.reset_index(drop=True).set_index('date')

    # Filter by months and years
    df2 = x[x.index.month.isin(months) & (x.index.year.isin(years))]
    filtered = list(df2['image_name'])
    
    return filtered
    
    
def stack_composed(statistic, feature, field, cores, chunks, 
                   ini_date=None, end_date=None, 
                   season=None, years=None):
    
    cores = str(cores)
    chunks = str(chunks)
    
    processed_ff_path = os.path.join(PROCESSED_PATH, feature, field)
    processed_f_path = os.path.join(PROCESSED_PATH, feature)
    processed_stat_path = os.path.join(processed_ff_path, 'stats')

    if not os.path.exists(processed_stat_path):
         os.makedirs(processed_stat_path)
    

    # Create a list from all the images 
#     tifs = glob.glob(f'{processed_ff_path}/close*.tif')
    tifs = glob.glob(f'{processed_f_path}/*/close*.tif') # To compute all the folders

    if season:
        # Create a list with the selected months and years
        # Resample the tifs list with only the months and years
        tifs = filter_season(tifs, season, years)

    # Create a file with the selected images
    tmp_tif_file = [os.path.join(processed_stat_path, 'tmp_images.txt')]
    with open(tmp_tif_file[0], 'w') as f:
        f.write('\n'.join(tifs))
    
    print(len(tifs))    
    
    summary = images_summary(tifs)
    display(summary)
    
    print(time_message(statistic, cores, chunks))
    
    tic = time.perf_counter()
    
    # If range selected
    if ini_date:
        print(ini_date)
        start = ini_date.strftime("%Y-%m-%d")
        end = end_date.strftime("%Y-%m-%d")
        
        processed_stat_name = os.path.join(processed_stat_path, 
                                           f'Stack_{statistic.upper()}_{feature}_{field}_{start}_{end}.tif')
        
        process = subprocess.run(['python3', 
                                    f'{os.getcwd()}/scripts/stackcomposed/bin/stack-composed',
                                    '-stat', statistic,
                                    '-bands', '1',
                                    '-start', start,
                                    '-end', end,
                                    '-p', cores,
                                    '-chunks', chunks,
                                    '-o', processed_stat_name]+
                                    tmp_tif_file,
                                stdout=subprocess.PIPE,
                                stderr=subprocess.PIPE,
                                universal_newlines=True)
    
    # If entire TS is selected or season
    else:
        if season:
            
            months = re_range(season)
            years = re_range(years)
            processed_stat_name = os.path.join(processed_stat_path, 
                               f'Stack_{statistic.upper()}_{feature}_{field}_Y{years}_m{months}.tif')
        
        else:
            processed_stat_name = os.path.join(processed_stat_path, 
                                           f'Stack_{statistic.upper()}_{feature}_{field}.tif')

        process = subprocess.run(['python3', 
                                    f'{os.getcwd()}/scripts/stackcomposed/bin/stack-composed',
                                    '-stat', statistic,
                                    '-bands', '1',
                                    '-p', cores,
                                    '-chunks', chunks,
                                    '-o', processed_stat_name]+
                                     tmp_tif_file,
                                stdout=subprocess.PIPE,
                                stderr=subprocess.PIPE,
                                universal_newlines=True)
                             
    # Once the process has ran remove the tmp_tif_file
    os.remove(tmp_tif_file[0])
                             
    if process.returncode == 0:
        toc = time.perf_counter()
        elapsed_time = round(toc-tic, 2)
        print(f'Done in {elapsed_time} seconds!')
        link = f"https://sepal.io/api/sandbox/jupyter/tree/pysmm_downloads/1_processed/{feature}/{field}/stats"
        html_link = f'<br>The processed images can be found in <a href = "{link}" target="_blank"> {processed_ff_path}</a>'
        display(HTML(html_link))
    else:
        print(process.stderr)
        
def run_stat_all_ts(statistic, feature, field, cores, chunks):
    
    stack_composed(statistic, feature, field, cores, chunks)

def run_stat_range(statistic, feature, field, 
                  cores, chunks, ini_date, end_date):
    
    stack_composed(statistic, feature, field, cores, chunks, ini_date, end_date)

def run_stat_season(statistic, feature, field, 
                  cores, chunks, years, season):
    
    stack_composed(statistic, feature, field, cores, chunks, season=season, years=years)

def create_accordion(*args):
    
    tab_name = args[0]
    advanced_buttons = VBox(args[1:])
    accordion = widgets.Accordion(children=[advanced_buttons], selected_index=None)
    accordion.set_title(0, tab_name)
    return accordion
    
def stats_span(feature, field):
    kwargs = {
        'feature' : fixed(feature),
        'field': fixed(field),
        'statistic' : statistic,
        'cores':w_cores,
        'chunks':w_chunks,
    }
    
    # Tab to entire series
    
    w_ts = interactive(run_stat_all_ts, {'manual':True}, **kwargs)
    w_ts_button = w_ts.children[-2]
    w_ts_button.layout = button_layout
    w_ts_button.button_style = 'info'
    w_ts_button.style = BUTTON_STYLE
    w_ts_button.icon = 'check'
    w_ts_button.description = 'Compute time series!'
    
    accordion = create_accordion('Advanced settings', w_ts.children[1], w_ts.children[2])
    
    w_ts_output = VBox([HBox([Label('Select the statistic you want compute:'), 
                            VBox([w_ts.children[0]]), w_ts_button, 
                        ]), 
                        accordion, 
                        w_ts.children[-1]
                    ])

    # Tab to range 
    
    kwargs['ini_date'] = ini_date
    kwargs['end_date'] = end_date
    
    w_range = interactive(run_stat_range, {'manual':True}, **kwargs)
    w_range_button = w_range.children[-2]
    w_range_button.layout = button_layout
    w_range_button.button_style = 'info'
    w_range_button.style = BUTTON_STYLE
    w_range_button.icon = 'check'
    w_range_button.description = 'Compute range!'
    
    accordion = create_accordion('Advanced settings', w_range.children[1], w_range.children[2])
    
    w_range_output = VBox([HBox([Label('Select the statistic you want compute:'),  
                            VBox([w_range.children[0], w_range.children[3], w_range.children[4]]),
                            w_range_button, 
                            ]),
                           accordion, 
                           w_range.children[-1]
                    ]) 
    
    # Tab to season

    kwargs.pop('ini_date', None)
    kwargs.pop('end_date', None)
    
    kwargs['years'] = w_season_years
    kwargs['season'] = w_season_months

    
    w_season = interactive(run_stat_season, {'manual':True}, **kwargs)

    w_season_button = w_season.children[-2]
    w_season_button.layout = button_layout
    w_season_button.icon = 'check'
    w_season_button.button_style = 'info'
    w_season_button.style = BUTTON_STYLE
    w_season_button.description = 'Compute season!'
    
    accordion = create_accordion('Advanced settings',
                                 w_season.children[1], 
                                 w_season.children[2])
    
    w_season_output = VBox([HBox([Label('Select the statistic you want compute:'),  
                            VBox([w_season.children[0], w_season.children[3], w_season.children[4]]),
                            w_season_button, 
                            ]),
                           accordion, 
                           w_season.children[-1]
                    ]) 
    
    # Tabs 
    
    children = [w_ts_output, w_range_output, w_season_output]
    
    tab = widgets.Tab()
    tab.children = children
    tab.set_title(0, 'All time series')
    tab.set_title(1, 'Range')
    tab.set_title(2, 'Season')
    
    display(tab)
        
def filter_process(feature, field):

    print('Postprocessing the soil moisture maps')
    
    process_path = os.path.join(RAW_PATH, feature, field)
    processed_ff_path = os.path.join(PROCESSED_PATH, feature, field)
    
    options = get_months_years(process_path)

    if options != 1:
        
        w_season_years.options = options[0] 
        w_season_years.value = [options[0][0]] 
        
        
        w_season_months.options = parse_months(options[1])
        w_season_months.value = options[1]
        
        return_cf_code = closing_filter(process_path)
        
        # Check if the closing filter operation was succesfull
        # and then continue with the stack step
        
        if return_cf_code == 0:        
            display(HTML(desc_postprocess))
            stats_span(feature, field)

def run():
    if select_1.value:
        w = interactive(filter_process, {'manual':True}, feature=select_1, field=select_2)
        w.children[0].description = ''
        w.children[1].description = ''
        run_button = w.children[2]
        run_button.description = 'Process the folder'
        run_button.button_style = 'info'
        run_button.style = BUTTON_STYLE
        run_button.icon='check'
        display(HBox([Label('Select the folder you want process:'), w.children[0], w.children[1]]))
        display(HBox([w.children[2]]))
        display(w.children[-1])
    else:
        display(HTML(no_images_to_process))

raw_folders = []

In [8]:
statistic = widgets.Dropdown(
    options=[('Median','median'), ('Mean','mean'), ('Gmean','Gmean'), 
             ('Max','max'), ('Min','min'), ('Std','std'), 
             ('Valid pixels','valid_pixels'), 
             ('Linear trend','linear_trend')],
    value='median',
    disabled=False,
)

time_span = widgets.ToggleButtons(
    options=['All time series', 'Range'],
    description='Dates:',
    disabled=False
)

ini_date = widgets.DatePicker(
    description='Start date',
    disabled=False
)

end_date = widgets.DatePicker(
    description='End date',
    disabled=False
)

time_span = widgets.ToggleButtons(
    options=['All time series', 'Range'],
    description='Dates:',
    disabled=False
)

ini_date = widgets.DatePicker(
    description='Start date',
    disabled=False
)

end_date = widgets.DatePicker(
    description='End date',
    disabled=False
)

select_1 = widgets.Select(
    options=return_paths(RAW_PATH),
)

w_cores = widgets.IntSlider(
    value=cores,
    min=1,
    max=cores,
    step=1,
    description='Processors:',
    orientation='horizontal', 
    readout=True
)

w_chunks = widgets.IntSlider(
    value=200,
    min=0,
    max=1000,
    step=20,
    description='Chunk:',
    orientation='horizontal', 
    readout=True
)

select_2 = widgets.Select(
)

select_1.observe(get_select2)

w_season_years = widgets.SelectMultiple(
    description='Year(s)'
)

w_season_months = widgets.SelectMultiple(
    description="Month(s):"
)

In [9]:
run()

HBox(children=(Label(value='Select the folder you want process:'), Select(options=('103_phu_merged', '107_phu_…

HBox(children=(Button(button_style='info', description='Process the folder', icon='check', style=ButtonStyle(b…

Output()