# <b>MODIS Water Validation Notebook - Compare models</b>

Purpose: Used to perform validation of C61 MOD44W products from different models. Compares those products to the previous version, C6 MOD44W.

*Note: We are following an incremental development lifecycle. This notebook is the first rendition which fit most of the requirements. Expect incremental releases which continue towards the goal of fully meeting requirements and increasing capabilities of the user.*

Installation requirements:

```bash
pip install localtileserver
```

TODO:
- ipysheet for user to input comments
- load layers from toolbar
- move everything inside a class to avoid user input

Some references:

- https://towardsdatascience.com/bring-your-jupyter-notebook-to-life-with-interactive-widgets-bc12e03f0916
- https://github.com/giswqs/geodemo/blob/master/geodemo/common.py

Version: 0.0.1
Date: 12/09/2022

*For DSG internal use*

### <b> WARNING </b>

Do not run all cells at once, doing so will shut down the local tile servers before you, the user, can interact.

Uncomment if localtileserver is not installed

## Tile and year selection

Choose which tile (see MODIS grid) and which year. Reference the grid image. 

The `h` followed by two numerical digits represent the <b>horizontal</b> tile ID. Use the column space to determine this ID. 

The `v` followed by two numerical digits represent the <b>vertical</b> tile ID. Use the row space to determine this ID. 

For example, the tile that is 9 columns to the right and 5 rows down is `h09v05`.

Example:
```python
TILE = 'h09v05'
```

![MODIS Grid Overlay](../imgs/modis_overlay.png)

In [20]:
# !pip install localtileserver

In [21]:
import os
import joblib
import tempfile
import ipysheet
import numpy as np
import pandas as pd
import rioxarray as rxr
import xarray as xr
import matplotlib.colors as mcolors
import ipywidgets as widgets
import warnings
from osgeo import gdal
from glob import glob
from ipysheet import from_dataframe
from localtileserver import TileClient, get_leaflet_tile_layer 
from ipyleaflet import Map, Marker, basemaps, ScaleControl, LayersControl#, AwesomeIcon
from ipyleaflet import LegendControl, FullScreenControl, Popup#, MarkerCluster

os.environ['LOCALTILESERVER_CLIENT_PREFIX'] = \
    f"{os.environ['JUPYTERHUB_SERVICE_PREFIX'].lstrip('/')}/proxy/{{port}}"

### Data Parameters

In [22]:
TILE = 'h09v05'
# TILE = 'h22v01'
# TILE = 'h21v10'
# TILE = 'h12v09'

In [23]:
YEAR = 2019

In [24]:
plot_obs = True

In [25]:
target = False
eb = True
percent = False
noc = False

In [26]:
MOD44_C6_YEAR = YEAR

############
#Directory Paths
############
#RFA dir paths
data_dir = '/explore/nobackup/projects/ilab/data/MODIS/PRODUCTION/Amanda_Comparison_04042023/'
TAR_BASEPATH = f'{data_dir}/Targeted_RFA_v421'
EB_BASEPATH = f'{data_dir}/1_2_7_NDVI_v201_Results/EvenBalance_Cluster'
EB_MATCH_BASEPATH = f'{data_dir}/1_2_7_NDVI_v201_Results/EB_Match'
PER_BASEPATH = f'{data_dir}/1_2_7_NDVI_v201_Results/Percent_Cluster'
NOC_BASEPATH = f'{data_dir}/1_2_7_NDVI_v201_Results/No_Cluster'
#C6 dir paths
MOD44W_C6_BASEPATH = '/explore/nobackup/people/mcarrol2/MODIS_water/v5_outputs/'
#Cache file dir
CACHE_DIR = '.cache'
os.makedirs(CACHE_DIR, exist_ok=True)

############
#Specific File Paths
############
#RFA file paths
file_qa_path = f'*{YEAR}*{TILE}*ProductQA.*tif'
file_path = f'*{YEAR}*{TILE}*Product.*tif'
file_sumobs_path = f'*{YEAR}*{TILE}*SumWater*tif'
file_ogmask_path = f'*{YEAR}*{TILE}*-Mask*tif'
#C6 file paths
mod44w_c6_path = f'{MOD44W_C6_BASEPATH}/{str(MOD44_C6_YEAR)}/MOD44W_{TILE}_{MOD44_C6_YEAR}_v5.tif'
if not os.path.exists(mod44w_c6_path):
    raise FileNotFoundError(f'Could not find the MOD44W C6 file: {mod44w_c6_path}')

############
#CMAPS and legend
############
water_c6_cmap: list = ['#194d33', '#7bdc93']
water_rfa_qa_cmap: list = ['#ee82ee', '#FCB900', '#FF6900', '#800080']
qa_water_legend_dict = {
    'QA- Perm Water Flipped L->W': '#ee82ee', 
    'QA- Ocean Mask L->W': '#FCB900',
    'QA- Burn Scar W->L': '#FF6900',
    'QA- DEM Slope Change W->L': '#800080'}


############
#Geotransform parameters
############
crs = 'PROJCS["Sinusoidal",GEOGCS["Sphere",DATUM["Sphere",SPHEROID["Sphere",6371000,0]],PRIMEM["Greenwich",0],' + \
    'UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]]],PROJECTION["Sinusoidal"]' + \
    ',PARAMETER["longitude_of_center",0],PARAMETER["false_easting",0],PARAMETER["false_northing",0]' + \
    ',UNIT["metre",1,AUTHORITY["EPSG","9001"]],AXIS["Easting",EAST],AXIS["Northing",NORTH]]'
mod44w_cs_ds = gdal.Open(mod44w_c6_path)
transform = mod44w_cs_ds.GetGeoTransform()

############
#KWARGS
############
temporary_files = []
rfa_kwargs = {'nodata':0,'show':False,'vmin':0,'vmax':1,'max_zoom':20}
obs_kwargs = {'nodata':0,'show':False,'vmin':0,'vmax':365,'max_zoom':20}  
pw_kwargs = {'nodata':0,'show':False,'vmin':1,'vmax':4,'max_zoom':20}  
transform_kwargs = {'transform':transform, 'projection':crs, 'year':YEAR, 
                        'tile':TILE, 'files_to_rm':temporary_files}


### Functions

In [27]:
def parse_qa(qa_array):
    qa_array_parsed = xr.where(qa_array == 0, 0, -1)
    qa_array_parsed = xr.where(qa_array == 4, 1, qa_array_parsed)
    qa_array_parsed = xr.where(qa_array == 6, 2, qa_array_parsed)
    qa_array_parsed = xr.where(qa_array == 9, 3, qa_array_parsed)
    return qa_array_parsed

def parse_fix_qa(qa_array):
    qa_array_parsed = xr.where(qa_array == 2, 1, 0)
    qa_array_parsed = xr.where(qa_array == 4, 2, qa_array_parsed)
    qa_array_parsed = xr.where(qa_array == 6, 3, qa_array_parsed)
    qa_array_parsed = xr.where(qa_array == 9, 4, qa_array_parsed)
    return qa_array_parsed

def open_and_write_temp(data_array, transform, projection, 
    year, tile, name = None, files_to_rm = None) -> str:
    tmpdir = tempfile.gettempdir()
    name_to_use = data_array.name if not name else name
    tempfile_name = f'MOD44W.A{year}001.{tile}.061.{name_to_use}.tif'
    tempfile_fp = os.path.join(tmpdir, tempfile_name)
    print(glob(tempfile_fp))
    if os.path.exists(tempfile_fp): 
        os.remove(tempfile_fp)
    tempfile_fp = os.path.join(tmpdir, tempfile_name)
    driver = gdal.GetDriverByName('GTiff')
    outDs = driver.Create(tempfile_fp, 4800, 4800, 
                          1, gdal.GDT_Float32, 
                          options=['COMPRESS=LZW'])
    outDs.SetGeoTransform(transform)
    outDs.SetProjection(projection)
    outBand = outDs.GetRasterBand(1)
    outBand.WriteArray(data_array.data[0, :, :])
    outBand.SetNoDataValue(250)
    outDs.FlushCache()
    outDs = None
    outBand = None
    driver = None
    return tempfile_fp

def get_location(cache_dir: str, tile: str, def_location: list) -> list:
    cache_fp = os.path.join(cache_dir, f'{tile}.marker.location.sv')
    if os.path.exists(cache_fp):
        location = joblib.load(cache_fp)
    else:
        location = def_location
    return location

def cache_location(tile: str, location: list) -> None:
    cache_fp = os.path.join(CACHE_DIR, f'{tile}.marker.location.sv')
    output = joblib.dump(location, cache_fp)
    return None

def initialize_marker(tile: str, location: list, cache_dir: str) -> Marker:
    name = 'Location Marker'
    title = name
    location = get_location(cache_dir, tile, location)
    marker = Marker(name=name, title=name, location=location)
    return marker

def initialize_message(location: list) -> widgets.HTML:
    ll_message = widgets.HTML()
    ll_message.value = str(location)
    return ll_message

In [28]:
def data_plot(product_path, legend_name, data_color = None, pw = False, obs = False):
    """
    In 
        data_color (str): the cmap color that the data will be plotted in
        data_path (str): path to the tif rf prediction files
        legend_name (str): name of the product that is plotted
        qa_layer_bool (bool): if True then create QA layer, if false create RF product layer
    
    Returns
        rfa_water_mask_layer (leaflet_tile_layer): Map layer of the RFA Product 
        rfa_legend_dict (dict): Dictionary of the RFA Product cmap 
        perm_water_layer (leaflet_tile_layer): Map layer of the Permanent Water QA Product 
        
    """
    data_array = rxr.open_rasterio(sorted(glob(product_path))[0])
    if obs is True: print('Max',data_array.max(),'\nMin',data_array.min())
    if pw is True:
        product_water_array = parse_fix_qa(data_array)
        product_kwargs = pw_kwargs
        tile_name = f'{legend_name} QA'
        tile_cmap = water_rfa_qa_cmap
        perm_mask_name = f'{legend_name} perm mask'
        qa_mask_name = perm_mask_name.replace(' ','_')
    else:
        product_water_array = data_array
        tile_name = f'{legend_name}'
        if obs is True: 
            tile_cmap = 'jet'
            product_kwargs = obs_kwargs
        else: 
            tile_cmap = [mcolors.cnames[data_color], mcolors.cnames[data_color]]
            product_kwargs = rfa_kwargs
        mask_name =  f'{legend_name} mask'
        qa_mask_name = mask_name.replace(' ','_')

    product_qa = open_and_write_temp(product_water_array, name=qa_mask_name, **transform_kwargs)
    product_client = TileClient(product_qa)
    product_layer = get_leaflet_tile_layer(
            product_client, cmap=tile_cmap, 
            name=tile_name, **product_kwargs)
  
    if (pw is True) or (obs is True): return product_layer
    else: 
        product_legend_dict = {tile_name: mcolors.cnames[data_color]}
        return product_layer, product_legend_dict

### Using Functions

In [29]:
#c6 data
c6_client = TileClient(mod44w_c6_path)
c6_water_mask_layer = get_leaflet_tile_layer(
    c6_client, cmap=water_c6_cmap, name=f'C6', 
    **rfa_kwargs)
c6_legend_dict = {'C6': '#7bdc93'}

In [30]:
if target:
    # targeted rfa trained using v4.2.1 data
    tar_layer, tar_dict = data_plot(f'{TAR_BASEPATH}/{file_path}','C61','yellow')
    tar_pw_layer = data_plot(f'{TAR_BASEPATH}/{file_qa_path}','C61', pw=True)
if eb:
    # clustered model trained using evenly balanced clusters and v2.0.1 data
    eb_layer, eb_dict = data_plot(f'{EB_BASEPATH}/{file_path}','Even Balance','gray')
    eb_pw_layer = data_plot(f'{EB_BASEPATH}/{file_qa_path}','Even Balance', pw=True)
    if plot_obs:
        eb_sumobs_layer = data_plot(f'{EB_BASEPATH}/{file_sumobs_path}',
            'EB SumObs', obs=True)
    eb_match_layer, eb_match_dict = data_plot(f'{EB_MATCH_BASEPATH}/{file_path}','EB Match','indigo')
    eb_match_pw_layer = data_plot(f'{EB_MATCH_BASEPATH}/{file_qa_path}','EB Match', pw=True)
    
if percent: 
    # clustered model trained using proportional clusters and v2.0.1 data
    per_layer, per_dict = data_plot(f'{PER_BASEPATH}/{file_path}','Percent','red')
    per_pw_layer = data_plot(f'{PER_BASEPATH}/{file_qa_path}','Percent', pw=True)
    if plot_obs:
        per_sumobs_layer = data_plot(f'{PER_BASEPATH}/{file_sumobs_path}',
            'Per SumObs', obs=True)
if noc: 
    # no clusters model trained using v2.0.1 data
    noc_layer, noc_dict = data_plot(f'{NOC_BASEPATH}/{file_path}','No Cluster','blue')
    noc_pw_layer = data_plot(f'{NOC_BASEPATH}/{file_qa_path}','No Cluster', pw=True)

        

[]
[]
[]
[]




Max <xarray.DataArray ()>
array(260, dtype=int16)
Coordinates:
    spatial_ref  int64 0 
Min <xarray.DataArray ()>
array(0, dtype=int16)
Coordinates:
    spatial_ref  int64 0
[]
[]
[]




Max <xarray.DataArray ()>
array(271, dtype=int16)
Coordinates:
    spatial_ref  int64 0 
Min <xarray.DataArray ()>
array(0, dtype=int16)
Coordinates:
    spatial_ref  int64 0
[]


In [31]:
legend_dict = {}
legend_dict.update(c6_legend_dict)
if target: 
    legend_dict.update(tar_dict)
if eb:
    legend_dict.update(eb_dict)
if percent:
    legend_dict.update(per_dict)
if noc: 
    legend_dict.update(noc_dict)
legend_dict.update(qa_water_legend_dict)
legend = LegendControl(legend_dict)

In [32]:
#########
#Setup map
#########
m = Map(
    center=c6_client.center(),
    zoom=c6_client.default_zoom,
    basemap=basemaps.Esri.WorldImagery,
    scroll_wheel_zoom=True,
    keyboard=True,
    layout=widgets.Layout(height='600px')
)
marker_location = c6_client.center()
marker = initialize_marker(tile=TILE, location=marker_location, cache_dir=CACHE_DIR)
latlon_message = initialize_message(marker.location)

def handle_click(**kwargs):
    latlon_message.value = str(marker.location)
    marker.popup = latlon_message
    cache_location(tile=TILE, location=marker.location)

m.add_layer(marker)
# m.add_layer(c6_water_mask_layer)
if target:
    m.add_layer(tar_layer)
    m.add_layer(tar_pw_layer)
if eb:
    m.add_layer(eb_sumobs_layer)
    m.add_layer(eb_layer)
    m.add_layer(eb_pw_layer)
if percent:
    m.add_layer(per_sumobs_layer)
    m.add_layer(per_layer)
    m.add_layer(per_pw_layer)
if noc:
    m.add_layer(noc_layer)
    m.add_layer(noc_pw_layer)
    
m.add_layer(c6_water_mask_layer)
    
marker.on_click(handle_click)
# m.add_control(legend)
m.add_control(ScaleControl(position='bottomleft'))
m.add_control(LayersControl(position='topright'))
m.add_control(FullScreenControl())

In [33]:
display(m)

Map(center=[-15.00001637063555, 36.51499794207356], controls=(ZoomControl(options=['position', 'zoom_in_text',…

## Save notes

Run this cell to save notes in the current working directory

In [None]:
userid = !whoami
notes_path = f'../notes/{TILE}-{userid[0]}-notes.csv'
if os.path.exists(notes_path):
    notes_df = pd.read_csv(notes_path)
    notes_df = notes_df.drop(columns=['Unnamed: 0'])
    sheet_notes = ipysheet.from_dataframe(notes_df)
else:
    tile = [' ' for _ in range(75)]
    year = [' ' for _ in range(75)]
    location = [' ' for _ in range(75)]
    note = [' ' for _ in range(75)]
    data = {'Tile': tile, 'Year': year, 'Location': location, 'Note': note}
    notes_df = pd.DataFrame(data=data)
    sheet_notes = ipysheet.from_dataframe(notes_df)
sheet_notes.column_width = [3,3,4,10]
sheet_notes.layout = widgets.Layout(width='100%',height='100%')
sheet_notes

In [None]:
sheet_notes_df = ipysheet.to_dataframe(sheet_notes)
sheet_notes_df.to_csv(notes_path)

### <b>DO NOT RUN THIS CELL UNTIL FINISHED WITH VALIDATION</b>
*Note: This will shut down the local tile servers*

*Ignore warnings as such:*
```
Server for key (default) not found.
```

In [None]:
for path_to_delete in temporary_files_to_delete:
    if os.path.exists(path_to_delete):
        os.remove(path_to_delete)
    temporary_files_to_delete.remove(path_to_delete)

mod44w_rfa_v1_water_client.shutdown(True)