# Validation Notebook

Purpose:

Installation requirements:

```bash
pip install ipyfilechooser localtileserver
```

TODO:
- temporary change no-data value and labels start
- specific number of points per field
- walk through the points
- ipysheet to modify the table and value of pixel
- load geopackage with existing validation points
- load layers from toolbar
- color the cells
- change color of marker when checked
- add metrics
- move everything inside a class to avoid user input
- more explicit bands for the user - selection dropdown, or checkbox
- dropdown menu for year to provide list of filenames
- drop down menu for location to provide list of filenames

Some references:

- https://towardsdatascience.com/bring-your-jupyter-notebook-to-life-with-interactive-widgets-bc12e03f0916
- https://github.com/giswqs/geodemo/blob/master/geodemo/common.py

Version: 0.0.1
Date: 08/31/2022

In [None]:
# !pip install ipyfilechooser ipysheet

In [None]:
# !python -m mitoinstaller install

In [1]:
import os
import re
import json
import tempfile
import folium
import ipysheet
import numpy as np
import pandas as pd
import rasterio as rio
import rioxarray as rxr
import geopandas as gpd
import branca.colormap as cm
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import ipywidgets as widgets
import tempfile

from osgeo import gdal
from pprint import pprint

from glob import glob
from pathlib import Path
from folium import plugins
from pyproj import Transformer
from ipyfilechooser import FileChooser
from ipysheet import from_dataframe
from rasterio.warp import calculate_default_transform, reproject, Resampling
from localtileserver import TileClient, get_leaflet_tile_layer, examples
from ipyleaflet import Map, Marker, basemaps, ScaleControl, LayersControl, AwesomeIcon
from ipyleaflet import LegendControl, FullScreenControl, MarkerCluster

os.environ['LOCALTILESERVER_CLIENT_PREFIX'] = \
    f"{os.environ['JUPYTERHUB_SERVICE_PREFIX'].lstrip('/')}/proxy/{{port}}"

import localtileserver
from localtileserver import get_folium_tile_layer, TileClient

## Notebook Parameters

- **tiles_basemap**: str = tiles basemap for underlying analysis
- **data_bands**: list = indices of the bands to visualize on the basemap
- **data_regex**: str = regex to where data of interest is located

In [2]:
# TODO:
# - more explicit bands for the user - selection dropdown, or checkbox
tiles_basemap: str = 'https://mt1.google.com/vt/lyrs=s&x={x}&y={y}&z={z}'
data_bands: list = [1, 2, 1]  # indices of the bands to visualize on the basemap
cmap: list = ['beige', 'blue']
classes: list = ['land', 'water']
val_points_per_class: int = 150
icons: list = ['tree', 'fa-tint']

## File Chooser for Data and Label Files

## Generate Random Points Dataset

In [3]:
TILE = 'h30v11'
YEAR = 2006
DAY = 225

In [4]:
MODIS_CSS_BASEPATH = '/css/modis/Collection6.1/L2G/MOD09GQ/'
MODIS_GA_CSS_BASEPATH = '/css/modis/Collection6.1/L2G/MOD09GA/'
MODIS_MOD44_PATH = '/explore/nobackup/people/mcarrol2/MODIS_water/v5_outputs'

HDF_PRSTR = 'HDF4_EOS:EOS_GRID:'
HDF_PSSTR = ':MODIS_Grid_2D:'
HDF_GA_PSSTR = ':MODIS_Grid_500m_2D:'
HDF_BANDS = ['sur_refl_b01_1', 'sur_refl_b02_1']
HDF_GA_BANDS = ['sur_refl_b03_1', 'sur_refl_b04_1', 'sur_refl_b05_1', 'sur_refl_b06_1', 'sur_refl_b07_1']

if YEAR == 2020:
    MOD44_YEAR = 2019
else:
    MOD44_YEAR = YEAR

In [5]:
mod44_filename_rgx = 'MOD44W_{}_{}_v5.tif'.format(TILE, MOD44_YEAR)
css_filename_rgx = 'MOD09GQ.A{}{:03}.{}.*.hdf'.format(YEAR, DAY, TILE)
ga_filename_rgx = 'MOD09GA.A{}{:03}.{}.*.hdf'.format(YEAR, DAY, TILE)
img_path = glob(os.path.join(MODIS_CSS_BASEPATH, str(YEAR), css_filename_rgx))[0]
ga_path = glob(os.path.join(MODIS_GA_CSS_BASEPATH, str(YEAR), ga_filename_rgx))[0]
mod44_path = glob(os.path.join(MODIS_MOD44_PATH, str(MOD44_YEAR), mod44_filename_rgx))[0]
img_path, ga_path, mod44_path

('/css/modis/Collection6.1/L2G/MOD09GQ/2006/MOD09GQ.A2006225.h30v11.061.2020272223202.hdf',
 '/css/modis/Collection6.1/L2G/MOD09GA/2006/MOD09GA.A2006225.h30v11.061.2020272223202.hdf',
 '/explore/nobackup/people/mcarrol2/MODIS_water/v5_outputs/2006/MOD44W_h30v11_2006_v5.tif')

In [6]:
def make_band_str(path, band, gq=True):
    post_str = HDF_PSSTR if gq else HDF_GA_PSSTR
    return '{}"{}"{}{}'.format(HDF_PRSTR, path, post_str, band)

def make_multi_band_modis(path, gq=True):
    file_name = 'MOD09GQ' if gq else 'MOD09GA'
    vrt_path = '{}.vrt'.format(file_name)
    tif_path = '{}.tif'.format(file_name)
    bands = HDF_BANDS if gq else HDF_GA_BANDS
    file_bands = [make_band_str(path, band, gq) for band in bands]
    if os.path.exists(vrt_path):
        os.remove(vrt_path)
    if os.path.exists(tif_path):
        os.remove(tif_path)
    if os.path.exists(tif_path.replace('.tif', '.tif.aux.xml')):
        os.remove(tif_path.replace('.tif', '.tif.aux.xml'))
    vrt_opts = gdal.BuildVRTOptions(separate=True)
    test_vrt = gdal.BuildVRT(vrt_path, file_bands, options=vrt_opts)
    tr_opts = gdal.TranslateOptions(format="GTiff")
    ds = gdal.Translate(tif_path, test_vrt, options=tr_opts)
    ds = None
    test_vrt = None
    return tif_path

In [7]:
# read prediction raster
raster_prediction = rxr.open_rasterio(
    mod44_path, chunks={"band": 1, "x": 4800, "y": 4800})
raster_prediction

raster_prediction.name = "predicted"
raster_crs = raster_prediction.rio.crs

# convert to dataframe and filter no-data
raster_prediction = raster_prediction.squeeze().to_dataframe().reset_index()  # convert array to dataframe
raster_prediction = raster_prediction.drop(['band', 'spatial_ref'], axis=1)  # drop some unecessary columns
raster_prediction = raster_prediction[raster_prediction['predicted'] >= 0]  # only select appropiate values, remove no-data
raster_prediction = raster_prediction.astype({'predicted': 'int'})  # convert mask into int

# create random points
unique_counts = raster_prediction['predicted'].value_counts()
for class_id, class_count in unique_counts.iteritems():
    raster_prediction = raster_prediction.drop(
        raster_prediction[raster_prediction['predicted'] == class_id].sample(
            n=class_count - val_points_per_class).index
    )

geometry = gpd.points_from_xy(raster_prediction.x, raster_prediction.y)
raster_prediction = gpd.GeoDataFrame(raster_prediction, crs=raster_crs, geometry=geometry).reset_index(drop=True)
raster_prediction = raster_prediction[raster_prediction['predicted'] != 250].reset_index(drop=True)

  for class_id, class_count in unique_counts.iteritems():


In [8]:
raster_prediction

Unnamed: 0,y,x,predicted,geometry
0,-2.232125e+06,1.352769e+07,1,POINT (13527688.386 -2232124.760)
1,-2.240233e+06,1.381263e+07,0,POINT (13812625.697 -2240232.733)
2,-2.269653e+06,1.346259e+07,1,POINT (13462592.952 -2269653.089)
3,-2.269653e+06,1.346560e+07,1,POINT (13465604.485 -2269653.089)
4,-2.270580e+06,1.345472e+07,1,POINT (13454716.636 -2270579.715)
...,...,...,...,...
295,-3.297512e+06,1.364491e+07,0,POINT (13644906.500 -3297512.314)
296,-3.319751e+06,1.443068e+07,0,POINT (14430684.839 -3319751.324)
297,-3.322531e+06,1.444852e+07,0,POINT (14448522.378 -3322531.200)
298,-3.332029e+06,1.432876e+07,1,POINT (14328756.045 -3332029.110)


In [9]:
# Client - initial client to localize zoom
color_list = [mcolors.rgb2hex(cmap[i]) for i in range(len(cmap))]
path = make_multi_band_modis(img_path)
data_client = TileClient(path)
label_client = TileClient(mod44_path)

# dataframe to match data_client crs
raster_prediction = raster_prediction.to_crs(4326)#(data_client.default_projection).split(':')[-1])
raster_prediction['operator'] = 0
raster_prediction['verified'] = 'false'

# Create ipyleaflet TileLayer from that server
data_layer = get_leaflet_tile_layer(
    data_client, show=False, band=data_bands, name="data")
label_layer = get_leaflet_tile_layer(
    label_client, show=False, cmap=color_list, name="label")

# Create ipyleaflet map, add tile layer, and display
m = Map(
    center=data_client.center(),
    zoom=data_client.default_zoom,
    basemap=basemaps.Esri.WorldImagery,
    scroll_wheel_zoom=True,
    keyboard=True
)
m.add_layer(data_layer)
m.add_layer(label_layer)

validation_sheet = ipysheet.sheet(from_dataframe(
    raster_prediction.to_crs(4326).drop(['geometry'], axis=1)
))

widgets.Dropdown.value.tag(sync=True)

# Iterate through list and add a marker
markers_list = []
for index, point in raster_prediction.iterrows():
        
    coordinates = (point['geometry'].y, point['geometry'].x)
    type_color = cmap[point['predicted']]
    type_pred = classes[point['predicted']]

    radio_pred_widget = widgets.RadioButtons(
        options=classes,
        value=type_pred, # Defaults to 'pineapple'
        layout={'width': 'max-content'}, # If the items' names are long
        description='Prediction:',
        disabled=True
    )
    
    radio_check_widget = widgets.RadioButtons(
        options=classes,
        value=classes[0], # Defaults to 'pineapple'
        layout={'width': 'max-content'}, # If the items' names are long
        description='Validation:',
        disabled=False
    )
    point_id_widget = widgets.IntText(
        value=index,
        description='ID:',
        disabled=True
    )
    checked_widget = widgets.Checkbox(
        value=False,
        description='Verified',
        disabled=False
    ) 
    popup = widgets.VBox([
        point_id_widget, radio_pred_widget,
        radio_check_widget, checked_widget
    ])

    marker = Marker(
        name=str(index),
        location=coordinates,
        draggable=False,
        icon=AwesomeIcon(
            name=icons[point['predicted']],
            marker_color=type_color,
            icon_color=type_color,
            # spin=True
        ),
        popup=popup
    )

    cell = ipysheet.cell(index, 2, type_pred)
    widgets.jslink((cell, 'value'), (radio_pred_widget, 'value'))
    widgets.jslink((radio_pred_widget, 'value'), (cell, 'value'))
    cell = ipysheet.cell(index, 3, 'other')
    widgets.jslink((cell, 'value'), (radio_check_widget, 'value'))
    widgets.jslink((radio_check_widget, 'value'), (cell, 'value'))
    cell = ipysheet.cell(index, 4, False)#, choice=)
    widgets.jslink((cell, 'value'), (checked_widget, 'value'))

    # append to group of markers
    markers_list.append(marker)

marker_cluster = MarkerCluster(
    markers=tuple(markers_list),
    name="validation"
)
# marker_cluster.on_click(handle_click)

m.add_layer(marker_cluster);
m.add_control(ScaleControl(position='bottomleft'))
m.add_control(LayersControl(position='topright'))
m.add_control(FullScreenControl())

#legend = LegendControl({"low":"#FAA", "medium":"#A55", 
# "High":"#500"}, name="Legend", position="bottomright")
#m.add_control(legend)
#validation_sheet = ipysheet.sheet(from_dataframe(
#    rds_random.to_crs(4326).drop(['geometry'], axis=1)
#))
display(m)
display(validation_sheet)

Map(center=[-24.999999997753427, 138.9062013279663], controls=(ZoomControl(options=['position', 'zoom_in_text'…

Sheet(cells=(Cell(column_end=0, column_start=0, row_end=299, row_start=0, squeeze_row=False, type='numeric', v…

In [None]:
vsheet_name = 'MOD09.Mask.A{}{}.{}.validation.parquet.gzip'.format(YEAR, DAY, TILE)
vsheet_path = os.path.join('validated_dfs', vsheet_name)
vsheet_path

In [None]:
df = ipysheet.to_dataframe(validation_sheet)
print(len(df[df['verified'] == True]))
print(len(df[df['operator'] == 'water']))
df.to_parquet(vsheet_path)

In [None]:
df[df['verified'] == True]