---
# IFSAR CHM and SRLITE CHM CNN workflow 2023
author: "Matt Macander"
date: '2023-09-26'
output: html_document
---

Notebook for method to generate GPKG of cloudmask footprints on ADAPT
https://github.com/pahbs/geoscitools/blob/master/footprint_cloudmask.ipynb

Notebook for method to generate GPKG of GLiHT CHM footprints?

/explore/nobackup/people/pmontesa/srlite/footprints_vhr_cloudmask_alaska_20230720.gpkg

Then, zip GPKG and send to ABR

## Package and send footprint/cloudmask data from ADAPT
```{bash ADAPT}
tar -cvzf $NOBACKUP/footprints_vhr_cloudmask_alaska_20230720.gpkg.tar.gz /explore/nobackup/people/pmontesa/srlite/footprints_vhr_cloudmask_alaska_20230720.gpkg
scp $NOBACKUP/footprints_vhr_cloudmask_alaska_20230720.gpkg.tar.gz mason:/data/gis/gis_projects/2021/.
```

## Ingest At ABR
```{bash ADAPT}
mkdir -p /data/gis/gis_projects/2021/21-347_NASA_SmallSat/20230825_srlite_ak
cd /data/gis/gis_projects/2021/21-347_NASA_SmallSat/20230825_srlite_ak

mv /data/gis/gis_projects/2021/footprints_vhr_cloudmask_alaska_20230720.gpkg.tar.gz .
tar -xvzf footprints_vhr_cloudmask_alaska_20230720.gpkg.tar.gz

```

## Prepare to combine with regular grid of points that will be center points of train/val chips
Point sample frame (512m spacing in EPSG:3338) created by converting 512m raster to points (extent of IFSAR DSM/DTM/CHM)

/data/gis/gis_projects/2021/21-347_NASA_SmallSat/20230522_srlite_ak/alaska_ifsar_512m_pts.shp

VHR cloudfree footprints file from Paul

https://github.com/pahbs/geoscitools/blob/master/footprint_cloudmask.ipynb

/data/gis/gis_projects/2021/21-347_NASA_SmallSat/20230825_srlite_ak/footprints_vhr_cloudmask_alaska_20230720.gpkg



## Convert to 3338 GDB using QGIS or ArcPro
###Convert to fgdb for 100x faster Intersect performance using latest QGIS with FileGDB API GDAL support (via full install or update of OSGeo4W)

/data/gis/gis_projects/2021/21-347_NASA_SmallSat/20230825_srlite_ak/footprints_vhr_cloudmask_alaska_20230720_3338.gpkg

## Generate Antibuffer in QGIS or ArcPro
Run antibuffer by 0.5 * (512^2 + 512^2)^0.5 m (one half of a diagonal of 256x256 pixel or 512x512 m box)
362 m
processing.run("native:buffer", {'INPUT':'W:\\gis_projects\\2021\\21-347_NASA_SmallSat\\20230825_srlite_ak\\footprints_vhr_cloudmask_alaska_20230720_3338.gpkg|layername=footprints_vhr_cloudmask_alaska_20230720_3338','DISTANCE':-362,'SEGMENTS':5,'END_CAP_STYLE':0,'JOIN_STYLE':0,'MITER_LIMIT':2,'DISSOLVE':False,'OUTPUT':'ogr:dbname=\'W:/gis_projects/2021/21-347_NASA_SmallSat/20230825_srlite_ak/footprints_vhr_cloudmask_alaska_20230720_3338_buff_minus362m.gpkg\' table="antibuff" (geom)'})

qgis_process run native:buffer --distance_units=meters --area_units=m2 --ellipsoid=EPSG:7030 --INPUT='W:/gis_projects/2021/21-347_NASA_SmallSat/20230825_srlite_ak/footprints_vhr_cloudmask_alaska_20230720_3338.gpkg|layername=footprints_vhr_cloudmask_alaska_20230720_3338' --DISTANCE=-362 --SEGMENTS=5 --END_CAP_STYLE=0 --JOIN_STYLE=0 --MITER_LIMIT=2 --DISSOLVE=false --OUTPUT='ogr:dbname='\''W:/gis_projects/2021/21-347_NASA_SmallSat/20230825_srlite_ak/footprints_vhr_cloudmask_alaska_20230720_3338_buff_minus362m.gpkg'\'' table="antibuff" (geom)

## GLiHT footprints had extremely complex linework with many holes
###  Simplify command in ArcPro
```
arcpy.EnvManager(transferGDBAttributeProperties="NOT_TRANSFER_GDB_ATTRIBUTE_PROPERTIES"):
    arcpy.cartography.SimplifyPolygon(
        in_features="footprints_gliht_chm_2014_3338",
        out_feature_class=r"W:\gis_projects\2021\21-347_NASA_SmallSat\sample_points\footprints_gliht.gdb\footprints_gliht_chm_2014_3338_simp5m",
        algorithm="POINT_REMOVE",
        tolerance="5 Meters",
        minimum_area="5 SquareMeters",
        error_option="NO_CHECK",
        collapsed_point_option="KEEP_COLLAPSED_POINTS",
        in_barriers=None
    )
```

### Pairwise Buffer in ArcPro (highly parallel, though it crashed computer on non-simplified inputs)
```
arcpy.analysis.PairwiseBuffer(
    in_features="footprints_gliht_chm_2014_3338_simp5m",
    out_feature_class=r"W:\gis_projects\2021\21-347_NASA_SmallSat\sample_points\footprints_gliht.gdb\footprints_gliht_chm_2014_3338_simp5m_antibuff90p5m",
    buffer_distance_or_field="-90.5 Meters",
    dissolve_option="NONE",
    dissolve_field=None,
    method="PLANAR",
    max_deviation="0 Meters"
)
```

### Pairwise Intersect in ArcPro (highly parallel), grid points vs. antibuffered gliht footprints
Yields one point for each gliht footprint intersection (multiple points on top of each other if multiple footprints)
```
arcpy.analysis.PairwiseIntersect(
    in_features="alaska_ifsar_128m_pts;footprints_gliht_chm_2014_3338_simp5m_antibuff90p5m",
    out_feature_class=r"W:\gis_projects\2021\21-347_NASA_SmallSat\sample_points\footprints_gliht.gdb\ak128_x_footprints_gliht_chm_2014_3338_simp5m_antibuff90p5m",
    join_attributes="ALL",
    cluster_tolerance=None,
    output_type="INPUT"
)
```

## Pairwise anti-buffer of VHR cloudmasks for 128m grid spacing (90.5 m)
```
arcpy.analysis.PairwiseBuffer(
    in_features="footprints_vhr_cloudmask_alaska_20230720_3338",
    out_feature_class=r"W:\gis_projects\2021\21-347_NASA_SmallSat\20230825_srlite_ak\arcpro_intersect.gdb\footprints_vhr_cloudmask_alaska_20230720_3338_buff_minus90p5m_fgdb",
    buffer_distance_or_field="-90.5 Meters",
    dissolve_option="NONE",
    dissolve_field=None,
    method="PLANAR",
    max_deviation="0 Meters"
)
```

## Pairwise intersect of sample points from grid vs. GLIHT  with anti-buffered VHR cloudmasks
```
arcpy.analysis.PairwiseIntersect(
    in_features="ak128_x_footprints_gliht_chm_2014_3338_simp5m_antibuff90p5m;footprints_vhr_cloudmask_alaska_20230720_3338_buff_minus90p5m_fgdb",
    out_feature_class=r"W:\gis_projects\2021\21-347_NASA_SmallSat\sample_points\footprints_gliht.gdb\ak128_x_vhr_x_footprints_gliht_chm_2014_3338_simp5m_antibuff90p5m",
    join_attributes="ALL",
    cluster_tolerance=None,
    output_type="INPUT"
)
```


### Calculate lat/lon on table if needed
### Export to CSV
### tar.gz and send to ADAPT
### Unpack on ADAPT



## On ADAPT PRISM JupyterHub
Initialize R

In [1]:
#v2023-09-16 all valid chips (650k)
#v2023-09-19 1% subset, use local terrain as predictor instead of DTM
#v2023-09-25 GLIHT and IFSAR CHM, local terrain as predictor instead of DTM
#v2023-09-26 GLIHT and IFSAR CHM, DTM as predictor for comparison and because pipeline not set up for rDTM yet
library(sf)
library(tidyverse)
library(janitor)
library(magrittr)
library(lubridate)
library(fs)
library(glue)
library(readxl)
library(terra)
library(furrr)
library(exactextractr)
# library(spatialEco)
# library(gfcanalysis)
basePath <- path('/explore/nobackup/people/mmacande/srlite/chm_model')
# outPath <- path('/explore/nobackup/people/mmacande/srlite/chm_model/20230926_chm_dtm')
outPath <- path('/explore/nobackup/people/mmacande/srlite/chm_model/20230928_chm')
dir_create(outPath)

Linking to GEOS 3.11.1, GDAL 3.6.1, PROJ 9.1.0; sf_use_s2() is TRUE

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.2 ──
[32m✔[39m [34mggplot2[39m 3.4.0      [32m✔[39m [34mpurrr  [39m 1.0.0 
[32m✔[39m [34mtibble [39m 3.1.8      [32m✔[39m [34mdplyr  [39m 1.0.10
[32m✔[39m [34mtidyr  [39m 1.2.1      [32m✔[39m [34mstringr[39m 1.5.0 
[32m✔[39m [34mreadr  [39m 2.1.3      [32m✔[39m [34mforcats[39m 0.5.2 
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()

Attaching package: ‘janitor’


The following objects are masked from ‘package:stats’:

    chisq.test, fisher.test



Attaching package: ‘magrittr’


The following object is masked from ‘package:purrr’:

    set_names


The following object is masked from ‘package:tidyr’:

    ext

## Fix Jupyter / R / GDAL
Code blocks below are dealing with gdal path issue on adapt. Still needed as of 2023-08-31

Run once

In [2]:
secondPath <- sf_proj_search_paths()[[2]]
sf_proj_search_paths(secondPath)

# sf_proj_search_paths('/explore/nobackup/people/mmacande/proj')
# Uncomment line below to see proj search paths
# dir_ls(sf_proj_search_paths())

# Ingest CSV with intersection of regular grid points and footprints
Load point file generated at ABR using SRLite cloud masks eroded (or anti-buffered) by 128 pixel diagonals to ensure all points had full 256x256 chip

In [8]:
# import os; os.environ['PROJ_LIB'] = '/home/mmacande/.conda/envs/r4-base-20230102/share/proj'
# proj error fix above, below did not work
# cp -r /home/mmacande/.conda/envs/r4-base-20230102/share/proj /explore/nobackup/people/mmacande/proj

# points <- read_sf('/explore/nobackup/people/mmacande/srlite/chm_model/chm_sample_100k_chm_1kPerBin_p1_v20230329.csv') %>%
# points <- read_sf(path(basePath, 'sample_points', 'ak128_x_vhr_x_footprints_gliht_chm_2014_3338_simp5m_antibuff90p5m_v20230925.csv')) %>%
    # select(pointid, file_lidar=file, path_lidar=path, BUFF_DIST, lat84, lon84, file_vhr = file_1)

points <- read_sf(path(basePath, 'sample_points', 'ak128_x_vhr_x_dggs_lidar_fp_v20230927_3338_antibuff90p5m_v20230928.csv')) %>%
    select(pointid, lidar_fp_path, BUFF_DIST, lat84, lon84, file_vhr = file) %>%
    mutate(file_lidar = path_file(lidar_fp_path), 
           file_lidar = str_replace(file_lidar, '_fp.gkp', '.tif'),
           path_lidar = path_dir(lidar_fp_path)) %>%
    st_as_sf(coords=c('lon84','lat84'), crs=4326, agr="constant", remove=F)
# mutate(epsg = as.integer(epsg))
      # chipName = str_replace(chipName, 'v20230523', 'v20230829'))

colnames(points)
head(points %>% st_drop_geometry())
# print(points %>% group_by(epsg) %>% tally())
# st_crs(points)

pointid,lidar_fp_path,BUFF_DIST,lat84,lon84,file_vhr,file_lidar,path_lidar
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
25600792,/explore/nobackup/projects/dem/AK_IFSAR/dggs.alaska.gov/public_lidar/yukon_flats_2009/yukon_flats_2009_chm_fp.gpkg,-90.5,66.5027313,-145.1144409,WV02_20190601_M1BS_1030010091224E00-toa.cloudmask.tif,yukon_flats_2009_chm_fp.gpkg,/explore/nobackup/projects/dem/AK_IFSAR/dggs.alaska.gov/public_lidar/yukon_flats_2009
25600793,/explore/nobackup/projects/dem/AK_IFSAR/dggs.alaska.gov/public_lidar/yukon_flats_2009/yukon_flats_2009_chm_fp.gpkg,-90.5,66.5025787,-145.1116028,WV02_20190601_M1BS_1030010091224E00-toa.cloudmask.tif,yukon_flats_2009_chm_fp.gpkg,/explore/nobackup/projects/dem/AK_IFSAR/dggs.alaska.gov/public_lidar/yukon_flats_2009
25600794,/explore/nobackup/projects/dem/AK_IFSAR/dggs.alaska.gov/public_lidar/yukon_flats_2009/yukon_flats_2009_chm_fp.gpkg,-90.5,66.5024261,-145.1087646,WV02_20190601_M1BS_1030010091224E00-toa.cloudmask.tif,yukon_flats_2009_chm_fp.gpkg,/explore/nobackup/projects/dem/AK_IFSAR/dggs.alaska.gov/public_lidar/yukon_flats_2009
25600795,/explore/nobackup/projects/dem/AK_IFSAR/dggs.alaska.gov/public_lidar/yukon_flats_2009/yukon_flats_2009_chm_fp.gpkg,-90.5,66.5022659,-145.1059265,WV02_20190601_M1BS_1030010091224E00-toa.cloudmask.tif,yukon_flats_2009_chm_fp.gpkg,/explore/nobackup/projects/dem/AK_IFSAR/dggs.alaska.gov/public_lidar/yukon_flats_2009
25609012,/explore/nobackup/projects/dem/AK_IFSAR/dggs.alaska.gov/public_lidar/yukon_flats_2009/yukon_flats_2009_chm_fp.gpkg,-90.5,66.5015869,-145.1148224,WV02_20190601_M1BS_1030010091224E00-toa.cloudmask.tif,yukon_flats_2009_chm_fp.gpkg,/explore/nobackup/projects/dem/AK_IFSAR/dggs.alaska.gov/public_lidar/yukon_flats_2009
25609013,/explore/nobackup/projects/dem/AK_IFSAR/dggs.alaska.gov/public_lidar/yukon_flats_2009/yukon_flats_2009_chm_fp.gpkg,-90.5,66.5014343,-145.1119843,WV02_20190601_M1BS_1030010091224E00-toa.cloudmask.tif,yukon_flats_2009_chm_fp.gpkg,/explore/nobackup/projects/dem/AK_IFSAR/dggs.alaska.gov/public_lidar/yukon_flats_2009


## Ancillary layers for spatial joins used to attribute and filter points

In [9]:
ak_yukon_boreal_arctic <- read_sf(path(basePath, 'gis_layers/AKVEG_Regions.gpkg'),
                                  layer='NorthAmericanBeringia_ModelArea_3338')
mrfy <- rast(path(basePath, 'gis_layers', 'mrfy_ak_1940_2022_30m_3338_u16.tif'))

ifsar_metadata <- read_sf(path(basePath, 'gis_layers','ifsar_metadata_x_ifsar_extent_wOcean.shp')) %>%
  rename(ifsar_year = year,
         ifsar_cell = cell) %>%
  mutate(ifsar_year = as.integer(ifsar_year),
         ifsar_year = if_else(is.na(ifsar_year), 0L, ifsar_year)) %>%
  select(ifsar_year, ifsar_cell) %>%
  st_make_valid()


In [12]:
# st_crs(ak_yukon_boreal_arctic)
# st_crs(mrfy)
# st_crs(ifsar_metadata)
nrow(points)

### Spatial joins

In [11]:
points_x_ifsar <- st_join(points, ifsar_metadata, join=st_within)
points_x_ifsar_sa <- st_join(points_x_ifsar, ak_yukon_boreal_arctic %>% st_transform(4326), join=st_within)

st_as_s2(): dropping Z and/or M coordinate



In [13]:
mrfy_max = tibble(mrfy_max = exact_extract(mrfy, points %>% st_transform(3338) %>% st_buffer(64), fun='max'))

mrfy_max <- mrfy_max %>%
  mutate(mrfy_max = if_else(is.na(mrfy_max), 1900, mrfy_max))

Cannot preload entire working area of 1332937329 cells with max_cells_in_memory = 3e+07. Raster values will be read for each feature individually.





In [14]:
points_x_ifsar_sa_fire <- bind_cols(points_x_ifsar_sa, mrfy_max)

In [15]:
# points_x_ifsar_sa_fire %>% st_drop_geometry() %>% group_by(ifsar_year) %>% tally()
# points_x_ifsar_sa_fire %>% st_drop_geometry() %>% group_by(Region) %>% tally()
# points_x_ifsar_sa_fire %>% st_drop_geometry() %>% group_by(mrfy_max) %>% tally()
colnames(points_x_ifsar_sa_fire)

## Get Paths of SRLite inputs to join on catid

In [16]:
#This is out of date
srlite_fp <- read_sf('/explore/nobackup/people/pmontesa/srlite/footprints_vhrmeta_SR_alaska.gpkg') %>%
  mutate(srlitePath = path(path, file))
colnames(srlite_fp)

srlite_fp_fs <- tibble(srlitePath = dir_ls('/explore/nobackup/projects/ilab/data/srlite/products/srlite_1.0.1/srlite/alaska', recurse=T, glob='*csv*/*-sr-02m.tif')) %>%

#Get list off file system
#Remove dups with OLS processing
filter(!str_detect(srlitePath, '/ols/')) %>%
mutate(srliteFile = path_file(srlitePath),
       catid = str_sub(srliteFile, 20, 35))
nrow(srlite_fp)
nrow(srlite_fp_fs)

srlite_fp_fs %>% group_by(catid) %>% tally() %>% filter(n>1)
#Remove dups, keeping the last generated path in alphabetical order
srlite_fp_fs <- srlite_fp_fs %>% group_by(catid) %>% arrange(desc(srlitePath)) %>% slice_head(n=1)
nrow(srlite_fp_fs)



catid,n
<chr>,<int>
1030010024CC3100,2
1030010092635900,2
103001009367EC00,2
103001009409EB00,2
103001009561CF00,2
10300100A622DE00,2


## Ingest SRLite metrics

In [None]:
srlite_metrics <- read_csv('/explore/nobackup/people/pmontesa/userfs02/projects/ilab/above_shrubs/data/tables/srlite_metrics_alaska_202308291350.csv') %>%
  select(-`...1`)
dups <- srlite_metrics %>%
  dplyr::group_by(model, file, type, footprint_name, catid, sensor, year, month, date, band_name) %>%
  dplyr::summarise(n = dplyr::n(), .groups = "drop") %>%
  dplyr::filter(n > 1L) 


srlite_metrics_wide <- srlite_metrics %>%
  filter(band_name %in% c('Blue','Green','Red','NIR')) %>%
  pivot_wider(id_cols=c(model, file, type, footprint_name, catid, sensor, year, month, date), names_from=band_name, values_from=intercept:rmse_norm, values_fn=mean) #%>%

colnames(srlite_metrics_wide)


## Calculate attributes and join SRLite info
## Generate names for image chips

In [None]:
points_fortified <- points_x_ifsar_sa_fire %>%
  st_drop_geometry() %>%
  select(-starts_with('Shape_')) %>%
  mutate(lidarPath = path(path_lidar, file_lidar),
         baseName = str_sub(file_vhr, 0, 35),
         baseNameLidar = path_ext_remove(file_lidar),
         vhr_year = as.integer(str_sub(baseName, 6, 9)),
         vhr_mm = as.integer(str_sub(baseName, 10, 11)),
         vhr_dd = as.integer(str_sub(baseName, 12, 13)),
         sensor = str_sub(baseName, 1, 4),
         catid = str_sub(baseName, 20, 35),
         .before=file_vhr) %>%
  group_by(baseName, baseNameLidar) %>%
  # mutate(chip_gran_id = row_number(FID_fishnet_512m_label),
  mutate(chip_gran_id = row_number(pointid),
         chipName = glue('chip_{baseName}_CHM_{baseNameLidar}_{str_pad(chip_gran_id, 5, "left", "0")}_v20230925'),
         .before=file_vhr) %>%
  mutate(imageYear_minus_ifsarYear = vhr_year - ifsar_year,
         fireYear_minus_imageYear = mrfy_max - vhr_year,
         fireYear_minus_ifsarYear = mrfy_max - ifsar_year,
         fireYear_minus_lidarYear = mrfy_max - 2014,
         imageYear_minus_lidarYear = vhr_year - 2014) %>%
         # fireFlag = between(fireYear_minus_imageYear, -1, 0) | between(fireYear_minus_ifsarYear, -1, 0) | between(mrfy_max, pmin(vhr_year, ifsar_year), pmax(vhr_year, ifsar_year))) %>%
  relocate(starts_with('ifsar'), .before=file_vhr) %>%
  relocate(starts_with('fire'), .before=file_vhr) %>%
  relocate(starts_with('mrfy'), .before=file_vhr) %>%
  ungroup() %>%
  left_join(srlite_fp_fs %>% select(catid, srlitePath)) %>%
  left_join(srlite_metrics_wide %>% select(catid, r2_score_NIR)) %>%
  mutate(random = runif(n()), .before=chipName,
         trainTest = case_when(
            random < 0.90 ~ 'train',
            T ~ 'test'))


In [None]:
points_fortified %>% group_by(vhr_mm) %>% tally()
points_fortified %>% group_by(imageYear_minus_lidarYear) %>% tally()
# points_fortified %>% group_by(fireYear_minus_lidarYear) %>% tally()
points_fortified %>% group_by(mrfy_max) %>% tally()
# points_fortified %>% group_by(group) %>% tally()

write_csv(points_fortified, path(outPath,'fortified_ak128_x_vhr_x_footprints_gliht_chm_2014_3338_simp5m_antibuff90p5m_v20230925.csv'))

## Filter points (July, August, no recent fires) and save CSV

In [None]:
filtered_points <- points_fortified %>%
filter(vhr_mm %in% c(7,8), r2_score_NIR >= 0.75, !between(mrfy_max, 2012, 2014)) %>%
mutate(group = glue('gliht_78_{trainTest}_v20230925'))

nrow(points_fortified)
nrow(filtered_points)
colnames(filtered_points)

write_csv(filtered_points, path(outPath,'fortified_filtered_ak128_x_vhr_x_footprints_gliht_chm_2014_3338_simp5m_antibuff90p5m_v20230925.csv'))

## Start from here if CSVs for chip generation are already saved

In [5]:
filtered_points <- read_csv(path(basePath, '20230925_chm','fortified_filtered_ak128_x_vhr_x_footprints_gliht_chm_2014_3338_simp5m_antibuff90p5m_v20230925.csv'))

[1mRows: [22m[34m61677[39m [1mColumns: [22m[34m31[39m
[36m──[39m [1mColumn specification[22m [36m────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m (13): file_lidar, path_lidar, lidarPath, baseName, baseNameLidar, sensor...
[32mdbl[39m (18): pointid, BUFF_DIST, lat84, lon84, vhr_year, vhr_mm, vhr_dd, chip_g...

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


In [6]:
#Check for missing srlite path
filtered_points %>% group_by(catid, srlitePath, file_vhr) %>% filter(is.na(srlitePath)) %>% tally()

catid,srlitePath,file_vhr,n
<chr>,<chr>,<chr>,<int>


Filtered points to include only July/August, high NIR R2 score, and exclude 2012-2014 fires. For now including all image years (VHR up to 4 years before 2014, and 6 years after)

Function to Load SRLite, IFSAR DSM/DTM/CHM and crop by bounding box.

Save out to image and label geotiffs.

Save index gpkg of chips

In [7]:
# filtered_points %>% st_drop_geometry() %>% head()
# srlitePath = '/explore/nobackup/projects/ilab/data/srlite/products/srlite_1.0.1/srlite/alaska/batch_1/8_band_csv/split_8/WV02_20130820_M1BS_10300100260A9600-sr-02m.tif'
# sr_2m_strip <- rast(srlitePath)
# crs(sr_2m_strip, describe=T)$code

## Function to generate train/val chips

In [8]:
crop_by_point_chm <- function(geometry, baseName, chipName, outPrefix, srlitePath, lidarPath, group) {
    sr_2m_strip <- rast(srlitePath)
    epsg <- as.integer(crs(sr_2m_strip, describe=T)$code)
    
    point <- st_sfc(geometry, crs=4326)
    utmPoint = st_transform(point, epsg)
 
    utmBuff = st_buffer(utmPoint, 64)
    # utmBuff = st_buffer(utmPoint, 256)
    utmBB = st_as_sfc(st_bbox(utmBuff))
    BB3338 = st_transform(utmBB, 3338)
    # sr_2m_strip <- rast(path(alaska_2m, glue('{baseName}-sr-02m.tif')))

    
    if (nlyr(sr_2m_strip) == 8) { sr_2m_strip <- subset(sr_2m_strip, c(2,3,5,7)) }
    sr_2m <- crop(sr_2m_strip, utmBB)#, filename=path(outPath, glue('{epsg}_{long}_{lat}.tif')))
    
    names(sr_2m) <- c('01_blue','02_green','03_red','04_nir')
    ifsar_dtm_2m <- project(ifsar_dtm, sr_2m, method='bilinear')
    ifsar_dsm_2m <- project(ifsar_dsm, sr_2m, method='bilinear')
    ifsar_dtm_min <- global(ifsar_dtm_2m, 'min')[[1]]
    local_dem_2m <- ifsar_dtm_2m - ifsar_dtm_min
    names(ifsar_dtm_2m) <- c('05_ifsar_dtm_m')
    names(local_dem_2m) <- c('05_local_dem_m')
    predictor_stack_2m <- c(sr_2m, local_dem_2m)
    predictor_stack_2m <- c(sr_2m, ifsar_dtm_2m)

    ifsar_chm_2m <- ifsar_dsm_2m - ifsar_dtm_2m
    ifsar_chm_2m <- clamp(ifsar_chm_2m, 0, 100)
    names(ifsar_chm_2m) <- c('06_ifsar_chm_m')

    lidar_chm_2m <- project(rast(lidarPath), sr_2m, method='average')
    lidar_chm_2m <- clamp(lidar_chm_2m, 0, 100)
    names(lidar_chm_2m) <- c('06_lidar_chm_m')
    
    #TODO optimize to run once and get all stats
    predictor_isNA <- sum(global(predictor_stack_2m, 'isNA')[[1]])
    ifsar_chm_isNA = global(ifsar_chm_2m, 'isNA')[[1]]
    ifsar_chm_percentiles = global(ifsar_chm_2m, fun=quantile, probs=c(0.00,0.50,0.90,0.98,1.00))
    ifsar_chm_min = ifsar_chm_percentiles[[1]]
    ifsar_chm_p50 = ifsar_chm_percentiles[[2]]
    ifsar_chm_p90 = ifsar_chm_percentiles[[3]]
    ifsar_chm_p98 = ifsar_chm_percentiles[[4]]
    ifsar_chm_max = ifsar_chm_percentiles[[5]]
    ifsar_chm_range = ifsar_chm_max - ifsar_chm_min
    
    lidar_chm_isNA = global(lidar_chm_2m, 'isNA')[[1]]
    lidar_chm_percentiles = global(lidar_chm_2m, fun=quantile, probs=c(0.00,0.50,0.90,0.98,1.00))
    lidar_chm_min = lidar_chm_percentiles[[1]]
    lidar_chm_p50 = lidar_chm_percentiles[[2]]
    lidar_chm_p90 = lidar_chm_percentiles[[3]]
    lidar_chm_p98 = lidar_chm_percentiles[[4]]
    lidar_chm_max = lidar_chm_percentiles[[5]]
    lidar_chm_range = lidar_chm_max - lidar_chm_min
    
    # print(glue('{chm_min}, {chm_p50}, {chm_p90}, {chm_p98}, {chm_max}, {chm_range}'))
    if(ifsar_chm_isNA > 0) {
        # print(glue('{chipName} includes {chm_isNA} nodata pixels in CHM, not exported')) 
        chipStatus = glue('Chip dropped, nodata in CHM')
        } else if (lidar_chm_isNA > 0) {
        chipStatus = glue('Chip dropped, nodata in lidar CHM')
        } else if (predictor_isNA > 0) {
        chipStatus = glue('Chip dropped, nodata in predictor stack')
        # } else if (chm_range <= 0.01) {   
        # print(glue('{chipName} is a constant or near constant chm of {chm_min}, not exported')) 
        # chipStatus = glue('Chip dropped, CHM constant or near constant at {chm_min}')
        } else {
        # print(glue('Exporting {chipName}'))
        writeRaster(predictor_stack_2m, filename=path(outPath, glue('{outPrefix}_images'), glue('{chipName}.tif')), overwrite=T)
        writeRaster(ifsar_chm_2m, filename=path(outPath, glue('{outPrefix}_ifsar_chm'), glue('{chipName}.tif')), overwrite=T)
        writeRaster(lidar_chm_2m, filename=path(outPath, glue('{outPrefix}_lidar_chm'), glue('{chipName}.tif')), overwrite=T)
        chipStatus = 'Chip exported'
    }

    return(tibble(BB3338, baseName, chipName, outPrefix, srlitePath, lidarPath, group, chipStatus, 
                  ifsar_chm_min, ifsar_chm_p50, ifsar_chm_p90, ifsar_chm_p98, ifsar_chm_max, 
                  lidar_chm_min, lidar_chm_p50, lidar_chm_p90, lidar_chm_p98, lidar_chm_max,
                  predictor_isNA, ifsar_chm_isNA, lidar_chm_isNA))
}

## Optional to look at chips that failed to export
Needs to be fixed

In [9]:
#BB Only for chips that errored
bb_by_point <- function(geometry, baseName, chipName, outPrefix, srlitePath, lidarPath, group) {
    # print(epsg)
    # print(geometry)
    point <- st_sfc(geometry, crs=4326)
    # print(point)
    utmPoint = st_transform(point, epsg)
    # print(utmPoint)
    utmBuff = st_buffer(utmPoint, 64)
    utmBB = st_as_sfc(st_bbox(utmBuff))
    BB3338 = st_transform(utmBB, 3338)

    chipStatus = 'Error during chip processing, not exported'

    return(tibble(epsg, BB3338, baseName, chipName, outPrefix, srlitePath, lidarPath, chm_mean_cm, group, chipStatus))
}

## Setup DEM sources and output folders

In [10]:
ifsar_dtm <- rast('/explore/nobackup/projects/dem/AK_IFSAR/alaska_ifsar_dtm_20221222.tif')
ifsar_dsm <- rast('/explore/nobackup/projects/dem/AK_IFSAR/alaska_ifsar_dsm_20221222.tif')

dir_create(path(outPath, 'train_images'))
dir_create(path(outPath, 'train_ifsar_chm'))
dir_create(path(outPath, 'train_lidar_chm'))

dir_create(path(outPath, 'test_images'))
dir_create(path(outPath, 'test_ifsar_chm'))
dir_create(path(outPath, 'test_lidar_chm'))


## Create geometries from lat/lon table

In [11]:
filtered_points_gis <- filtered_points %>%
st_as_sf(coords=c('lon84','lat84'), crs=4326, agr="constant", remove=F)


## Small scale testing trials

In [12]:
test2 <- T
if(test2) {
# filtered_points_train %>% select(epsg, geometry, baseName, chipName, srlitePath, chm_mean_cm) %>% 
filtered_points_gis %>% select(geometry, baseName, chipName, srlitePath, lidarPath, group, outPrefix = trainTest) %>% 
# mutate(group = 'subset trial', outPrefix = 'train') %>% 
st_drop_geometry() %>% 
ungroup() %>% 
slice_sample(n=2)
}

baseName,chipName,srlitePath,lidarPath,group,outPrefix
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
WV02_20150804_M1BS_1030010047414C00,chip_WV02_20150804_M1BS_1030010047414C00_CHM_AK_20140712_DoD1_l11s580_CHM_00002_v20230925,/explore/nobackup/projects/ilab/data/srlite/products/srlite_1.0.1/srlite/alaska/batch_1/8_band_csv/split_5/WV02_20150804_M1BS_1030010047414C00-sr-02m.tif,/explore/nobackup/people/pmontesa/userfs02/data/gliht/chm/2014/AK_20140712_DoD1_l11s580_CHM.tif,gliht_78_train_v20230925,train
WV02_20150815_M1BS_1030010046200300,chip_WV02_20150815_M1BS_1030010046200300_CHM_AK_10Jul2014_l11s627_CHM_00007_v20230925,/explore/nobackup/projects/ilab/data/srlite/products/srlite_1.0.1/srlite/alaska/batch_1/8_band_csv/split_4/WV02_20150815_M1BS_1030010046200300-sr-02m.tif,/explore/nobackup/people/pmontesa/userfs02/data/gliht/chm/2014/AK_10Jul2014_l11s627_CHM.tif,gliht_78_train_v20230925,train


In [13]:
if(test2) {
pwalk(filtered_points_gis %>% select(geometry, baseName, chipName, srlitePath, lidarPath, group, outPrefix = trainTest) %>% 
      # mutate(group = 'subset trial', outPrefix = 'train') %>% 
      # st_drop_geometry() %>% 
      ungroup() %>% 
      slice_sample(n=2),
      crop_by_point_chm)
}

In [14]:
if(test2) {
result <- pmap_dfr(filtered_points_gis %>% select(geometry, baseName, chipName, srlitePath, lidarPath, group, outPrefix = trainTest) %>% 
      # st_drop_geometry() %>% 
      ungroup() %>% 
      slice_sample(n=2),
      crop_by_point_chm)
result
}


BB3338,baseName,chipName,outPrefix,srlitePath,lidarPath,group,chipStatus,ifsar_chm_min,ifsar_chm_p50,⋯,ifsar_chm_p98,ifsar_chm_max,lidar_chm_min,lidar_chm_p50,lidar_chm_p90,lidar_chm_p98,lidar_chm_max,predictor_isNA,ifsar_chm_isNA,lidar_chm_isNA
<POLYGON [m]>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
"POLYGON ((591531.6 1489362,...",WV02_20150815_M1BS_103001004769F300,chip_WV02_20150815_M1BS_103001004769F300_CHM_AK_11Jul2014_l1s564_CHM_00054_v20230925,train,/explore/nobackup/projects/ilab/data/srlite/products/srlite_1.0.1/srlite/alaska/batch_1/8_band_csv/split_4/WV02_20150815_M1BS_103001004769F300-sr-02m.tif,/explore/nobackup/people/pmontesa/userfs02/data/gliht/chm/2014/AK_11Jul2014_l1s564_CHM.tif,gliht_78_train_v20230925,Chip exported,0,0.5371399,⋯,2.175189,2.688477,0,0.32,2.25625,4.0415,7.98,0,0,0
"POLYGON ((114334.6 1674460,...",WV02_20130807_M1BS_10300100243DE900,chip_WV02_20130807_M1BS_10300100243DE900_CHM_AK_20140725_l4s617_CHM_00007_v20230925,train,/explore/nobackup/projects/ilab/data/srlite/products/srlite_1.0.1/srlite/alaska/batch_1/8_band_csv/split_6_1/WV02_20130807_M1BS_10300100243DE900-sr-02m.tif,/explore/nobackup/people/pmontesa/userfs02/data/gliht/chm/2014/AK_20140725_l4s617_CHM.tif,gliht_78_train_v20230925,Chip exported,0,5.3354607,⋯,13.878938,19.416756,0,6.99125,11.7625,15.39125,26.695,0,0,0


## Set up parallel environment

In [15]:
plan(multicore, workers=36)

In [16]:
plan()

In [17]:
#Don't think random number generation within parallel is an issue here so quiet the warnings. See below
#https://www.r-bloggers.com/2020/09/future-1-19-1-making-sure-proper-random-numbers-are-produced-in-parallel-processing/
options(future.rng.onMisuse = "ignore")
# options()

In [18]:
nrow(filtered_points_gis)

## Batch testing trial (run rm -fr test_* train_* afterwards and run fresh export with testSubset = F)

In [19]:
testSubset = F
if(testSubset) {
    nrow(filtered_points_gis)
    filtered_points_gis <- filtered_points_gis %>%
    slice_sample(n = 1000)
    nrow(filtered_points_gis)
}


## Run full batch parallel chip generation

In [20]:
chips_df <- future_pmap_dfr(filtered_points_gis %>% select(geometry, baseName, chipName, srlitePath, lidarPath, group, outPrefix = trainTest) %>% 
                            ungroup(), 
                            possibly(crop_by_point_chm, otherwise=NULL))

In [21]:
nrow(chips_df)

## Save gpkg of chips with metadata including CHM stats

In [22]:
write_sf(chips_df %>% rename(groupName = group), path(outPath, 'chips_df_v20230926.gpkg'))

## Generate bounding box for chips that did not process due to error
Needs to be fixed

In [None]:
chips_df_dropped <- filtered_points_gis %>% anti_join(chips_df, by='chipName')
nrow(filtered_points_gis)
nrow(chips_df)
nrow(chips_df_dropped)

In [None]:
chips_df_dropped_wBB <- future_pmap_dfr(chips_df_dropped %>% 
                               select(geometry, baseName, chipName, srlitePath, lidarPath, group, outPrefix = trainTest) %>% 
                               ungroup(), 
      possibly(bb_by_point))#, otherwise=NULL))
nrow(chips_df_dropped_wBB)

In [None]:
if(nrow(chips_df_dropped_wBB) > 0) {
    chips_df_full <- bind_rows(
        chips_df,
        chips_df_dropped_wBB) 
    } else { chips_df_full <- chips_df }

nrow(chips_df_full)

In [None]:
# file_delete('/explore/nobackup/people/mmacande/srlite/chm_model/20230829_chm/chips_df_v20230829_full.gpkg')
write_sf(chips_df_full, path(outPath, 'chips_df_v20230919_full.gpkg'))

## Generate List of strips that were used in training / test chips

In [23]:
train_strips <- filtered_points %>%
group_by(srlitePath) %>% tally()
train_strips
write_csv(train_strips %>% select(srlitePath), path(outPath, 'training_strips_gliht_20230926.csv'))

srlitePath,n
<chr>,<int>
/explore/nobackup/projects/ilab/data/srlite/products/srlite_1.0.1/srlite/alaska/batch_1/4_band_csv/WV02_20100815_M1BS_10300100068BF600-sr-02m.tif,1123
/explore/nobackup/projects/ilab/data/srlite/products/srlite_1.0.1/srlite/alaska/batch_1/4_band_csv/WV02_20110707_M1BS_103001000CA39000-sr-02m.tif,292
/explore/nobackup/projects/ilab/data/srlite/products/srlite_1.0.1/srlite/alaska/batch_1/8_band_csv/split_4/WV02_20100804_M1BS_103001000617C000-sr-02m.tif,427
/explore/nobackup/projects/ilab/data/srlite/products/srlite_1.0.1/srlite/alaska/batch_1/8_band_csv/split_4/WV02_20130814_M1BS_103001002688EB00-sr-02m.tif,2876
/explore/nobackup/projects/ilab/data/srlite/products/srlite_1.0.1/srlite/alaska/batch_1/8_band_csv/split_4/WV02_20130826_M1BS_1030010027974900-sr-02m.tif,2652
/explore/nobackup/projects/ilab/data/srlite/products/srlite_1.0.1/srlite/alaska/batch_1/8_band_csv/split_4/WV02_20140721_M1BS_103001003407EB00-sr-02m.tif,5859
/explore/nobackup/projects/ilab/data/srlite/products/srlite_1.0.1/srlite/alaska/batch_1/8_band_csv/split_4/WV02_20150815_M1BS_1030010046200300-sr-02m.tif,2664
/explore/nobackup/projects/ilab/data/srlite/products/srlite_1.0.1/srlite/alaska/batch_1/8_band_csv/split_4/WV02_20150815_M1BS_103001004769F300-sr-02m.tif,2578
/explore/nobackup/projects/ilab/data/srlite/products/srlite_1.0.1/srlite/alaska/batch_1/8_band_csv/split_4/WV02_20200708_M1BS_10300100AA782A00-sr-02m.tif,2247
/explore/nobackup/projects/ilab/data/srlite/products/srlite_1.0.1/srlite/alaska/batch_1/8_band_csv/split_5/WV02_20110812_M1BS_103001000C8BBA00-sr-02m.tif,740


## Make it all readable by others

In [None]:
file_chmod(dir_ls(outPath, recurse=T), mode='755')
# file_chmod(dir_ls('/explore/nobackup/people/mmacande/srlite/phys_model/20230131_phys', recurse=T), mode='755')


In [None]:
outPath


# ADAPT GPU workflow once chips exist
```{bash ADAPT GPU}

ssh adaptlogin
screen (or screen -r)
ssh gpulogin1
salloc -N1 -t05-00:00:00 -J chm_cnn

#Run once after gpu salloc is successful
module load singularity;
mkdir -p /lscratch/$USER/container
singularity build --sandbox /lscratch/$USER/container/above-shrubs docker://nasanccs/above-shrubs:dev

#AK SRLite through approx 2023-08-29, inv sqrt weighting with 100% 0 CHM tiles removed
cd /explore/nobackup/people/mmacande/srlite/chm_model/20230916_chm

#setup preprocess
singularity exec --env PYTHONPATH="$NOBACKUP/development/above-shrubs" --nv -B $NOBACKUP,/lscratch,/explore/nobackup/people,/explore/nobackup/projects /lscratch/$USER/container/above-shrubs python $NOBACKUP/development/above-shrubs/above_shrubs/view/chm_pipeline_cnn.py -c /explore/nobackup/people/mmacande/srlite/chm_model/20230926_chm_dtm/above_shrubs_gliht_cnn_v20230926.yaml -s setup preprocess

singularity exec --env PYTHONPATH="$NOBACKUP/development/above-shrubs" --nv -B $NOBACKUP,/lscratch,/explore/nobackup/people,/explore/nobackup/projects /lscratch/$USER/container/above-shrubs python $NOBACKUP/development/above-shrubs/above_shrubs/view/chm_pipeline_cnn.py -c /explore/nobackup/people/mmacande/srlite/chm_model/20230916_chm/above_shrubs_cnn_v20230916.yaml -s train

singularity exec --env PYTHONPATH="$NOBACKUP/development/above-shrubs" --nv -B $NOBACKUP,/lscratch,/explore/nobackup/people,/explore/nobackup/projects /lscratch/$USER/container/above-shrubs python $NOBACKUP/development/above-shrubs/above_shrubs/view/chm_pipeline_cnn.py -c /explore/nobackup/people/mmacande/srlite/chm_model/20230916_chm/above_shrubs_cnn_v20230916.yaml -s validate

#reorganize validate chips into subfolder
mkdir products/validate
mv products/*.npy products/validate/.

singularity exec --env PYTHONPATH="$NOBACKUP/development/above-shrubs" --nv -B $NOBACKUP,/lscratch,/explore/nobackup/people,/explore/nobackup/projects /lscratch/$USER/container/above-shrubs python $NOBACKUP/development/above-shrubs/above_shrubs/view/chm_pipeline_cnn.py -c /explore/nobackup/people/mmacande/srlite/chm_model/20230829_chm/above_shrubs_cnn_v1.yaml -s predict

singularity exec --env PYTHONPATH="$NOBACKUP/development/above-shrubs" --nv -B $NOBACKUP,/lscratch,/explore/nobackup/people,/explore/nobackup/projects /lscratch/$USER/container/above-shrubs python $NOBACKUP/development/above-shrubs/above_shrubs/view/chm_pipeline_cnn.py -c /explore/nobackup/people/mmacande/srlite/chm_model/20230916_chm/above_shrubs_cnn_v20230916.yaml -s predict

#Parallel inference across the cluster using Slurm:

for i in {1..20}; do sbatch scripts/slurm_predict.sh; done

```

## ADAPT cleanup outputs to re-run
```
cd /explore/nobackup/people/mmacande/srlite/chm_model/20230829_chm

find products -type d -name WV03* |less
find products -type d -name WV03* -exec rm -rf {} +

find products -type d -name WV02* |less
find products -type d -name WV02* -exec rm -rf {} +

find products -type f -name '*.lock'
find products -type f -name '*.lock' -delete

find products -empty -type d
find products -empty -type d -delete
```

# Generate table of CHMs for viewer

In [20]:
cloudmasks <- tibble(cloudmask_path = dir_ls('/explore/nobackup/projects/ilab/data/srlite/products/srlite_1.0.1/cloudmask', glob='*/*cloudmask.tif', recurse=T)) %>%
mutate(cloudmask_file = path_file(cloudmask_path),
       catid = str_sub(cloudmask_file, 20, 35))
nrow(cloudmasks)
cloudmasks %>% head()

cloudmask_path,cloudmask_file,catid
<fs::path>,<chr>,<chr>
/explore/nobackup/projects/ilab/data/srlite/products/srlite_1.0.1/cloudmask/alaska_batch_1/4_bands/WV02_20100502_M1BS_10300100045FA300-toa.cloudmask.tif,WV02_20100502_M1BS_10300100045FA300-toa.cloudmask.tif,10300100045FA300
/explore/nobackup/projects/ilab/data/srlite/products/srlite_1.0.1/cloudmask/alaska_batch_1/4_bands/WV02_20100505_M1BS_1030010005215900-toa.cloudmask.tif,WV02_20100505_M1BS_1030010005215900-toa.cloudmask.tif,1030010005215900
/explore/nobackup/projects/ilab/data/srlite/products/srlite_1.0.1/cloudmask/alaska_batch_1/4_bands/WV02_20100526_M1BS_1030010005890800-toa.cloudmask.tif,WV02_20100526_M1BS_1030010005890800-toa.cloudmask.tif,1030010005890800
/explore/nobackup/projects/ilab/data/srlite/products/srlite_1.0.1/cloudmask/alaska_batch_1/4_bands/WV02_20100529_M1BS_10300100054FCA00-toa.cloudmask.tif,WV02_20100529_M1BS_10300100054FCA00-toa.cloudmask.tif,10300100054FCA00
/explore/nobackup/projects/ilab/data/srlite/products/srlite_1.0.1/cloudmask/alaska_batch_1/4_bands/WV02_20100531_M1BS_10300100053C1900-toa.cloudmask.tif,WV02_20100531_M1BS_10300100053C1900-toa.cloudmask.tif,10300100053C1900
/explore/nobackup/projects/ilab/data/srlite/products/srlite_1.0.1/cloudmask/alaska_batch_1/4_bands/WV02_20100607_M1BS_10300100056B1D00-toa.cloudmask.tif,WV02_20100607_M1BS_10300100056B1D00-toa.cloudmask.tif,10300100056B1D00


In [21]:
# ccdc <- tibble(ccdc_path = dir_ls('/explore/nobackup/projects/ilab/data/srlite/products/srlite_1.0.1/ccdc', glob='*/*ccdc.tif', recurse=T)) %>%
ccdc <- bind_rows(
    tibble(ccdc_path = dir_ls('/explore/nobackup/people/mmacande/srlite/srlite_shared/ccdc_20230214/alaska/', glob='*/*ccdc.tif', recurse=T),
           source = '/explore/nobackup/people/mmacande/srlite/srlite_shared/ccdc_20230214/alaska'),
    tibble(ccdc_path = dir_ls('/explore/nobackup/people/mmacande/srlite/srlite_shared/ccdc_20230807_alaska_batch23/alaska', glob='*/*ccdc.tif', recurse=T),
           source = '/explore/nobackup/people/mmacande/srlite/srlite_shared/ccdc_20230807_alaska_batch23') ) %>%
mutate(ccdc_file = path_file(ccdc_path),
       catid = str_sub(ccdc_file, 20, 35))

nrow(ccdc)
# ccdc %>% head()
dups <- ccdc %>% group_by(catid) %>% tally() %>% filter(n > 1)
print('duplicates:')
nrow(dups)
      
ccdc <- bind_rows(
    ccdc %>% filter(source == '/explore/nobackup/people/mmacande/srlite/srlite_shared/ccdc_20230214/alaska') %>% anti_join(dups),
    ccdc %>% filter(source == '/explore/nobackup/people/mmacande/srlite/srlite_shared/ccdc_20230807_alaska_batch23'))
nrow(ccdc)


[1] "duplicates:"


[1m[22mJoining, by = "catid"


In [22]:
toas <- tibble(toa_path = dir_ls('/explore/nobackup/projects/ilab/data/srlite/products/srlite_1.0.1/toa', glob='*/*toa.tif', recurse=T)) %>%
mutate(toa_file = path_file(toa_path),
       catid = str_sub(toa_file, 20, 35))

nrow(toas)
toas %>% head()

toa_path,toa_file,catid
<fs::path>,<chr>,<chr>
/explore/nobackup/projects/ilab/data/srlite/products/srlite_1.0.1/toa/alaska/4_bands/WV02_20100502_M1BS_10300100045FA300-toa.tif,WV02_20100502_M1BS_10300100045FA300-toa.tif,10300100045FA300
/explore/nobackup/projects/ilab/data/srlite/products/srlite_1.0.1/toa/alaska/4_bands/WV02_20100505_M1BS_1030010005215900-toa.tif,WV02_20100505_M1BS_1030010005215900-toa.tif,1030010005215900
/explore/nobackup/projects/ilab/data/srlite/products/srlite_1.0.1/toa/alaska/4_bands/WV02_20100526_M1BS_1030010005890800-toa.tif,WV02_20100526_M1BS_1030010005890800-toa.tif,1030010005890800
/explore/nobackup/projects/ilab/data/srlite/products/srlite_1.0.1/toa/alaska/4_bands/WV02_20100529_M1BS_10300100054FCA00-toa.tif,WV02_20100529_M1BS_10300100054FCA00-toa.tif,10300100054FCA00
/explore/nobackup/projects/ilab/data/srlite/products/srlite_1.0.1/toa/alaska/4_bands/WV02_20100531_M1BS_10300100053C1900-toa.tif,WV02_20100531_M1BS_10300100053C1900-toa.tif,10300100053C1900
/explore/nobackup/projects/ilab/data/srlite/products/srlite_1.0.1/toa/alaska/4_bands/WV02_20100607_M1BS_10300100056B1D00-toa.tif,WV02_20100607_M1BS_10300100056B1D00-toa.tif,10300100056B1D00


In [30]:
chms_predicted <- tibble(
    chm_path = dir_ls('/explore/nobackup/people/mmacande/srlite/chm_model/20230926_chm_dtm/products_train_strips', glob='*/*.tif', recurse=T)) %>%
    mutate(chm_file = path_file(chm_path),
           catid = str_sub(chm_file,20,35)) %>%
select(catid, chm_file, chm_path) %>%
left_join(srlite_fp %>% select(catid, srlitePath)) %>% #, arcBor)) %>%
rename(srlite_path = srlitePath) %>%
left_join(cloudmasks %>% select(catid, cloudmask_path)) %>%
left_join(toas %>% select(catid, toa_path)) %>%
left_join(ccdc %>% select(catid, ccdc_path)) %>%
st_as_sf()
 
nrow(chms_predicted)
head(chms_predicted %>% st_drop_geometry())
write_csv(chms_predicted %>% st_drop_geometry(), '/explore/nobackup/people/mmacande/srlite/chm_model/20230926_chm_dtm/chm_srlite_list_20230926.csv')
write_sf(chms_predicted, '/explore/nobackup/people/mmacande/srlite/chm_model/20230926_chm_dtm/chm_srlite_list_20230926.gpkg')

[1m[22mJoining, by = "catid"
[1m[22mJoining, by = "catid"
[1m[22mJoining, by = "catid"
[1m[22mJoining, by = "catid"


catid,chm_file,chm_path,srlite_path,cloudmask_path,toa_path,ccdc_path
<chr>,<chr>,<fs::path>,<fs::path>,<fs::path>,<fs::path>,<fs::path>
103001000617C000,WV02_20100804_M1BS_103001000617C000-sr-02m.cnn-chm-v1.tif,/explore/nobackup/people/mmacande/srlite/chm_model/20230926_chm_dtm/products_train_strips/WV02_20100804_M1BS_103001000617C000-sr-02m/WV02_20100804_M1BS_103001000617C000-sr-02m.cnn-chm-v1.tif,/explore/nobackup/projects/ilab/data/srlite/products/srlite_1.0.1/srlite/alaska/batch_1/8_band_csv/split_4/WV02_20100804_M1BS_103001000617C000-sr-02m.tif,/explore/nobackup/projects/ilab/data/srlite/products/srlite_1.0.1/cloudmask/alaska_batch_1/split_4/WV02_20100804_M1BS_103001000617C000-toa.cloudmask.tif,/explore/nobackup/projects/ilab/data/srlite/products/srlite_1.0.1/toa/alaska/split_4/WV02_20100804_M1BS_103001000617C000-toa.tif,/explore/nobackup/people/mmacande/srlite/srlite_shared/ccdc_20230214/alaska/WV02_20100804_M1BS_103001000617C000-ccdc.tif
10300100068BF600,WV02_20100815_M1BS_10300100068BF600-sr-02m.cnn-chm-v1.tif,/explore/nobackup/people/mmacande/srlite/chm_model/20230926_chm_dtm/products_train_strips/WV02_20100815_M1BS_10300100068BF600-sr-02m/WV02_20100815_M1BS_10300100068BF600-sr-02m.cnn-chm-v1.tif,/explore/nobackup/projects/ilab/data/srlite/products/srlite_1.0.1/srlite/alaska/batch_1/4_band_csv/WV02_20100815_M1BS_10300100068BF600-sr-02m.tif,/explore/nobackup/projects/ilab/data/srlite/products/srlite_1.0.1/cloudmask/alaska_batch_1/4_bands/WV02_20100815_M1BS_10300100068BF600-toa.cloudmask.tif,/explore/nobackup/projects/ilab/data/srlite/products/srlite_1.0.1/toa/alaska/4_bands/WV02_20100815_M1BS_10300100068BF600-toa.tif,/explore/nobackup/people/mmacande/srlite/srlite_shared/ccdc_20230214/alaska/WV02_20100815_M1BS_10300100068BF600-ccdc.tif
103001000CA39000,WV02_20110707_M1BS_103001000CA39000-sr-02m.cnn-chm-v1.tif,/explore/nobackup/people/mmacande/srlite/chm_model/20230926_chm_dtm/products_train_strips/WV02_20110707_M1BS_103001000CA39000-sr-02m/WV02_20110707_M1BS_103001000CA39000-sr-02m.cnn-chm-v1.tif,/explore/nobackup/projects/ilab/data/srlite/products/srlite_1.0.1/srlite/alaska/batch_1/4_band_csv/WV02_20110707_M1BS_103001000CA39000-sr-02m.tif,/explore/nobackup/projects/ilab/data/srlite/products/srlite_1.0.1/cloudmask/alaska_batch_1/4_bands/WV02_20110707_M1BS_103001000CA39000-toa.cloudmask.tif,/explore/nobackup/projects/ilab/data/srlite/products/srlite_1.0.1/toa/alaska/4_bands/WV02_20110707_M1BS_103001000CA39000-toa.tif,/explore/nobackup/people/mmacande/srlite/srlite_shared/ccdc_20230214/alaska/WV02_20110707_M1BS_103001000CA39000-ccdc.tif
103001000D6CEA00,WV02_20110811_M1BS_103001000D6CEA00-sr-02m.cnn-chm-v1.tif,/explore/nobackup/people/mmacande/srlite/chm_model/20230926_chm_dtm/products_train_strips/WV02_20110811_M1BS_103001000D6CEA00-sr-02m/WV02_20110811_M1BS_103001000D6CEA00-sr-02m.cnn-chm-v1.tif,/explore/nobackup/projects/ilab/data/srlite/products/srlite_1.0.1/srlite/alaska/batch_1/8_band_csv/split_6_0/WV02_20110811_M1BS_103001000D6CEA00-sr-02m.tif,/explore/nobackup/projects/ilab/data/srlite/products/srlite_1.0.1/cloudmask/alaska_batch_1/split_6/WV02_20110811_M1BS_103001000D6CEA00-toa.cloudmask.tif,/explore/nobackup/projects/ilab/data/srlite/products/srlite_1.0.1/toa/alaska/split_6_0/WV02_20110811_M1BS_103001000D6CEA00-toa.tif,/explore/nobackup/people/mmacande/srlite/srlite_shared/ccdc_20230214/alaska/WV02_20110811_M1BS_103001000D6CEA00-ccdc.tif
103001000C8BBA00,WV02_20110812_M1BS_103001000C8BBA00-sr-02m.cnn-chm-v1.tif,/explore/nobackup/people/mmacande/srlite/chm_model/20230926_chm_dtm/products_train_strips/WV02_20110812_M1BS_103001000C8BBA00-sr-02m/WV02_20110812_M1BS_103001000C8BBA00-sr-02m.cnn-chm-v1.tif,/explore/nobackup/projects/ilab/data/srlite/products/srlite_1.0.1/srlite/alaska/batch_1/8_band_csv/split_5/WV02_20110812_M1BS_103001000C8BBA00-sr-02m.tif,/explore/nobackup/projects/ilab/data/srlite/products/srlite_1.0.1/cloudmask/alaska_batch_1/split_5/WV02_20110812_M1BS_103001000C8BBA00-toa.cloudmask.tif,/explore/nobackup/projects/ilab/data/srlite/products/srlite_1.0.1/toa/alaska/split_5/WV02_20110812_M1BS_103001000C8BBA00-toa.tif,/explore/nobackup/people/mmacande/srlite/srlite_shared/ccdc_20230214/alaska/WV02_20110812_M1BS_103001000C8BBA00-ccdc.tif
103001000D56BA00,WV02_20110812_M1BS_103001000D56BA00-sr-02m.cnn-chm-v1.tif,/explore/nobackup/people/mmacande/srlite/chm_model/20230926_chm_dtm/products_train_strips/WV02_20110812_M1BS_103001000D56BA00-sr-02m/WV02_20110812_M1BS_103001000D56BA00-sr-02m.cnn-chm-v1.tif,/explore/nobackup/projects/ilab/data/srlite/products/srlite_1.0.1/srlite/alaska/batch_1/8_band_csv/split_6_0/WV02_20110812_M1BS_103001000D56BA00-sr-02m.tif,/explore/nobackup/projects/ilab/data/srlite/products/srlite_1.0.1/cloudmask/alaska_batch_1/split_6/WV02_20110812_M1BS_103001000D56BA00-toa.cloudmask.tif,/explore/nobackup/projects/ilab/data/srlite/products/srlite_1.0.1/toa/alaska/split_6_0/WV02_20110812_M1BS_103001000D56BA00-toa.tif,/explore/nobackup/people/mmacande/srlite/srlite_shared/ccdc_20230214/alaska/WV02_20110812_M1BS_103001000D56BA00-ccdc.tif


# Get footprints of Other Lidar

In [33]:
lidar_fp <- tibble(lidar_fp_path = dir_ls('/explore/nobackup/projects/dem/AK_IFSAR/dggs.alaska.gov/public_lidar', glob='*.gpkg', recurse=T),
                   lidar_fp_file = path_file(lidar_fp_path),
                   lidar_base = str_sub(lidar_fp_file, 0, -13))
lidar_fp
read_sf_add_path <- function(path) {
    fp <- read_sf(path) %>%
    mutate(lidar_fp_path = path) %>%
    st_transform(4326)
    }

lidar_fps <- map_dfr(dir_ls('/explore/nobackup/projects/dem/AK_IFSAR/dggs.alaska.gov/public_lidar', glob='*.gpkg', recurse=T), read_sf_add_path) %>%
group_by(lidar_fp_path) %>%
summarize() %>%
mutate(lidar_fp_file = path_file(lidar_fp_path),
       lidar_base = str_sub(lidar_fp_file, 0, -13))


lidar_fp_path,lidar_fp_file,lidar_base
<fs::path>,<chr>,<chr>
/explore/nobackup/projects/dem/AK_IFSAR/dggs.alaska.gov/public_lidar/Kenai_2008/Kenai_2008_chm_fp.gpkg,Kenai_2008_chm_fp.gpkg,Kenai_2008
/explore/nobackup/projects/dem/AK_IFSAR/dggs.alaska.gov/public_lidar/NSB_lidar_Anaktuvuk_Pass_2019/NSB_lidar_Anaktuvuk_Pass_2019_chm_fp.gpkg,NSB_lidar_Anaktuvuk_Pass_2019_chm_fp.gpkg,NSB_lidar_Anaktuvuk_Pass_2019
/explore/nobackup/projects/dem/AK_IFSAR/dggs.alaska.gov/public_lidar/Unalakleet_2019/Unalakleet_2019_chm_fp.gpkg,Unalakleet_2019_chm_fp.gpkg,Unalakleet_2019
/explore/nobackup/projects/dem/AK_IFSAR/dggs.alaska.gov/public_lidar/fairbanks_ql1_2017/fairbanks_ql1_2017_chm_fp.gpkg,fairbanks_ql1_2017_chm_fp.gpkg,fairbanks_ql1_2017
/explore/nobackup/projects/dem/AK_IFSAR/dggs.alaska.gov/public_lidar/fairbanks_ql2_2017/fairbanks_ql2_2017_chm_fp.gpkg,fairbanks_ql2_2017_chm_fp.gpkg,fairbanks_ql2_2017
/explore/nobackup/projects/dem/AK_IFSAR/dggs.alaska.gov/public_lidar/infrastructure_2011/infrastructure_2011_26905_chm_fp.gpkg,infrastructure_2011_26905_chm_fp.gpkg,infrastructure_2011_26905
/explore/nobackup/projects/dem/AK_IFSAR/dggs.alaska.gov/public_lidar/infrastructure_2011/infrastructure_2011_26906_chm_fp.gpkg,infrastructure_2011_26906_chm_fp.gpkg,infrastructure_2011_26906
/explore/nobackup/projects/dem/AK_IFSAR/dggs.alaska.gov/public_lidar/infrastructure_2011/infrastructure_2011_26907_chm_fp.gpkg,infrastructure_2011_26907_chm_fp.gpkg,infrastructure_2011_26907
/explore/nobackup/projects/dem/AK_IFSAR/dggs.alaska.gov/public_lidar/yukon_flats_2009/yukon_flats_2009_chm_fp.gpkg,yukon_flats_2009_chm_fp.gpkg,yukon_flats_2009


In [36]:
lidar_fps <- lidar_fps %>%
st_transform(3338) %>%
mutate(area = st_area(geom),
      area_sqkm = as.numeric(area) / 1000000) %>%
st_transform(4326)

In [38]:
lidar_fps %>% st_drop_geometry()
write_sf(lidar_fps, '/explore/nobackup/projects/dem/AK_IFSAR/dggs.alaska.gov/dggs_lidar_fp_v20230927.gpkg')

Unnamed: 0_level_0,lidar_fp_path,lidar_fp_file,lidar_base,area,area_sqkm
Unnamed: 0_level_1,<fs::path>,<chr>,<chr>,<[m^2]>,<dbl>
1,/explore/nobackup/projects/dem/AK_IFSAR/dggs.alaska.gov/public_lidar/fairbanks_ql1_2017/fairbanks_ql1_2017_chm_fp.gpkg,fairbanks_ql1_2017_chm_fp.gpkg,fairbanks_ql1_2017,1005917390 [m^2],1005.91739
2,/explore/nobackup/projects/dem/AK_IFSAR/dggs.alaska.gov/public_lidar/fairbanks_ql2_2017/fairbanks_ql2_2017_chm_fp.gpkg,fairbanks_ql2_2017_chm_fp.gpkg,fairbanks_ql2_2017,4805728376 [m^2],4805.72838
3,/explore/nobackup/projects/dem/AK_IFSAR/dggs.alaska.gov/public_lidar/infrastructure_2011/infrastructure_2011_26905_chm_fp.gpkg,infrastructure_2011_26905_chm_fp.gpkg,infrastructure_2011_26905,958302097 [m^2],958.3021
4,/explore/nobackup/projects/dem/AK_IFSAR/dggs.alaska.gov/public_lidar/infrastructure_2011/infrastructure_2011_26906_chm_fp.gpkg,infrastructure_2011_26906_chm_fp.gpkg,infrastructure_2011_26906,6417262908 [m^2],6417.26291
5,/explore/nobackup/projects/dem/AK_IFSAR/dggs.alaska.gov/public_lidar/infrastructure_2011/infrastructure_2011_26907_chm_fp.gpkg,infrastructure_2011_26907_chm_fp.gpkg,infrastructure_2011_26907,990567400 [m^2],990.5674
6,/explore/nobackup/projects/dem/AK_IFSAR/dggs.alaska.gov/public_lidar/Kenai_2008/Kenai_2008_chm_fp.gpkg,Kenai_2008_chm_fp.gpkg,Kenai_2008,11374106688 [m^2],11374.10669
7,/explore/nobackup/projects/dem/AK_IFSAR/dggs.alaska.gov/public_lidar/NSB_lidar_Anaktuvuk_Pass_2019/NSB_lidar_Anaktuvuk_Pass_2019_chm_fp.gpkg,NSB_lidar_Anaktuvuk_Pass_2019_chm_fp.gpkg,NSB_lidar_Anaktuvuk_Pass_2019,140384482 [m^2],140.38448
8,/explore/nobackup/projects/dem/AK_IFSAR/dggs.alaska.gov/public_lidar/Unalakleet_2019/Unalakleet_2019_chm_fp.gpkg,Unalakleet_2019_chm_fp.gpkg,Unalakleet_2019,67332086 [m^2],67.33209
9,/explore/nobackup/projects/dem/AK_IFSAR/dggs.alaska.gov/public_lidar/yukon_flats_2009/yukon_flats_2009_chm_fp.gpkg,yukon_flats_2009_chm_fp.gpkg,yukon_flats_2009,2600500090 [m^2],2600.50009


## get raw chm summary vat of chips
```
#JupyterLinks/nobackup/srlite/chm_model/20230829_chm/extract_chip_samples.ipynb
# scp chm_train_chip_tbl.csv.tar.gz mason:/data/gis/gis_projects/2021/.
# On Mason
# mv ../../chm_train_chip_tbl.csv.tar.gz .
# tar xzvf chm_train_chip_tbl.csv.tar.gz 
chm_chips_freq <- read_csv(path(gis, 'gis_projects/2021/21-347_NASA_SmallSat/20230825_srlite_ak/chm_train_chip_tbl_transposed.csv'),
                           col_names=c('chm_m', 'count'), skip=0) %>%
  mutate(chm_cm = round(chm_m * 100),
         chm_dm = round(chm_m * 10),
         chm_m = round(chm_m))

chm_chips_x_cm <- chm_chips_freq %>%
  group_by(chm_cm) %>%
  summarize(count = sum(count)) %>%
  mutate(freq = count / sum(count))

chm_chips_x_dm <- chm_chips_freq %>%
  group_by(chm_dm) %>%
  summarize(count = sum(count)) %>%
  mutate(freq = count / sum(count))

chm_chips_x_m <- chm_chips_freq %>%
  group_by(chm_m) %>%
  summarize(count = sum(count)) %>%
  mutate(freq = count / sum(count))

```

# OLD Working below here

In [None]:
sceneList <- chips_df_full %>% select(srlitePath) %>% distinct() %>% arrange(srlitePath) %>% rename(srlite_path = srlitePath)
write_csv(sceneList, '/explore/nobackup/people/mmacande/srlite/chm_model/20230829_chm/list_srlite_path_for_predstack_v20230829.csv')

Review Outputs and Construct Table for interactive viz

In [None]:
srlite_fp <- read_sf('/explore/nobackup/people/pmontesa/srlite/footprints_vhrmeta_SR_alaska.gpkg') %>%
  mutate(srlite_path = path(path, file))


In [None]:
srlite_summer <- srlite_fp %>% st_drop_geometry() %>% filter(month %in% c(6,7,8,9))
srlite_summer %>% group_by(month) %>% tally()
srlite_summer %>% head()

In [None]:
srlite_metrics <- read_csv('/explore/nobackup/people/pmontesa/userfs02/projects/ilab/above_shrubs/data/tables/srlite_metrics_alaska_202308291350.csv') %>%
  select(-`...1`)
dups <- srlite_metrics %>%
  dplyr::group_by(model, file, type, footprint_name, catid, sensor, year, month, date, band_name) %>%
  dplyr::summarise(n = dplyr::n(), .groups = "drop") %>%
  dplyr::filter(n > 1L) 


srlite_metrics_wide <- srlite_metrics %>%
  filter(band_name %in% c('Blue','Green','Red','NIR')) %>%
  pivot_wider(id_cols=c(model, file, type, footprint_name, catid, sensor, year, month, date), names_from=band_name, values_from=intercept:rmse_norm, values_fn=mean) #%>%

srlite_metrics_wide_78 <- srlite_metrics_wide %>%
  filter(month %in% c(7,8))

srlite_metrics_wide_78_r2filt <- srlite_metrics_wide_78 %>%
  filter(r2_score_NIR >= 0.75)

srlite_metrics_wide_r2filt <- srlite_metrics_wide %>%
  filter(r2_score_NIR >= 0.75)

nrow(srlite_metrics_wide_78_r2filt)

srlite_metrics_wide_r2filt %>% head()

In [None]:
srlite_summer_r2_filt <- srlite_summer %>%
semi_join(srlite_metrics_wide_r2filt, by='catid')

# srlite_summer %>% head()
# srlite_metrics_wide_r2filt %>% head()

srlite_summer_r2_filt %>% group_by(month) %>% tally()

In [None]:
predicted_set1 <- read_csv('/explore/nobackup/people/mmacande/srlite/chm_model/20230829_chm/list_srlite_path_for_predstack_v20230829.csv') %>%
mutate(srlite_file = path_file(srlite_path),
       catid = str_sub(srlite_file,20,35))

predicted_set1 %>% head()

srlite_batch2 <- srlite_summer_r2_filt %>%
anti_join(predicted_set1)

srlite_batch2 %>% group_by(month) %>% tally()


In [None]:
chms_predicted <- tibble(
    chm_path = dir_ls('/explore/nobackup/people/mmacande/srlite/chm_model/20230829_chm/products', glob='*/*.tif', recurse=T))
nrow(chms_predicted)
chms_predicted %>% head()