# Extract subset of latest NGA footprints
## Setup

In [1]:
library(sf)
library(tidyverse)
library(janitor)
library(magrittr)
library(lubridate)
library(fs)
library(glue)
library(readxl)
library(terra)
library(furrr)
library(DT)
library(tictoc)
library(gdalUtilities)
# library(mapview)
# library(spatialEco)
# library(gfcanalysis)
srliteBase <- path('/explore/nobackup/projects/above/misc/ABoVE_Shrubs')

Linking to GEOS 3.11.1, GDAL 3.6.1, PROJ 9.1.0; sf_use_s2() is TRUE

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.2 ──
[32m✔[39m [34mggplot2[39m 3.4.0      [32m✔[39m [34mpurrr  [39m 1.0.0 
[32m✔[39m [34mtibble [39m 3.1.8      [32m✔[39m [34mdplyr  [39m 1.0.10
[32m✔[39m [34mtidyr  [39m 1.2.1      [32m✔[39m [34mstringr[39m 1.5.0 
[32m✔[39m [34mreadr  [39m 2.1.3      [32m✔[39m [34mforcats[39m 0.5.2 
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()

Attaching package: ‘janitor’


The following objects are masked from ‘package:stats’:

    chisq.test, fisher.test



Attaching package: ‘magrittr’


The following object is masked from ‘package:purrr’:

    set_names


The following object is masked from ‘package:tidyr’:

    ext

In [2]:
#GDAL path fixup on ADAPT, may just be me
secondPath <- sf_proj_search_paths()[[2]]
secondPath
sf_proj_search_paths(secondPath)

## Find latest ADAPT NGA footprints file and get info

In [3]:
dir_ls('/css/nga/INDEX/')
dir_ls('/css/nga/INDEX/20231027')
dir_ls('/css/nga/INDEX/current')

st_layers('/css/nga/INDEX/20231027/nga_footprint_20231027.gdb')

Driver: OpenFileGDB 
Available layers:
                layer_name geometry_type features fields
1 nga_footprint_archive_v2 Multi Polygon  9604140     48
2  nga_footprint_master_v2 Multi Polygon 26279175     44

## Extract and Save Spatial Subset(s)

In [4]:
# nga_fp_w <- st_read('/css/nga/INDEX/20231027/nga_footprint_20231027.gdb', 
nga_fp_w <- st_read('/css/nga/INDEX/20240202/nga_footprint_20240202.gdb', 
                    query = "SELECT * FROM \"nga_footprint_master_v2\" WHERE cent_long < -122 AND cent_lat > 50 AND cent_lat < 72")

Reading query `SELECT * FROM "nga_footprint_master_v2" WHERE cent_long < -122 AND cent_lat > 50 AND cent_lat < 72' from data source `/nfs4m/css/nga/data/INDEX/20240202/nga_footprint_20240202.gdb' 
  using driver `OpenFileGDB'
Simple feature collection with 1231141 features and 44 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -180 ymin: 49.89032 xmax: 180.0001 ymax: 72.13867
Geodetic CRS:  WGS 84


In [10]:
nrow(nga_fp_w)

In [5]:
write_sf(nga_fp_w, '/explore/nobackup/projects/above/misc/ABoVE_Shrubs/footprints/nga_footprint_20240202_subsets.gpkg', layer='nga_footprint_master_v2_ak_west')

In [8]:
nga_fp_e <- st_read('/css/nga/INDEX/20240202/nga_footprint_20240202.gdb', 
                    # layer='nga_footprint_master_v2', 
                    query = "SELECT * FROM \"nga_footprint_master_v2\" WHERE cent_long > 172 AND cent_lat > 51 AND cent_lat < 54")

Reading query `SELECT * FROM "nga_footprint_master_v2" WHERE cent_long > 172 AND cent_lat > 51 AND cent_lat < 54' from data source `/nfs4m/css/nga/data/INDEX/20240202/nga_footprint_20240202.gdb' 
  using driver `OpenFileGDB'
Simple feature collection with 19515 features and 44 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -180 ymin: 50.91949 xmax: 180.0001 ymax: 54.05672
Geodetic CRS:  WGS 84


In [9]:
nrow(nga_fp_e)
write_sf(nga_fp_e, '/explore/nobackup/projects/above/misc/ABoVE_Shrubs/footprints/nga_footprint_20240202_subsets.gpkg', layer='nga_footprint_master_v2_ak_east')

In [17]:
# nga_fp_e <- read_sf('/explore/nobackup/projects/above/misc/ABoVE_Shrubs/footprints/nga_footprint_20240202_subsets.gpkg', layer='nga_footprint_master_v2_ak_east')
# nrow(nga_fp_e)

In [11]:
# nga_fp_w <- read_sf('/explore/nobackup/projects/above/misc/ABoVE_Shrubs/footprints/nga_footprint_20240202_subsets.gpkg', layer='nga_footprint_master_v2_ak_west')
# nrow(nga_fp_w)

In [18]:
# colnames(nga_fp_e)

In [19]:
# colnames(nga_fp_w)

In [20]:
nga_fp_ak <- bind_rows(nga_fp_w, nga_fp_e)

In [21]:
nrow(nga_fp_ak)
write_sf(nga_fp_ak, '/explore/nobackup/projects/above/misc/ABoVE_Shrubs/footprints/nga_footprint_20240202_subset_ak.gpkg', layer='nga_footprint_master_v2_ak')


## Filter Subsetted NGA Footprints

In [22]:
nga_fp_ak <- read_sf('/explore/nobackup/projects/above/misc/ABoVE_Shrubs/footprints/nga_footprint_20240202_subset_ak.gpkg', layer='nga_footprint_master_v2_ak')  %>%
rename(catid = catalog_id)

In [23]:
# nga_fp_ak <- nga_fp_ak

colnames(nga_fp_ak)
datatable(head(nga_fp_ak))
nrow(nga_fp_ak)


In [24]:
# nga_fp_ak %>% st_drop_geometry() %>% filter(between(acq_month, 6, 9)) %>% group_by(acq_year, acq_month) %>% tally()
nga_fp_ak %>% st_drop_geometry() %>% filter(between(acq_month, 7,8), sensor != 'WV01') %>% group_by(acq_year, sensor) %>% tally() %>% pivot_wider(names_from=sensor, values_from=n, values_fill=0)

acq_year,IK01,QB02,GE01,WV02,WV03
<int>,<int>,<int>,<int>,<int>,<int>
2000,104,0,0,0,0
2001,460,0,0,0,0
2002,322,446,0,0,0
2003,383,394,0,0,0
2004,310,757,0,0,0
2005,300,875,0,0,0
2006,515,527,0,0,0
2007,370,566,0,0,0
2008,521,1347,0,0,0
2009,707,2061,1646,0,0


In [26]:
nga_fp_ak_tbl <- nga_fp_ak %>% st_drop_geometry()

### Filter to Multispectral M1BS Only

In [27]:
nga_fp_ak %>% st_drop_geometry() %>% group_by(spec_type) %>% tally()

nga_fp_ak_filt <- nga_fp_ak %>%
filter(spec_type == 'Multispectral')
nrow(nga_fp_ak_filt)

spec_type,n
<chr>,<int>
Multispectral,507854
Panchromatic,742762
Pansharpened,20
SWIR,20


In [28]:
nga_fp_ak_filt %>% st_drop_geometry() %>% group_by(prod_code, sensor) %>% tally()
nga_fp_ak_filt <- nga_fp_ak_filt %>%
filter(prod_code == 'M1BS')
nrow(nga_fp_ak_filt)

prod_code,sensor,n
<chr>,<chr>,<int>
A1BS,WV03,3740
C1BA,WV03,15
C1BB,WV03,17
M1BS,GE01,59864
M1BS,IK01,11910
M1BS,QB02,32746
M1BS,WV02,281133
M1BS,WV03,110184
M1BS,WV04,75
m2as,WV02,6


### Filter out midnight sun scenes (summer months, low sun elevation)

In [29]:
nga_fp_ak_filt <- nga_fp_ak_filt %>%
filter((between(acq_month,4,8) & sun_elev > 17) | (acq_month < 4 | acq_month > 8)) %>%
# filter(sun_elev >= 17) %>%
filter(!is.na(sun_elev))

nga_fp_ak_filt %>%
st_drop_geometry() %>% 
group_by(acq_month) %>% 
summarize(min(sun_elev), max(sun_elev), n())
nrow(nga_fp_ak_filt)

acq_month,min(sun_elev),max(sun_elev),n()
<int>,<dbl>,<dbl>,<int>
1,0.1,22.4,14349
2,0.9,31.4,37503
3,10.3,44.1,68707
4,21.3,54.6,64884
5,32.1,61.4,55774
6,37.5,63.6,48077
7,34.3,62.5,41291
8,24.9,57.6,37464
9,14.9,47.4,36880
10,3.1,35.8,35534


In [30]:
nga_fp_ak_filt <- nga_fp_ak_filt %>%
mutate(s_pathonly = path_dir(s_filepath))

In [31]:
str(nga_fp_ak_filt)

sf [473,019 × 46] (S3: sf/tbl_df/tbl/data.frame)
 $ fpuid       : chr [1:473019] "{699FE9CB-AA92-4925-8768-8BFDB069A482}" "{B9D2D6CD-3736-4A59-86AE-BE60EE11BC06}" "{E3F0368A-18FD-4422-AFBC-4821DC034A6F}" "{AAA3B333-E9CD-4D0A-9633-9C256D9E4841}" ...
 $ strip_id    : chr [1:473019] "WV02_10300100F072D900_M1BS_508006054100_01" "WV03_104001008F11AB00_M1BS_508009123070_01" "WV03_104001008F11AB00_M1BS_508009123070_01" "WV03_104001008F11AB00_M1BS_508009123070_01" ...
 $ scene_id    : chr [1:473019] "WV02_20231021195402_10300100F072D900_23OCT21195402-M1BS-508006054100_01_P003" "WV03_20231119210714_104001008F11AB00_23NOV19210714-M1BS-508009123070_01_P001" "WV03_20231119210716_104001008F11AB00_23NOV19210716-M1BS-508009123070_01_P002" "WV03_20231119210717_104001008F11AB00_23NOV19210717-M1BS-508009123070_01_P003" ...
 $ status      : chr [1:473019] "pending" "pending" "pending" "pending" ...
 $ catid       : chr [1:473019] "10300100F072D900" "104001008F11AB00" "104001008F11AB00" "104001008F11AB00"

# Dissolve on catid and folder and include attributes (as mean if numeric and varying)

In [47]:
sf_use_s2(FALSE)

In [33]:
suppressMessages(
    nga_fp_ak_filt_diss <- nga_fp_ak_filt %>%
    st_make_valid() %>%
    # st_drop_geometry() %>%
    group_by(strip_id, catid, sensor, acq_year, acq_month, spec_type, prod_code, prod_short, file_fmt, s_pathonly, previewurl, zone_id, source) %>%
    summarize(n_scenes = n(),
             min_acq_time = min(acq_time),
             max_acq_time = max(acq_time),
             cloudcover = mean(cloudcover),
             cent_lat = mean(cent_lat),
             cent_long = mean(cent_long),
             bits_pixel = mean(bits_pixel),
             off_nadir = mean(off_nadir),
             sun_elev = mean(sun_elev),
             prod_gsd = mean(prod_gsd),
             ref_height = mean(ref_height),
             xtrackva = mean(xtrackva),
             bands = mean(bands),
             min_added_date = min(added_date),
             max_added_date = max(added_date),
             file_sz = sum(file_sz),
             avsunazim = mean(avsunazim),
             avtargetaz = mean(avtargetaz),
             det_pitch = mean(det_pitch),
             intrackva = mean(intrackva),
             stereopair = first(stereopair),
             pairname = first(pairname))
)

In [34]:
nrow(nga_fp_ak_filt_diss)

# Review and de-duplicated duplicate catids

In [36]:
nga_fp_ak_filt_diss_nGt1 <- nga_fp_ak_filt_diss %>% st_drop_geometry() %>% group_by(catid) %>% tally() %>% filter(n > 1)
print(c('duplicate catids, ', nrow(nga_fp_ak_filt_diss_nGt1)))

nga_fp_ak_filt_diss_nGt1_check <- nga_fp_ak_filt_diss %>%
semi_join(nga_fp_ak_filt_diss_nGt1)

datatable(head(nga_fp_ak_filt_diss_nGt1_check))

[1] "duplicate catids, " "642"               


[1m[22mJoining, by = "catid"


In [37]:
write_sf(nga_fp_ak_filt_diss_nGt1_check, '/explore/nobackup/projects/above/misc/ABoVE_Shrubs/footprints/nga_footprint_20240202_subset_ak_filtered.gpkg', layer='nga_footprint_master_v2_ak_m1bs_dupCatid')


### Remove duplicates. First pick strip with the most scenes. If tied, then pick strip added most recently. No ties on second test to ensure no dups

In [38]:
nga_fp_ak_filt_diss <- nga_fp_ak_filt_diss %>%
group_by(catid) %>%
slice_max(n_scenes, n=1, with_ties=T) %>%
slice_max(max_added_date, n=1, with_ties=F)

nrow(nga_fp_ak_filt_diss)

In [42]:
print(c('duplicate catids after removal'))
nga_fp_ak_filt_diss %>% st_drop_geometry() %>% group_by(catid) %>% tally() %>% filter(n > 1)


[1] "duplicate catids after removal"


catid,n
<chr>,<int>


In [39]:
write_sf(nga_fp_ak_filt_diss, '/explore/nobackup/projects/above/misc/ABoVE_Shrubs/footprints/nga_footprint_20240202_subset_ak_filtered.gpkg', layer='nga_footprint_master_v2_ak_m1bs_noDupCatids')


In [40]:
nga_fp_ak_filt_diss %>% st_drop_geometry() %>% group_by(round(cloudcover * 10)) %>% tally()
nga_fp_ak_filt_diss <- nga_fp_ak_filt_diss %>%
filter(cloudcover <= 0.5)

nrow(nga_fp_ak_filt_diss)

round(cloudcover * 10),n
<dbl>,<int>
-9990,716
-8991,1
-8990,1
-7990,1
-6993,1
0,49336
1,11381
2,9165
3,5749
4,2296


In [41]:
nga_fp_ak_filt_diss %>% st_drop_geometry() %>% group_by(acq_month) %>% tally()
nga_fp_ak_filt_diss <- nga_fp_ak_filt_diss %>%
filter(between(acq_month, 6, 9))

nrow(nga_fp_ak_filt_diss)

acq_month,n
<int>,<int>
1,1881
2,5587
3,11560
4,10961
5,9216
6,8602
7,7951
8,6938
9,6689
10,5561


In [43]:
nga_fp_ak_filt_diss %>% st_drop_geometry() %>% group_by(sensor) %>% tally()

sensor,n
<chr>,<int>
GE01,5102
IK01,1288
QB02,2795
WV02,14405
WV03,6579
WV04,11


In [44]:
srlite_202310 <- tibble(srlitePath = dir_ls('/explore/nobackup/projects/above/misc/ABoVE_Shrubs/srlite/002m', glob='*.tif')) %>%
mutate(srliteFile = path_file(srlitePath),
       catid = str_sub(srliteFile, 20, 35))
nrow(srlite_202310)

In [45]:
nga_fp_ak_filt_wSrlite <- nga_fp_ak_filt_diss %>%
left_join(srlite_202310)

[1m[22mJoining, by = "catid"


In [46]:
datatable(head(nga_fp_ak_filt_wSrlite))
nrow(nga_fp_ak_filt_wSrlite)

In [48]:
# file_delete('/explore/nobackup/projects/above/misc/ABoVE_Shrubs/footprints/nga_footprint_20231027_subset_ak_filtered.gpkg')
write_sf(nga_fp_ak_filt_wSrlite, '/explore/nobackup/projects/above/misc/ABoVE_Shrubs/footprints/nga_footprint_20240202_subset_ak_filtered.gpkg', layer='nga_footprint_master_v2_ak_m1bs_ccLtp5_sunelevGt17_6789_wSRLite')
