# Footprint the extents of raster input and output for SR-Lite

sync to:
/adapt/nobackup/people/pmontesa/userfs02/projects/srlite/notebooks/footprint_srlite.ipynb


In [78]:
!rsync /home/pmontesa/code/geoscitools/footprint_srlite.ipynb /adapt/nobackup/people/pmontesa/userfs02/projects/srlite/notebooks/footprint_srlite.ipynb

In [1]:
import rasterio
import rasterio.features
import rasterio.warp
from rasterio.plot import show
from rasterio import Affine, MemoryFile
from rasterio.enums import Resampling

import pandas as pd
import geopandas as gpd
import shapely
from shapely.geometry import box
import fiona
from fiona.crs import from_epsg
import pprint

import glob
import os

import sys
sys.path.append('/efs/pmontesa/code/geoscitools')
sys.path.append('/home/pmontesa/code/geoscitools')
import maplib
import footprintlib
from footprintlib import *

# https://gis.stackexchange.com/questions/375577/how-do-i-write-out-a-mixed-geometry-geodataframe-to-a-geopackage
# https://gis.stackexchange.com/questions/187877/how-to-polygonize-raster-to-shapely-polygons

### Set up dictionaries of parameters

## Build a dictionary of the raster datasets for footprinting

In [2]:
dict_list = [

    # EVHR TOA: /adapt/nobackup/projects/ilab/projects/srlite/input
    {'location': 'local',
     'data_dir': "/adapt/nobackup/projects/ilab/projects/srlite/input",
     'data_source': 'EVHR',
     'image_type': '**',
     'search_tail_string': '/*toa.tif',
     'search_link_string': '',
     'GET_BATCH': True,
     'DO_DATAMASK': False,
     'GET_ONLY_DATASETMASK': True,
     'MANY_CRS': True,
     'out_footprint_dir': '/adapt/nobackup/projects/ilab/projects/srlite/input',
     'footprint_name_SPLIT_CHAR': '_',
     'footprint_name_SPLIT_POS': 0,
     'aws_credential_fn': None
    },
    # CCDC input: /adapt/nobackup/projects/ilab/data/srlite/ccdc/CCDC_ALL
    {'location': 'local',
     'data_dir': "/adapt/nobackup/projects/ilab/data/srlite/ccdc/CCDC_ALL",
     'data_source': 'CCDC',
     'image_type': '',
     'search_tail_string': '/*ccdc.tif',
     'search_link_string': '',
     'GET_BATCH': False,
     'DO_DATAMASK': False,
     'GET_ONLY_DATASETMASK': True,
     'MANY_CRS': True,
     'out_footprint_dir': '/adapt/nobackup/projects/ilab/data/srlite/ccdc', #'/adapt/nobackup/people/pmontesa/userfs02/_share',
     'footprint_name_SPLIT_CHAR': '_',
     'footprint_name_SPLIT_POS': 0,
     'aws_credential_fn': None
    },
    # SR-Lite cloudmasks
    {'location': 'local',
     'data_dir': "/adapt/nobackup/projects/ilab/data/srlite/cloudmask",
     'data_source': 'Cloudmask',
     'image_type': '**',
     'search_tail_string': '/*toa.cloudmask.v1.2.tif',
     'search_link_string': '',
     'GET_BATCH': True,
     'DO_DATAMASK': False,
     'GET_ONLY_DATASETMASK': True,
     'MANY_CRS': True,
     'out_footprint_dir': '/adapt/nobackup/projects/ilab/data/srlite/cloudmask',
     'footprint_name_SPLIT_CHAR': '_',
     'footprint_name_SPLIT_POS': 0,
     'aws_credential_fn': None
    },

    # SR-lite output
    {'location': 'local',
     #'data_dir': "/adapt/nobackup/projects/ilab/data/srlite/products/srlite-0.9.9-06132022-reproject_ALL_only_once_to_toa",
     'data_dir': "/adapt/nobackup/projects/ilab/data/srlite/products/srlite-0.9.11-07152022-rma-mode-average/07152022",
     'data_source': 'srlite-0.9.11',
     'image_type': '**',
     'search_tail_string': '/*sr-02m.tif',
     'search_link_string': '',
     'GET_BATCH': True,
     'DO_DATAMASK': False,
     'GET_ONLY_DATASETMASK': True,
     'MANY_CRS': True,
     'out_footprint_dir': '/adapt/nobackup/people/pmontesa/userfs02/projects/srlite',
     'footprint_name_SPLIT_CHAR': '_',
     'footprint_name_SPLIT_POS': 0,
     'aws_credential_fn': None
    },
]

### Run the footprinting code over an datalist to return and geodataframe and a geopackage **updated** 

https://stackoverflow.com/questions/67812512/rasterio-does-not-exist-in-the-file-system-and-is-not-recognized-as-a-support

In [3]:
######################
# Select a dictionary
d = dict_list[-1]
TEST=False
######################
d

{'location': 'local',
 'data_dir': '/adapt/nobackup/projects/ilab/data/srlite/products/srlite-0.9.11-07152022-rma-mode-average/07152022',
 'data_source': 'srlite-0.9.11',
 'image_type': '**',
 'search_tail_string': '/*sr-02m.tif',
 'search_link_string': '',
 'GET_BATCH': True,
 'DO_DATAMASK': False,
 'GET_ONLY_DATASETMASK': True,
 'MANY_CRS': True,
 'out_footprint_dir': '/adapt/nobackup/people/pmontesa/userfs02/projects/srlite',
 'footprint_name_SPLIT_CHAR': '_',
 'footprint_name_SPLIT_POS': 0,
 'aws_credential_fn': None}

In [4]:
%%time

footprint_gdf_list = list() 

if d['image_type'] == '**' or d['image_type'] == '':
    layer_name = 'all'
else:
    layer_name = d['image_type']
    
#filename = 'footprints_' + d['data_source']+'_'+d['image_type']+'.gpkg'
filename = 'footprints_' + d['data_source']+'.gpkg'

#SEARCH_STR_LIST = ["/", sensor, image_type, ".tif"]
SEARCH_STR_LIST = ["/", d['image_type'], d['search_tail_string']]
keystring = d['search_link_string'].join(SEARCH_STR_LIST)
print(d['data_dir'] + keystring)

# Raster list
if d['location'] == 'local':
    r_fn_list = glob.glob(d['data_dir'] + keystring, recursive=True)
else:
    print('on s3...')
    r_fn_list = s3.glob(d['data_dir'] + keystring)
    r_fn_list = ['s3://' + f for f in r_fn_list]

if TEST:
    r_fn_list = r_fn_list[0:2]
    print(r_fn_list)
    filename = 'TEST_footprints_' + d['data_source']+'_'+d['image_type']+'.gpkg'

outprint_fn = os.path.join(d['out_footprint_dir'], filename)

print(f'{len(r_fn_list)} total inputs from ' + d['data_source'] )   

if d['location'] == 'local':
    # Raster files are local
    # Get a list geodataframes of indiv raster footprint geometries (both data and mask)
    f_gdf_lst = [raster_footprint(r_fn, DO_DATAMASK=d['DO_DATAMASK'], GET_ONLY_DATASETMASK=d['GET_ONLY_DATASETMASK'], R_READ_MODE='r', MANY_CRS=d['MANY_CRS']) for r_fn in r_fn_list]
if d['location'] == 's3':
    # Raster files are on s3
    # Get the rio AWS session needed to access the s3 rasters in order to footprint
    aws_session = get_rio_aws_session_from_creds(d['aws_credential_fn'])
    with rio.Env(aws_session):
        # Get a list geodataframes of indiv raster footprint geometries (both data and mask)
        f_gdf_lst = [raster_footprint(r_fn, DO_DATAMASK=d['DO_DATAMASK'], GET_ONLY_DATASETMASK=d['GET_ONLY_DATASETMASK'], R_READ_MODE='r') for r_fn in r_fn_list]
        
if len(f_gdf_lst)==0:
    print('\nNothing was footprinted.\n')
else:
    # Build a footprint database geodataframe from all rasters
    footprint_gdf = build_footprint_db(f_gdf_lst, TO_GCS=False, WRITE_GPKG=True, OUT_F_NAME=outprint_fn, OUT_LYR_NAME=layer_name)
    footprint_gdf['footprint_name'] = footprint_gdf['file'].str.split(d['footprint_name_SPLIT_CHAR'], expand=True)[d['footprint_name_SPLIT_POS']]
    if d['GET_BATCH']:
        footprint_gdf['batch_name'] = footprint_gdf['path'].apply(lambda x: os.path.splitext(os.path.basename(x))[0])
    footprint_gdf_list.append(footprint_gdf)


/adapt/nobackup/projects/ilab/data/srlite/products/srlite-0.9.11-07152022-rma-mode-average/07152022/**/*sr-02m.tif
203 total inputs from srlite-0.9.11
Building GDF from list...
Wrote out spatial footprints to /adapt/nobackup/people/pmontesa/userfs02/projects/srlite/footprints_srlite-0.9.11.gpkg
CPU times: user 17.5 s, sys: 1.65 s, total: 19.2 s
Wall time: 26.6 s


### Summarize the footprints gdf

In [5]:
print(f"Total # of {d['data_source'] } files:  {footprint_gdf.shape[0]}")
footprint_gdf[['footprint_name','batch_name','path']].groupby(['batch_name','footprint_name']).count()


Total # of srlite-0.9.11 files:  203


Unnamed: 0_level_0,Unnamed: 1_level_0,path
batch_name,footprint_name,Unnamed: 2_level_1
Alaska,WV02,118
Alaska,WV03,21
Howland,GE01,3
Howland,WV02,7
Howland,WV03,1
Laselva,GE01,5
Laselva,QB02,8
Senegal,WV02,4
Siberia,WV02,22
Siberia,WV03,1


In [18]:
footprint_gdf = gpd.read_file('/adapt/nobackup/people/pmontesa/userfs02/projects/srlite/footprints_srlite-0.9.9.gpkg')
footprint_gdf['batch_name'] = footprint_gdf['path'].apply(lambda x: os.path.splitext(os.path.basename(x))[0])
footprint_gdf[footprint_gdf.batch_name == 'Howland-WV02'].total_bounds
#footprint_gdf.bounds

array([-68.90757601,  45.08967624, -68.54069091,  45.29153413])

In [75]:
maplib.MAP_LAYER_FOLIUM(footprint_gdf, LAYER_COL_NAME='file', LAYER_NAME = d['data_source'], zoom_start=6, 
                        lon_start=footprint_gdf[footprint_gdf.batch_name == 'Senegal'].geometry.centroid.x.mean(), 
                        lat_start=footprint_gdf[footprint_gdf.batch_name == 'Senegal'].geometry.centroid.y.mean(), 
                        fig_h=350)


  

  This is separate from the ipykernel package so we can avoid doing imports until


In [76]:
maplib.MAP_LAYER_FOLIUM(gpd.read_file('/adapt/nobackup/people/pmontesa/userfs02/projects/srlite/misc/footprints_srlite-0.9.9.gpkg'),
                        LAYER_COL_NAME='file', foliumMap = None, zoom_start=2, lon_start=0, fig_h=350,
                        LAYER_NAME = 'SR-Lite',
                        LAYER_STYLE_DICT = {'fillColor': 'black', 'color': 'blue', 'weight' : 0.75, 'opacity': 1, 'fillOpacity': 0.5})