# Footprint the extents of rasters

In [2]:
import rasterio
import rasterio.features
import rasterio.warp
from rasterio.plot import show
from rasterio import Affine, MemoryFile
from rasterio.enums import Resampling

import pandas as pd
import geopandas as gpd
import shapely
from shapely.geometry import box
import fiona
from fiona.crs import from_epsg
import pprint

import glob
import os

import sys
sys.path.append('/efs/pmontesa/code/geoscitools')
sys.path.append('/home/pmontesa/code/geoscitools')
import maplib
import footprintlib
from footprintlib import *

# https://gis.stackexchange.com/questions/375577/how-do-i-write-out-a-mixed-geometry-geodataframe-to-a-geopackage
# https://gis.stackexchange.com/questions/187877/how-to-polygonize-raster-to-shapely-polygons

### Set up dictionaries of parameters

NameError: name 'credentials_fn' is not defined

Set up an AWSSession

In [3]:
import configparser
config = configparser.ConfigParser()
config.read('/efs/pmontesa/credentials')
print(config.sections()[0])
[print(key) for key in config['boreal_pub']]


IndexError: list index out of range

In [5]:
parse_aws_creds(credentials_fn)[0]

'boreal_pub'

## Build a dictionary of the raster datasets for footprinting

In [51]:
dict_list = [

    {'location':'s3',  #'local' or 's3'
     'data_dir': "s3://terrapulse-pub-data/boreal-forest-data",
     'data_source': 'terrapulse-pub-data',
     'image_type': 'tcc_2020',
     'search_tail_string': '/**/*.tif',
     'search_link_string': '',
     'DO_DATAMASK': False,
     'GET_ONLY_DATASETMASK': True,
     'out_footprint_dir': '/efs/projects/forest_height/test_out',
     'footprint_name_SPLIT_CHAR': '_',
     'footprint_name_SPLIT_POS': 0,
     'aws_credential_fn': '/efs/pmontesa/credentials'
    },
    {'location':'local', #'local' or 's3'
     'data_dir': "/efs/projects/data/hls_tiles",
     'data_source': 'hls_tiles',
     'image_type': 'boreal_tiles_v0003',
     'search_tail_string': '/*.tif',
     'search_link_string': '',
     'DO_DATAMASK': False,
     'GET_ONLY_DATASETMASK': True,
     'out_footprint_dir': '/efs/projects/forest_height/test_out',
     'footprint_name_SPLIT_CHAR': '_',
     'footprint_name_SPLIT_POS': 0,
     'aws_credential_fn': None
    },
    {'location':'local', #'local' or 's3'
     'data_dir': "/att/nobackup/pmontesa/userfs02/data/worldclim/1km/bioclim",
     'data_source': 'worldclim',
     'image_type': 'wc2.0_30s_bio',
     'search_tail_string': '/wc2.0_bio_30s*.tif',
     'search_link_string': '',
     'DO_DATAMASK': False,
     'GET_ONLY_DATASETMASK': True,
     'out_footprint_dir': '/att/nobackup/pmontesa/userfs02/projects/ilab/boreal_height',
     'footprint_name_SPLIT_CHAR': '_',
     'footprint_name_SPLIT_POS': 0,
     'aws_credential_fn': None
    },   
    {'location':'local', #'local' or 's3'
     'data_dir': "/att/nobackup/pmontesa/userfs02/data",
     'data_source': 'soil_grids',
     'image_type': 'soil_grids',
     'search_tail_string': '/*.tif',
     'search_link_string': '',
     'DO_DATAMASK': False,
     'GET_ONLY_DATASETMASK': True,
     'out_footprint_dir': '/att/nobackup/pmontesa/userfs02/projects/ilab/boreal_height',
     'footprint_name_SPLIT_CHAR': '_',
     'footprint_name_SPLIT_POS': 0,
     'aws_credential_fn': None
    }, 
    {'location':'local', #'local' or 's3'
     'data_dir': "/att/nobackup/pmontesa/userfs02/data",
     'data_source': 'awi',
     'image_type': 'MAGT',
     'search_tail_string': '/*PERPROB*.tif',
     'search_link_string': '',
     'DO_DATAMASK': False,
     'GET_ONLY_DATASETMASK': True,
     'out_footprint_dir': '/att/nobackup/pmontesa/userfs02/projects/ilab/boreal_height',
     'footprint_name_SPLIT_CHAR': '_',
     'footprint_name_SPLIT_POS': 0,
     'aws_credential_fn': None
    },    
]

In [59]:
if False:
    for d in dict_list:
        print('\n', d['data_source'])

        if d['location'] == 's3':
            import s3fs
            # Use s3fs to access file remote s3 system
            s3 = footprintlib.get_s3_fs_from_creds('/home/pmontesa/credentials_test') # '/efs/pmontesa/credentials'
            print(s3.ls(d['data_dir']))

### Run the footprinting code over an s3 datalist to return and geodataframe and a geopackage **updated** 

https://stackoverflow.com/questions/67812512/rasterio-does-not-exist-in-the-file-system-and-is-not-recognized-as-a-support

In [52]:
%%time

######################
# Select a dictionary
d = dict_list[3]
TEST=False
######################


footprint_gdf_list = list() 
filename = 'footprints_' + d['data_source']+'_'+d['image_type']+'.gpkg'

#SEARCH_STR_LIST = ["/", sensor, image_type, ".tif"]
SEARCH_STR_LIST = ["/", d['image_type'], d['search_tail_string']]
keystring = d['search_link_string'].join(SEARCH_STR_LIST)
print(d['data_dir'] + keystring)

# Raster list
if d['location'] == 'local':
    r_fn_list = glob.glob(d['data_dir'] + keystring)
else:
    print('on s3...')
    r_fn_list = s3.glob(d['data_dir'] + keystring)
    r_fn_list = ['s3://' + f for f in r_fn_list]

if TEST:
    r_fn_list = r_fn_list[0:2]
    print(r_fn_list)
    filename = 'TEST_footprints_' + d['data_source']+'_'+d['image_type']+'.gpkg'


outprint_fn = os.path.join(d['out_footprint_dir'], filename)

print(f'{len(r_fn_list)} total inputs from ' + d['data_source'] )    

if d['location'] == 'local':
    # Raster files are local
    # Get a list geodataframes of indiv raster footprint geometries (both data and mask)
    f_gdf_lst = [raster_footprint(r_fn, DO_DATAMASK=d['DO_DATAMASK'], GET_ONLY_DATASETMASK=d['GET_ONLY_DATASETMASK'], R_READ_MODE='r') for r_fn in r_fn_list]
if d['location'] == 's3':
    # Raster files are on s3
    # Get the rio AWS session needed to access the s3 rasters in order to footprint
    aws_session = get_rio_aws_session_from_creds(d['aws_credential_fn'])
    with rio.Env(aws_session):
        # Get a list geodataframes of indiv raster footprint geometries (both data and mask)
        f_gdf_lst = [raster_footprint(r_fn, DO_DATAMASK=d['DO_DATAMASK'], GET_ONLY_DATASETMASK=d['GET_ONLY_DATASETMASK'], R_READ_MODE='r') for r_fn in r_fn_list]
if len(f_gdf_lst)==0:
    print('\nNothing was footprinted.\n')
else:
    # Build a footprint database geodataframe from all rasters
    footprint_gdf = build_footprint_db(f_gdf_lst, TO_GCS=True, WRITE_GPKG=True, OUT_F_NAME=outprint_fn, OUT_LYR_NAME=d['image_type'])
    footprint_gdf['footprint_name'] = footprint_gdf['file'].str.split(d['footprint_name_SPLIT_CHAR'], expand=True)[d['footprint_name_SPLIT_POS']]
    footprint_gdf_list.append(footprint_gdf)


/att/nobackup/pmontesa/userfs02/data/soil_grids/*.tif
74 total inputs from soil_grids
Building GDF from list...
Converting to GCS...
Wrote out spatial footprints to /att/nobackup/pmontesa/userfs02/projects/ilab/boreal_height/footprints_soil_grids_soil_grids.gpkg
CPU times: user 2.99 s, sys: 314 ms, total: 3.3 s
Wall time: 10.6 s


  return _prepare_from_string(" ".join(pjargs))


#### Test access to rasters on s3

In [None]:
test_terraPulse_file = r_fn_list[1]

import rioxarray as rxr

with rio.Env(aws_session):
    #arr= rxr.open_rasterio(test_terraPulse_file,  masked=False)
    
    with rio.open(test_terraPulse_file) as src:
        profile = src.profile
        arr = src.read(1)
        #arr.plot()
        from rasterio.plot import show_hist
        show_hist(
            src, bins=50, lw=0.0, stacked=False, alpha=0.3,
            histtype='stepfilled', title="Histogram")

### Select the footprints gdf

In [53]:
footprint_gdf = footprint_gdf_list[0]
footprint_gdf

Unnamed: 0,geometry,footprint_name,path,file
0,"POLYGON ((179.99994 -56.00081, 179.99994 83.99...",BLDFIE,/att/nobackup/pmontesa/userfs02/data/soil_grids,BLDFIE_M_sl1_250m_ll.tif
1,"POLYGON ((179.99994 -56.00081, 179.99994 83.99...",BLDFIE,/att/nobackup/pmontesa/userfs02/data/soil_grids,BLDFIE_M_sl5_250m_ll.tif
2,"POLYGON ((179.99994 -56.00081, 179.99994 83.99...",BDRICM,/att/nobackup/pmontesa/userfs02/data/soil_grids,BDRICM_M_250m_ll.tif
3,"POLYGON ((179.99994 -62.00081, 179.99994 87.37...",CLYPPT,/att/nobackup/pmontesa/userfs02/data/soil_grids,CLYPPT_M_sl3_250m_ll.tif
4,"POLYGON ((179.99994 -56.00081, 179.99994 83.99...",AWCtS,/att/nobackup/pmontesa/userfs02/data/soil_grids,AWCtS_M_sl3_250m_ll.tif
...,...,...,...,...
69,"POLYGON ((179.99994 -56.00081, 179.99994 83.99...",AWCh1,/att/nobackup/pmontesa/userfs02/data/soil_grids,AWCh1_M_sl6_250m_ll.tif
70,"POLYGON ((179.99994 -56.00081, 179.99994 83.99...",AWCh1,/att/nobackup/pmontesa/userfs02/data/soil_grids,AWCh1_M_sl2_250m_ll.tif
71,"POLYGON ((179.99994 -62.00081, 179.99994 87.37...",SLTPPT,/att/nobackup/pmontesa/userfs02/data/soil_grids,SLTPPT_M_sl4_250m_ll.tif
72,"POLYGON ((179.99994 -56.00081, 179.99994 83.99...",WWP,/att/nobackup/pmontesa/userfs02/data/soil_grids,WWP_M_sl3_250m_ll.tif


## Map the footprints

In [55]:
#maplib.MAP_LAYER_FOLIUM(footprint_gdf.iloc[[3,5]], zoom_start=2)
maplib.MAP_LAYER_FOLIUM(footprint_gdf, LAYER_COL_NAME='file', zoom_start=2, lon_start=0)

### Manually check footprint for a single image

In [30]:
r_fn = '/att/nobackup/pmontesa/userfs02/data/MAGT/UiO_PEX_PERPROB_3.0_20171201_2000_2016.tif'
r_fn = '/att/nobackup/pmontesa/userfs02/data/soil_grids/CECSOL_M_sl2_250m_ll.tif'
r_fn = '/att/nobackup/pmontesa/userfs02/data/worldclim/1km/bioclim/wc2.0_30s_bio/wc2.0_bio_30s_01.tif'
footprint_gdf = raster_footprint(r_fn, DO_DATAMASK=d['DO_DATAMASK'], GET_ONLY_DATASETMASK=d['GET_ONLY_DATASETMASK'], R_READ_MODE='r+')
maplib.MAP_LAYER_FOLIUM(footprint_gdf, LAYER_COL_NAME='path', zoom_start=2, lon_start=0)