# Explore data in stac catalog

In [None]:
import os
import sys
from pathlib import Path
import pandas as pd
import numpy as np
import pyproj
import geopandas as gpd
from pystac_client import Client
import rich.table

%matplotlib inline

In [None]:
sys.path.append(r"../LUCinSA_helpers")
from file_checks import *

In [None]:
'''
PARAMETERS: modify in notebook_params notebook, then run that notebook and this cell to update here
DO NOT modify this cell
'''

%store -r basic_config
print("Basic Parameters: \n raw_dir = {} \n local_dir = {} \n  gridCell = {}  \n yr_range = {} \n image_type = {} \n"
      .format(basic_config['raw_dir'],basic_config['local_dir'],basic_config['grid_cell'], basic_config['yr_range'], basic_config['image_type']))

In [None]:
#grid_file = basic_config['grid_file']
grid_file = '/home/klwalker/data/LUCinLA_grid_8858.gpkg'
#grid_file = f'/home/sandbox-cel/capeTown/vector/cape_grid_utm32S.gpkg'
#cell = basic_config['grid_cell']
#cell = '3972'
cell = '4000'
yrs= [2018,2019]
#yrs = basic_config['yr_range']
#yrs = [2022,2022]
#img_type = basic_config['image_type']
img_type = 's2'
#img_type = 'l8'

## Explore properties of stac catalog

In [None]:
s2_cat_default = Client.open("https://earth-search.aws.element84.com/v1")
s2_cat_archive2023 = Client.open("https://earth-search.aws.element84.com/v0")
s2_cat_pre2017 = Client.open("https://planetarycomputer.microsoft.com/api/stac/v1/")
s2_cat_archive2023.add_conforms_to("ITEM_SEARCH")
l_cat_default = Client.open("https://planetarycomputer.microsoft.com/api/stac/v1/")

In [None]:
## view Planetary hub (default for Landsat):
collections = l_cat_default.get_collections()
for collection in collections:
    print(collection)

In [None]:
## default for Sentinel-2 (after 2023)
collections = s2_cat_default.get_collections()
for collection in collections:
    print(collection)

In [None]:
## Sentinel archival2023
collections = s2_cat_archive2023.get_collections()
for collection in collections:
    print(collection)

## Get list of images meeting criteria from default stac catalog
note: cloud cover (cc) = 90 is the default, but can change to another max cloud cover to filter images

In [None]:
#imgs = get_img_list_from_cat(img_type, cell, grid_file, yrs=yrs, cat='default', cc=90)
imgs = get_img_list_from_cat(img_type, cell, grid_file, yrs=yrs, cat='planetary', cc=90)
imgs.sort_values(by='obs', inplace=True)
imgs.head()

## Get images from archival catalog
(element84 stac library v0 was used to downolad Sentinel images until 2023 (there are no S2 images in the catalog after 4/2023)

In [None]:
oldimgs = get_img_list_from_cat('s2', cell, grid_file, yrs=yrs, cat='archival2023')
oldimgs.sort_values(by='obs', inplace=True)
oldimgs.tail()

In [None]:
avg_cc = imgs.loc[:, 'cloudcov'].mean().round(2)
print(f'avg cloud_cover = {avg_cc}%')

#### To see the specs of a particular collection:

In [None]:
s2prod = s2_cat_default.get_collection("sentinel-2-l2a")
s2prod 

In [None]:
olds2prod = s2_cat_archive2023.get_collection("sentinel-s2-l2a")
olds2prod 

In [None]:
s2pre2017 = s2_cat_pre2017.get_collection("sentinel-2-l2a")
s2pre2017 

In [None]:
def make_asset_table(scene):
    asset_table = rich.table.Table("Asset Key", "Description")
    for asset_key, asset in scene.assets.items():
        # print(f"{asset_key:<25} - {asset.title}")
        asset_table.add_row(asset_key, asset.title)

    return asset_table

In [None]:
grid = gpd.read_file(grid_file)
if grid.crs != pyproj.CRS.from_epsg(4326):
    grid = grid.to_crs('epsg:4326')
bb = grid.query(f'UNQ == {cell}').geometry.total_bounds

if yrs == None:
    time_slice="2010-01-01/2022-12-30"
else:
    time_slice=f"{yrs[0]}-01-01/{yrs[1]}-12-30"

In [None]:
p_search = l_cat_default.search(bbox=bb,
            datetime=time_slice,
            collections=['landsat-c2-l2'],
            query=['eo:cloud_cover<90'],
            max_items = 10)
lscene=p_search.item_collection()[0]

In [None]:
s2_search = s2_cat_default.search(bbox=bb,
            datetime=time_slice,
            collections=['sentinel-2-l2a'],
            query=['eo:cloud_cover<90'],
            max_items = 10)
s2scene=s2_search.item_collection()[0]

In [None]:
s2_old_search = s2_cat_archive2023.search(bbox=bb,
            datetime=time_slice,
            collections=['sentinel-s2-l2a-cogs'],
            query=['eo:cloud_cover<90'],
            max_items = 10)
s2_old_scene=s2_old_search.item_collection()[0]

In [None]:
s2_pre2017_search = s2_cat_pre2017.search(bbox=bb,
            datetime=time_slice,
            collections=['sentinel-2-l2a'],
            query=['eo:cloud_cover<90'],
            max_items = 10)
s2_pre2017=s2_pre2017_search.item_collection()[0]

## compare assets for old and new sentinel images

In [None]:
make_asset_table(lscene)

In [None]:
make_asset_table(s2scene)

In [None]:
make_asset_table(s2_pre2017)

In [None]:
make_asset_table(s2_old_scene)

## Compare properties for old and new sentinel images

In [None]:
pd.DataFrame.from_dict(s2scene.properties, orient='index')

In [None]:
pd.DataFrame.from_dict(s2_old_scene.properties, orient='index')

### To save search results:

In [None]:
#scenes.save_object(os.path.join(basic_config['local_dir']),f'stacsearch_{cell}_{yrs}_{img_type}.json')

## get url to download a band/asset

In [None]:
For Planetary:

In [None]:
import planetary_computer as pc

items = pc.sign(p_search)
test_item = items.items[0]
test_url = test_item.assets["nir08"].href
print(test_url)

For Element84 (Sentinel):

In [None]:
url=s2scene.assets["nir"].href
#url=s2_old_scene.assets["B8A"].href
print(url)

## View thumbnail:

In [None]:
import requests
samp_img = requests.get(s2scene.assets["thumbnail"].href).content
import matplotlib.pyplot as plt
from PIL import Image
import io
plt.figure(figsize=(5, 5))
plt.imshow(Image.open(io.BytesIO(samp_img)))

#### Explore scene classification map

In [None]:
import rioxarray
scl = rioxarray.open_rasterio(s2scene.assets["scl"].href)
scl

## Check for missing images in db

In [None]:
#missing_local_s, missing_remote_s, missing_from_localdb_s = compare_files_to_db('s', 'both', basic_config['raw_dir'], basic_config['grid_cell'], basic_config['grid_file'], yrs=basic_config['yr_range'],data_source=basic_config['data_source'])
#print('{} images from sentinel catalog have not been processed'.format(len(missing_from_localdb_s)))
#print(missing_from_localdb_s)

### explore difference between STAC catalogs

In [None]:
'''
catfiff = ComparePlanetaryHub_w_Element84 ('Sentinel', basicConfig['gridCell'],basicConfig['gridFile'],Yrs = [2000,2022])
diff0 = pd.DataFrame(catdiff[0])
diff0['date'] = diff0.apply(lambda x: x[0].split("_")[2], axis=1)
diff0.sort_values(by=['date'], inplace=True) 
diff1 = pd.DataFrame(catdiff[1])
diff1['date'] = diff1.apply(lambda x: x[0].split("_")[2], axis=1)
diff1.sort_values(by=['date'], inplace=True)    
print('{} Images in Element84 but not Planetary: {}'.format(len(catdiff[0]),diff0))
print('{} Images in Planetary but not Element84: {}'.format(len(catdiff[1]),diff1))
'''

## To save an html copy of this notebook with all outputs:

In [None]:
'''
### uncomment and Run to print output as html
out_name = str(basic_config['country']+'1aa_StacData_'+str(basic_config['grid_cell']))
!jupyter nbconvert --output-dir='./Outputs' --to html --no-input --output=$out_name 1aa_StacData_FileContent.ipynb
'''