<a href="https://colab.research.google.com/github/jshogland/SpatialModelingTutorials/blob/main/Notebooks/getting_stac_data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Getting Cloud based data
## This Notebook demonstrates how to query Open Street Maps for National Forest Boundaries and Planetary Computer's data catalog for seamless elevation and Sentinel 2 level 2a processed data.

Author John Hogland 12/1/2023

## Install various packages

In [None]:
!pip install --upgrade numba
!pip install --upgrade geopandas
!pip install mapclassify
!pip install --upgrade datascience
!pip install --upgrade gym
!pip install --upgrade folium
!pip install osmnx
!pip install raster_tools
!pip install pystac_client
!pip install stackstac
!pip install planetary_computer

## Import packages

In [None]:
from raster_tools import Raster, clipping
import numpy as np, geopandas as gpd, pandas as pd, osmnx as ox

## Get forest boundary and buffered extent to download elevation and Sentinel 2 Level 2a processed imagery using Open Street Maps

In [None]:
snf=ox.geocode_to_gdf('Malheur National Forest, OR, USA') # can change to any forest (used to create a geometry for selecting the extent of the study area)
ply=snf.buffer(0.25)
geo=ply[0]

## Import packages and make methods to download and mosaic stac data

In [None]:
import pystac_client
import stackstac
import planetary_computer

def make_mosaic(xra):
    return Raster(stackstac.mosaic(xra))

def get_stac_data(geo,url="https://planetarycomputer.microsoft.com/api/stac/v1",name="3dep-seamless",qry=None,res=None,crs=5070,dt=None,limit=1000):
    '''
    gets data from planetary computer

    geo = (polygon) geometry bounding box (WGS84)
    url = (string) base url to planetary computer
    name = (string) catelog resource
    qry =  (dictoinary) of property values
    res = (tuple of numbers) output resolution (x,y)
    crs = (int) output crs
    dt = (strin) data time intervale 2022/2023
    limit = (int) max number of items to return

  returns (stac items, dataframe of tiles, and xarray data array)
    '''
    catalog = pystac_client.Client.open(url, modifier=planetary_computer.sign_inplace)
    srch = catalog.search(collections=name, intersects=geo, query=qry, datetime=dt, limit=limit,)
    ic = srch.item_collection()
    df = gpd.GeoDataFrame.from_features(ic.to_dict(), crs="epsg:4326")
    xra = stackstac.stack(ic,resolution=res,epsg=crs)
    return ic, df, xra


## Get 3dep seamless 30 meter resolution elevation data

In [None]:
url="https://planetarycomputer.microsoft.com/api/stac/v1" #planetary computer url
nm="3dep-seamless" #specify the catalog name
qry={'gsd':{'eq':30}} #query for 30 meter data
ic,df,xra = get_stac_data(geo,url,nm,qry,(30,30),5070) #get the data
dem=make_mosaic(xra) #make a mosaic of the surfaces

## Visualize the boundary of each dem tile and plot a subset of the boundary


In [None]:
m=df.explore(style_kwds={"fillOpacity":0})
m=snf.explore(m=m,color='orange')
m

### Clip elevation to national forest boundary and plot (note, only pixels for the extent of the national forest are downloaded when plotting)

In [None]:
snf_p=snf.to_crs(dem.crs)
dem_c=clipping.clip(snf_p,dem)
dem_c.plot(figsize=(15,15),cmap='terrain')

## Get Sentinel 2 level 2a processed data for the extent of the boundary and the month of June 2022

In [None]:
nm='sentinel-2-l2a'
qry={'eo:cloud_cover':{'lt':1}}
dt='2022-06'
ic,df,xra = get_stac_data(geo,url,nm,qry,(10,10),5070,dt)

## Visualize the various scenes and project area

In [None]:
m=df.explore(style_kwds={"fillOpacity":0})
m=snf.explore(m=m,color='orange')
m

## Look at the xarray data array. Any attribute or coordiante variable can be used to subset the array.

In [None]:
xra

## Extract band BO2 (blue) and mosaic the various scenes dates to produce a 1 band raster. One could do various things here to subset the data and produce a seamless raster, but we are going to simply mosaic based on the order of rasters in the item collections.

In [None]:
xra2=xra.sel(band='B02')
sent2=make_mosaic(xra2)
sent2.xdata

## Take a subset of the image and plot it

In [None]:
(sent2.xdata[0,10000:12000,10000:12000]).plot(figsize=(15,15),robust=True) #only band 2 pixels for the extent of the national forst are downloaded