In [1]:
%%bash

hostname
nslookup `hostname`

echo '########################'

lscpu | grep -i cpu\(
echo '########################'

free -h | grep -i mem

ip-10-12-69-251.us-west-2.compute.internal
Server:		10.12.64.2
Address:	10.12.64.2#53

Non-authoritative answer:
Name:	ip-10-12-69-251.us-west-2.compute.internal
Address: 10.12.69.251

########################
CPU(s):              8
On-line CPU(s) list: 0-7
NUMA node0 CPU(s):   0-7
########################
Mem:            31G        1.0G        243M        1.0M         29G         29G


In [2]:
import geopandas as gpd
from shapely.geometry import mapping
from pystac_client import Client
from pystac import ItemCollection

In [3]:
def make_geom(aoi_filename):
    '''creates a geom/Polygon/Coordinates - bbox - from an geojson file'''
    # read in AOI as a GeoDataFrame
    aoi = gpd.read_file(aoi_filename)

    # get the geometry of the AOI as a dictionary for use with PySTAC Client
    geom = mapping(aoi.to_dict()['geometry'][0])
    return geom

In [4]:
aoi_geojson_file = 'siouxFalls.geojson'

In [5]:
geom = make_geom(aoi_geojson_file)

In [6]:
geom['coordinates']

(((-96.8609619140625, 43.44893105587766),
  (-96.5863037109375, 43.44893105587766),
  (-96.5863037109375, 43.632099415557754),
  (-96.8609619140625, 43.632099415557754),
  (-96.8609619140625, 43.44893105587766)),)

In [7]:
def get_stac_records_sentinel_search(geom):
    # STAC API - Landsat Collection 2
    url = "https://earth-search.aws.element84.com/v0"

    # Search parameters
    params = {
        "collections": ["sentinel-s2-l2a-cogs"],
        "intersects": geom,
        "datetime": "2020-05-01/2021-12-31",
        "limit": 100,
        "query": ["eo:cloud_cover<5", "sentinel:data_coverage>88"]
    }
    cat = Client.open(url)
    search = cat.search(**params)
    
    matched = search.matched()
    print(f"{search.matched()} scenes found")
    return(search)


In [8]:
thing = get_stac_records_sentinel_search(geom)

38 scenes found


In [9]:
type(thing)

pystac_client.item_search.ItemSearch

In [10]:
dir(thing)

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_format_bbox',
 '_format_collections',
 '_format_datetime',
 '_format_fields',
 '_format_filter',
 '_format_ids',
 '_format_intersects',
 '_format_query',
 '_format_sortby',
 '_max_items',
 '_parameters',
 '_stac_io',
 'client',
 'get_all_items',
 'get_all_items_as_dict',
 'get_item_collections',
 'get_items',
 'get_parameters',
 'matched',
 'method',
 'url']

In [11]:
search_dict = thing.get_all_items_as_dict()['features']
search_items = thing.get_all_items()

In [12]:
#(items_dict, item_collection) = get_stac_records_sentinel(geom)

In [13]:
search_items[0]

<Item id=S2B_14TPP_20211123_0_L2A>

In [14]:
len(search_dict)

38

In [15]:
search_dict[0]['assets'].keys()

dict_keys(['thumbnail', 'overview', 'info', 'metadata', 'visual', 'B01', 'B02', 'B03', 'B04', 'B05', 'B06', 'B07', 'B08', 'B8A', 'B09', 'B11', 'B12', 'AOT', 'WVP', 'SCL'])

In [16]:
import yaml

from odc import stac
from pyproj import CRS
from pystac.extensions.projection import ProjectionExtension

def open_odc(items, crs=None, resolution=None):
    configuration_str = """---
        landsat-c2l2-sr:
          measurements:
            '*':
              dtype: float32
              nodata: 0
              units: 'm'
        """
    configuration = yaml.load(configuration_str, Loader=yaml.CSafeLoader)
    datasets = list(stac.stac2ds(items, configuration))
    
    crs_str = str(items[0].properties['proj:epsg'])
    crs = f'EPSG:{crs_str}'

    resolution=(-10, 10)
    data = stac.dc_load(datasets, bands=['B04', 'B03', 'B02', 'B09'], chunks={"x": 1024, "y": 1024}, output_crs=crs, resolution=resolution)
    return data

In [17]:
#!conda install -y odc-stac

In [18]:
 _datacube = open_odc(search_items)

In [19]:
_datacube

Unnamed: 0,Array,Chunk
Bytes,9.35 GiB,2.00 MiB
Shape,"(38, 11491, 11492)","(1, 1024, 1024)"
Count,5513 Tasks,5472 Chunks
Type,uint16,numpy.ndarray
"Array Chunk Bytes 9.35 GiB 2.00 MiB Shape (38, 11491, 11492) (1, 1024, 1024) Count 5513 Tasks 5472 Chunks Type uint16 numpy.ndarray",11492  11491  38,

Unnamed: 0,Array,Chunk
Bytes,9.35 GiB,2.00 MiB
Shape,"(38, 11491, 11492)","(1, 1024, 1024)"
Count,5513 Tasks,5472 Chunks
Type,uint16,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,9.35 GiB,2.00 MiB
Shape,"(38, 11491, 11492)","(1, 1024, 1024)"
Count,5513 Tasks,5472 Chunks
Type,uint16,numpy.ndarray
"Array Chunk Bytes 9.35 GiB 2.00 MiB Shape (38, 11491, 11492) (1, 1024, 1024) Count 5513 Tasks 5472 Chunks Type uint16 numpy.ndarray",11492  11491  38,

Unnamed: 0,Array,Chunk
Bytes,9.35 GiB,2.00 MiB
Shape,"(38, 11491, 11492)","(1, 1024, 1024)"
Count,5513 Tasks,5472 Chunks
Type,uint16,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,9.35 GiB,2.00 MiB
Shape,"(38, 11491, 11492)","(1, 1024, 1024)"
Count,5513 Tasks,5472 Chunks
Type,uint16,numpy.ndarray
"Array Chunk Bytes 9.35 GiB 2.00 MiB Shape (38, 11491, 11492) (1, 1024, 1024) Count 5513 Tasks 5472 Chunks Type uint16 numpy.ndarray",11492  11491  38,

Unnamed: 0,Array,Chunk
Bytes,9.35 GiB,2.00 MiB
Shape,"(38, 11491, 11492)","(1, 1024, 1024)"
Count,5513 Tasks,5472 Chunks
Type,uint16,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,9.35 GiB,2.00 MiB
Shape,"(38, 11491, 11492)","(1, 1024, 1024)"
Count,5513 Tasks,5472 Chunks
Type,uint16,numpy.ndarray
"Array Chunk Bytes 9.35 GiB 2.00 MiB Shape (38, 11491, 11492) (1, 1024, 1024) Count 5513 Tasks 5472 Chunks Type uint16 numpy.ndarray",11492  11491  38,

Unnamed: 0,Array,Chunk
Bytes,9.35 GiB,2.00 MiB
Shape,"(38, 11491, 11492)","(1, 1024, 1024)"
Count,5513 Tasks,5472 Chunks
Type,uint16,numpy.ndarray


In [20]:
import rioxarray
datacube = _datacube.rio.clip([geom], crs='epsg:4326')

In [21]:
datacube

Unnamed: 0,Array,Chunk
Bytes,345.45 MiB,2.00 MiB
Shape,"(38, 2095, 2275)","(1, 1024, 1024)"
Count,18456 Tasks,342 Chunks
Type,uint16,numpy.ndarray
"Array Chunk Bytes 345.45 MiB 2.00 MiB Shape (38, 2095, 2275) (1, 1024, 1024) Count 18456 Tasks 342 Chunks Type uint16 numpy.ndarray",2275  2095  38,

Unnamed: 0,Array,Chunk
Bytes,345.45 MiB,2.00 MiB
Shape,"(38, 2095, 2275)","(1, 1024, 1024)"
Count,18456 Tasks,342 Chunks
Type,uint16,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,345.45 MiB,2.00 MiB
Shape,"(38, 2095, 2275)","(1, 1024, 1024)"
Count,18456 Tasks,342 Chunks
Type,uint16,numpy.ndarray
"Array Chunk Bytes 345.45 MiB 2.00 MiB Shape (38, 2095, 2275) (1, 1024, 1024) Count 18456 Tasks 342 Chunks Type uint16 numpy.ndarray",2275  2095  38,

Unnamed: 0,Array,Chunk
Bytes,345.45 MiB,2.00 MiB
Shape,"(38, 2095, 2275)","(1, 1024, 1024)"
Count,18456 Tasks,342 Chunks
Type,uint16,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,345.45 MiB,2.00 MiB
Shape,"(38, 2095, 2275)","(1, 1024, 1024)"
Count,18456 Tasks,342 Chunks
Type,uint16,numpy.ndarray
"Array Chunk Bytes 345.45 MiB 2.00 MiB Shape (38, 2095, 2275) (1, 1024, 1024) Count 18456 Tasks 342 Chunks Type uint16 numpy.ndarray",2275  2095  38,

Unnamed: 0,Array,Chunk
Bytes,345.45 MiB,2.00 MiB
Shape,"(38, 2095, 2275)","(1, 1024, 1024)"
Count,18456 Tasks,342 Chunks
Type,uint16,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,345.45 MiB,2.00 MiB
Shape,"(38, 2095, 2275)","(1, 1024, 1024)"
Count,18456 Tasks,342 Chunks
Type,uint16,numpy.ndarray
"Array Chunk Bytes 345.45 MiB 2.00 MiB Shape (38, 2095, 2275) (1, 1024, 1024) Count 18456 Tasks 342 Chunks Type uint16 numpy.ndarray",2275  2095  38,

Unnamed: 0,Array,Chunk
Bytes,345.45 MiB,2.00 MiB
Shape,"(38, 2095, 2275)","(1, 1024, 1024)"
Count,18456 Tasks,342 Chunks
Type,uint16,numpy.ndarray


In [22]:
def nc_from_ds(DS, filename):
    DS.time.attrs = {}  #this allowed the nc to be written
    #DS.SCL.attrs = {}
    ds1 = DS.drop(labels='spatial_ref')
    ds1.to_netcdf(filename)

In [23]:
%%time
nc_from_ds(datacube, 'siouxFallsPruned.nc')

CPU times: user 2min 12s, sys: 1min 7s, total: 3min 20s
Wall time: 1min 32s


In [24]:
#! ls ~/.aws/

In [25]:
#! echo '*.nc' > .gitignore

# Download the netcdf tony

In [26]:
! date; ls -lh *.nc

Sat Jan  8 03:54:41 UTC 2022
-rw-rw-r-- 1 ec2-user ec2-user 1.4G Jan  8 03:54 siouxFallsPruned.nc


In [27]:
#! cp siouxFallsPruned.nc siouxFallsPruned1.nc

In [28]:
! conda list odc-stac

# packages in environment at /home/ec2-user/miniconda3/envs/cole:
#
# Name                    Version                   Build  Channel
odc-stac                  0.2.2              pyhd8ed1ab_0    conda-forge


In [29]:
#!conda install -y  odc-stac

Collecting package metadata (current_repodata.json): done
Solving environment: done

## Package Plan ##

  environment location: /home/ec2-user/miniconda3/envs/cole

  added / updated specs:
    - odc-stac


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    odc-stac-0.2.3             |     pyhd8ed1ab_0          38 KB  conda-forge
    ------------------------------------------------------------
                                           Total:          38 KB

The following packages will be UPDATED:

  odc-stac                               0.2.2-pyhd8ed1ab_0 --> 0.2.3-pyhd8ed1ab_0



Downloading and Extracting Packages
odc-stac-0.2.3       | 38 KB     | ##################################### | 100% 
Preparing transaction: done
Verifying transaction: done
Executing transaction: done
