In [8]:
import pystac
from pystac.stac_io import DefaultStacIO, StacIO
import botocore
import boto3
from urllib.parse import urlparse


In [14]:
class CustomStacIO(DefaultStacIO):
    """Custom STAC IO class that uses boto3 to read from S3."""

    def __init__(self):
        self.session = botocore.session.Session()
        self.s3_client = self.session.create_client(
            service_name="s3",
            region_name="us-east-1",
            endpoint_url="http://eoap-zoo-project-localstack.eoap-zoo-project.svc.cluster.local:4566",
            aws_access_key_id="test",
            aws_secret_access_key="test",
            #verify=True,
            #use_ssl=True,
            #config=Config(s3={"addressing_style": "path", "signature_version": "s3v4"}),
        )

    def read_text(self, source, *args, **kwargs):
        parsed = urlparse(source)
        if parsed.scheme == "s3":
            return (
                self.s3_client.get_object(Bucket=parsed.netloc, Key=parsed.path[1:])[
                    "Body"
                ]
                .read()
                .decode("utf-8")
            )
        else:
            return super().read_text(source, *args, **kwargs)

    def write_text(self, dest, txt, *args, **kwargs):
        parsed = urlparse(dest)
        if parsed.scheme == "s3":
            self.s3_client.put_object(
                Body=txt.encode("UTF-8"),
                Bucket=parsed.netloc,
                Key=parsed.path[1:],
                ContentType="application/geo+json",
            )
        else:
            super().write_text(dest, txt, *args, **kwargs)

StacIO.set_default(CustomStacIO)

In [61]:
cat = pystac.read_file("s3://results/90f980c6-8bac-11ef-ae11-0ac1619c2c37/catalog.json")

collection_id = "sub_path"

logger.info(f"Create collection with ID {collection_id}")

collection = None

collection = next(cat.get_all_collections())

logger.info("Got collection from outputs")

        
items = []

for item in collection.get_all_items():

    logger.info("Processing item {item.id}")
    
    for asset_key in item.assets.keys():
        logger.info(f"Processing asset {asset_key}")
        temp_asset = item.assets[asset_key].to_dict()
        temp_asset["storage:platform"] = "EOEPCA"
        temp_asset["storage:requester_pays"] = False
        temp_asset["storage:tier"] = "Standard"
        temp_asset["storage:region"] = "region_name"
        temp_asset["storage:endpoint"] = "endpoint_url"
        item.assets[asset_key] = item.assets[asset_key].from_dict(temp_asset)
    
    item.collection_id = collection_id

    items.append(item.clone())

item_collection = ItemCollection(items=items)

logger.info("Created collection from items")

# Trap the case of no output collection
if item_collection is None:
    logger.error("The output collection is empty")
    feature_collection = json.dumps({}, indent=2)

# Set the feature collection to be returned
results = item_collection.to_dict()
results["id"] = collection_id

[32m2024-10-16 12:56:18.198[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m5[0m - [1mCreate collection with ID sub_path[0m
[32m2024-10-16 12:56:18.208[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m11[0m - [1mGot collection from outputs[0m
[32m2024-10-16 12:56:18.216[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m18[0m - [1mProcessing item {item.id}[0m
[32m2024-10-16 12:56:18.218[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m21[0m - [1mProcessing asset data[0m
[32m2024-10-16 12:56:18.227[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m18[0m - [1mProcessing item {item.id}[0m
[32m2024-10-16 12:56:18.229[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m21[0m - [1mProcessing asset data[0m
[32m2024-10-16 12:56:18.231[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m36[0m - [1mCreated collection from items[0m


In [71]:
collection.id

'90f980c6-8bac-11ef-ae11-0ac1619c2c37'

In [62]:
results

{'type': 'FeatureCollection',
 'features': [{'type': 'Feature',
   'stac_version': '1.0.0',
   'id': 'S2B_10TFK_20210713_0_L2A',
   'properties': {'proj:epsg': 32610,
    'proj:geometry': {'type': 'Polygon',
     'coordinates': [[[636990.0, 4410550.0],
       [691590.0, 4410550.0],
       [691590.0, 4482600.0],
       [636990.0, 4482600.0],
       [636990.0, 4410550.0]]]},
    'proj:bbox': [636990.0, 4410550.0, 691590.0, 4482600.0],
    'proj:shape': [7205, 5460],
    'proj:transform': [10.0,
     0.0,
     636990.0,
     0.0,
     -10.0,
     4482600.0,
     0.0,
     0.0,
     1.0],
    'proj:projjson': {'$schema': 'https://proj.org/schemas/v0.4/projjson.schema.json',
     'type': 'ProjectedCRS',
     'name': 'WGS 84 / UTM zone 10N',
     'base_crs': {'name': 'WGS 84',
      'datum': {'type': 'GeodeticReferenceFrame',
       'name': 'World Geodetic System 1984',
       'ellipsoid': {'name': 'WGS 84',
        'semi_major_axis': 6378137,
        'inverse_flattening': 298.257223563}},
 

In [68]:

dir(ItemCollection.from_dict(results))

['__abstractmethods__',
 '__add__',
 '__annotations__',
 '__class__',
 '__class_getitem__',
 '__contains__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__orig_bases__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_abc_impl',
 '_repr_html_',
 'clone',
 'extra_fields',
 'from_dict',
 'from_file',
 'is_item_collection',
 'items',
 'save_object',
 'to_dict']

In [70]:
ItemCollection.from_dict(results).items[0].get_assets()

{'data': <Asset href=s3://results/90f980c6-8bac-11ef-ae11-0ac1619c2c37/90f980c6-8bac-11ef-ae11-0ac1619c2c37/S2B_10TFK_20210713_0_L2A/otsu.tif>}

In [46]:
cat = pystac.read_file("s3://results/90f980c6-8bac-11ef-ae11-0ac1619c2c37/catalog.json")

In [47]:
cat.describe()

* <Catalog id=catalog>
    * <Collection id=90f980c6-8bac-11ef-ae11-0ac1619c2c37>
      * <Item id=S2B_10TFK_20210713_0_L2A>
      * <Item id=S2A_10TFK_20220524_0_L2A>


In [48]:
collection = next(cat.get_all_collections())

In [49]:
collection

In [51]:
collection.describe()

* <Collection id=90f980c6-8bac-11ef-ae11-0ac1619c2c37>
  * <Item id=S2B_10TFK_20210713_0_L2A>
  * <Item id=S2A_10TFK_20220524_0_L2A>


In [40]:
next(collection.get_all_items())

In [52]:
items = collection.get_all_items()

In [53]:
items

<itertools.chain at 0x73befdc29430>

In [54]:

collection_id = "ccc"
itemFinal = []
for i in items:
    logger.info(f"Processing item {i.id}")
    for a in i.assets.keys():
        logger.info(f"Processing asset {a}")
        cDict = i.assets[a].to_dict()
        cDict["storage:platform"] = "EOEPCA"
        cDict["storage:requester_pays"] = False
        cDict["storage:tier"] = "Standard"
        cDict["storage:region"] = "aaa"
        cDict["storage:endpoint"] = "bb"
        i.assets[a] = i.assets[a].from_dict(cDict)
    i.collection_id = collection_id
    itemFinal += [i.clone()]
colln = ItemCollection(items=itemFinal)

[32m2024-10-16 12:47:43.130[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [1mProcessing item S2B_10TFK_20210713_0_L2A[0m
[32m2024-10-16 12:47:43.131[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m6[0m - [1mProcessing asset data[0m
[32m2024-10-16 12:47:43.132[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m4[0m - [1mProcessing item S2A_10TFK_20220524_0_L2A[0m
[32m2024-10-16 12:47:43.133[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m6[0m - [1mProcessing asset data[0m


In [45]:
collection.to_dict()

{'type': 'FeatureCollection',
 'features': [{'type': 'Feature',
   'stac_version': '1.0.0',
   'id': 'S2A_10TFK_20220524_0_L2A',
   'properties': {'proj:epsg': 32610,
    'proj:geometry': {'type': 'Polygon',
     'coordinates': [[[636990.0, 4410550.0],
       [691590.0, 4410550.0],
       [691590.0, 4482600.0],
       [636990.0, 4482600.0],
       [636990.0, 4410550.0]]]},
    'proj:bbox': [636990.0, 4410550.0, 691590.0, 4482600.0],
    'proj:shape': [7205, 5460],
    'proj:transform': [10.0,
     0.0,
     636990.0,
     0.0,
     -10.0,
     4482600.0,
     0.0,
     0.0,
     1.0],
    'proj:projjson': {'$schema': 'https://proj.org/schemas/v0.4/projjson.schema.json',
     'type': 'ProjectedCRS',
     'name': 'WGS 84 / UTM zone 10N',
     'base_crs': {'name': 'WGS 84',
      'datum': {'type': 'GeodeticReferenceFrame',
       'name': 'World Geodetic System 1984',
       'ellipsoid': {'name': 'WGS 84',
        'semi_major_axis': 6378137,
        'inverse_flattening': 298.257223563}},
 