#### Import necessary Python modules

In [15]:
# HTTP requests
import requests


# utility libraries
from datetime import date
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import folium

import getpass

# JSON parser
import json

# XML parser
import xml.etree.ElementTree as ET

# system modules
import os
import re
import sys
import random

# data manipulation
import pandas as pd
import numpy as np

# geo data manipulation
import geopandas as gpd
import shapely

# EO data manipulation
import rasterio
import xarray as xr
import rioxarray as riox

# visualization product
import hvplot.pandas
import matplotlib.pyplot as plt
import matplotlib.image
from rasterio.windows import Window

# file manipulation
from pathlib import Path
import fsspec

In [4]:
aoi = gpd.read_file('data/catchment_outline.geojson', crs="EPGS:4326")
plot = aoi.hvplot(geo=True, 
                  tiles='OSM',
                  #tiles='EsriImagery',
                  frame_width=800, 
                  frame_height=600, 
                  alpha=0.3, 
                  line_width=4
                  )
                                

In [5]:
plot

In [6]:
# Get the bounds
minx, miny, maxx, maxy = aoi.total_bounds

In [7]:
# base URL of the product catalogue
catalogue_odata_url = "https://catalogue.dataspace.copernicus.eu/odata/v1"

# search parameters
collection_name = "SENTINEL-2"
product_type = "S2MSI2A"
max_cloud_cover = 1
aoi = str(shapely.geometry.Polygon([(minx, miny), (minx, maxy), (maxx, maxy), (maxx, miny), (minx, miny)]))
search_period_start = "2018-02-01T00:00:00.000Z"
search_period_end = "2018-06-30T00:00:00.000Z"

In [8]:
search_query = (f"{catalogue_odata_url}/Products?$filter="
                f"Collection/Name eq '{collection_name}' "
                f"and contains(Name, 'SAFE') ne true "
                f"and Attributes/OData.CSC.StringAttribute/any(att:att/Name eq 'productType' and att/OData.CSC.StringAttribute/Value eq '{product_type}') "
                f"and OData.CSC.Intersects(area=geography'SRID=4326;{aoi}') "
                f"and ContentDate/Start gt {search_period_start} "
                f"and ContentDate/Start lt {search_period_end}")

In [9]:
response = requests.get(search_query).json()
result = pd.DataFrame.from_dict(response["value"])

In [41]:
credential = str.split(open('/home/pier/.s3_CDSE_passwd').read().replace('\n', ''), ':')
fs = fsspec.filesystem('s3',key=credential[0], secret=credential[1], endpoint_url='https://eodata.dataspace.copernicus.eu', anon=False)

In [69]:
bands = result['S3Path'].apply(lambda x: fs.glob(f"s3:/{x}/GRANULE/**/*[03,11]_20m.jp2"))
df = bands.apply(pd.Series)
df.columns = ['B03', 'B11']

In [74]:
flist = df['B03'].to_list() + df['B11'].to_list()

In [0]:
fs2 = fsspec.filesystem('')

In [79]:
flist[0]

'eodata/Sentinel-2/MSI/L2A/2018/02/28/S2A_MSIL2A_20180228T101021_N9999_R022_T32TPS_20221108T224604/GRANULE/L2A_T32TPS_A014033_20180228T101021/IMG_DATA/R20m/T32TPS_20180228T101021_B03_20m.jp2'

In [76]:
from kerchunk.xarray_backend import SingleHdf5ToZa 
from pathlib import Path
import os
import ujson

so = dict(mode='rb', anon=True, default_fill_cache=False, default_cache_type='first') # args to fs.open()
# default_fill_cache=False avoids caching data in between file chunks to lowers memory usage.

def gen_json(file_url):
    with fs.open(file_url, **so) as infile:
        h5chunks = SingleHdf5ToZarr(infile, file_url, inline_threshold=300)
        # inline threshold adjusts the Size below which binary blocks are included directly in the output
        # a higher inline threshold can result in a larger json file but faster loading time
        variable = file_url.split('/')[-1].split('.')[0]
        month = file_url.split('/')[2]
        outf = f'{month}_{variable}.json' #file name to save json to
        with fs2.open(outf, 'wb') as f:
            f.write(ujson.dumps(h5chunks.translate()).encode());

In [None]:
max_cloud_cover = 100.00
search_query = (f"{search_query} "
                f"and Attributes/OData.CSC.DoubleAttribute/any(att:att/Name eq 'cloudCover' and att/OData.CSC.DoubleAttribute/Value le {max_cloud_cover})")

# print(f"""\n{search_query.replace(' ', "%20")}\n""")

response = requests.get(search_query).json()
result = pd.DataFrame.from_dict(response["value"])

In [227]:
result

Unnamed: 0,@odata.mediaContentType,Id,Name,ContentType,ContentLength,OriginDate,PublicationDate,ModificationDate,Online,EvictionDate,S3Path,Checksum,ContentDate,Footprint,GeoFootprint
0,application/octet-stream,4243afa5-a55e-5fcd-a140-5481f3e69381,S2A_MSIL2A_20180228T101021_N9999_R022_T32TPS_2...,application/octet-stream,1430998754,2022-11-08T21:44:25.213Z,2022-11-08T23:39:32.613Z,2022-11-09T16:24:11.631Z,True,,/eodata/Sentinel-2/MSI/L2A/2018/02/28/S2A_MSIL...,[{}],"{'Start': '2018-02-28T10:10:21.026Z', 'End': '...",geography'SRID=4326;POLYGON ((10.3140352803744...,"{'type': 'Polygon', 'coordinates': [[[10.31403..."
1,application/octet-stream,3373447b-48a0-5283-a0d5-bddd465482b9,S2B_MSIL2A_20180206T102209_N9999_R065_T32TPS_2...,application/octet-stream,872861419,2022-11-09T00:09:40.933Z,2022-11-09T01:56:46.871Z,2022-11-09T17:01:17.925Z,True,,/eodata/Sentinel-2/MSI/L2A/2018/02/06/S2B_MSIL...,[{}],"{'Start': '2018-02-06T10:22:09.027Z', 'End': '...",geography'SRID=4326;POLYGON ((11.7519401846995...,"{'type': 'Polygon', 'coordinates': [[[11.75194..."
2,application/octet-stream,ad0833be-5c85-5222-bcb4-9f5f5ab5705a,S2A_MSIL2A_20180208T101151_N9999_R022_T32TPT_2...,application/octet-stream,913210352,2022-11-08T14:20:47.458Z,2022-11-08T16:13:19.060Z,2022-11-09T14:40:03.581Z,True,,/eodata/Sentinel-2/MSI/L2A/2018/02/08/S2A_MSIL...,[{}],"{'Start': '2018-02-08T10:11:51.026Z', 'End': '...",geography'SRID=4326;POLYGON ((10.3146581512162...,"{'type': 'Polygon', 'coordinates': [[[10.31465..."
3,application/octet-stream,e25240c9-ad2f-51f7-bcf4-f255466fea9d,S2A_MSIL2A_20180221T102031_N9999_R065_T32TPT_2...,application/octet-stream,1159614561,2022-11-08T14:19:25.405Z,2022-11-08T16:10:12.650Z,2022-11-09T14:39:17.103Z,True,,/eodata/Sentinel-2/MSI/L2A/2018/02/21/S2A_MSIL...,[{}],"{'Start': '2018-02-21T10:20:31.026Z', 'End': '...",geography'SRID=4326;POLYGON ((11.7524028466031...,"{'type': 'Polygon', 'coordinates': [[[11.75240..."
4,application/octet-stream,724a74ef-2eb5-5b7d-9ce2-ccab32a25ada,S2B_MSIL2A_20180213T101119_N9999_R022_T32TPS_2...,application/octet-stream,1320650637,2022-11-09T04:29:01.776Z,2022-11-09T06:17:39.272Z,2022-11-09T17:57:28.877Z,True,,/eodata/Sentinel-2/MSI/L2A/2018/02/13/S2B_MSIL...,[{}],"{'Start': '2018-02-13T10:11:19.027Z', 'End': '...",geography'SRID=4326;POLYGON ((10.3140352803744...,"{'type': 'Polygon', 'coordinates': [[[10.31403..."
5,application/octet-stream,28d86255-ec9f-5bff-9d5d-97028255a3fc,S2A_MSIL2A_20180201T102241_N9999_R065_T32TPT_2...,application/octet-stream,801787735,2022-11-08T14:19:39.495Z,2022-11-08T16:12:04.248Z,2022-11-09T14:39:50.617Z,True,,/eodata/Sentinel-2/MSI/L2A/2018/02/01/S2A_MSIL...,[{}],"{'Start': '2018-02-01T10:22:41.026Z', 'End': '...",geography'SRID=4326;POLYGON ((11.7514858548926...,"{'type': 'Polygon', 'coordinates': [[[11.75148..."
6,application/octet-stream,62353d28-00f1-5a39-8430-8581da4ba5aa,S2A_MSIL2A_20180211T102141_N9999_R065_T32TPT_2...,application/octet-stream,944211998,2022-11-08T14:19:41.718Z,2022-11-08T16:08:36.522Z,2022-11-09T14:38:59.896Z,True,,/eodata/Sentinel-2/MSI/L2A/2018/02/11/S2A_MSIL...,[{}],"{'Start': '2018-02-11T10:21:41.026Z', 'End': '...",geography'SRID=4326;POLYGON ((11.7518992847344...,"{'type': 'Polygon', 'coordinates': [[[11.75189..."
7,application/octet-stream,d40e33ce-38fb-5fb0-ab43-8e039cfc065f,S2B_MSIL2A_20180216T102059_N9999_R065_T32TPS_2...,application/octet-stream,1132708897,2022-11-09T04:33:59.923Z,2022-11-09T06:30:25.256Z,2022-11-09T18:01:13.044Z,True,,/eodata/Sentinel-2/MSI/L2A/2018/02/16/S2B_MSIL...,[{}],"{'Start': '2018-02-16T10:20:59.027Z', 'End': '...",geography'SRID=4326;POLYGON ((11.7516153472605...,"{'type': 'Polygon', 'coordinates': [[[11.75161..."
8,application/octet-stream,02dc2da1-8a31-5e0c-adda-c03817e53c3d,S2A_MSIL2A_20180218T101051_N9999_R022_T32TPT_2...,application/octet-stream,817457143,2022-11-08T14:19:40.466Z,2022-11-08T16:07:35.413Z,2022-11-09T14:38:48.046Z,True,,/eodata/Sentinel-2/MSI/L2A/2018/02/18/S2A_MSIL...,[{}],"{'Start': '2018-02-18T10:10:51.026Z', 'End': '...",geography'SRID=4326;POLYGON ((10.3148104952024...,"{'type': 'Polygon', 'coordinates': [[[10.31481..."
9,application/octet-stream,eabe3dd7-bee2-5a88-a42b-cadf911071ca,S2B_MSIL2A_20180213T101119_N9999_R022_T32TPT_2...,application/octet-stream,1282584573,2022-11-08T14:19:11.222Z,2022-11-08T16:11:34.045Z,2022-11-09T14:39:46.460Z,True,,/eodata/Sentinel-2/MSI/L2A/2018/02/13/S2B_MSIL...,[{}],"{'Start': '2018-02-13T10:11:19.027Z', 'End': '...",geography'SRID=4326;POLYGON ((10.31467721264 4...,"{'type': 'Polygon', 'coordinates': [[[10.31467..."


In [219]:
geometry_str = result['Footprint'].map(lambda x: x.split(';')[1].replace("\'", ""))

In [220]:
geom = gpd.GeoSeries.from_wkt(geometry_str)

In [221]:
gdf = gpd.GeoDataFrame(result['Name'], geometry=geom, crs="EPSG:4326")

In [222]:
gdf

Unnamed: 0,Name,geometry
0,S2A_MSIL2A_20180228T101021_N9999_R022_T32TPS_2...,"POLYGON ((10.31404 46.94617, 10.29048 45.95826..."
1,S2B_MSIL2A_20180206T102209_N9999_R065_T32TPS_2...,"POLYGON ((11.75194 46.84761, 11.75558 46.92054..."
2,S2A_MSIL2A_20180208T101151_N9999_R022_T32TPT_2...,"POLYGON ((10.31466 46.96894, 10.31189 46.85818..."
3,S2A_MSIL2A_20180221T102031_N9999_R065_T32TPT_2...,"POLYGON ((11.75240 46.85775, 11.80285 47.81947..."
4,S2B_MSIL2A_20180213T101119_N9999_R022_T32TPS_2...,"POLYGON ((10.31404 46.94617, 10.29048 45.95826..."
5,S2A_MSIL2A_20180201T102241_N9999_R065_T32TPT_2...,"POLYGON ((11.75149 46.84027, 11.80285 47.81947..."
6,S2A_MSIL2A_20180211T102141_N9999_R065_T32TPT_2...,"POLYGON ((11.75190 46.84815, 11.80285 47.81947..."
7,S2B_MSIL2A_20180216T102059_N9999_R065_T32TPS_2...,"POLYGON ((11.75162 46.84111, 11.75558 46.92054..."
8,S2A_MSIL2A_20180218T101051_N9999_R022_T32TPT_2...,"POLYGON ((10.31481 46.97503, 10.31189 46.85818..."
9,S2B_MSIL2A_20180213T101119_N9999_R022_T32TPT_2...,"POLYGON ((10.31468 46.96971, 10.31189 46.85818..."


In [226]:
# Plot the GeoDataFrame using hvplot
plot = gdf.hvplot.polygons(geo=True, tiles='OSM', frame_height=600, frame_width=600, alpha=0.2)
plot