### Searching and downloading Sentinel scenes

Latest Copernicus API (OData) for searching and downloading Sentinel scenes, 2024-08-05

In [None]:
import os
import datetime
import pandas as pd
import requests
import shapely
import geopandas as gpd
#from creds import *

##package from github: https://github.com/MrChebur/Copernicus-OData-wrapper
##installation failed, scripts are copied in same folder as the script
from copernicus_odata_wrapper.query import Query
from copernicus_odata_wrapper.filter import Filter
from copernicus_odata_wrapper import attributes



### Search parameters

In [None]:
start_date=datetime.datetime(2024,1,1)
end_date=datetime.datetime(2024,12,31)
data_collection="SENTINEL-2"
aoi="POLYGON((12.11 46.29,12.14 46.29,12.14 46.26,12.11 46.26,12.11 46.29))"
#outpath="F:/RS/2022"
outpath="E:/summer_school_2025/S2"

##set minimum file size
minfilesize=60000000 ##ca. 60MB

##set maximum cloud cover for whole scene
maxcloudcover=30.0#10.0#

##set minimum processing baseline
minbaseline=440

##fill in credentials
bn=""
pw=""

### Search for scenes
Workaround: daily search due to limit of 20 results; takes long...

In [None]:
#json = requests.get(f"https://catalogue.dataspace.copernicus.eu/odata/v1/Products?$filter=Collection/Name eq '{data_collection}' and OData.CSC.Intersects(area=geography'SRID=4326;{aoi}) and ContentDate/Start gt {start_date}T00:00:00.000Z and ContentDate/Start lt {end_date}T00:00:00.000Z").json()
#resp=pd.DataFrame.from_dict(json['value'])
#resp.head(5)
#resp.tail(5)
#print(len(resp.index))

##workaround because only 20 results max: daily search, takes long...
dates=pd.date_range(start_date,end_date,freq="D")#daterange(start_date, end_date)
resp=pd.DataFrame()
for i in range(len(dates)-1):
    start_date_i=dates[i]
    end_date_i=dates[i+1]

    ##setting up the filter
    f=Filter()
    ##filter by sensing dates
    f.by_sensing_date(start_date_i,end_date_i,full_day=True)
    f.And()
    ##filter by coordinates
    f.by_geographic_criteria(aoi)
    f.And()
    ##filter by collection name
    f.collection('SENTINEL-2')
    f.And()
    ##consider cloud cover<maxcloudcover and only l2a data
    queries = [
        attributes.CloudCover()<maxcloudcover,
        attributes.ProductType()=='S2MSI2A' ##L2A data (BOA)
    ]
    f.by_attribute(queries)

    query=Query()
    query.set_filter(f)
    ##sort results
    query.set_orderby('ContentDate/Start',ascending=True)
    query.set_expand(attributes=True,assets=True)
    response=query.send()

    response_df=pd.DataFrame.from_dict(response['value'])
    resp=pd.concat([resp,response_df])
    if len(response_df.index)>0:
        print(start_date_i)

#resp.to_csv(os.path.join(outpath,"out.csv"),index=False)
##re-read (not working!)
#resp=pd.DataFrame(pd.read_csv(os.path.join(outpath,"out.csv")))


### Extract scene footprints and export

In [None]:
##lower limit for file size
resp_size=resp[resp["ContentLength"]>minfilesize]
#print(resp_size.head(5))

##start with 2023
temp=pd.DataFrame()
temp_raw=pd.DataFrame.from_dict(resp_size["ContentDate"])
for i in range(len(temp_raw.index)):
    line=pd.DataFrame(temp_raw["ContentDate"].iloc[i],index=[i])
    temp=pd.concat([temp,line])

# ##not required
# temp["Start"]=pd.to_datetime(temp["Start"],format='%Y-%m-%dT%H:%M:%S.%fZ')
# choice=pd.Series(temp["Start"]>=start_date)
# resp_year=resp_size[choice.values]
resp_year=resp_size
resp_year['date']=resp_year.Name.str.split("_").str[2]
resp_year['year']=resp_year.Name.str.split("_").str[2].str[0:4]
resp_year['month']=resp_year.Name.str.split("_").str[2].str[4:6]
#print(resp_year.date.unique())
print(resp_year.month.unique())

##check processing baseline
resp_year["baseline"]=resp_year.Name.str.split("_").str[3].str[1:].astype(int)
resp_year=resp_year[(resp_year.baseline>=minbaseline)&(resp_year.baseline<9999)]
print(resp_year.head(5))

##extract footprints of scenes
polys_raw=pd.DataFrame.from_dict(resp_year)
polys=pd.DataFrame()
for i in range(len(polys_raw.index)):
    line=pd.DataFrame.from_dict(polys_raw["GeoFootprint"].iloc[i])
    polys=pd.concat([polys,line])

##create polygons and save
polygons=polys['coordinates'].apply(shapely.Polygon)
polygons_out=gpd.GeoDataFrame(crs="epsg:4326",geometry=polygons)
polygons_out.to_file(os.path.join(outpath,"scenes_selected.shp"))

#print(resp_year)
#print(resp_year.)

### Init session and credentials and download

In [None]:

##set credentials
def get_keycloak(username: str, password: str) -> str:
    data = {
        "client_id": "cdse-public",
        "username": username,
        "password": password,
        "grant_type": "password",
        }
    try:
        r = requests.post("https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token",
        data=data,
        )
        r.raise_for_status()
    except Exception as e:
        raise Exception(
            f"Keycloak token creation failed. Reponse from the server was: {r.json()}"
            )
    return r.json()["access_token"]


def init_session():
    keycloak_token = get_keycloak(bn,pw)
    session = requests.Session()
    session.headers.update({'Authorization': f'Bearer {keycloak_token}'})
    return(session)



##multiple downloads
for i in range(len(resp_year.index)):
    productid=resp_year['Id'].iloc[i]
    name=resp_year['Name'].iloc[i]#.replace(".SAFE","")
    outfile=os.path.join(outpath,f"{name}.zip")

    if not os.path.exists(outfile):
        session=init_session()
        url=f"https://catalogue.dataspace.copernicus.eu/odata/v1/Products({productid})/$value"

        response=session.get(url,allow_redirects=False)
        print(response.status_code)
        if response.status_code in (301,302,303,307):
            print(f"downloading {name}")
            url=response.headers['Location']
            response=session.get(url,allow_redirects=False)
            print(response.status_code)

            if (response.status_code==200):
                ##get file and save
                file=session.get(url,verify=False,allow_redirects=True)
                with open(outfile,'wb') as p:
                    p.write(file.content)

                # ##alternative via curl (not faster)
                # cmd=f'curl -H "Authorization: Bearer {keycloak_token}" "https://catalogue.dataspace.copernicus.eu/odata/v1/Products({productid})/$value" --location-trusted --output {outfile}'
                # os.system(cmd)
            else:
                break
    else:
        print(f"{productid} already downloaded.")


