# Import required packages

In [1]:
import os
import sys
import pandas as pd
import requests
import json
from datetime import datetime, date
from tqdm import tqdm
from dotenv import load_dotenv


# Credentials & Token

Save your cdse log in credentials to the .env file as:
CDSE_email = "your_email"
CDSE_password = "your_password"

In [2]:
load_dotenv()
username=os.getenv("CDSE_email")
password=os.getenv("CDSE_password")

# check that the credentials are set correctly 
if not password or not username:
    print("Please provide your Sentinel Hub credentials in the .env file.")
    exit(1)

In [3]:
def get_access_token(username: str, password: str) -> str:
    data = {
        "client_id": "cdse-public",
        "username":username,
        "password":password,
        "grant_type": "password",
        }
    try:
        r = requests.post(
            "https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token",
            data=data,
        )
        r.raise_for_status()
    except Exception as e:
        raise Exception(
            f"Access token creation failed. Reponse from the server was: {r.json()}"
            )
    print("Access token created successfully!")
    return r.json()["access_token"]

In [4]:
# Get access token
token = get_access_token(username, password)

Access token created successfully!


# Set Search Criteria

In [5]:
data_collection = "SENTINEL-2"
bbox = [12.45, 44.825, 12.7, 45.055]    # Po River
#bbox = [9.6, 42.95, 9.9, 43.155]       # North East Corsica
#bbox = [16.5, 38.35, 16.755, 38.555]   # South East Calabria
start_date =  "2019-07-26"
end_date = "2019-07-31"
output_dir = "../data/SAFE"

In [None]:
https://catalogue.dataspace.copernicus.eu/odata/v1/Products?$filter=Collection/Name eq 'CCM' and OData.CSC.Intersects(area=geography'SRID=4326;POLYGON((12.655118166047592 47.44667197521409,21.39065656328509 48.347694733853245,28.334291357162826 41.877123516783655,17.47086198383573 40.35854475076158,12.655118166047592 47.44667197521409))')&$top=20

In [None]:
def get_url(data_collection, bbox, start_date, end_date, token):
  cloud_cover = 20
  requests.get(
    f"https://catalogue.dataspace.copernicus.eu/odata/v1/Products?$filter=Collection/Name eq '{data_collection}' "  
    f"and OData.CSC.Intersects(area=geography'SRID=4326;{bbox}) "  
    f"and ContentDate/Start ge {start_date}T00:00:00.000Z and ContentDate/Start le {end_date}T00:00:00.000Z "  
    f"and Attributes/OData.CSC.DoubleAttribute/any(att:att/Name eq 'cloudCover' and att/OData.CSC.DoubleAttribute/Value lt {cloud_cover}) "  
    f"and Attributes/OData.CSC.StringAttribute/any(att:att/Name eq 'productType' and att/OData.CSC.StringAttribute/Value eq 'productType' and att/Value eq 'S2MSI1C')",
    headers=headers ).json()
  
  headers = {"Authorization": f"Bearer {token}"}
  response = requests.get(url, headers=headers)

  response.raise_for_status()
  
  print("Query URL:", url)
  return response.json()

In [13]:
url = (
f"https://catalogue.dataspace.copernicus.eu/odata/v1/Products?"
f"$filter=Collection/Name eq'{data_collection}' and "
f"OData.CSC.Intersects(area=geography'SRID=4326;"
f"POLYGON(({str(bbox[0]) + " " + str(bbox[1]) + "," + str(bbox[2]) + " " + str(bbox[1]) + "," + str(bbox[2]) + " " + str(bbox[3]) + ","  + str(bbox[0]) + " " + str(bbox[3]) + "," + str(bbox[0]) + " " + str(bbox[1])}'))') and "
f"ContentDate/Start gt {start_date}T00:00:00.000Z and "
f"ContentDate/Start lt {end_date}T00:00:00.000Z)"
)
print(url)

https://catalogue.dataspace.copernicus.eu/odata/v1/Products?$filter=Collection/Name eq'SENTINEL-2' and OData.CSC.Intersects(area=geography'SRID=4326;POLYGON((12.45 44.825,12.7 44.825,12.7 45.055,12.45 45.055,12.45 44.825'))') and ContentDate/Start gt 2019-07-26T00:00:00.000Z and ContentDate/Start lt 2019-07-31T00:00:00.000Z)


In [18]:
def download_product(token, product_id, output_path):
    url = f"https://zipper.dataspace.copernicus.eu/odata/v1/Products({product_id})/$value"
    headers = {"Authorization": f"Bearer {token}"}
    session = requests.Session()
    session.headers.update(headers)

    response = session.get(url, allow_redirects=False)
    while response.status_code in (301, 302, 303, 307):
        url = response.headers["Location"]
        response = session.get(url, allow_redirects=False)

    final_response = session.get(url, stream=True)
    final_response.raise_for_status()

    with open(output_path, "wb") as f:
        for chunk in final_response.iter_content(chunk_size=8192):
            f.write(chunk)
    print(f"Downloaded to {output_path}")

# run download

In [19]:
result = get_url(data_collection, bbox, start_date, end_date, token)
print("Full API response:", result)
if not result["value"]:
    print("No Sentinel-2 L1C products found for the specified criteria.")
    exit()

product = result["value"][0]
product_id = product["Id"]
product_name = product["Name"]
output_zip = f"{product_name}.zip"

# Download the product
download_product(token, product_id, output_zip)


HTTPError: 403 Client Error: Forbidden for url: https://catalogue.dataspace.copernicus.eu/odata/v1/Products?$filter=Collection/Name%20eq'SENTINEL-2'%20and%20OData.CSC.Intersects(area=geography'SRID=4326;POLYGON((12.45%2044.825,12.7%2044.825,12.7%2045.055,12.45%2045.055,12.45%2044.825'))')%20and%20ContentDate/Start%20gt%202019-07-26T00:00:00.000Z%20and%20ContentDate/Start%20lt%202019-07-31T00:00:00.000Z)&$top=20

# Iterate (just notes)

In [6]:
http_rs = [] 
for i in matching_ids:
    
    satellite = "Sentinel-2"
    contains = "S2MSI1C"
    contains2 = i['id'][11:26]
    http_request = get_https_request(satellite, contains, start_date, end_date)
    http_rs.append(http_request)
    return http_rs

SyntaxError: 'return' outside function (1571520283.py, line 9)

In [None]:
# Load the CSV file
matched_products_df = pd.read_csv("../matched_s2_products.csv")
unique_products_df = matched_products_df.drop_duplicates(subset="product_id")

# Create the dictionary
filter_products_dict = {
    "Name" : [{product_id} for product_id in unique_products_df["product_id"]]
}

len(filter_products_dict["Name"])

3977

can also search by product name (see [docs](https://documentation.dataspace.copernicus.eu/APIs/OData.html)), but two issues:
- matched_s2_products has 3977 tiles, s2_product_unique has 4472 (but outdated N0XXX in product name),
i.e. needs to be limited spatially/temporally at minimum
- need to change functions to deal with iterating through dict values 

possible accepted url options:

In [None]:
filter_products_dict.keys()
filter_products_dict.values()

In [None]:

satellite = "Sentinel-2"
contains = "L1C"
contains2 = "T32TQQ"    # option to be replaced with unique tiles or strtime iterating through 
                        # litter rows csv within matching_ids, i.e. iterated over