# Download L1B, in safe format, then transform to ZARR




## Download the safe format file
### Import dependencies for downloading Safe format. 

In [1]:
import os
import boto3
from dotenv import load_dotenv

### Setup and Environment Configuration

First, we'll import the required libraries and set up our environment. Make sure your Copernicus credentials are stored in your environment variables or a `.env` file.


In [7]:
import boto3
import os

# Get credentials from environment variables
load_dotenv("config.env")
ACCESS_KEY_ID = os.environ.get("ACCESS_KEY_ID")
SECRET_ACCESS_KEY = os.environ.get("SECRET_ACCESS_KEY")
#print("AK:", ACCESS_KEY_ID, "SK:", "--------")

session = boto3.session.Session()
s3 = boto3.resource(
    's3',
    endpoint_url='https://eodata.dataspace.copernicus.eu',
    aws_access_key_id=ACCESS_KEY_ID,
    aws_secret_access_key=SECRET_ACCESS_KEY,
    region_name='default'
)  # generated secrets

def download(bucket, product: str, target: str = "") -> None:
    """
    Downloads every file in bucket with provided product as prefix

    Raises FileNotFoundError if the product was not found

    Args:
        bucket: boto3 Resource bucket object
        product: Path to product
        target: Local catalog for downloaded files. Should end with an `/`. Default current directory.
    """
    files = bucket.objects.filter(Prefix=product)
    print(files)
    if not list(files):
        print(f"Could not find any files for {product}")
        
    for file in files:
        os.makedirs(os.path.dirname(file.key), exist_ok=True)
        if not os.path.isdir(file.key):
            bucket.download_file(file.key, f"{target}{file.key}")

    return None

In [3]:
%%time
# path to the product to download
files = download(
    s3.Bucket("eodata"), 
    #"Sentinel-1/SAR/SLC/2019/10/13/S1B_IW_SLC__1SDV_20191013T155948_20191013T160015_018459_022C6B_13A2.SAFE/",
    "Sentinel-2/MSI/MSI_L1B_GR/2025/07/24/S2A_OPER_MSI_L1B_GR_2APS_20250724T145646_S20250724T112818_D07_N05.11.tar/"
)

s3.Bucket.objectsCollection(s3.Bucket(name='eodata'), s3.ObjectSummary)
Could not find any files for Sentinel-2/MSI/MSI_L1B_GR/2025/07/24/S2A_OPER_MSI_L1B_GR_2APS_20250724T145646_S20250724T112818_D07_N05.11.tar/
CPU times: user 46.6 ms, sys: 45.9 ms, total: 92.5 ms
Wall time: 516 ms


In [11]:
for file in files:
    os.makedirs(os.path.dirname(file.key), exist_ok=True)
    if not os.path.isdir(file.key):
        bucket.download_file(file.key, f"{target}{file.key}")

TypeError: 'NoneType' object is not iterable

### Connecting to Copernicus Data Space Ecosystem

Now let's establish connections to both the S3 storage and STAC catalog services using our credentials.


In [4]:
!ls Sentinel-2

ls: cannot access 'Sentinel-2': No such file or directory


In [5]:
import boto3
import os
from dotenv import load_dotenv

# Load credentials from .env
load_dotenv()
ACCESS_KEY_ID = os.environ.get("ACCESS_KEY_ID")
SECRET_ACCESS_KEY = os.environ.get("SECRET_ACCESS_KEY")

# Initialize S3 resource for CDSE
session = boto3.session.Session()
s3 = session.resource(
    's3',
    endpoint_url='https://eodata.dataspace.copernicus.eu',
    aws_access_key_id=ACCESS_KEY_ID,
    aws_secret_access_key=SECRET_ACCESS_KEY,
    region_name='default'
)
def download_all_under_prefix(bucket, prefix: str, target: str = "") -> None:
    """
    Recursively downloads all files under a given prefix (folder-like path)

    Args:
        bucket: boto3 Bucket resource
        prefix: S3 prefix (folder path) to mirror
        target: Local base folder to store downloaded files
    """
    files = bucket.objects.filter(Prefix=prefix)
    count = 0
    for file in files:
        # Full local path
        local_path = os.path.join(target, file.key)
        # Ensure destination directory exists
        os.makedirs(os.path.dirname(local_path), exist_ok=True)
        # Skip directories (S3 can list empty 'folders')
        if not file.key.endswith("/"):
            print(f"⬇️  Downloading {file.key}")
            bucket.download_file(file.key, local_path)
            count += 1
    if count == 0:
        raise FileNotFoundError(f"❌ No files found under {prefix}")
    print(f"✅ Downloaded {count} files under {prefix}")


In [None]:
# Call the function on your desired folder
download_all_under_prefix(
    bucket=s3.Bucket("eodata"),
    prefix="Sentinel-2/MSI/MSI_L1B_GR/2025/07/24/",
    target="S2_L1B_20250724"  # Local folder
)

⬇️  Downloading Sentinel-2/MSI/MSI_L1B_GR/2025/07/24/S2A_OPER_MSI_L1B_GR_2APS_20250724T011624_S20250724T000002_D02_N05.11.tar
⬇️  Downloading Sentinel-2/MSI/MSI_L1B_GR/2025/07/24/S2A_OPER_MSI_L1B_GR_2APS_20250724T011624_S20250724T000002_D12_N05.11.tar
⬇️  Downloading Sentinel-2/MSI/MSI_L1B_GR/2025/07/24/S2A_OPER_MSI_L1B_GR_2APS_20250724T011624_S20250724T000002_D04_N05.11.tar
⬇️  Downloading Sentinel-2/MSI/MSI_L1B_GR/2025/07/24/S2A_OPER_MSI_L1B_GR_2APS_20250724T011624_S20250724T000002_D10_N05.11.tar
⬇️  Downloading Sentinel-2/MSI/MSI_L1B_GR/2025/07/24/S2A_OPER_MSI_L1B_GR_2APS_20250724T011624_S20250724T000002_D06_N05.11.tar
⬇️  Downloading Sentinel-2/MSI/MSI_L1B_GR/2025/07/24/S2A_OPER_MSI_L1B_GR_2APS_20250724T011624_S20250724T000002_D08_N05.11.tar
⬇️  Downloading Sentinel-2/MSI/MSI_L1B_GR/2025/07/24/S2A_OPER_MSI_L1B_GR_2APS_20250724T011624_S20250724T000002_D07_N05.11.tar
⬇️  Downloading Sentinel-2/MSI/MSI_L1B_GR/2025/07/24/S2A_OPER_MSI_L1B_GR_2APS_20250724T011624_S20250724T000002_D05_N05

In [1]:
import os
s3_prefix = os.environ["JUPYTERHUB_USER"]

In [5]:
from dotenv import load_dotenv

In [6]:
load_dotenv("config.env")
ACCESS_KEY = os.environ.get("access_key")
SECRET_KEY = os.environ.get("secret_key")

In [8]:
#ACCESS_KEY, SECRET_KEY

In [9]:
import s3fs

In [13]:
s3_bucket = s3_prefix + '-' + 's2l1b'

In [16]:
client_kwargs={'endpoint_url': 'https://pangeo-eosc-minioapi.vm.fedcloud.eu/'}

#s3 = s3fs.S3FileSystem(anon=False, client_kwargs=client_kwargs) # Works only when using s3 in this Notebook, not with distributed.
s3 = s3fs.S3FileSystem(key=None, secret=None, client_kwargs=client_kwargs)

In [17]:
s3.ls(s3_bucket)

[]

In [6]:
import boto3
import os
from dotenv import load_dotenv

# Load credentials
load_dotenv()
ACCESS_KEY_ID = os.environ.get("ACCESS_KEY_ID")
SECRET_ACCESS_KEY = os.environ.get("SECRET_ACCESS_KEY")

# Initialize S3 session
session = boto3.session.Session()
s3 = session.resource(
    's3',
    endpoint_url='https://eodata.dataspace.copernicus.eu',
    aws_access_key_id=ACCESS_KEY_ID,
    aws_secret_access_key=SECRET_ACCESS_KEY,
    region_name='default'
)

def get_total_size(bucket, prefix):
    """
    Returns the total size (in bytes) and file count under a given prefix.
    """
    total_bytes = 0
    file_count = 0

    for obj in bucket.objects.filter(Prefix=prefix):
        if not obj.key.endswith("/"):  # skip folders
            total_bytes += obj.size
            file_count += 1

    return total_bytes, file_count

# Run it
prefix = "Sentinel-2/MSI/MSI_L1B_GR/2025/07/24/"
bucket = s3.Bucket("eodata")
total_bytes, file_count = get_total_size(bucket, prefix)

# Print nicely
print(f"📦 Total files: {file_count}")
print(f"🧮 Total size: {total_bytes / (1024**3):.2f} GB")

📦 Total files: 147330
🧮 Total size: 3102.43 GB


In [None]:
# Run it
prefix = "Sentinel-2/MSI/MSI_L1B_GR/2025/07/23/"
bucket = s3.Bucket("eodata")
total_bytes, file_count = get_total_size(bucket, prefix)

# Print nicely
print(f"📦 Total files: {file_count}")
print(f"🧮 Total size: {total_bytes / (1024**3):.2f} GB")