In [1]:
import boto3

In [2]:
# --- Configuration for the Specific Bucket ---
# The bucket name is the part after the host in the path
BUCKET_NAME = 'chelsav2'
# The prefix is the folder path you want to list
PREFIX = 'GLOBAL/monthly/'
# This is the custom endpoint URL for SWITCH Cloud Object Storage
ENDPOINT_URL = 'https://os.zhdk.cloud.switch.ch'
# Region is often required, but you can use a placeholder for public buckets
REGION_NAME = 'us-east-1' 

def list_all_files_in_s3(bucket_name, prefix, endpoint_url, region_name):
    """
    Lists all object keys (file paths) in an S3-compatible bucket,
    handling pagination automatically.
    """
    
    # Initialize the S3 client
    # Since this is a public bucket, we often don't need credentials,
    # but the endpoint_url and region_name are mandatory for non-AWS S3.
    s3_client = boto3.client(
        's3',
        endpoint_url=endpoint_url,
        region_name=region_name
        # If the bucket were private, you'd add: 
        # aws_access_key_id='YOUR_KEY', 
        # aws_secret_access_key='YOUR_SECRET'
    )
    
    # Use the Paginator to automatically handle large numbers of files
    paginator = s3_client.get_paginator('list_objects_v2')
    
    pages = paginator.paginate(
        Bucket=bucket_name,
        Prefix=prefix
    )
    
    all_files = []
    
    for page in pages:
        if 'Contents' in page:
            # 'Contents' is a list of dictionaries, one for each file
            for obj in page['Contents']:
                # The 'Key' is the full file path/name
                all_files.append(obj['Key'])
                
    return all_files

# --- Execute the function ---
print(f"Listing files in bucket: {BUCKET_NAME} with prefix: {PREFIX}")
try:
    file_list = list_all_files_in_s3(BUCKET_NAME, PREFIX, ENDPOINT_URL, REGION_NAME)

    if file_list:
        print(f"\nSuccessfully found {len(file_list)} files.")
        print("\n--- First 5 Files ---")
        for file_key in file_list[:5]:
            print(f"- {file_key}")
        print("---------------------\n")
        
        # Now you can use this `file_list` in your URL generator test
        # to ensure the files exist before attempting to download them.

    else:
        print("No files found with the specified prefix.")

except Exception as e:
    print(f"An error occurred: {e}")
    print("Double-check the BUCKET_NAME, PREFIX, and ENDPOINT_URL.")

Listing files in bucket: chelsav2 with prefix: GLOBAL/monthly/
An error occurred: Unable to locate credentials
Double-check the BUCKET_NAME, PREFIX, and ENDPOINT_URL.


In [3]:
ls

 Volume in drive C is OS
 Volume Serial Number is 1C25-B250

 Directory of C:\Users\niels\Documents\Repositories\BmC\tests\datasource\chelsa

03/10/2025  11:04    <DIR>          .
02/10/2025  15:39    <DIR>          ..
03/10/2025  11:02    <DIR>          .ipynb_checkpoints
02/10/2025  16:08    <DIR>          __pycache__
02/10/2025  16:19                35 test_s3.py
03/10/2025  11:04             4.285 Untitled.ipynb
03/10/2025  11:05    <DIR>          urls
               2 File(s)          4.320 bytes
               5 Dir(s)  233.413.570.560 bytes free


In [2]:
import os

os.listdir("urls/monthly")

['clt.txt',
 'cmi.txt',
 'hurs.txt',
 'pet.txt',
 'pr.txt',
 'rsds.txt',
 'sfcWind.txt',
 'tas.txt',
 'tasmax.txt',
 'tasmin.txt',
 'vas.txt']

In [3]:
variable_url_lists = os.listdir("urls/monthly")
[file_name.split(".")[0] for file_name in variable_url_lists]

['clt',
 'cmi',
 'hurs',
 'pet',
 'pr',
 'rsds',
 'sfcWind',
 'tas',
 'tasmax',
 'tasmin',
 'vas']

In [4]:
for filename in variable_url_lists:
    filepath = os.path.join("urls/monthly", filename)
    with open(filepath, "r") as f:
        urls = f.readlines()

In [7]:
urls[0][:-2]

'https://os.zhdk.cloud.switch.ch/chelsav2/GLOBAL/monthly/vpd/CHELSA_vpd_01_1980_V.2.1.tif'

In [11]:
import numpy as np
import rasterio
from rasterio.transform import from_origin
from rasterio.windows import from_bounds
from affine import Affine
def make_dummy_tif(path, width=100, height=80, ulx=1000.0, uly=2000.0, pixel_size=1.0):
    """Create a deterministic single-band GeoTIFF: values = row*width + col (i.e. np.arange)"""
    arr = np.arange(width * height, dtype=np.int32).reshape((height, width))
    transform = from_origin(ulx, uly, pixel_size, pixel_size)
    profile = {
        "driver": "GTiff",
        "dtype": arr.dtype,
        "count": 1,
        "width": width,
        "height": height,
        "crs": "EPSG:4326",
        "transform": transform,
    }
    with rasterio.open(path, "w", **profile) as dst:
        dst.write(arr, 1)
    return transform

In [12]:
test = make_dummy_tif("test")

RasterioIOError: Attempt to create new tiff file '' failed: No such file or directory