In [None]:
# This notebook demonstrates reading a simple text file from the DestinE S3 area using boto3

In [1]:
import boto3
import json

In [4]:
# The access key and secret key are stored in a file in the user space
# This is for security - so we don't accidentally check the keys into github
with open("access_keys.json", 'r') as fh:
    s3_keys = json.load(fh)

In [12]:
# the endpoint is as given in the DestinE Fresh Data Pool interface
s3_endpoint = "https://s3.central.data.destination-earth.eu"
# create the client
s3_client = boto3.client(
    's3',
    aws_access_key_id = s3_keys["accessKey"], 
    aws_secret_access_key = s3_keys["secretKey"],
    endpoint_url = s3_endpoint
)
# list the buckets available
response = s3_client.list_buckets()
for b in response["Buckets"]:
    print(b)

{'Name': 'neils-test-data', 'CreationDate': datetime.datetime(2025, 10, 22, 14, 48, 40, 633000, tzinfo=tzlocal())}


In [17]:
# list the contents of "neils-test-data"
bucket_name = "neils-test-data"
response = s3_client.list_objects(Bucket=bucket_name)
for o in response["Contents"]:
    print(o)

{'Key': 'tas_Amon_HadGEM3-GC31-LL_piControl_r1i1p1f1_gn_195001-204912.nc', 'LastModified': datetime.datetime(2025, 10, 22, 14, 59, 20, 615000, tzinfo=tzlocal()), 'ETag': '"e94023df8265a6ad36a2003e5f6ee52d-5"', 'Size': 71469230, 'StorageClass': 'STANDARD', 'Owner': {'DisplayName': '8d48d597-ad88-4aff-b84f-ba52a542e794', 'ID': 'cbebc788-4bc3-49ef-889a-5b283b9fc754'}}
{'Key': 'text-file.txt', 'LastModified': datetime.datetime(2025, 10, 27, 14, 20, 7, 48000, tzinfo=tzlocal()), 'ETag': '"bca686258317266e3337dba6f8644541"', 'Size': 19, 'StorageClass': 'STANDARD', 'Owner': {'DisplayName': '8d48d597-ad88-4aff-b84f-ba52a542e794', 'ID': 'cbebc788-4bc3-49ef-889a-5b283b9fc754'}}


In [34]:
# get the object, ready to stream into memory
object_name = "text-file.txt"
response = s3_client.get_object(Key=object_name, Bucket=bucket_name)

In [35]:
# read from the object - the data is a StreamingBody object stored in the "Body" key of the dictionary
data = response["Body"]
text_binary = data.read() # read can have the keyword amt=<number of bytes>.  Reading again will 
text = text_binary.decode()
print(text)

A simple text file



In [45]:
# read can have the keyword amt=<number of bytes>.  
# Reading again will resume from the last read position. 
# .tell() can be used to find out where the stream pointer is
# see https://botocore.amazonaws.com/v1/documentation/api/latest/reference/response.html
# have to reload the file, as there is no seek(0)
response = s3_client.get_object(Key=object_name, Bucket=bucket_name)
print(response)
data = response["Body"]
buf_size = 8
# casting of file_size is imperative, as it is a string in the response headers
file_size = int(response["ResponseMetadata"]["HTTPHeaders"]["content-length"])
eof = False
while not eof:
    text_binary = data.read(amt=buf_size)
    text = text_binary.decode()
    if data.tell() == file_size:
        eof = True

{'ResponseMetadata': {'RequestId': 'tx00000eda6efa49ee638f4-0068ff8364-363378a1-default', 'HostId': '', 'HTTPStatusCode': 200, 'HTTPHeaders': {'server': 'nginx/1.27.2', 'date': 'Mon, 27 Oct 2025 14:36:20 GMT', 'content-type': 'text/plain', 'content-length': '19', 'accept-ranges': 'bytes', 'last-modified': 'Mon, 27 Oct 2025 14:20:07 GMT', 'x-rgw-object-type': 'Normal', 'etag': '"bca686258317266e3337dba6f8644541"', 'x-amz-request-id': 'tx00000eda6efa49ee638f4-0068ff8364-363378a1-default'}, 'RetryAttempts': 0}, 'AcceptRanges': 'bytes', 'LastModified': datetime.datetime(2025, 10, 27, 14, 20, 7, tzinfo=tzutc()), 'ContentLength': 19, 'ETag': '"bca686258317266e3337dba6f8644541"', 'ContentType': 'text/plain', 'Metadata': {}, 'Body': <botocore.response.StreamingBody object at 0x7c6e4d85ed40>}
8 19
A simple
16 19
 text fi
19 19
le

