In [None]:
import boto3
import numpy as np

from dataplug import CloudObject
from dataplug.formats.metabolomics.imzml import ImzML, partition_chunks_strategy

session = boto3.Session()
creds = session.get_credentials().get_frozen_credentials()

s3_config = {
        "credentials": {
                "AccessKeyId": creds.access_key,
                "SecretAccessKey": creds.secret_key,
                "SessionToken": creds.token,
        },
        "region_name": "us-east-1"
}

In [None]:
# Replace by your ID
my_id = input()
print("Your ID is", my_id)

In [None]:
imzml_url = "s3://scipy-tutorial-data/HR2MSImouseurinarybladderS096.ibd"
co = CloudObject.from_s3(ImzML, imzml_url, s3_config=s3_config,
                         metadata_bucket=f"scipy-tutorial-meta-{str(my_id)}")

In [None]:
# Data must be pre-processed first ==> This only needs to be done once per dataset
# Preprocessing will create reusable indexes to repartition
# the data many times in many chunk sizes
co.preprocess(force=True)

# Partition the object
# This does not move data around, it only creates data slices from the indexes
data_slices = co.partition(partition_chunks_strategy, chunk_size=256 * 1024**2)

for data_slice in data_slices:
    # Evaluate the data_slice, which will perform the
    # actual HTTP GET requests to get the partition data
    raw_data = data_slice.get()

    # Load the data from bytes in memory
    # 1. read the common m/z array
    mz_length_bytes = data_slice.mz_lengths[0] * data_slice.cloud_object.attributes.mz_size
    mz_array = np.frombuffer(raw_data[0:mz_length_bytes], dtype=data_slice.cloud_object.attributes.mz_precision)

    for i in range(len(data_slice.int_offsets)):
        spectrum_index = data_slice.spectrum_index + i
        coordinates = data_slice.cloud_object.attributes.coordinates[spectrum_index]
        print(f"Spectrum index {spectrum_index}, pixel coordinates: {coordinates}")

        # 2. read intensity array
        int_length_bytes = data_slice.int_lengths[i] * data_slice.cloud_object.attributes.int_size
        int_array = np.frombuffer(
            raw_data[data_slice.int_offsets[i] : data_slice.int_offsets[i] + int_length_bytes],
            dtype=data_slice.cloud_object.attributes.int_precision,
        )