In [None]:
import boto3

from dataplug import CloudObject
from dataplug.formats.genomics.fastq import FASTQGZip, partition_reads_batches

session = boto3.Session()
creds = session.get_credentials().get_frozen_credentials()

s3_config = {
        "credentials": {
                "AccessKeyId": creds.access_key,
                "SecretAccessKey": creds.secret_key,
                "SessionToken": creds.token,
        },
        "region_name": "us-east-1"
}

In [None]:
# Replace by your ID
my_id = input()
print("Your ID is", my_id)

In [None]:
fastqgz_url = "s3://scipy-tutorial-data/SRR6052133.fastq.gz"
co = CloudObject.from_s3(FASTQGZip, fastqgz_url, s3_config=s3_config,
                         metadata_bucket=f"scipy-tutorial-meta-{str(my_id)}")

In [None]:
# Data must be pre-processed first ==> This only needs to be done once per dataset
# Preprocessing will create reusable indexes to repartition
# the data many times in many chunk sizes
co.preprocess()

# Partition the object
# This does not move data around, it only creates data slices from the indexes
data_slices = co.partition(partition_reads_batches, num_batches=20)

for data_slice in data_slices[0:5]:
    # Evaluate the data_slice, which will perform the
    # actual HTTP GET requests to get the partition data
    batch = data_slice.get()
    for i in batch[0:5]:
        print(i)
    print("-----")