# Imports

In [1]:
import sys
import os

In [2]:
import math

In [3]:
import numpy as np

In [4]:
import matplotlib.pyplot as plt

In [5]:
import pandas as pd

In [6]:
import boto3

# Functions

In [7]:
def head(key, bucket='hcp-openaccess'):
    return True if client.head_object(Bucket=bucket, Key=key) else False

In [8]:
def ls(path='', bucket='hcp-openaccess', maxkeys=1300, delimiter=False, endslash=True):
    # delimiter = not (delimiter and head(path, bucket)) # If delimiter set True by default 

    path_ = path + '/' if path and not path.endswith('/') and delimiter and endslash else path

    if delimiter:
        res = client.list_objects(Bucket=bucket, Delimiter='/', Prefix=path_, MaxKeys=maxkeys)
    else:
        res = client.list_objects(Bucket=bucket, Prefix=path_, MaxKeys=maxkeys)

    try:
        if delimiter:
            return [prefix['Prefix'] for prefix in res['CommonPrefixes']]
        else:
            return [item['Key'] for item in res['Contents']]
    except KeyError:
        print(f"Invalid Path '{path_}' (if it is not, set `delimiter` to False)")
        return False

In [9]:
def get(key, bucket='hcp-openaccess'):
    # Check if object exists
    if not ls(key, bucket, delimiter=False): return
    
    return client.get_object(Bucket=bucket, Key=key)

In [10]:
def download(prefix, local='data', bucket='hcp-openaccess'):
    local = f'{local}\\{bucket}'
    keys = []
    dirs = []
    next_token = ''
    base_kwargs = {
        'Bucket': bucket,
        'Prefix': prefix,
    }
    while next_token is not None:
        kwargs = base_kwargs.copy()
        if next_token != '':
            kwargs.update({'ContinuationToken': next_token})
        results = client.list_objects_v2(**kwargs)
        contents = results.get('Contents')
        for i in contents:
            k = i.get('Key')
            if k[-1] != '/':
                keys.append(k)
            else:
                dirs.append(k)
        next_token = results.get('NextContinuationToken')
    for d in dirs:
        dest_pathname = os.path.join(local, d)
        if not os.path.exists(os.path.dirname(dest_pathname)):
            os.makedirs(os.path.dirname(dest_pathname))
    for k in keys:
        dest_pathname = os.path.join(local, k)
        if not os.path.exists(os.path.dirname(dest_pathname)):
            os.makedirs(os.path.dirname(dest_pathname))
        client.download_file(bucket, k, dest_pathname)

# Data

In [11]:
client = boto3.client('s3')

In [12]:
resource = boto3.resource('s3')

---

<div class="alert alert-success">
    Buckets
</div>

In [13]:
[bucket["Name"] for bucket in client.list_buckets()["Buckets"]]

['hcp-openaccess',
 'hcp-openaccess-logfiles',
 'hcp-openaccess-logs-temp',
 'hcp-openaccess-logstorage-temp',
 'hcp-openaccess-test']

<div class="alert alert-success">
    Top Level Folders in <i>`hcp-openaccess`</i> Bucket
</div>

In [14]:
ls(delimiter=True)

['HCP/',
 'HCP_1200/',
 'HCP_900/',
 'HCP_Resources/',
 'HCP_Retest/',
 'HCP_WB_Tutorial_1.0/']

In [15]:
download('HCP_WB_Tutorial_1.0/.DS_Store')