In [None]:
# This notebook copies files from AWS S3 buckets to /home/jovyan/data (i.e. local to the Jupyter workspace).
#   This *should* be done using the 'boto3' library which is not the same thing as its predecessor, the 'boto' 
#   library. So at the moment we are anachronistic because 'boto' is deprecated in favor of boto3. 
#
# Here is what Stack Overflow has to say: 
#
# The boto package is the hand-coded Python library that has been around 
# since 2006. It is very popular and is fully supported by AWS but because 
# it is hand-coded and there are so many services available (with more 
# appearing all the time) it is difficult to maintain.
#
# So, boto3 is a new version of the boto library based on botocore. All 
# of the low-level interfaces to AWS are driven from JSON service descriptions 
# that are generated automatically from the canonical descriptions of the services. 
# So, the interfaces are always correct and always up to date. There is a 
# resource layer on top of the client-layer that provides a nicer, more Pythonic interface.
#
# The boto3 library is being actively developed by AWS and is the one I would 
# recommend people use if they are starting new development.

In [5]:
# Don't run unless you want to grab glodap files from S3
# This may take a couple minutes to run

import boto

data_dir = '/home/jovyan/data/glodap/'
local_salinity_filename = data_dir + 'glodap_salinity.nc'
local_temperature_filename = data_dir + 'glodap_temperature.nc'
local_oxygen_filename = data_dir + 'glodap_oxygen.nc'

connection = boto.connect_s3(anon=True)
bucket = connection.get_bucket('himatdata')
for key in bucket.list():
    keyname = str(key.name.encode('utf-8'))
    if 'glodap/' in keyname and 'salinity' in keyname: key.get_contents_to_filename(local_salinity_filename)
    elif 'glodap/' in keyname and 'temperature' in keyname: key.get_contents_to_filename(local_temperature_filename)
    elif 'glodap/' in keyname and 'oxygen' in keyname: key.get_contents_to_filename(local_oxygen_filename)

CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 6.44 µs


In [7]:
# Don't run unless you want to grab a set of nine ARGO profile netcdf files (9 ARGO platforms (drifters (floats)))
# f = ...strip strip strip strip is due to possible irregularities in the string cast of the key name
# This may take a couple minutes to run

import boto
data_dir = '/home/jovyan/data/glodap/'
connection = boto.connect_s3(anon=True)
bucket = connection.get_bucket('himatdata')
for key in bucket.list(): 
    keyname = str(key.name.encode('utf-8'))
    f = keyname.strip("b'").strip('b"').strip('"').strip("'")
    if 'argo-profiles' in keyname: 
        ff = '/home/jovyan/data/' + f
        key.get_contents_to_filename(ff)


CPU times: user 3 µs, sys: 1e+03 ns, total: 4 µs
Wall time: 6.44 µs


In [8]:
# Don't run unless you want to grab a large (800MB) tar file from S3 bucket 'oceanhackweek' to the local directory
# This contains a bunch of different sub-dirs and data files as it un-tars into the 'data' directory.
# This takes less than a minute to run.

import boto
f = '/home/jovyan/data.tar'
connection = boto.connect_s3(anon=True)
bucket = connection.get_bucket('oceanhackweek')
for key in bucket.list(): 
    keyname = str(key.name.encode('utf-8'))
    if 'data.tar' in keyname: key.get_contents_to_filename(f)

CPU times: user 2 µs, sys: 1e+03 ns, total: 3 µs
Wall time: 6.91 µs
