In [None]:
# This notebook demonstrates reading a netCDF file from the DestinE S3 area using the netCDF4 library

In [1]:
import netCDF4 as nc
import json
import s3fs

In [3]:
# The access key and secret key are stored in a file in the user space
# This is for security - so we don't accidentally check the keys into github
with open("access_keys.json", 'r') as fh:
    s3_keys = json.load(fh)

In [2]:
# the endpoint is given in the DestinE Fresh Data Pool interface
s3_endpoint = "https://s3.central.data.destination-earth.eu"
# bucket name and object name to read
bucket_name = "neils-test-data"
object_name = "tas_Amon_HadGEM3-GC31-LL_piControl_r1i1p1f1_gn_195001-204912.nc"
# construct the url of the dataset
s3_url = bucket_name + "/" + object_name
print(s3_url)

neils-test-data/tas_Amon_HadGEM3-GC31-LL_piControl_r1i1p1f1_gn_195001-204912.nc


In [17]:
# we need to create a s3filesystem object to stream the data
fs = s3fs.S3FileSystem(
    endpoint_url=s3_endpoint,
    key=s3_keys["accessKey"], 
    secret=s3_keys["secretKey"]
)
# open the file
fo = fs.open(s3_url)

In [18]:
# Here we stream the entire contents into memory and then open a netCDF file from those contents
nc_bytes = fo.read()

In [19]:
# Probably not the most efficient way of doing it!  xarray could well be better
ncd = nc.Dataset(
    "inmemory.nc",
    memory=nc_bytes
)
print(ncd)

<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF4_CLASSIC data model, file format HDF5):
    Conventions: CF-1.7 CMIP-6.2
    activity_id: CMIP
    branch_method: standard
    branch_time_in_child: 0.0
    branch_time_in_parent: 267840.0
    creation_date: 2019-06-20T14:08:01Z
    cv_version: 6.2.20.1
    data_specs_version: 01.00.29
    experiment: pre-industrial control
    experiment_id: piControl
    external_variables: areacella
    forcing_index: 1
    frequency: mon
    further_info_url: https://furtherinfo.es-doc.org/CMIP6.MOHC.HadGEM3-GC31-LL.piControl.none.r1i1p1f1
    grid: Native N96 grid; 192 x 144 longitude/latitude
    grid_label: gn
    history: 2019-06-20T13:42:01Z ; CMOR rewrote data to be consistent with CMIP6, CF-1.7 CMIP-6.2 and CF standards.;
2019-06-20T13:41:40Z MIP Convert v1.1.0, Python v2.7.12, Iris v1.13.0, Numpy v1.13.3, netcdftime v1.4.1.
    initialization_index: 1
    institution: Met Office Hadley Centre, Fitzroy Road, Exeter, Devon, EX1 3PB, UK
   

In [20]:
# Here's a 2nd way of doing it using boto3
import boto3

In [21]:
# create the client
s3_client = boto3.client(
    's3',
    aws_access_key_id = s3_keys["accessKey"], 
    aws_secret_access_key = s3_keys["secretKey"],
    endpoint_url = s3_endpoint
)

In [24]:
# stream the object, get the data
response = s3_client.get_object(Key=object_name, Bucket=bucket_name)
nc_bytes2 = response["Body"].read()

In [25]:
ncd2 = nc.Dataset(
    "inmemory.nc",
    memory=nc_bytes2
)
print(ncd2)

<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF4_CLASSIC data model, file format HDF5):
    Conventions: CF-1.7 CMIP-6.2
    activity_id: CMIP
    branch_method: standard
    branch_time_in_child: 0.0
    branch_time_in_parent: 267840.0
    creation_date: 2019-06-20T14:08:01Z
    cv_version: 6.2.20.1
    data_specs_version: 01.00.29
    experiment: pre-industrial control
    experiment_id: piControl
    external_variables: areacella
    forcing_index: 1
    frequency: mon
    further_info_url: https://furtherinfo.es-doc.org/CMIP6.MOHC.HadGEM3-GC31-LL.piControl.none.r1i1p1f1
    grid: Native N96 grid; 192 x 144 longitude/latitude
    grid_label: gn
    history: 2019-06-20T13:42:01Z ; CMOR rewrote data to be consistent with CMIP6, CF-1.7 CMIP-6.2 and CF standards.;
2019-06-20T13:41:40Z MIP Convert v1.1.0, Python v2.7.12, Iris v1.13.0, Numpy v1.13.3, netcdftime v1.4.1.
    initialization_index: 1
    institution: Met Office Hadley Centre, Fitzroy Road, Exeter, Devon, EX1 3PB, UK
   