In [1]:
time_idx = 0 ## the time index value within the hour - usually there are 11 index values

**Load python libraries**

In [2]:
%matplotlib inline
import boto3
import botocore
import requests
from dask.distributed import Client, progress
import netCDF4
import xarray as xr
import numpy as np

import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature as cfeature
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER
import matplotlib.ticker as mticker
from IPython.display import Image, display

from botocore import UNSIGNED
from botocore.config import Config

import datetime

# path to modules
import sys
sys.path.append('../modules')

from GOESR_functions import goes_lat_lon_reproj#, get_s3_keys
from plotter import loadCPT

import pytz

In [3]:
def get_s3_keys(bucket, s3_client, prefix = ''):
    """
    Generate the keys in an S3 bucket.

    :param bucket: Name of the S3 bucket.
    :param prefix: Only fetch keys that start with this prefix (optional).
    """
    
    kwargs = {'Bucket': bucket}

    if isinstance(prefix, str):
        kwargs['Prefix'] = prefix

    while True:
        resp = s3_client.list_objects_v2(**kwargs)
        for obj in resp['Contents']:
            key = obj['Key']
            if key.startswith(prefix):
                yield key

        try:
            kwargs['ContinuationToken'] = resp['NextContinuationToken']
        except KeyError:
            break

**Connect to Dask Distributed Cluster**

Set the `memory_limit` parameter in `Client()` if dask doesn't auto detect your memory limit accurately later on in the notebook. You will know this is occurring if processes start to get killed due to memory limit errors.

In [4]:
from dask.distributed import Client
daskclient = Client(processes=True, n_workers=3, memory_limit='8GB')
daskclient

0,1
Client  Scheduler: tcp://127.0.0.1:40373  Dashboard: http://127.0.0.1:8787/status,Cluster  Workers: 3  Cores: 48  Memory: 22.35 GiB


**Set up access to S3 bucket using `boto3` and a low-level client**

Rather than setting up access key and ID, we will use a low-level client to request data anonymously.

In [5]:
# No AWS keys required
client = boto3.client('s3', config=botocore.client.Config(signature_version=botocore.UNSIGNED))

Let's check out what is in the NOAA GOES AWS bucket. For more information on each of the products, see this [documentation](https://docs.opendata.aws/noaa-goes16/cics-readme.html). 

In [6]:
bucket_name = 'noaa-goes16'

paginator = client.get_paginator('list_objects')
result = paginator.paginate(Bucket=bucket_name, Delimiter='/')
# for prefix in result.search('CommonPrefixes'):
#     print(prefix.get('Prefix'))

In [7]:
product_name = 'ABI-L2-CMIPC'
year = 2017
band = 2

# day_of_year = 142
days_of_year = [142, 143, 144]

hour = 15
# hours = [14]

datas = []

for day_of_year in days_of_year:
# for hour in hours:


    prefix = product_name + '/' + str(year) + '/' + str(day_of_year).zfill(3)  \
    + '/' + str(hour).zfill(2) + '/OR_'+ product_name + '-M3C' + str(band).zfill(2)
    prefix

    # Initialize s3 client. 
    s3_client = boto3.client('s3', config=Config(signature_version=UNSIGNED))

    keys = get_s3_keys(bucket_name, s3_client, prefix = prefix)

    s3_data_key = [key for key in keys][time_idx] ## specific time between 0 and 11
    # s3_data_key = [key for key in keys][0] # selecting the first measurement taken within the hour

    resp = requests.get('https://' + bucket_name + '.s3.amazonaws.com/' + s3_data_key)

    file_name = s3_data_key.split('/')[-1].split('.')[0]
    nc4_ds = netCDF4.Dataset(file_name, memory = resp.content)
    store = xr.backends.NetCDF4DataStore(nc4_ds)
    DS = xr.open_dataset(store)

    #Download netcdf for specified key
    file_name = s3_data_key.split('/')[-1].split('.')[0]
    nc4_ds = netCDF4.Dataset(file_name, memory = resp.content)
    store = xr.backends.NetCDF4DataStore(nc4_ds)
    DS = xr.open_dataset(store)
    print('Opened file:', file_name)

    #Convert radiance scan x, y data into derived latitude and longitude values using [this tutorial](https://makersportal.com/blog/2018/11/25/goes-r-satellite-latitude-and-longitude-grid-projection-algorithm)
    lats, lons = goes_lat_lon_reproj(DS)

    tmp = DS.CMI

    tmp['lats'] = (('y', 'x'), lats)
    tmp['lons'] = (('y', 'x'), lons)
    
    datas.append(tmp)
    
sat_data = xr.concat(datas, dim = 't')

Opened file: OR_ABI-L2-CMIPC-M3C02_G16_s20171421502078_e20171421504451_c20171421504558
Opened file: OR_ABI-L2-CMIPC-M3C02_G16_s20171431502078_e20171431504451_c20171431504552
Opened file: OR_ABI-L2-CMIPC-M3C02_G16_s20171441502189_e20171441504562_c20171441508287


In [8]:
timez = sat_data.t.values

# save times as datetimes
dates = []
dates_list = []

# store dates as a datetine and convert it to PDT timezone
for i in range(len(timez)):
        
    dates_obj = datetime.datetime.strptime(str(timez[i])[:-10], '%Y-%m-%dT%H:%M:%S')
    dates_obj_zone = dates_obj.replace(tzinfo =  pytz.timezone('UTC'))
    dates_obj_PDT = dates_obj_zone.astimezone(pytz.timezone('US/Pacific'))    
    datetime64_PDT = np.datetime64(str(dates_obj_PDT)[:16])
    dates.append(datetime64_PDT)
    dates_list.append(datetime64_PDT)

sat_data["t"] = dates_list

In [12]:
filename_out = 'GOES16_' + product_name + '_band_' + str(band) + '_' + str(sat_data.t[0].values)[:13].replace('T', '_').replace('-', '_') +\
                '-' + str(sat_data.t[-1].values)[:13].replace('T', '_').replace('-', '_')

dat_outdir = '/home/sbarc/students/coello/repos/classes/spring_2021/geog_288cj/goes_images/outputs/data/'

In [13]:
sat_data.to_netcdf(path = dat_outdir + filename_out)

print('Saved Files As: \n', dat_outdir + filename_out)

  return dataset.to_netcdf(*args, **kwargs)
  return dataset.to_netcdf(*args, **kwargs)


Saved Files As: 
 /home/sbarc/students/coello/repos/classes/spring_2021/geog_288cj/goes_images/outputs/data/GOES16_ABI-L2-CMIPC_band_2_2017_05_22_08-2017_05_24_08
