In [18]:
from google.cloud import storage
import os
import netCDF4
import numpy as np
import numpy.ma as ma
from dask import delayed
import dask.array as da
from dask.distributed import Client
import glob
import tempfile
import subprocess
import datetime, time
from urllib import request
from multiprocessing import Pool
import json
import gc
import boto3
import botocore
import itertools

storage_client = storage.Client.from_service_account_json('/home/jovyan/work/credentials.json')
bucket = storage_client.get_bucket('nex-gddp')

loca_bucket = 'nasanex'
base_key_path = 'LOCA'
base_url = 'ftp://gdo-dcp.ucllnl.org/pub/dcp/archive/cmip5/loca/LOCA_2016-04-02/'
all_models = ["ACCESS1-0","ACCESS1-3","CCSM4","CESM1-BGC","CESM1-CAM5","CMCC-CM","CMCC-CMS","CNRM-CM5","CSIRO-Mk3-6-0","CanESM2","EC-EARTH","FGOALS-g2","GFDL-CM3","GFDL-ESM2G","GFDL-ESM2M","GISS-E2-H","GISS-E2-R","HadGEM2-AO","HadGEM2-CC","HadGEM2-ES","IPSL-CM5A-LR","IPSL-CM5A-MR","MIROC-ESM","MIROC-ESM-CHEM","MIROC5","MPI-ESM-LR","MPI-ESM-MR","MRI-CGCM3","NorESM1-M","bcc-csm1-1","bcc-csm1-1-m","inmcm4"]
some_models = ["ACCESS1-0","ACCESS1-3","CCSM4"]

client = Client('scheduler:8786')

s3 = boto3.resource('s3')


# Begin here
def process_model_year(model, scenario, year):
    process_year_temps(model, scenario, year)

def process_year_temps(model, scenario, year):
    print(f"Processing temperatures for {model} {year} ({scenario})")
    ids = (gen_netcdf_id(model, scenario, year, 'tasmax'), gen_netcdf_id(model, scenario, year, 'tasmin'))
    print(f"File ids are: {ids}")
    tasmax_file, tasmin_file = list(map(download_file, ids))
    tasmax_dataset, tasmin_dataset = netCDF4.Dataset(tasmax_file), netCDF4.Dataset(tasmin_file)
    print(tasmax_file)
    print(tasmax_dataset)
    print(tasmax_dataset['tasmax'])
    print(tasmax_dataset['tasmax'].shape)
    pass

def gen_netcdf_id(model, scenario, year, var):
    id = f'LOCA/{model}/16th/{scenario}/r1i1p1/{var}/{var}_day_{model}_{scenario}_r1i1p1_{str(year)}0101-{str(year)}1231.LOCA_2016-04-02.16th.nc'
    return id

def download_file(file_id, loca_bucket = loca_bucket, download_location = '/temp'):
    filename = f'{download_location}/{file_id.split("/")[-1]}'
    print(f"Downloading {filename}")
    try:
        s3.Bucket(loca_bucket).download_file(file_id, filename)
    except botocore.exceptions.ClientError as e:
        if e.response['Error']['Code'] == "404":
            file_id = file_id.replace('r1i1p1', 'r6i1p1')
            s3.Bucket(loca_bucket).download_file(file_id, filename)
        else:
            raise
    except:
        filename = None
        raise
    return filename

def cleanup():
    for file in glob.glob('/temp/*'):
        os.remove(file)

## BASELINE

# Average of models per year
def process_baseline_year_per_model(model, var, download = True):
    if download:
        print("Generating ids")
        ids = [ gen_netcdf_id(model, 'historical', year, var) for year in range(1971, 2001)]
        print("Downloading files")
        pool = Pool()
        filenames = pool.map(download_file, ids)
    else:
        pass
    # shape = (365, 490, 960)
    print("Stacking files")
    #arr_list = list(map(lambda fn: da.from_array(netCDF4.Dataset(fn)['tasmax'], chunks = (366, 245, 240)), filenames))
    #arr_stack = da.stack(arr_list)
    #print("Calculating average")
    #year_avg = np.mean(arr_stack, axis = 0).compute()
    #result_filename = f"/temp/{file_prefix}_{year}_tasmax_baseline_average.npy"
    #np.save(result_filename, year_avg)
    #blob = bucket.blob(f'baseline/tasmax/intermediate/{result_filename.split("/")[-1]}')
    #blob.upload_from_filename(result_filename)
    #print(result_filename)
    #return None

In [20]:
process_baseline_year_per_model("ACCESS1-0", "tasmax")

Generating ids
Downloading files
Downloading /temp/tasmax_day_ACCESS1-0_historical_r1i1p1_19710101-19711231.LOCA_2016-04-02.16th.nc
Downloading /temp/tasmax_day_ACCESS1-0_historical_r1i1p1_19740101-19741231.LOCA_2016-04-02.16th.nc
Downloading /temp/tasmax_day_ACCESS1-0_historical_r1i1p1_19720101-19721231.LOCA_2016-04-02.16th.nc
Downloading /temp/tasmax_day_ACCESS1-0_historical_r1i1p1_19760101-19761231.LOCA_2016-04-02.16th.nc
Downloading /temp/tasmax_day_ACCESS1-0_historical_r1i1p1_19730101-19731231.LOCA_2016-04-02.16th.nc
Downloading /temp/tasmax_day_ACCESS1-0_historical_r1i1p1_19770101-19771231.LOCA_2016-04-02.16th.nc
Downloading /temp/tasmax_day_ACCESS1-0_historical_r1i1p1_19750101-19751231.LOCA_2016-04-02.16th.nc
Downloading /temp/tasmax_day_ACCESS1-0_historical_r1i1p1_19780101-19781231.LOCA_2016-04-02.16th.nc
Downloading /temp/tasmax_day_ACCESS1-0_historical_r1i1p1_19790101-19791231.LOCA_2016-04-02.16th.nc
Downloading /temp/tasmax_day_ACCESS1-0_historical_r1i1p1_19800101-19801231.L

Exception in thread Thread-18:
Traceback (most recent call last):
  File "/opt/conda/envs/dask/lib/python3.6/threading.py", line 916, in _bootstrap_inner
    self.run()
  File "/opt/conda/envs/dask/lib/python3.6/threading.py", line 864, in run
    self._target(*self._args, **self._kwargs)
  File "/opt/conda/envs/dask/lib/python3.6/multiprocessing/pool.py", line 463, in _handle_results
    task = get()
  File "/opt/conda/envs/dask/lib/python3.6/multiprocessing/connection.py", line 251, in recv
    return _ForkingPickler.loads(buf.getbuffer())
TypeError: __init__() missing 1 required positional argument: 'operation_name'



Downloading /temp/tasmax_day_ACCESS1-0_historical_r1i1p1_19870101-19871231.LOCA_2016-04-02.16th.nc
Downloading /temp/tasmax_day_ACCESS1-0_historical_r1i1p1_19880101-19881231.LOCA_2016-04-02.16th.nc
Downloading /temp/tasmax_day_ACCESS1-0_historical_r1i1p1_19890101-19891231.LOCA_2016-04-02.16th.nc
Downloading /temp/tasmax_day_ACCESS1-0_historical_r1i1p1_19900101-19901231.LOCA_2016-04-02.16th.nc
Downloading /temp/tasmax_day_ACCESS1-0_historical_r1i1p1_19910101-19911231.LOCA_2016-04-02.16th.nc
Downloading /temp/tasmax_day_ACCESS1-0_historical_r1i1p1_19920101-19921231.LOCA_2016-04-02.16th.nc
Downloading /temp/tasmax_day_ACCESS1-0_historical_r1i1p1_19930101-19931231.LOCA_2016-04-02.16th.nc
Downloading /temp/tasmax_day_ACCESS1-0_historical_r1i1p1_19940101-19941231.LOCA_2016-04-02.16th.nc
Downloading /temp/tasmax_day_ACCESS1-0_historical_r1i1p1_19950101-19951231.LOCA_2016-04-02.16th.nc


Process ForkPoolWorker-66:
Process ForkPoolWorker-68:
Process ForkPoolWorker-71:
Process ForkPoolWorker-67:
Process ForkPoolWorker-69:
Process ForkPoolWorker-65:
  File "/opt/conda/envs/dask/lib/python3.6/multiprocessing/queues.py", line 346, in put
    with self._wlock:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Process ForkPoolWorker-70:
Process ForkPoolWorker-72:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/opt/conda/envs/dask/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
Traceback (most recent call last):
  File "/opt/conda/envs/dask/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/opt/conda/envs/dask/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/opt/conda/envs/dask/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap


KeyboardInterrupt: 

Downloading /temp/tasmax_day_ACCESS1-0_historical_r1i1p1_19960101-19961231.LOCA_2016-04-02.16th.nc
Downloading /temp/tasmax_day_ACCESS1-0_historical_r1i1p1_19970101-19971231.LOCA_2016-04-02.16th.nc
Downloading /temp/tasmax_day_ACCESS1-0_historical_r1i1p1_19980101-19981231.LOCA_2016-04-02.16th.nc
Downloading /temp/tasmax_day_ACCESS1-0_historical_r1i1p1_19990101-19991231.LOCA_2016-04-02.16th.nc
Downloading /temp/tasmax_day_ACCESS1-0_historical_r1i1p1_20000101-20001231.LOCA_2016-04-02.16th.nc


In [17]:
s3.Bucket(loca_bucket).download_file(gen_netcdf_id("casdf-0", 'historical', 1971, "tasmax"), '/temp/test.nc')

ClientError: An error occurred (400) when calling the HeadObject operation: Bad Request

In [13]:
s3.Bucket(loca_bucket).download_file(gen_netcdf_id("ACCESS1-0", 'historical', 1971, "tasmax"), '/temp/test.nc')


ClientError: An error occurred (400) when calling the HeadObject operation: Bad Request