In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
from google.cloud import storage
import os
import netCDF4
import numpy as np
import numpy.ma as ma
from dask import delayed
import dask.array as da
from dask.distributed import Client
import glob
import tempfile
import subprocess
import datetime, time
from urllib import request
from multiprocessing import Pool
import json
import gc
import boto3
import botocore

storage_client = storage.Client.from_service_account_json('/home/jovyan/work/credentials.json')
bucket = storage_client.get_bucket('nex-gddp')
nex_bucket = 'nasanex'
base_key_path = lambda var: f'NEX-GDDP/BCSD/historical/day/atmos/{var}/r1i1p1/v1.0/'
all_models = ["ACCESS1-0","BNU-ESM","CCSM4","CESM1-BGC","CNRM-CM5","CSIRO-Mk3-6-0","CanESM2","GFDL-CM3","GFDL-ESM2G","GFDL-ESM2M","IPSL-CM5A-LR","IPSL-CM5A-MR","MIROC-ESM-CHEM","MIROC-ESM","MIROC5","MPI-ESM-LR","MPI-ESM-MR","MRI-CGCM3","NorESM1-M","bcc-csm1-1","inmcm4"]
some_models = ["ACCESS1-0","BNU-ESM","CCSM4"]
client = Client('scheduler:8786')
s3 = boto3.resource('s3')

# Average of models per year
def gen_netcdf_id(model, scenario, year, var):
    id =  base_key_path(var) + f'{var}_day_BCSD_{scenario}_r1i1p1_{model}_{str(year)}.nc'
    return id

def download_file(file_id, nex_bucket = nex_bucket, download_location = '/temp'):
    filename = f'{download_location}/{file_id.split("/")[-1]}'
    print(f"Downloading {filename}")
    s3.Bucket(nex_bucket).download_file(file_id, filename)
    #try:
    #    s3.Bucket(loca_bucket).download_file(file_id, filename)
    #except botocore.exceptions.ClientError as e:
    #    if e.response['Error']['Code'] == "404":
    #        file_id = file_id.replace('r1i1p1', 'r6i1p1')
    #        s3.Bucket(loca_bucket).download_file(file_id, filename)
    #except:
    #    filename = None
    return filename



def cleanup():
    for file in glob.glob('/temp/*'):
        os.remove(file)

def process_baseline_year(year, var, models = all_models, file_prefix = ""):
    print("Generating ids")
    ids = [gen_netcdf_id(model, 'historical', year, var) for model in models]
    print(ids)
    print("Downloading files")
    pool = Pool()
    filenames = pool.map(download_file, ids)
    # shape = (365, 490, 960)
    print("Stacking files")
    arr_list = list(map(lambda fn: da.from_array(netCDF4.Dataset(fn)[var], chunks = (366, 245, 240)), filenames))
    arr_stack = da.stack(arr_list)
    print("Calculating average")
    year_avg = np.mean(arr_stack, axis = 0).compute()
    result_filename = f"/temp/{file_prefix}_{year}_{var}_baseline_average.npy"
    np.save(result_filename, year_avg)
    blob = bucket.blob(f'{file_prefix}/baseline/{var}/intermediate/{result_filename.split("/")[-1]}')
    blob.upload_from_filename(result_filename)
    print(result_filename)
    return None

In [None]:
for year in range(1981, 2001):
    try:
        process_baseline_year(year, "tasmax", file_prefix = "nexgddp_baseline_v2_tasmax")
    except ValueError as e:
        pass
    except:
        raise
            

Generating ids
['NEX-GDDP/BCSD/historical/day/atmos/tasmax/r1i1p1/v1.0/tasmax_day_BCSD_historical_r1i1p1_ACCESS1-0_1981.nc', 'NEX-GDDP/BCSD/historical/day/atmos/tasmax/r1i1p1/v1.0/tasmax_day_BCSD_historical_r1i1p1_BNU-ESM_1981.nc', 'NEX-GDDP/BCSD/historical/day/atmos/tasmax/r1i1p1/v1.0/tasmax_day_BCSD_historical_r1i1p1_CCSM4_1981.nc', 'NEX-GDDP/BCSD/historical/day/atmos/tasmax/r1i1p1/v1.0/tasmax_day_BCSD_historical_r1i1p1_CESM1-BGC_1981.nc', 'NEX-GDDP/BCSD/historical/day/atmos/tasmax/r1i1p1/v1.0/tasmax_day_BCSD_historical_r1i1p1_CNRM-CM5_1981.nc', 'NEX-GDDP/BCSD/historical/day/atmos/tasmax/r1i1p1/v1.0/tasmax_day_BCSD_historical_r1i1p1_CSIRO-Mk3-6-0_1981.nc', 'NEX-GDDP/BCSD/historical/day/atmos/tasmax/r1i1p1/v1.0/tasmax_day_BCSD_historical_r1i1p1_CanESM2_1981.nc', 'NEX-GDDP/BCSD/historical/day/atmos/tasmax/r1i1p1/v1.0/tasmax_day_BCSD_historical_r1i1p1_GFDL-CM3_1981.nc', 'NEX-GDDP/BCSD/historical/day/atmos/tasmax/r1i1p1/v1.0/tasmax_day_BCSD_historical_r1i1p1_GFDL-ESM2G_1981.nc', 'NEX-GDD

In [3]:
#download_file(id_test)
s3.Bucket(nex_bucket).download_file('NEX-GDDP/BCSD/historical/day/atmos/tasmax/r1i1p1/v1.0/tasmax_day_BCSD_historical_r1i1p1_ACCESS1-0_1971.nc', '/temp/test.nc')