In [18]:
import intake
import numpy as np
import pandas as pd

# need AWS access keys set up in environ to pull zarr files
zarr_urls = {
    'rcp45_prec':'s3://ncar-na-cordex/day/prec.rcp45.day.NAM-44i.raw.zarr',
    'rcp85_prec':'s3://ncar-na-cordex/day/prec.rcp85.day.NAM-44i.raw.zarr',
    'rcp45_temp':'s3://ncar-na-cordex/day/temp.rcp45.day.NAM-44i.raw.zarr',
    'rcp85_temp':'s3://ncar-na-cordex/day/temp.rcp85.day.NAM-44i.raw.zarr'
}

datasets = {}

for key, url in zarr_urls.items():

    zarr_cat = intake.open_zarr(url)
    zarr_source = zarr_cat()
    
    try:
        # Attempt to load the dataset
        dataset = zarr_source.to_dask()
        datasets[key] = dataset
        print(f"Dataset from {url} loaded successfully")

    except Exception as e:
        print(f"Error loading dataset from {url}: {e}")

Dataset from s3://ncar-na-cordex/day/prec.rcp45.day.NAM-44i.raw.zarr loaded successfully
Dataset from s3://ncar-na-cordex/day/prec.rcp85.day.NAM-44i.raw.zarr loaded successfully
Dataset from s3://ncar-na-cordex/day/temp.rcp45.day.NAM-44i.raw.zarr loaded successfully
Dataset from s3://ncar-na-cordex/day/temp.rcp85.day.NAM-44i.raw.zarr loaded successfully


In [59]:
simulations = ['CanESM2.CanRCM4', 'CanESM2.RCA4']

# Get dataset, filter, aggregate with xarray
rcp45_prec_ds = (
    datasets.get('rcp45_prec')
        .sel(
            lat = slice(37,41), 
            lon = slice(-105,-102), 
            member_id = simulations, 
            bnds = 0)
        .drop('time_bnds')
        .mean(dim=['lat', 'lon'])
)

rcp85_prec_ds = (
    datasets.get('rcp85_prec')
        .sel(
            lat = slice(37,41), 
            lon = slice(-105,-102), 
            member_id = simulations, 
            bnds = 0)
        .drop('time_bnds')
        .mean(dim=['lat', 'lon'])
)

rcp45_temp_ds = (
    datasets.get('rcp45_temp')
        .sel(
            lat = slice(37,41), 
            lon = slice(-105,-102), 
            member_id = simulations, 
            bnds = 0)
        .drop('time_bnds')
        .mean(dim=['lat', 'lon'])
)

rcp85_temp_ds = (
    datasets.get('rcp85_temp')
        .sel(
            lat = slice(37,41), 
            lon = slice(-105,-102), 
            member_id = simulations,
              bnds = 0)
        .drop('time_bnds')
        .mean(dim=['lat', 'lon'])
)

ds_dict = {
    'rcp45_prec_ds':rcp45_prec_ds,
    'rcp45_temp_ds':rcp45_temp_ds,
    'rcp85_prec_ds':rcp85_prec_ds,
    'rcp85_temp_ds':rcp85_temp_ds
}

In [83]:
for name, ds in ds_dict.items():
    for id in ds.member_id:
        (ds.sel(
            member_id = id)
            .drop_vars('member_id')
            .to_dataframe()
            .to_parquet(f'data/scenarios/{name}_{id.values}.parquet'))