In [1]:
# Packages needed generally
import matplotlib.pyplot as plt
import xarray as xr
import pandas as pd

# Packages needed for CESM1 example
import pprint
import intake
import requests
import aiohttp
import s3fs

# Packages needed for CMIP6
import zarr
import gcsfs
import dask

In [2]:
# Open original collection description file: CESM1 LENS
cat_url = "https://ncar-cesm-lens.s3-us-west-2.amazonaws.com/catalogs/aws-cesm1-le.json"
col = intake.open_esm_datastore(cat_url)
col

# Location of file containing grid (lat/lon) information for CESM1 LENS
grid_url="s3://ncar-cesm-lens/ocn/static/grid.zarr"

In [3]:
# Search for the TEMP variable, display the first few entries in the resulting data frame
col.search(variable="TEMP").df

Unnamed: 0,variable,long_name,component,experiment,frequency,vertical_levels,spatial_domain,units,start_time,end_time,path
0,TEMP,potential temperature,ocn,20C,monthly,60.0,global_ocean,degC,1920-01-16 12:00:00,2005-12-16 12:00:00,s3://ncar-cesm-lens/ocn/monthly/cesmLE-20C-TEM...
1,TEMP,potential temperature,ocn,CTRL,monthly,60.0,global_ocean,degC,0400-01-16 12:00:00,2200-12-16 12:00:00,s3://ncar-cesm-lens/ocn/monthly/cesmLE-CTRL-TE...
2,TEMP,potential temperature,ocn,HIST,monthly,60.0,global_ocean,degC,1850-01-16 12:00:00,1919-12-16 12:00:00,s3://ncar-cesm-lens/ocn/monthly/cesmLE-HIST-TE...
3,TEMP,potential temperature,ocn,RCP85,monthly,60.0,global_ocean,degC,2006-01-16 12:00:00,2100-12-16 12:00:00,s3://ncar-cesm-lens/ocn/monthly/cesmLE-RCP85-T...


In [4]:
# Get more detailed: search for monthly output for the 20th century and RCP8.5 
# ("HIST" is the 1850-1919 period, which is only in the first ensemble member, and "20C" is 1920-2005 which is common across all the other members)
col_ocntemp = col.search(
    frequency=["monthly"],
    component="ocn",
    variable="TEMP",
    experiment=["20C", "RCP85"],  
)

col_ocntemp.df

Unnamed: 0,variable,long_name,component,experiment,frequency,vertical_levels,spatial_domain,units,start_time,end_time,path
0,TEMP,potential temperature,ocn,20C,monthly,60.0,global_ocean,degC,1920-01-16 12:00:00,2005-12-16 12:00:00,s3://ncar-cesm-lens/ocn/monthly/cesmLE-20C-TEM...
1,TEMP,potential temperature,ocn,RCP85,monthly,60.0,global_ocean,degC,2006-01-16 12:00:00,2100-12-16 12:00:00,s3://ncar-cesm-lens/ocn/monthly/cesmLE-RCP85-T...


In [5]:
# Load catalog entries for subset into a dictionary of xarray datasets
dsets = col_ocntemp.to_dataset_dict(
    zarr_kwargs={"consolidated": True}, storage_options={"anon": True}
)
print(f"\nDataset dictionary keys:\n {dsets.keys()}")


--> The keys in the returned dictionary of datasets are constructed as follows:
	'component.experiment.frequency'


Cannot find the ecCodes library
Cannot find the ecCodes library



Dataset dictionary keys:
 dict_keys(['ocn.RCP85.monthly', 'ocn.20C.monthly'])


In [6]:
# Get file containing lat/lon grid information: s3://ncar-cesm-lens/ocn/static/grid.zarr
fs = s3fs.S3FileSystem(anon=True)
grid = xr.open_zarr(fs.get_mapper(grid_url), consolidated=True)

In [7]:
# Define Xarray datasets corresponding to the two experiments
ds_20C = dsets["ocn.20C.monthly"]
ds_RCP85 = dsets["ocn.RCP85.monthly"]

In [8]:
ds_20C_mean=ds_20C.mean(dim="member_id")

In [9]:
ds_20C_mean.TEMP.sel(time=slice("1950", "2000"))

Unnamed: 0,Array,Chunk
Bytes,16.81 GiB,168.75 MiB
Shape,"(612, 60, 384, 320)","(6, 60, 384, 320)"
Dask graph,102 chunks in 7 graph layers,102 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 16.81 GiB 168.75 MiB Shape (612, 60, 384, 320) (6, 60, 384, 320) Dask graph 102 chunks in 7 graph layers Data type float32 numpy.ndarray",612  1  320  384  60,

Unnamed: 0,Array,Chunk
Bytes,16.81 GiB,168.75 MiB
Shape,"(612, 60, 384, 320)","(6, 60, 384, 320)"
Dask graph,102 chunks in 7 graph layers,102 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [22]:
# Define specific latitude and longitude points
specific_latitudes = [33.94138, 33.96776, 34.02607, 34.07339, 34.10185, 34.11523, 34.11611, 34.11434, 34.11712, 34.11664]  # Add your 113 specific latitudes
specific_longitudes = [-119.27422, -119.25010, -119.23642, -119.25686, -119.29178, -119.33040, -119.39120, -119.40212, -119.42896, -119.44844]  # Add your 113 specific longitudes

# Create a DataFrame with the specific latitude and longitude points
points_df = pd.DataFrame({'nlat': specific_latitudes, 'nlon': specific_longitudes})

# Subset the ds_20C dataset based on the specific latitude and longitude points
grid_subset_points = grid.sel(
    lat=points_df['nlat'], 
    lon=points_df['nlon'], 
    method='nearest'
)

# Print the resulting subset dataset based on specific points
print(ds_20C_subset_points)

#_______________________________________________________________________________
# Subset the grid dataset based on the defined latitude and longitude ranges
#grid_subset = grid.sel(lat=lat_range, lon=lon_range)

# Print the resulting subset grid dataset
#print(grid_subset)

KeyError: "'lat' is not a valid dimension or coordinate"