### Sample use of csv/dataframe catalog for new CMIP6 stores in gs://pangeo-cmip6 bucket

In [1]:
import pandas as pd
import xarray as xr
import gcsfs
import qgrid   # please use old kernel (e.g. pangeo-Jun2019)  until qgrid is updated for jupyterlab >= 1.0.0
gcsfs.__version__

'0.3.0'

In [2]:
# define a simple search by keywords

def search_df(df, **search):
    d = df
    for skey in search.keys():
        d = d[d[skey]==search[skey]]
    return d

In [3]:
# define GCSFileSystem instance and get a pre-made catalog

gcs = gcsfs.GCSFileSystem(project='pangeo-181919', token='anon', access='read_only')
dfcat = pd.read_csv('http://fletcher.ldeo.columbia.edu/catalogs/pangeo-cmip6.csv')

In [4]:
# find out what is available:

dfcat.source_id.unique(),dfcat.experiment_id.unique()

(array(['UKESM1-0-LL', 'MIROC6', 'CAMS-CSM1-0', 'MPI-ESM1-2-HR', 'CanESM5',
        'CNRM-CM6-1', 'CNRM-ESM2-1', 'GFDL-CM4', 'GFDL-ESM4', 'MRI-ESM2-0',
        'CESM2', 'BCC-CSM2-MR', 'IPSL-CM6A-LR', 'HadGEM3-GC31-LL',
        'GISS-E2-1-G', 'GISS-E2-1-H', 'SAM0-UNICON', 'AWI-CM-1-1-MR',
        'EC-Earth3-LR', 'E3SM-1-0', 'CESM2-WACCM', 'BCC-ESM1', 'EC-Earth3',
        'MIROC-ES2L', 'EC-Earth3-Veg', 'NESM3'], dtype=object),
 array(['ssp585', 'ssp245', 'piControl', 'historical'], dtype=object))

In [5]:
# perform simple search:

dfs = search_df(dfcat,experiment_id='historical',source_id='GISS-E2-1-H',variable_id='ts')
dfs.zstore.values

array(['gcs://pangeo-cmip6/AR6_WG1/CMIP/NASA-GISS/GISS-E2-1-H/historical/r10i1p1f1/Amon/ts/gn/',
       'gcs://pangeo-cmip6/AR6_WG1/CMIP/NASA-GISS/GISS-E2-1-H/historical/r4i1p1f1/Amon/ts/gn/',
       'gcs://pangeo-cmip6/AR6_WG1/CMIP/NASA-GISS/GISS-E2-1-H/historical/r1i1p1f1/Amon/ts/gn/',
       'gcs://pangeo-cmip6/AR6_WG1/CMIP/NASA-GISS/GISS-E2-1-H/historical/r7i1p1f1/Amon/ts/gn/',
       'gcs://pangeo-cmip6/AR6_WG1/CMIP/NASA-GISS/GISS-E2-1-H/historical/r8i1p1f1/Amon/ts/gn/',
       'gcs://pangeo-cmip6/AR6_WG1/CMIP/NASA-GISS/GISS-E2-1-H/historical/r6i1p1f1/Amon/ts/gn/',
       'gcs://pangeo-cmip6/AR6_WG1/CMIP/NASA-GISS/GISS-E2-1-H/historical/r9i1p1f1/Amon/ts/gn/',
       'gcs://pangeo-cmip6/AR6_WG1/CMIP/NASA-GISS/GISS-E2-1-H/historical/r2i1p1f1/Amon/ts/gn/',
       'gcs://pangeo-cmip6/AR6_WG1/CMIP/NASA-GISS/GISS-E2-1-H/historical/r3i1p1f1/Amon/ts/gn/',
       'gcs://pangeo-cmip6/AR6_WG1/CMIP/NASA-GISS/GISS-E2-1-H/historical/r5i1p1f1/Amon/ts/gn/'],
      dtype=object)

In [6]:
# open one of these zarr stores: (requires gcsfs version >= 0.3.0 )

#store = dfs.zstore.values[0]
#map = gcs.get_mapper(store)
#xr.open_zarr(map)

<xarray.Dataset>
Dimensions:    (bnds: 2, lat: 90, lon: 144, time: 1980)
Coordinates:
  * lat        (lat) float64 -89.0 -87.0 -85.0 -83.0 ... 83.0 85.0 87.0 89.0
    lat_bnds   (lat, bnds) float64 dask.array<shape=(90, 2), chunksize=(90, 2)>
  * lon        (lon) float64 1.25 3.75 6.25 8.75 ... 351.2 353.8 356.2 358.8
    lon_bnds   (lon, bnds) float64 dask.array<shape=(144, 2), chunksize=(144, 2)>
  * time       (time) object 1850-01-16 12:00:00 ... 2014-12-16 12:00:00
    time_bnds  (time, bnds) object dask.array<shape=(1980, 2), chunksize=(1980, 2)>
Dimensions without coordinates: bnds
Data variables:
    ts         (time, lat, lon) float32 dask.array<shape=(1980, 90, 144), chunksize=(600, 90, 144)>
Attributes:
    Conventions:            CF-1.7 CMIP-6.2
    activity_id:            CMIP
    branch_method:          standard
    branch_time_in_child:   0.0
    branch_time_in_parent:  65700.0
    cmor_version:           3.3.2
    contact:                Kenneth Lo (cdkkl@giss.nasa.gov)

## New method to view, sort and select zarr stores 

In [8]:
widget = qgrid.show_grid(dfcat, column_options={'editable':False})  # don't allow any of the fields to be changed
widget

just before QgridWidget call


QgridWidget(grid_options={'fullWidthRows': True, 'syncColumnCellResize': True, 'forceFitColumns': True, 'defau…

In [9]:
print("Choose zarr stores above by filtering or selecting:")
print("f - filter from available choices at the top of each column (default)")
print("s - select a few rows above (click and then <Shift> click or <Command> click)")

print("CHOOSE METHOD HERE:")
method = 'f'

Choose zarr stores above by filtering or selecting:
f - filter from available choices at the top of each column (default)
s - select a few rows above (click and then <Shift> click or <Command> click)
CHOOSE METHOD HERE:


In [10]:
if method == 's':
    #  (a) select a few rows above (click and then <Shift> click or <Command> click)
    #    if no zstores are listed, you didn't select any!
    dfnew = widget.get_selected_df()
else:
    # use the filters available at the top of each column to select and then
    dfnew = widget.get_changed_df()
    

In [11]:
print(dfnew.zstore.values)

['gcs://pangeo-cmip6/AR6_WG1/ScenarioMIP/IPSL/IPSL-CM6A-LR/ssp585/r1i1p1f1/3hr/pr/gr/'
 'gcs://pangeo-cmip6/AR6_WG1/ScenarioMIP/IPSL/IPSL-CM6A-LR/ssp245/r1i1p1f1/3hr/pr/gr/'
 'gcs://pangeo-cmip6/AR6_WG1/CMIP/IPSL/IPSL-CM6A-LR/historical/r10i1p1f1/3hr/pr/gr/']


In [None]:
dfnew.