## Demonstration: How to use the CSV catalog of CMIP6 data in GCS

In [None]:
import pandas as pd
import xarray as xr
import gcsfs
gcsfs.__version__

In [None]:
#  First we connect to Google Cloud Storage (GCS)
gcs = gcsfs.GCSFileSystem(project='pangeo-181919', token='anon', access='read_only')

# We maintain a CSV catalog of the existing CMIP6 zstores on GCS on a LDEO-local machine,
#   so we read it into a pandas dataframe
dfcat = pd.read_csv('http://fletcher.ldeo.columbia.edu/catalogs/pangeo-cmip6.csv')

print('number of zstores:',dfcat.shape[0])

### View the CSV catalog:
- use the 'qgrid' package
- don't just scroll through, <b>sort</b> and <b>filter</b> in the qgrid widget to explore and subselect 
- then use (Shift)(mouse click) or (Command)(mouse click) to <b>select rows</b>

In [None]:
import qgrid
widget = qgrid.show_grid(dfcat)
widget

### After you have filtered and/or selected

In [None]:
# if you have only filtered and sorted, use:
dfnew = widget.get_changed_df()

# if you have selected specific rows, use this instead: 
#dfnew = widget.get_selected_df()

In [None]:
# now you can list the URLs of the data your have selected:
dfnew.zstore.values[0:10]

### and to access the GCS zstores:
 - note that this requires gcsfs.__version__ >= 0.3.0 since the old method is deprecated (using gcsfs.GCSMap)
 - we will just grab the first from your list

In [None]:
zstore =  dfnew.zstore.values[0]
print(zstore,':')
map = gcs.get_mapper(zstore)
ds = xr.open_zarr(map)
ds