In [1]:
import pandas as pd
import numpy as np
from glob import glob
import gcsfs
pd.set_option('display.width', 1000)

### Find datasets which have a new version since a prior date
 - Specify the catalog date to compare to current catalog
 - Specify catalog type ('-noQC' or '') 

In [2]:
fs = gcsfs.GCSFileSystem(token='anon',access='read_only',cache_timeout=-1)

date = '20210119'
cat_type = '-noQC'

# find closest catalog to specified date:
files = fs.glob(f'gs://cmip6/old_catalogs/pangeo-cmip6-*{cat_type}.csv.gz')
dates = [int(s.split('cmip6-')[-1].split(cat_type)[0]) for s in files]
prior_date = str(min(dates, key=lambda x:abs(x-int(date))))
prior_date

'20210119'

In [3]:
# Read the catalogs
df_old = pd.read_csv(f'https://cmip6.storage.googleapis.com/old_catalogs/pangeo-cmip6-{prior_date}{cat_type}.csv.gz', dtype='unicode')
df = pd.read_csv(f'https://cmip6.storage.googleapis.com/cmip6-zarr-consolidated-stores{cat_type}.csv', dtype='unicode')

In [4]:
dfn = pd.merge(df_old, df[['zstore','version']], on='zstore', how='left', sort=False, suffixes=('_old', '_new'))
df_replaced = dfn[dfn.version_old != dfn.version_new]
df_replaced = df_replaced[~(df_replaced.version_new.isnull())]

In [5]:
df_replaced[['zstore', 'version_old','version_new']].style 

Unnamed: 0,zstore,version_old,version_new
35856,gs://cmip6/CMIP/CNRM-CERFACS/CNRM-CM6-1/historical/r29i1p1f2/Amon/clt/gr/,20191004,20200529
45531,gs://cmip6/CMIP/E3SM-Project/E3SM-1-1-ECA/piControl/r1i1p1f1/Amon/clt/gr/,20191216,20201001
45672,gs://cmip6/CMIP/E3SM-Project/E3SM-1-1/piControl/r1i1p1f1/Amon/clt/gr/,20191029,20201123
45996,gs://cmip6/CMIP/EC-Earth-Consortium/EC-Earth3-LR/piControl/r1i1p1f1/Amon/clt/gr/,20190103,20200409
47067,gs://cmip6/CMIP/EC-Earth-Consortium/EC-Earth3-Veg/piControl/r1i1p1f1/Amon/clt/gr/,20190619,20200226
50487,gs://cmip6/CMIP/EC-Earth-Consortium/EC-Earth3/piControl/r1i1p1f1/Amon/clt/gr/,20190712,20200312
