In [1]:
import pandas as pd
import numpy as np
from glob import glob
import gcsfs
pd.set_option('display.width', 1000)

### Find datasets which have a new version since a prior date
 - Specify the catalog date to compare to current catalog
 - Specify catalog type ('-noQC' or '') 

In [2]:
fs = gcsfs.GCSFileSystem(token='anon',access='read_only',cache_timeout=-1)

date = '20210210'
cat_type = '-noQC'

# find closest catalog to specified date:
files = fs.glob(f'gs://cmip6/CMIP6/old_catalogs/pangeo-cmip6-*{cat_type}.csv.gz')
dates = [int(s.split('cmip6-')[-1].split(cat_type)[0]) for s in files]
prior_date = str(min(dates, key=lambda x:abs(x-int(date))))
prior_date

'20210209'

In [3]:
# Read the catalogs
df_old = pd.read_csv(f'https://cmip6.storage.googleapis.com/CMIP6/old_catalogs/pangeo-cmip6-{prior_date}{cat_type}.csv.gz', dtype='unicode')
df_old['cstore'] = df_old.apply(lambda row: row.zstore.split('CMIP6/')[-1].split('v20')[0], axis = 1)
df = pd.read_csv(f'https://cmip6.storage.googleapis.com/cmip6-zarr-consolidated-stores{cat_type}.csv', dtype='unicode')
df['cstore'] = df.apply(lambda row: row.zstore.split('CMIP6/')[-1].split('v20')[0], axis = 1)

In [4]:
dfn = pd.merge(df_old, df[['cstore','version']], on='cstore', how='left', sort=False, suffixes=('_old', '_new'))
df_replaced = dfn[dfn.version_old != dfn.version_new]
df_replaced = df_replaced[~(df_replaced.version_new.isnull())]
len(df_replaced), df_replaced.experiment_id.unique(), df_replaced.table_id.unique()

(427,
 array(['piControl', 'ssp245', 'historical', 'ssp370', 'ssp119', 'ssp585',
        'abrupt-4xCO2', '1pctCO2', 'ssp126', 'ssp245-covid',
        'ssp245-cov-strgreen', 'ssp245-cov-modgreen', 'ssp245-cov-fossil'],
       dtype=object),
 array(['Amon', 'SImon', 'Omon', 'Oday', 'Oyr'], dtype=object))

In [5]:
df_replaced[['zstore', 'version_old','version_new']].style

Unnamed: 0,zstore,version_old,version_new
43032,gs://cmip6/CMIP6/CMIP/EC-Earth-Consortium/EC-Earth3-LR/piControl/r1i1p1f1/Amon/ta/gr/v20190103/,20190103,20200409
43045,gs://cmip6/CMIP6/CMIP/EC-Earth-Consortium/EC-Earth3-LR/piControl/r1i1p1f1/SImon/sivol/gn/v20190103/,20190103,20200919
46061,gs://cmip6/CMIP6/ScenarioMIP/IPSL/IPSL-CM6A-LR/ssp245/r2i1p1f1/SImon/sisnthick/gn/v20190119/,20190119,20190516
52030,gs://cmip6/CMIP6/CMIP/NCAR/CESM2-WACCM/historical/r2i1p1f1/Omon/vsf/gn/v20190227/,20190227,20190917
52407,gs://cmip6/CMIP6/ScenarioMIP/CCCma/CanESM5/ssp370/r4i1p1f1/Amon/ua/gn/v20190306/,20190306,20190429
52420,gs://cmip6/CMIP6/ScenarioMIP/CCCma/CanESM5/ssp370/r4i1p1f1/Amon/va/gn/v20190306/,20190306,20190429
52638,gs://cmip6/CMIP6/ScenarioMIP/CCCma/CanESM5/ssp119/r5i1p1f1/Amon/ua/gn/v20190306/,20190306,20190429
52639,gs://cmip6/CMIP6/ScenarioMIP/CCCma/CanESM5/ssp119/r5i1p1f1/Amon/va/gn/v20190306/,20190306,20190429
52648,gs://cmip6/CMIP6/ScenarioMIP/CCCma/CanESM5/ssp119/r4i1p1f1/Amon/ua/gn/v20190306/,20190306,20190429
52649,gs://cmip6/CMIP6/ScenarioMIP/CCCma/CanESM5/ssp119/r4i1p1f1/Amon/va/gn/v20190306/,20190306,20190429


In [None]:
df.zstore.values[0]

In [None]:
df_old.version.unique()