(REF)
- CMIP variables: https://pcmdi.llnl.gov/mips/cmip3/variableList.html
- SSP: https://en.wikipedia.org/wiki/Shared_Socioeconomic_Pathways


In [77]:
import s3fs
import pandas as pd
pd.options.display.max_rows = 200

In [3]:
# Google vs. AWS

# for Google Cloud:
cmip_gc = pd.read_csv("https://cmip6.storage.googleapis.com/pangeo-cmip6.csv")
# for AWS S3:
cmip_aws = pd.read_csv("https://cmip6-pds.s3.amazonaws.com/pangeo-cmip6.csv")

print("Google", "AWS")
print(cmip_gc.shape, cmip_aws.shape)

Google AWS
(514818, 11) (522217, 11)


In [91]:
# Activity ID
print("Activity ID")
print(sorted(set(cmip_aws['activity_id'])))
print(sorted(set(cmip_gc['activity_id'])))

# The number of CMIP entries
print("\nNum entries (CMIP)")
print(len(cmip_aws[cmip_aws['activity_id']=='CMIP']))
print(len(cmip_gc[cmip_gc['activity_id']=='CMIP']))

# The number of CMIP entries
print("\nNum entries (ScenarioMIP)")
print(len(cmip_aws[cmip_aws['activity_id']=='ScenarioMIP']))
print(len(cmip_gc[cmip_gc['activity_id']=='ScenarioMIP']))

# Experiment ID (CMIP)
print("\nExperiment ID (CMIP)")
print(sorted(set(cmip_aws[cmip_aws['activity_id']=='CMIP']['experiment_id'])))
print(sorted(set(cmip_gc[cmip_gc['activity_id']=='CMIP']['experiment_id'])))

# Experiment ID (ScenarioMIP)
print("\nExperiment ID (ScenarioMIP)")
print(sorted(set(cmip_aws[cmip_aws['activity_id']=='ScenarioMIP']['experiment_id'])))
print(sorted(set(cmip_gc[cmip_gc['activity_id']=='ScenarioMIP']['experiment_id'])))



Activity ID
['AerChemMIP', 'C4MIP', 'CDRMIP', 'CFMIP', 'CMIP', 'DAMIP', 'DCPP', 'FAFMIP', 'GMMIP', 'HighResMIP', 'ISMIP6', 'LS3MIP', 'LUMIP', 'OMIP', 'PAMIP', 'PMIP', 'RFMIP', 'ScenarioMIP']
['AerChemMIP', 'C4MIP', 'CDRMIP', 'CFMIP', 'CMIP', 'DAMIP', 'DCPP', 'FAFMIP', 'GMMIP', 'HighResMIP', 'ISMIP6', 'LS3MIP', 'LUMIP', 'OMIP', 'PAMIP', 'PMIP', 'RFMIP', 'ScenarioMIP']

Num entries (CMIP)
136689
134576

Num entries (ScenarioMIP)
152478
148581

Experiment ID (CMIP)
['1pctCO2', 'abrupt-4xCO2', 'amip', 'amip-hist', 'esm-hist', 'esm-piControl', 'esm-piControl-spinup', 'hist-GHG', 'hist-nat', 'historical', 'historical-cmip5', 'historical-ext', 'piControl', 'piControl-cmip5', 'piControl-spinup']
['1pctCO2', 'abrupt-4xCO2', 'amip', 'esm-hist', 'esm-piControl', 'esm-piControl-spinup', 'hist-GHG', 'hist-nat', 'historical', 'historical-cmip5', 'historical-ext', 'piControl', 'piControl-cmip5', 'piControl-spinup']

Experiment ID (ScenarioMIP)
['rcp26-cmip5', 'rcp45-cmip5', 'rcp85-cmip5', 'ssp119', '

In [132]:
a = cmip_aws[cmip_aws['activity_id'].isin(['CMIP', 'ScenarioMIP'])]                    # filter activity_id
a = a[a['variable_id'].isin(['tas', 'ts', 'pr', 'hus', 'mrso', 'uas', 'vas', 'ps'])]   # filter variable_id
a = a[a['table_id'].isin(['Amon', 'day'])]   # filter variable_id
a = a[a['experiment_id'].isin(['historical', 'ssp126', 'ssp245', 'ssp370', 'ssp585'])] # filter experiment id
a = a.drop(['activity_id', 'institution_id', 'dcpp_init_year', 'version'], axis=1)
a = a.groupby(['experiment_id', 'variable_id', 'table_id']).nunique()
a

# NOTE
# - source_id is model name
# - ztore shows the total avaialable individual files.
# - historical: 1850 to 2014
# - SSP: 2015 to 2010
# - hus: specific humidity, mrso: soil moisture, pr: precip, ps: surface P, tas: surface air T, ts: skin T, {u,v}as: wind speed

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,source_id,member_id,grid_label,zstore
experiment_id,variable_id,table_id,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
historical,hus,Amon,64,200,3,632
historical,hus,day,46,78,4,282
historical,mrso,day,35,65,4,133
historical,pr,Amon,63,200,3,630
historical,pr,day,51,163,4,489
historical,ps,Amon,63,152,3,565
historical,tas,Amon,64,200,3,635
historical,tas,day,49,163,4,462
historical,ts,Amon,64,152,3,584
historical,uas,Amon,50,152,3,508


Tables (work in progress)

In [96]:
cmip_aws.groupby(['activity_id', 'experiment_id']).nunique()

Unnamed: 0_level_0,Unnamed: 1_level_0,institution_id,source_id,member_id,table_id,variable_id,grid_label,zstore,dcpp_init_year,version
activity_id,experiment_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
AerChemMIP,hist-1950HC,6,6,7,12,251,4,409,0,11
AerChemMIP,hist-piAer,3,3,3,6,228,2,243,0,5
AerChemMIP,hist-piNTCF,7,7,8,9,239,3,273,0,10
AerChemMIP,histSST,10,10,3,6,215,4,322,0,19
AerChemMIP,histSST-1950HC,3,3,1,10,248,3,282,0,3
AerChemMIP,histSST-piAer,1,1,1,4,195,2,198,0,1
AerChemMIP,histSST-piCH4,1,1,1,9,241,2,260,0,1
AerChemMIP,histSST-piNTCF,2,2,1,9,217,2,233,0,2
AerChemMIP,histSST-piO3,1,1,1,4,195,2,198,0,1
AerChemMIP,piClim-2xDMS,2,2,1,5,61,3,110,0,2


In [129]:
a = cmip_aws[(cmip_aws['activity_id']=='CMIP') * (cmip_aws['experiment_id']=='historical') * (cmip_aws['variable_id']=='tas') * (cmip_aws['table_id']=='day')]
a = a.drop(['activity_id', 'experiment_id', 'variable_id', 'table_id', 'institution_id'], axis=1)
a = a.groupby(['source_id', 'grid_label']).nunique()
a.loc['total'] = a.sum(numeric_only=True,axis=0)

a

Unnamed: 0,member_id,zstore,dcpp_init_year,version
"(ACCESS-CM2, gn)",3,3,0,3
"(ACCESS-ESM1-5, gn)",30,30,0,8
"(AWI-CM-1-1-MR, gn)",5,5,0,1
"(AWI-ESM-1-1-LR, gn)",1,1,0,1
"(BCC-CSM2-MR, gn)",3,3,0,3
"(BCC-ESM1, gn)",3,3,0,1
"(CAMS-CSM1-0, gn)",1,1,0,1
"(CESM2, gn)",11,11,0,4
"(CESM2-FV2, gn)",3,3,0,2
"(CESM2-WACCM, gn)",3,3,0,1
