# CMIP6 Historical model runs
This script opens up all the CMIP6 historical model run files with data on precipitation pr and surface temperature tas and loops over the files.


In [1]:
import xarray as xr, matplotlib.pyplot as plt
#import sys 
#sys.path.append('') # use this if the function file is in a different directory to the notebook
from importlib import reload # need to use this if I edit a function file
import anomaly_function as func # imports the function
import os

In [2]:
# use this to reload the function file if I make any edits to it
func = reload(func) 

In [3]:
!ls /g/data/lp01/CMIP6/CMIP

AS-RCEC  CCCma	       E3SM-Project	    IPSL   NASA-GISS  NUIST
AWI	 CCCR-IITM     EC-Earth-Consortium  MIROC  NCAR       SNU
BCC	 CNRM-CERFACS  FIO-QLNM		    MOHC   NCC	      THU
CAMS	 CSIRO	       HAMMOZ-Consortium    MPI-M  NIMS-KMA   UA
CAS	 CSIRO-ARCCSS  INM		    MRI    NOAA-GFDL


In [4]:
# store each section of the directory as a string
institution_dir = '/g/data/lp01/CMIP6/CMIP/'
variable_dir = '/historical/r1i1p1f1/Amon/tas/gr1.5/'
print(institution_dir, variable_dir)

/g/data/lp01/CMIP6/CMIP/ /historical/r1i1p1f1/Amon/tas/gr1.5/


In [5]:
# store all institutions found in the 
institution_list = os.listdir(institution_dir)

In [6]:
institution_list


['HAMMOZ-Consortium',
 'NUIST',
 'UA',
 'CSIRO-ARCCSS',
 'MPI-M',
 'IPSL',
 'CAS',
 'BCC',
 'THU',
 'AS-RCEC',
 'NIMS-KMA',
 'NASA-GISS',
 'AWI',
 'MIROC',
 'MRI',
 'FIO-QLNM',
 'CCCma',
 'CSIRO',
 'NOAA-GFDL',
 'MOHC',
 'CNRM-CERFACS',
 'E3SM-Project',
 'INM',
 'EC-Earth-Consortium',
 'NCC',
 'CAMS',
 'NCAR',
 'SNU',
 'CCCR-IITM']

In [7]:
# creates a dictionary containing the model and model path
models = {}

# find the models from each instituion and store them in a list
for institution in institution_list:
    model_list = os.listdir(f'{institution_dir}{institution}')
    # find the 
    for model in model_list:
        # check if the historical model with the right variable exists and if so save the version number for the file
        if os.path.exists(f'{institution_dir}{institution}/{model}{variable_dir}'):
            version = os.listdir(f'{institution_dir}{institution}/{model}{variable_dir}')
            # for each version, call model_path to make the path and then store with the model in a dictionary 'models'
            for v in version:
                path = func.model_path(institution, model, v)
                if os.path.exists(f'{path}'):
                    models[model] = path
               


In [8]:
models

{'MPI-ESM-1-2-HAM': '/g/data/lp01/CMIP6/CMIP/HAMMOZ-Consortium/MPI-ESM-1-2-HAM/historical/r1i1p1f1/Amon/tas/gr1.5/v20190627/tas_Amon_MPI-ESM-1-2-HAM_historical_r1i1p1f1_gr1.5_185001-201412.nc',
 'NESM3': '/g/data/lp01/CMIP6/CMIP/NUIST/NESM3/historical/r1i1p1f1/Amon/tas/gr1.5/v20190630/tas_Amon_NESM3_historical_r1i1p1f1_gr1.5_185001-201412.nc',
 'ACCESS-CM2': '/g/data/lp01/CMIP6/CMIP/CSIRO-ARCCSS/ACCESS-CM2/historical/r1i1p1f1/Amon/tas/gr1.5/v20190919/tas_Amon_ACCESS-CM2_historical_r1i1p1f1_gr1.5_185001-201412.nc',
 'MPI-ESM1-2-HR': '/g/data/lp01/CMIP6/CMIP/MPI-M/MPI-ESM1-2-HR/historical/r1i1p1f1/Amon/tas/gr1.5/v20190710/tas_Amon_MPI-ESM1-2-HR_historical_r1i1p1f1_gr1.5_185001-201412.nc',
 'MPI-ESM1-2-LR': '/g/data/lp01/CMIP6/CMIP/MPI-M/MPI-ESM1-2-LR/historical/r1i1p1f1/Amon/tas/gr1.5/v20190710/tas_Amon_MPI-ESM1-2-LR_historical_r1i1p1f1_gr1.5_185001-201412.nc',
 'IPSL-CM6A-LR': '/g/data/lp01/CMIP6/CMIP/IPSL/IPSL-CM6A-LR/historical/r1i1p1f1/Amon/tas/gr1.5/v20180803/tas_Amon_IPSL-CM6A-LR_h

In [10]:
len(models)

43

In [11]:
# names = []
# ds = []

# for name, path in models.items():
#     ds.append(xr.open_mfdataset(path, combine='by_coords'))
#     names.append(name)

In [12]:
# Now open each dataset, and store the dataset and model name in two arrays
# The try-except method allows all other files to be read even if one file does not exist or has some issues. 
names = []
ds = []
    
for name, path in models.items():
    try:
        ds.append(xr.open_mfdataset(path, combine='by_coords'))
        names.append(name)
    except OSError:
        # No files read, move on to the next
        continue



In [None]:
print(ds)

In [14]:
# Combine the individual data sets into a single dataset, with the co-ordinate 
# 'model' representing the source model
# By using a for loop with try-except and deleting the time and height coordinates we are removing any differences in 
# height and time.  (We can add the time back later). 
for name, path in models.items():
    try:
        d = xr.open_mfdataset(path, combine='by_coords')
        del d['time']
        #del d['height']
        ds.append(d)
        names.append(name)
    except OSError:
        # No files read, move on to the next
        continue


In [None]:
# # Combine the individual data sets into a single dataset, with the co-ordinate
# # 'model' representing the source model

# multi_model = xr.concat(ds, dim='model')
# multi_model.coords['model'] = names

# multi_model

In [None]:
# A dictionary with the paths to the data for each model
# In reality these runs are not compatible (they use different calendars)
# so the paths here are not correct

dictionary = {
    'AS-RCEC':'/g/data/lp01/CMIP6/CMIP/AS-RCEC/TaiESM1/historical/r1i1p1f1/Amon/tas/gr1.5/v20200218/*.nc',
    'BCC-CSM2-MR':'/g/data/lp01/CMIP6/CMIP/BCC/BCC-CSM2-MR/historical/r1i1p1f1/Amon/tas/gr1.5/v20181126/*.nc',
    'BCC-ESM1':'/g/data/lp01/CMIP6/CMIP/BCC/BCC-ESM1/historical/r1i1p1f1/Amon/tas/gr1.5/v20181214/*.nc',
    'CAMS':'/g/data/lp01/CMIP6/CMIP/CAMS/CAMS-CSM1-0/historical/r1i1p1f1/Amon/tas/gr1.5/v20190708/*.nc',
    
}

In [None]:
dictionary

In [None]:
# You can loop over the names and values in a dictionary

for name, path in dictionary.items():
    print(name, path)

In [None]:
# Now open each dataset, and store the dataset and model name in two arrays

names = []
ds = []
for name, path in dictionary.items():
    ds.append(xr.open_mfdataset(path, combine='by_coords'))
    names.append(name)

In [None]:
# Combine the individual data sets into a single dataset, with the co-ordinate
# 'model' representing the source model

multi_model = xr.concat(ds, dim='model')
multi_model.coords['model'] = names

multi_model

Right now I have all the models stored in one array so from hereafter I can calculate anomalies etc.  

In [None]:
# calculate an anomaly for each model
count=0

for model in multi_model:
    count = count+1
    print(count)

tas_aus = multi_model.tas.sel(lat=slice(-50,-5), lon=slice(110,160))b

In [None]:
tas_aus

In [None]:
func.monthly_anom(tas_aus, '18500101', '19001231')

In [None]:
# calculate monthly anomalies
# group the data into months
tas_aus_mon = multi_model.tas.groupby('time.month')
# calculate the mean climatology along each month for the time period 1850-1900 
tas_aus_clim = multi_model.tas.sel(time=slice('18500101','19001231')).groupby('time.month').mean(dim='time')
# caclulate the anomalies for each month
tas_anom_mon = (tas_aus_mon - tas_aus_clim)
tas_anom_mon.mean(dim=('lat','lon')).plot

In [None]:
print(tas_anom_mon.mean(dim=('lat','lon')))


In [None]:
# plot the temperature anomalies for each month throughout the time period
fig, ax = plt.subplots() # make a figure

tas_anom_mon.plot(color = 'red', ax=ax) #plot the anomalies
plt.title('Monthly temperature anomalies', fontsize=16)
plt.ylabel('Temperature anomaly [C]') # this overrides the label which comes on automatically using the xarray title

ax.grid(which='major', linestyle='-', linewidth='0.5', color='black') # customise major grid
ax.minorticks_on() # need this line in order to get the minor grid lines 
ax.grid(which='minor', linestyle=':', linewidth='0.5', color='black')

fig.set_figheight(8) # these two parameters change the figure height and width 
fig.set_figwidth(15)
#fig.suptitle('example figure title', fontsize=16)

In [None]:
# Mean over the model co-ordinate

multi_model.mean('model')