# CMIP6 Historical model runs
This script opens up all the CMIP6 historical model run files with data on precipitation pr and surface temperature tas and loops over the files.


In [1]:
import xarray as xr, matplotlib.pyplot as plt
#import sys 
#sys.path.append('') # use this if the function file is in a different directory to the notebook
from importlib import reload # need to use this if I edit a function file
import anomaly_function as func # imports the function
import os
import numpy as np

In [2]:
# use this to reload the function file if I make any edits to it
func = reload(func) 

In [3]:
!ls /g/data/lp01/CMIP6/CMIP

AS-RCEC  CCCma	       CSIRO-ARCCSS	    INM    MRI	      NOAA-GFDL
AWI	 CCCR-IITM     E3SM-Project	    IPSL   NASA-GISS  NUIST
BCC	 CMCC	       EC-Earth-Consortium  MIROC  NCAR       SNU
CAMS	 CNRM-CERFACS  FIO-QLNM		    MOHC   NCC	      THU
CAS	 CSIRO	       HAMMOZ-Consortium    MPI-M  NIMS-KMA   UA


In [4]:
# store each section of the directory as a string
institution_dir = '/g/data/lp01/CMIP6/CMIP/'
variable_dir = '/historical/r1i1p1f1/Amon/tas/gr1.5/'
print(institution_dir, variable_dir)

/g/data/lp01/CMIP6/CMIP/ /historical/r1i1p1f1/Amon/tas/gr1.5/


In [5]:
# store all institutions found in the 
institution_list = os.listdir(institution_dir)

In [6]:
institution_list


['HAMMOZ-Consortium',
 'NUIST',
 'UA',
 'CSIRO-ARCCSS',
 'MPI-M',
 'IPSL',
 'CAS',
 'BCC',
 'THU',
 'AS-RCEC',
 'NIMS-KMA',
 'CMCC',
 'NASA-GISS',
 'AWI',
 'MIROC',
 'MRI',
 'FIO-QLNM',
 'CCCma',
 'CSIRO',
 'NOAA-GFDL',
 'MOHC',
 'CNRM-CERFACS',
 'E3SM-Project',
 'INM',
 'EC-Earth-Consortium',
 'NCC',
 'CAMS',
 'NCAR',
 'SNU',
 'CCCR-IITM']

In [43]:
# creates a dictionary containing the model and model path
models = {}

# find the models from each instituion and store them in a list
for institution in institution_list:
    model_list = os.listdir(f'{institution_dir}{institution}')
    # find the 
    for model in model_list:
        # check if the historical model with the right variable exists and if so save the version number for the file
        if os.path.exists(f'{institution_dir}{institution}/{model}{variable_dir}'):
            version = os.listdir(f'{institution_dir}{institution}/{model}{variable_dir}')
            # for each version, call model_path to make the path and then store with the model in a dictionary 'models'
            for v in version:
                #path = func.model_path(institution, model, v)
                path = f'{institution_dir}{institution}/{model}/{variable_dir}{v}/tas_Amon_{model}_historical_r1i1p1f1_gr1.5_185001-201412.nc'
                if os.path.exists(f'{path}'):
                    models[model] = path
                else:
                    # If the model path does not exist, then let me know by printing it here
                    print(f'Path for {model} does not exist')
                    
# Prints the number of models loaded into the dictionary
print(f'There are {len(models)} model paths loaded into the dictionary "models"')

Path for MCM-UA-1-0 does not exist
There are 44 model paths loaded into the dictionary "models"


In [37]:
models

{'MPI-ESM-1-2-HAM': '/g/data/lp01/CMIP6/CMIP/HAMMOZ-Consortium/MPI-ESM-1-2-HAM//historical/r1i1p1f1/Amon/tas/gr1.5/v20190627/tas_Amon_MPI-ESM-1-2-HAM_historical_r1i1p1f1_gr1.5_185001-201412.nc',
 'NESM3': '/g/data/lp01/CMIP6/CMIP/NUIST/NESM3//historical/r1i1p1f1/Amon/tas/gr1.5/v20190630/tas_Amon_NESM3_historical_r1i1p1f1_gr1.5_185001-201412.nc',
 'ACCESS-CM2': '/g/data/lp01/CMIP6/CMIP/CSIRO-ARCCSS/ACCESS-CM2//historical/r1i1p1f1/Amon/tas/gr1.5/v20190919/tas_Amon_ACCESS-CM2_historical_r1i1p1f1_gr1.5_185001-201412.nc',
 'MPI-ESM1-2-HR': '/g/data/lp01/CMIP6/CMIP/MPI-M/MPI-ESM1-2-HR//historical/r1i1p1f1/Amon/tas/gr1.5/v20190710/tas_Amon_MPI-ESM1-2-HR_historical_r1i1p1f1_gr1.5_185001-201412.nc',
 'MPI-ESM1-2-LR': '/g/data/lp01/CMIP6/CMIP/MPI-M/MPI-ESM1-2-LR//historical/r1i1p1f1/Amon/tas/gr1.5/v20190710/tas_Amon_MPI-ESM1-2-LR_historical_r1i1p1f1_gr1.5_185001-201412.nc',
 'IPSL-CM6A-LR': '/g/data/lp01/CMIP6/CMIP/IPSL/IPSL-CM6A-LR//historical/r1i1p1f1/Amon/tas/gr1.5/v20180803/tas_Amon_IPSL-CM6

In [10]:
# names = []
# ds = []

# for name, path in models.items():
#     ds.append(xr.open_mfdataset(path, combine='by_coords'))
#     names.append(name)

In [11]:
# Now open each dataset, and store the dataset and model name in two arrays
# The try-except method allows all other files to be read even if one file does not exist or has some issues. 
names = []
ds = []
    
for name, path in models.items():
    try:
        ds.append(xr.open_mfdataset(path, combine='by_coords'))
        names.append(name)
    except OSError:
        # No files read, move on to the next
        continue



In [12]:
print(ds)

[<xarray.Dataset>
Dimensions:    (bnds: 2, lat: 120, lon: 240, time: 1980)
Coordinates:
  * time       (time) datetime64[ns] 1850-01-16T12:00:00 ... 2014-12-16T12:00:00
  * lon        (lon) float64 0.0 1.5 3.0 4.5 6.0 ... 354.0 355.5 357.0 358.5
  * lat        (lat) float64 -89.25 -87.75 -86.25 -84.75 ... 86.25 87.75 89.25
    height     float64 ...
Dimensions without coordinates: bnds
Data variables:
    time_bnds  (time, bnds) datetime64[ns] dask.array<chunksize=(1980, 2), meta=np.ndarray>
    tas        (time, lat, lon) float32 dask.array<chunksize=(1980, 120, 240), meta=np.ndarray>
Attributes:
    CDI:                       Climate Data Interface version 1.9.7.1 (http:/...
    history:                   Fri Jan 10 11:57:16 2020: cdo -O -L -P 28 -rem...
    source:                    MPI-ESM1.2-HAM (2017): \naerosol: HAM2.3\natmo...
    institution:               ETH Zurich, Switzerland; Max Planck Institut f...
    Conventions:               CF-1.7 CMIP-6.2
    activity_id:        

In [70]:
# Now open each dataset, and store the dataset and model name in two arrays
# The try-except method allows all other files to be read even if one file does not exist or has some issues. 
# By deleting the time and height coordinates we are removing any differences in height and time.  
# (We can add the time back later). 
names = []
ds = []

for name, path in models.items():
    try:
        d = xr.open_mfdataset(path, combine='by_coords')
        d = d.sel(time= '18500101','19001231')
        del d['time']
        #del d['time']
        #del d['height']
        ds.append(d)
        names.append(name)
    except OSError:
        # No files read, move on to the next
        continue

        



SyntaxError: positional argument follows keyword argument (<ipython-input-70-c5803f3cde01>, line 11)

In [68]:
# Combine the individual data sets into a single dataset, with the co-ordinate
# 'model' representing the source model

multi_model = xr.concat(ds, dim='model')
multi_model.coords['model'] = names

multi_model

ValueError: must supply at least one object to concatenate

In [None]:
multi_model.coords['time'] = xr.open_mfdataset(models['ACCESS-CM2'], combine='by_coords')['time']

In [71]:
# A dictionary with the paths to the data for each model
# In reality these runs are not compatible (they use different calendars)
# so the paths here are not correct

dictionary = {
    'AS-RCEC':'/g/data/lp01/CMIP6/CMIP/AS-RCEC/TaiESM1/historical/r1i1p1f1/Amon/tas/gr1.5/v20200218/*.nc',
    'BCC-CSM2-MR':'/g/data/lp01/CMIP6/CMIP/BCC/BCC-CSM2-MR/historical/r1i1p1f1/Amon/tas/gr1.5/v20181126/*.nc',
    'BCC-ESM1':'/g/data/lp01/CMIP6/CMIP/BCC/BCC-ESM1/historical/r1i1p1f1/Amon/tas/gr1.5/v20181214/*.nc',
    'CAMS':'/g/data/lp01/CMIP6/CMIP/CAMS/CAMS-CSM1-0/historical/r1i1p1f1/Amon/tas/gr1.5/v20190708/*.nc',
    
}

In [72]:
# You can loop over the names and values in a dictionary

for name2, path2 in dictionary.items():
    print(name2, path2)

AS-RCEC /g/data/lp01/CMIP6/CMIP/AS-RCEC/TaiESM1/historical/r1i1p1f1/Amon/tas/gr1.5/v20200218/*.nc
BCC-CSM2-MR /g/data/lp01/CMIP6/CMIP/BCC/BCC-CSM2-MR/historical/r1i1p1f1/Amon/tas/gr1.5/v20181126/*.nc
BCC-ESM1 /g/data/lp01/CMIP6/CMIP/BCC/BCC-ESM1/historical/r1i1p1f1/Amon/tas/gr1.5/v20181214/*.nc
CAMS /g/data/lp01/CMIP6/CMIP/CAMS/CAMS-CSM1-0/historical/r1i1p1f1/Amon/tas/gr1.5/v20190708/*.nc


In [73]:
# Now open each dataset, and store the dataset and model name in two arrays

names2 = []
ds2 = []
for name2, path2 in dictionary.items():
    ds2.append(xr.open_mfdataset(path2, combine='by_coords'))
    names2.append(name2)

In [75]:
# Combine the individual data sets into a single dataset, with the co-ordinate
# 'model' representing the source model

multi_model2 = xr.concat(ds2, dim='model')
multi_model2.coords['model'] = names2

multi_model2

Unnamed: 0,Array,Chunk
Bytes,126.72 kB,31.68 kB
Shape,"(4, 1980, 2)","(1, 1980, 2)"
Count,16 Tasks,4 Chunks
Type,object,numpy.ndarray
"Array Chunk Bytes 126.72 kB 31.68 kB Shape (4, 1980, 2) (1, 1980, 2) Count 16 Tasks 4 Chunks Type object numpy.ndarray",2  1980  4,

Unnamed: 0,Array,Chunk
Bytes,126.72 kB,31.68 kB
Shape,"(4, 1980, 2)","(1, 1980, 2)"
Count,16 Tasks,4 Chunks
Type,object,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,912.38 MB,228.10 MB
Shape,"(4, 1980, 120, 240)","(1, 1980, 120, 240)"
Count,16 Tasks,4 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 912.38 MB 228.10 MB Shape (4, 1980, 120, 240) (1, 1980, 120, 240) Count 16 Tasks 4 Chunks Type float32 numpy.ndarray",4  1  240  120  1980,

Unnamed: 0,Array,Chunk
Bytes,912.38 MB,228.10 MB
Shape,"(4, 1980, 120, 240)","(1, 1980, 120, 240)"
Count,16 Tasks,4 Chunks
Type,float32,numpy.ndarray


In [76]:
multi_model2.transpose('model',...,True)

Unnamed: 0,Array,Chunk
Bytes,126.72 kB,31.68 kB
Shape,"(4, 1980, 2)","(1, 1980, 2)"
Count,16 Tasks,4 Chunks
Type,object,numpy.ndarray
"Array Chunk Bytes 126.72 kB 31.68 kB Shape (4, 1980, 2) (1, 1980, 2) Count 16 Tasks 4 Chunks Type object numpy.ndarray",2  1980  4,

Unnamed: 0,Array,Chunk
Bytes,126.72 kB,31.68 kB
Shape,"(4, 1980, 2)","(1, 1980, 2)"
Count,16 Tasks,4 Chunks
Type,object,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,912.38 MB,228.10 MB
Shape,"(4, 1980, 120, 240)","(1, 1980, 120, 240)"
Count,16 Tasks,4 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 912.38 MB 228.10 MB Shape (4, 1980, 120, 240) (1, 1980, 120, 240) Count 16 Tasks 4 Chunks Type float32 numpy.ndarray",4  1  240  120  1980,

Unnamed: 0,Array,Chunk
Bytes,912.38 MB,228.10 MB
Shape,"(4, 1980, 120, 240)","(1, 1980, 120, 240)"
Count,16 Tasks,4 Chunks
Type,float32,numpy.ndarray


Right now I have all the models stored in one array so from hereafter I can calculate anomalies etc.  

In [108]:
# calculate an anomaly for each model
monthly_anomaly=[]
seasonal_anomaly=[]
count = 0
for m in multi_model2.model:
    monthly_anomaly.append(func.monthly_anom(multi_model2.sel(model=m), '18500101', '19001231'))
    seasonal_anomaly.append(func.seasonal_anom(multi_model2.sel(model=m), '18500101', '19001231'))
    #print(multi_model2.sel(model=m))
    print('hey')
    count = count + 1

#tas = multi_model2.tas.sel(lat=slice(-50,-5), lon=slice(110,160))

  return self.array[key]


hey


  return self.array[key]


hey


  return self.array[key]


hey


  return self.array[key]


hey


In [113]:
# Combine the individual data sets into a single dataset, with the co-ordinate
# 'model' representing the source model

multi_monthly_anom = xr.concat(monthly_anomaly, dim='model')
#multi_monthly_anom.coords['model'] = names2


multi_seasonal_anom = xr.concat(seasonal_anomaly, dim='model')
#multi_seasonal_anom.coords['model'] = names2

In [119]:
# take the mean in longitutde and latitude
mean_mon = multi_monthly_anom.mean(dim=('lat','lon'))
multi_seasonal_anom.mean(dim=('lat','lon'))

#trying to plot but it ain't working
for m in mean_mon.model:
    mean_mon.sel(model=m).tas.plot
    

In [121]:
mean_mon.sel(model=m).tas


Unnamed: 0,Array,Chunk
Bytes,7.92 kB,4 B
Shape,"(1980,)","(1,)"
Count,42084 Tasks,1980 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 7.92 kB 4 B Shape (1980,) (1,) Count 42084 Tasks 1980 Chunks Type float32 numpy.ndarray",1980  1,

Unnamed: 0,Array,Chunk
Bytes,7.92 kB,4 B
Shape,"(1980,)","(1,)"
Count,42084 Tasks,1980 Chunks
Type,float32,numpy.ndarray


In [None]:
# plot the temperature anomalies for each month throughout the time period
fig, ax = plt.subplots() # make a figure

tas_anom_mon.plot(color = 'red', ax=ax) #plot the anomalies
plt.title('Monthly temperature anomalies', fontsize=16)
plt.ylabel('Temperature anomaly [C]') # this overrides the label which comes on automatically using the xarray title

ax.grid(which='major', linestyle='-', linewidth='0.5', color='black') # customise major grid
ax.minorticks_on() # need this line in order to get the minor grid lines 
ax.grid(which='minor', linestyle=':', linewidth='0.5', color='black')

fig.set_figheight(8) # these two parameters change the figure height and width 
fig.set_figwidth(15)
#fig.suptitle('example figure title', fontsize=16)

In [102]:
# Mean over the model co-ordinate

seasonal_anomaly.mean('model')

ValueError: Dataset does not contain the dimensions: ['model']