# get CMIP6 data

Notebook to download and process CMIP6 data

## 1. Set-up

Modified from the "first way" in Ryan Abernathey's "CMIP6 in the Cloud Five Ways", see:
https://medium.com/pangeo/cmip6-in-the-cloud-five-ways-96b177abe396

In [1]:
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import xarray as xr
#from mpl_toolkits.basemap import Basemap

%matplotlib inline
%config InlineBackend.figure_format = 'retina' 

In [2]:
#!/usr/bin/env python
from __future__ import print_function
import requests
import xml.etree.ElementTree as ET
import numpy

# Author: Unknown
# I got the original version from a word document published by ESGF
# https://docs.google.com/document/d/1pxz1Kd3JHfFp8vR2JCVBfApbsHmbUQQstifhGNdc6U0/edit?usp=sharing

# API AT: https://github.com/ESGF/esgf.github.io/wiki/ESGF_Search_REST_API#results-pagination

def esgf_search(server="https://esgf-node.llnl.gov/esg-search/search",
                files_type="OPENDAP", local_node=True, project="CMIP6",
                verbose=False, format="application%2Fsolr%2Bjson",
                use_csrf=False, **search):
    client = requests.session()
    payload = search
    payload["project"] = project
    payload["type"]= "File"
    if local_node:
        payload["distrib"] = "false"
    if use_csrf:
        client.get(server)
        if 'csrftoken' in client.cookies:
            # Django 1.6 and up
            csrftoken = client.cookies['csrftoken']
        else:
            # older versions
            csrftoken = client.cookies['csrf']
        payload["csrfmiddlewaretoken"] = csrftoken

    payload["format"] = format

    offset = 0
    numFound = 10000
    all_files = []
    files_type = files_type.upper()
    while offset < numFound:
        payload["offset"] = offset
        url_keys = [] 
        for k in payload:
            url_keys += ["{}={}".format(k, payload[k])]

        url = "{}/?{}".format(server, "&".join(url_keys))
        print(url)
        r = client.get(url)
        r.raise_for_status()
        resp = r.json()["response"]
        numFound = int(resp["numFound"])
        resp = resp["docs"]
        offset += len(resp)
        for d in resp:
            if verbose:
                for k in d:
                    print("{}: {}".format(k,d[k]))
            url = d["url"]
            for f in d["url"]:
                sp = f.split("|")
                if sp[-1] == files_type:
                    all_files.append(sp[0].split(".html")[0])
    return sorted(all_files)

In [3]:
def sdat(c, F):
        #My old fashioned way of saving data
        np.savez(F, u=c)
        return 0
    
import scipy.stats as ss
def lin_regression(var1, var2):
	"""
	Regresses var1 onto var2
	Returns: slope, intercept, r_value, p_value, std_err
	"""
	#print "Linear regression"
	return ss.linregress(var1, var2)


In [4]:
#List of models for which data is available:
model = ["CanESM5", "CESM2", "CNRM-CM6-1", "MRI-ESM2-0", "CNRM-ESM2-1", "IPSL-CM6A-LR",
         "UKESM1-0-LL", "CESM2-WACCM", "GFDL-CM4", "MIROC-ES2L", "HadGEM3-GC31-LL", "GISS-E2-1-H", 
         "GFDL-ESM4", "GISS-E2-1-G", "BCC-CSM2-MR", "BCC-ESM1", "INM-CM4-8", "NorESM2-LM", "MPI-ESM1-2-HR",
         "MIROC6", "NorESM2-MM", "FGOALS-f3-L"]
m = len( model )

lim1 = [-90, -60, -30, 30, 60]
lim2 = [-60, -30, 30, 60, 90]

## First do piControl

In [5]:
#Once the list of files has been pulled up, download files[lev:lev2]
lev = [5, 12, 0, 2, 0, 0, 8, 7, 5, 3, 0, 14, 5, 36, 0, 0, 5, 0, 100, 8, 0, 0]
lev2 = [8, 24, 1, 3, 1, 1, 14, 14, 10, 6, 10, 20, 10, 46, 1, 1, 10, 50, 200, 13, 50, 1]

In [None]:
#For checking availability and format of data
ids = ['tas', 'rlut', 'rsut', 'rlutcs', 'rsutcs']
for i in range(1):
    result3 = esgf_search(activity_id='CMIP', table_id='Amon', source_id=model[d], 
                          variable_id=ids[i], experiment_id='piControl')

    print(result3)
    files_to_open = result3[lev[d]:lev2[d]]
    ds = xr.open_mfdataset(files_to_open)


In [7]:
#Download data
for d in range( m ):
    print("Doing: ", model[d])

    result1 = esgf_search(activity_id='CMIP', table_id='Amon', source_id=model[d], 
                              variable_id='tas', experiment_id='piControl')
    
    result2 = esgf_search(activity_id='CMIP', table_id='Amon', source_id=model[d], 
                              variable_id='rlut', experiment_id='piControl')
    result3 = esgf_search(activity_id='CMIP', table_id='Amon', source_id=model[d], 
                              variable_id='rsut', experiment_id='piControl')
    result4 = esgf_search(activity_id='CMIP', table_id='Amon', source_id=model[d], 
                              variable_id='rlutcs', experiment_id='piControl')
    result5 = esgf_search(activity_id='CMIP', table_id='Amon', source_id=model[d], 
                              variable_id='rsutcs', experiment_id='piControl')
    
    if lev2[d] - lev[d] > 1:
        files_to_open = result1[lev[d]:lev2[d]]

        files_to_open2 = result2[lev[d]:lev2[d]]
        files_to_open3 = result3[lev[d]:lev2[d]]
        files_to_open4 = result4[lev[d]:lev2[d]]
        files_to_open5 = result5[lev[d]:lev2[d]]

        ds = xr.open_mfdataset(files_to_open)
        ds2 = xr.open_mfdataset(files_to_open2)
        ds3 = xr.open_mfdataset(files_to_open3)
        ds4 = xr.open_mfdataset(files_to_open4)
        ds5 = xr.open_mfdataset(files_to_open5)
    else:
        files_to_open = result1[lev[d]]
        files_to_open2 = result2[lev[d]]
        files_to_open3 = result3[lev[d]]
        files_to_open4 = result4[lev[d]]
        files_to_open5 = result5[lev[d]]

        ds = xr.open_dataset(files_to_open, chunks={'time': '100MB'}, decode_times=False)
        ds2 = xr.open_dataset(files_to_open2, chunks={'time': '100MB'}, decode_times=False)
        ds3 = xr.open_dataset(files_to_open3, chunks={'time': '100MB'}, decode_times=False)
        ds4 = xr.open_dataset(files_to_open4, chunks={'time': '100MB'}, decode_times=False)
        ds5 = xr.open_dataset(files_to_open5, chunks={'time': '100MB'}, decode_times=False)
        
        
    #Save regional data
    d1, d2, d3 = np.shape( ds.tas)

    tas = ds.tas.mean(dim='lon')
    rlut = ds2.rlut.mean(dim='lon')
    rsut = ds3.rsut.mean(dim='lon')
    rlutcs = ds4.rlutcs.mean(dim='lon')
    rsutcs = ds5.rsutcs.mean(dim='lon')

    for i in range( 5 ):
        ztas = tas.sel(lat=slice(lim1[i], lim2[i]))
        zrlut = rlut.sel(lat=slice(lim1[i], lim2[i]))
        zrsut = rsut.sel(lat=slice(lim1[i], lim2[i]))
        zrlutcs = rlutcs.sel(lat=slice(lim1[i], lim2[i]))
        zrsutcs = rsutcs.sel(lat=slice(lim1[i], lim2[i]))

        sdat( ztas, "data/" + model[d] + "_PI_tas_" + str(lim1[i]) + "_" + str(lim2[i]) + ".dat")
        sdat( zrlut, "data/" + model[d] + "_PI_rlut_" + str(lim1[i]) + "_" + str(lim2[i]) + ".dat")
        sdat( zrsut, "data/" + model[d] + "_PI_rsut_" + str(lim1[i]) + "_" + str(lim2[i]) + ".dat")
        sdat( zrlutcs, "data/" + model[d] + "_PI_rlutcs_" + str(lim1[i]) + "_" + str(lim2[i]) + ".dat")
        sdat( zrsutcs, "data/" + model[d] + "_PI_rsutcs_" + str(lim1[i]) + "_" + str(lim2[i]) + ".dat")


Doing:  NorESM2-MM
https://esgf-node.llnl.gov/esg-search/search/?activity_id=CMIP&table_id=Amon&source_id=NorESM2-MM&variable_id=tas&experiment_id=piControl&project=CMIP6&type=File&distrib=false&format=application%2Fsolr%2Bjson&offset=0
https://esgf-node.llnl.gov/esg-search/search/?activity_id=CMIP&table_id=Amon&source_id=NorESM2-MM&variable_id=tas&experiment_id=piControl&project=CMIP6&type=File&distrib=false&format=application%2Fsolr%2Bjson&offset=10
https://esgf-node.llnl.gov/esg-search/search/?activity_id=CMIP&table_id=Amon&source_id=NorESM2-MM&variable_id=tas&experiment_id=piControl&project=CMIP6&type=File&distrib=false&format=application%2Fsolr%2Bjson&offset=20
https://esgf-node.llnl.gov/esg-search/search/?activity_id=CMIP&table_id=Amon&source_id=NorESM2-MM&variable_id=tas&experiment_id=piControl&project=CMIP6&type=File&distrib=false&format=application%2Fsolr%2Bjson&offset=30
https://esgf-node.llnl.gov/esg-search/search/?activity_id=CMIP&table_id=Amon&source_id=NorESM2-MM&variable

OSError: [Errno -68] NetCDF: I/O failure: b'http://esgf-data3.diasjp.net/thredds/dodsC/esg_dataroot/CMIP6/CMIP/NCC/NorESM2-MM/piControl/r1i1p1f1/Amon/tas/gn/v20191108/tas_Amon_NorESM2-MM_piControl_r1i1p1f1_gn_128001-128912.nc'

## Historical

In [3]:
#Once the list of files has been pulled up, download files[lev:lev2]
hlev = [40, 40, 29, 6, 0, 0, 2, 0, 3, 0, 0, 80, 0, 80, 0, 0, 3, 34, 33 * 10, 2, 17, 0]
hlev2 = [41, 44, 30, 7, 1, 1, 4, 1, 5, 1, 2, 84, 2, 84, 1, 1, 6, 51, 33 * 11, 4, 34, 1]


In [63]:
#For checking availability and format of data
d = 21
ids = ['tas', 'rlut', 'rsut', 'rlutcs', 'rsutcs']
for i in range(5):
    result3 = esgf_search(activity_id='CMIP', table_id='Amon', source_id=model[d], 
                          variable_id=ids[i], experiment_id='historical')
    print(result3)
    files_to_open = result3[hlev[d]:hlev2[d]]
    ds = xr.open_mfdataset(files_to_open)


https://esgf-node.llnl.gov/esg-search/search/?activity_id=CMIP&table_id=Amon&source_id=FGOALS-f3-L&variable_id=tas&experiment_id=historical&project=CMIP6&type=File&distrib=false&format=application%2Fsolr%2Bjson&offset=0
['http://esg.lasg.ac.cn/thredds/dodsC/esg_dataroot/CMIP6/CMIP/CAS/FGOALS-f3-L/historical/r1i1p1f1/Amon/tas/gr/v20190927/tas_Amon_FGOALS-f3-L_historical_r1i1p1f1_gr_185001-201412.nc', 'http://esg.lasg.ac.cn/thredds/dodsC/esg_dataroot/CMIP6/CMIP/CAS/FGOALS-f3-L/historical/r2i1p1f1/Amon/tas/gr/v20190927/tas_Amon_FGOALS-f3-L_historical_r2i1p1f1_gr_185001-201412.nc', 'http://esg.lasg.ac.cn/thredds/dodsC/esg_dataroot/CMIP6/CMIP/CAS/FGOALS-f3-L/historical/r3i1p1f1/Amon/tas/gr/v20190927/tas_Amon_FGOALS-f3-L_historical_r3i1p1f1_gr_185001-201412.nc', 'http://esgf-data1.llnl.gov/thredds/dodsC/css03_data/CMIP6/CMIP/CAS/FGOALS-f3-L/historical/r1i1p1f1/Amon/tas/gr/v20190927/tas_Amon_FGOALS-f3-L_historical_r1i1p1f1_gr_185001-201412.nc', 'http://esgf-data1.llnl.gov/thredds/dodsC/css03_

In [64]:
#Download data
for d in range( m ):
    print("Doing: ", model[d])

    result1 = esgf_search(activity_id='CMIP', table_id='Amon', source_id=model[d], 
                              variable_id='tas', experiment_id='historical')
    result2 = esgf_search(activity_id='CMIP', table_id='Amon', source_id=model[d], 
                              variable_id='rlut', experiment_id='historical')
    result3 = esgf_search(activity_id='CMIP', table_id='Amon', source_id=model[d], 
                              variable_id='rsut', experiment_id='historical')
    result4 = esgf_search(activity_id='CMIP', table_id='Amon', source_id=model[d], 
                              variable_id='rlutcs', experiment_id='historical')
    result5 = esgf_search(activity_id='CMIP', table_id='Amon', source_id=model[d], 
                              variable_id='rsutcs', experiment_id='historical')
        
    files_to_open = result1[hlev[d]:hlev2[d]]
    files_to_open2 = result2[hlev[d]:hlev2[d]]
    files_to_open3 = result3[hlev[d]:hlev2[d]]
    files_to_open4 = result4[hlev[d]:hlev2[d]]
    files_to_open5 = result5[hlev[d]:hlev2[d]]

    ds = xr.open_mfdataset(files_to_open)
    ds2 = xr.open_mfdataset(files_to_open2)
    ds3 = xr.open_mfdataset(files_to_open3)
    ds4 = xr.open_mfdataset(files_to_open4)
    ds5 = xr.open_mfdataset(files_to_open5)

    d1, d2, d3 = np.shape( ds.tas)

    tas = ds.tas.mean(dim='lon')
    rlut = ds2.rlut.mean(dim='lon')
    rsut = ds3.rsut.mean(dim='lon')
    rlutcs = ds4.rlutcs.mean(dim='lon')
    rsutcs = ds5.rsutcs.mean(dim='lon')

    for i in range( 5 ):
        ztas = tas.sel(lat=slice(lim1[i], lim2[i]))
        zrlut = rlut.sel(lat=slice(lim1[i], lim2[i]))
        zrsut = rsut.sel(lat=slice(lim1[i], lim2[i]))
        zrlutcs = rlutcs.sel(lat=slice(lim1[i], lim2[i]))
        zrsutcs = rsutcs.sel(lat=slice(lim1[i], lim2[i]))

        sdat( ztas, "data/" + model[d] + "_historical_tas_" + str(lim1[i]) + "_" + str(lim2[i]) + ".dat")
        sdat( zrlut, "data/" + model[d] + "_historical_rlut_" + str(lim1[i]) + "_" + str(lim2[i]) + ".dat")
        sdat( zrsut, "data/" + model[d] + "_historical_rsut_" + str(lim1[i]) + "_" + str(lim2[i]) + ".dat")
        sdat( zrlutcs, "data/" + model[d] + "_historical_rlutcs_" + str(lim1[i]) + "_" + str(lim2[i]) + ".dat")
        sdat( zrsutcs, "data/" + model[d] + "_historical_rsutcs_" + str(lim1[i]) + "_" + str(lim2[i]) + ".dat")


Doing:  FGOALS-f3-L
https://esgf-node.llnl.gov/esg-search/search/?activity_id=CMIP&table_id=Amon&source_id=FGOALS-f3-L&variable_id=tas&experiment_id=historical&project=CMIP6&type=File&distrib=false&format=application%2Fsolr%2Bjson&offset=0
https://esgf-node.llnl.gov/esg-search/search/?activity_id=CMIP&table_id=Amon&source_id=FGOALS-f3-L&variable_id=rlut&experiment_id=historical&project=CMIP6&type=File&distrib=false&format=application%2Fsolr%2Bjson&offset=0
https://esgf-node.llnl.gov/esg-search/search/?activity_id=CMIP&table_id=Amon&source_id=FGOALS-f3-L&variable_id=rsut&experiment_id=historical&project=CMIP6&type=File&distrib=false&format=application%2Fsolr%2Bjson&offset=0
https://esgf-node.llnl.gov/esg-search/search/?activity_id=CMIP&table_id=Amon&source_id=FGOALS-f3-L&variable_id=rlutcs&experiment_id=historical&project=CMIP6&type=File&distrib=false&format=application%2Fsolr%2Bjson&offset=0
https://esgf-node.llnl.gov/esg-search/search/?activity_id=CMIP&table_id=Amon&source_id=FGOALS-f

## Abrupt 4xCO2

In [65]:
#Once the list of files has been pulled up, download files[lev:lev2]
alev = [2, 2, 6, 0, 0, 3, 0, 4, 3, 0, 2, 3, 0, 6, 0, 0, 2, 50, 33, 0, 15, 0]
alev2 = [3, 3, 7, 1, 1, 4, 2, 7, 6, 1, 4, 6, 2, 9, 1, 1, 4, 65, 66, 2, 30, 1 ]
alev3 = [4, 2, 6, 0, 0, 11, 0, 4, 3, 0, 2, 3, 0, 6, 0, 0, 2, 50, 33, 0, 15, 0]
alev4 = [5, 3, 7, 1, 1, 12, 2, 7, 6, 1, 4, 6, 2, 9, 1, 1, 4, 65, 66, 2, 30, 1]
alev5 = [4, 2, 6, 0, 0, 3, 0, 4, 3, 0, 2, 3, 0, 6, 0, 0, 2, 50, 33, 0, 15, 0]
alev6 = [5, 3, 7, 1, 1, 4, 2, 7, 6, 1, 4, 6, 2, 9, 1, 1, 4, 65, 66, 2, 30, 1]

In [28]:
#For checking availability and format of data
d = 21
ids = ['tas', 'rlut', 'rsut', 'rlutcs', 'rsutcs']
for i in range(5):
    result3 = esgf_search(activity_id='CMIP', table_id='Amon', source_id=model[d], 
                          variable_id=ids[i], experiment_id='abrupt-4xCO2')
    print(result3[alev[d]:alev2[d]])
    files_to_open = result3[alev[d]:alev2[d]]
    ds = xr.open_mfdataset(files_to_open)


https://esgf-node.llnl.gov/esg-search/search/?activity_id=CMIP&table_id=Amon&source_id=NorESM2-MM&variable_id=tas&experiment_id=abrupt-4xCO2&project=CMIP6&type=File&distrib=false&format=application%2Fsolr%2Bjson&offset=0
https://esgf-node.llnl.gov/esg-search/search/?activity_id=CMIP&table_id=Amon&source_id=NorESM2-MM&variable_id=tas&experiment_id=abrupt-4xCO2&project=CMIP6&type=File&distrib=false&format=application%2Fsolr%2Bjson&offset=10
https://esgf-node.llnl.gov/esg-search/search/?activity_id=CMIP&table_id=Amon&source_id=NorESM2-MM&variable_id=tas&experiment_id=abrupt-4xCO2&project=CMIP6&type=File&distrib=false&format=application%2Fsolr%2Bjson&offset=20
['http://esgf-data1.llnl.gov/thredds/dodsC/css03_data/CMIP6/CMIP/NCC/NorESM2-MM/abrupt-4xCO2/r1i1p1f1/Amon/tas/gn/v20191108/tas_Amon_NorESM2-MM_abrupt-4xCO2_r1i1p1f1_gn_000101-001012.nc', 'http://esgf-data1.llnl.gov/thredds/dodsC/css03_data/CMIP6/CMIP/NCC/NorESM2-MM/abrupt-4xCO2/r1i1p1f1/Amon/tas/gn/v20191108/tas_Amon_NorESM2-MM_abru

OSError: [Errno -68] NetCDF: I/O failure: b'http://esgf-data1.llnl.gov/thredds/dodsC/css03_data/CMIP6/CMIP/NCC/NorESM2-MM/abrupt-4xCO2/r1i1p1f1/Amon/rsut/gn/v20191108/rsut_Amon_NorESM2-MM_abrupt-4xCO2_r1i1p1f1_gn_009101-010012.nc'

In [66]:
#Download data
for d in range( m ):
    print("Doing: ", model[d])

    result1 = esgf_search(activity_id='CMIP', table_id='Amon', source_id=model[d], 
                              variable_id='tas', experiment_id='abrupt-4xCO2')
    result2 = esgf_search(activity_id='CMIP', table_id='Amon', source_id=model[d], 
                              variable_id='rlut', experiment_id='abrupt-4xCO2')
    result3 = esgf_search(activity_id='CMIP', table_id='Amon', source_id=model[d], 
                              variable_id='rsut', experiment_id='abrupt-4xCO2')
    result4 = esgf_search(activity_id='CMIP', table_id='Amon', source_id=model[d], 
                              variable_id='rlutcs', experiment_id='abrupt-4xCO2')
    result5 = esgf_search(activity_id='CMIP', table_id='Amon', source_id=model[d], 
                              variable_id='rsutcs', experiment_id='abrupt-4xCO2')
        
    files_to_open = result1[alev[d]:alev2[d]]
    files_to_open2 = result2[alev3[d]:alev4[d]]
    files_to_open3 = result3[alev5[d]:alev6[d]]
    files_to_open4 = result4[alev5[d]:alev6[d]]
    files_to_open5 = result5[alev5[d]:alev6[d]]

    ds = xr.open_mfdataset(files_to_open)
    ds2 = xr.open_mfdataset(files_to_open2)
    ds3 = xr.open_mfdataset(files_to_open3)
    ds4 = xr.open_mfdataset(files_to_open4)
    ds5 = xr.open_mfdataset(files_to_open5)

    d1, d2, d3 = np.shape( ds.tas)

    tas = ds.tas.mean(dim='lon')
    rlut = ds2.rlut.mean(dim='lon')
    rsut = ds3.rsut.mean(dim='lon')
    rlutcs = ds4.rlutcs.mean(dim='lon')
    rsutcs = ds5.rsutcs.mean(dim='lon')

    for i in range( 5 ):
        ztas = tas.sel(lat=slice(lim1[i], lim2[i]))
        zrlut = rlut.sel(lat=slice(lim1[i], lim2[i]))
        zrsut = rsut.sel(lat=slice(lim1[i], lim2[i]))
        zrlutcs = rlutcs.sel(lat=slice(lim1[i], lim2[i]))
        zrsutcs = rsutcs.sel(lat=slice(lim1[i], lim2[i]))

        sdat( ztas, "data/" + model[d] + "_abrupt-4xCO2_tas_" + str(lim1[i]) + "_" + str(lim2[i]) + ".dat")
        sdat( zrlut, "data/" + model[d] + "_abrupt-4xCO2_rlut_" + str(lim1[i]) + "_" + str(lim2[i]) + ".dat")
        sdat( zrsut, "data/" + model[d] + "_abrupt-4xCO2_rsut_" + str(lim1[i]) + "_" + str(lim2[i]) + ".dat")
        sdat( zrlutcs, "data/" + model[d] + "_abrupt-4xCO2_rlutcs_" + str(lim1[i]) + "_" + str(lim2[i]) + ".dat")
        sdat( zrsutcs, "data/" + model[d] + "_abrupt-4xCO2_rsutcs_" + str(lim1[i]) + "_" + str(lim2[i]) + ".dat")


Doing:  FGOALS-f3-L
https://esgf-node.llnl.gov/esg-search/search/?activity_id=CMIP&table_id=Amon&source_id=FGOALS-f3-L&variable_id=tas&experiment_id=abrupt-4xCO2&project=CMIP6&type=File&distrib=false&format=application%2Fsolr%2Bjson&offset=0
https://esgf-node.llnl.gov/esg-search/search/?activity_id=CMIP&table_id=Amon&source_id=FGOALS-f3-L&variable_id=rlut&experiment_id=abrupt-4xCO2&project=CMIP6&type=File&distrib=false&format=application%2Fsolr%2Bjson&offset=0
https://esgf-node.llnl.gov/esg-search/search/?activity_id=CMIP&table_id=Amon&source_id=FGOALS-f3-L&variable_id=rsut&experiment_id=abrupt-4xCO2&project=CMIP6&type=File&distrib=false&format=application%2Fsolr%2Bjson&offset=0
https://esgf-node.llnl.gov/esg-search/search/?activity_id=CMIP&table_id=Amon&source_id=FGOALS-f3-L&variable_id=rlutcs&experiment_id=abrupt-4xCO2&project=CMIP6&type=File&distrib=false&format=application%2Fsolr%2Bjson&offset=0
https://esgf-node.llnl.gov/esg-search/search/?activity_id=CMIP&table_id=Amon&source_id=