In [43]:
# imports
import xarray as xr
import numpy as np
import glob
import warnings

Functions for preprocessing data.

In [2]:
coarse_latitude = np.linspace(-90, 90, num = 96, endpoint=True)
coarse_longitude = np.linspace(0, 360, num=144, endpoint=False)


# function to reduce the gridding of latitude and longitude in half
def coarsen_lat_lon(dataset):
    dataset = dataset.coarsen(lat=2, lon=2).mean()
    dataset['lat'] = coarse_latitude
    dataset['lat'].assign_attrs(units="degrees")
    dataset['lon'] = coarse_longitude
    dataset['lon'].assign_attrs(units="degrees")
    return dataset

In [3]:
# process data into desired format
def preprocess(dataset, variable):
    # drop bands
    dataset = dataset.drop_dims('nbnd')
    # coarsen data
    dataset = coarsen_lat_lon(dataset)
    # get yearly data, rename and reorder coordinates
    dataset = dataset.groupby("time.year").mean("time")
    dataset = dataset[['year', 'lon', 'lat', variable]]
    dataset = dataset.rename(year="time")
    # compute data and convert to float64
    dataset[variable] = dataset[variable].compute()
    dataset[variable] = dataset[variable].astype(np.float64)
    # drop attributes
    dataset.attrs = {}
    return dataset

Functions and variables to get data.

In [4]:
# list scenarios and simulations
scenarios = ['ssp126', 'ssp245', 'ssp370', 'ssp585', 'historical']
simulations = ['r10i1p1f1', 'r11i1p1f1', 'r4i1p1f1']
# simulation list for historical scenario only
historical_simulations = [f'r{n}i1p1f1' for n in range(1, 12)]

In [5]:
# get MIP associated with particular scenario
def get_MIP(scenario):
    if scenario == 'historical':
        return 'CMIP'
    else:
        return 'ScenarioMIP'

In [6]:
# get file based on params
def get_file(scenario, simulation, variable):
    mip = get_MIP(scenario)
    return glob.glob(
        f"/glade/collections/cmip/CMIP6/{mip}/NCAR/CESM2/{scenario}/{simulation}/Amon/{variable}/gn/latest/*.nc"
    )

In [7]:
# get mean of all ensemble members for each scenario for a given variable
def get_variable_data(variable):
    var_dict = {}
    for scenario in scenarios:
        ensembles = []
        # get list of all simulations available
        if scenario == 'historical':
            simulation_list = historical_simulations
        else:
            simulation_list = simulations
        for sim in simulation_list:
            files = get_file(scenario, sim, variable)
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                dataset = xr.open_mfdataset(files)
            ensembles.append(preprocess(dataset, variable))
        # mean over all ensemble members
        mean_ensemble = sum(ensembles)/len(ensembles)
        var_dict[scenario] = mean_ensemble
    return var_dict

Get and save data.

In [8]:
# get data
hurs_data = get_variable_data('hurs')
tas_data = get_variable_data('tas')

In [44]:
# save data
for scenario in scenarios:
    hurs_data[scenario].to_netcdf(f'hurs_{scenario}.nc')
for scenario in scenarios:
    tas_data[scenario].to_netcdf(f'tas_{scenario}.nc')

In [None]:
# tas data is in Kelvin
# hurs data is in % (may be over 100%)