In [1]:
# imports
import xarray as xr
import numpy as np
import glob
import warnings

Functions for preprocessing data.

In [2]:
coarse_latitude = np.linspace(-90, 90, num = 96, endpoint=True)
coarse_longitude = np.linspace(0, 360, num=144, endpoint=False)


# function to reduce the gridding of latitude and longitude in half
def coarsen_lat_lon(dataset):
    dataset = dataset.coarsen(lat=2, lon=2).mean()
    dataset['lat'] = coarse_latitude
    dataset['lat'].assign_attrs(units="degrees")
    dataset['lon'] = coarse_longitude
    dataset['lon'].assign_attrs(units="degrees")
    return dataset

In [3]:
# process data into desired format
def preprocess(dataset, variable):
    # drop bands
    dataset = dataset.drop_dims('nbnd')
    # coarsen data
    dataset = coarsen_lat_lon(dataset)
    # get yearly data, rename and reorder coordinates
    dataset = dataset.groupby("time.year").mean("time")
    dataset = dataset[['year', 'lon', 'lat', variable]]
    dataset = dataset.rename(year="time")
    # compute data and convert to float64
    dataset[variable] = dataset[variable].compute()
    dataset[variable] = dataset[variable].astype(np.float64)
    # drop attributes
    dataset.attrs = {}
    return dataset

Functions and variables to get data.

In [4]:
# list scenarios and simulations
train_scenarios = ['historical', 'ssp126', 'ssp370', 'ssp585']
scenarios = ['historical', 'ssp126', 'ssp370', 'ssp585','ssp245']
simulation_dict = {
    'hist-aer': [f'r{n}i1p1f1' for n in [1, 3]],
    'hist-GHG': [f'r{n}i1p1f1' for n in [1, 2, 3]],
    'ssp370-lowNTCF': [f'r{n}i2p1f1' for n in [2, 3]],
    'historical': [f'r{n}i1p1f1' for n in range(1, 12)]
}

for scenario in ['ssp126', 'ssp245', 'ssp370', 'ssp585']:
    simulation_dict[scenario] = [f'r{n}i1p1f1' for n in [4]]

In [5]:
simulation_dict

{'hist-aer': ['r1i1p1f1', 'r3i1p1f1'],
 'hist-GHG': ['r1i1p1f1', 'r2i1p1f1', 'r3i1p1f1'],
 'ssp370-lowNTCF': ['r2i2p1f1', 'r3i2p1f1'],
 'historical': ['r1i1p1f1',
  'r2i1p1f1',
  'r3i1p1f1',
  'r4i1p1f1',
  'r5i1p1f1',
  'r6i1p1f1',
  'r7i1p1f1',
  'r8i1p1f1',
  'r9i1p1f1',
  'r10i1p1f1',
  'r11i1p1f1'],
 'ssp126': ['r4i1p1f1'],
 'ssp245': ['r4i1p1f1'],
 'ssp370': ['r4i1p1f1'],
 'ssp585': ['r4i1p1f1']}

In [6]:
# get MIP associated with particular scenario
def get_MIP(scenario):
    if scenario == 'historical':
        return 'CMIP'
    elif scenario.startswith('hist'):
        return 'DAMIP'
    elif scenario == 'ssp370-lowNTCF':
        return 'AerChemMIP'
    else:
        return 'ScenarioMIP'

In [7]:
# get file based on params
def get_file(scenario, simulation, variable):
    mip = get_MIP(scenario)
    return glob.glob(
        f"/glade/collections/cmip/CMIP6/{mip}/NCAR/CESM2/{scenario}/{simulation}/Amon/{variable}/gn/latest/*.nc"
    )

In [8]:
# get mean of all ensemble members for each scenario for a given variable
def get_variable_data(variable,scenarios,simulation_dict):
    var_dict = {}
    for scenario in scenarios:
        ensembles = []
        # get list of all simulations available
        simulation_list = simulation_dict[scenario]
        for sim in simulation_list:
            files = get_file(scenario, sim, variable)
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                dataset = xr.open_mfdataset(files)
            ensembles.append(preprocess(dataset, variable))
        # mean over all ensemble members
        mean_ensemble = sum(ensembles)/len(ensembles)
        var_dict[scenario] = mean_ensemble
    return var_dict

Get and save data.

In [9]:
# get data
hurs_data = get_variable_data('hurs',scenarios,simulation_dict)
tas_data = get_variable_data('tas',scenarios,simulation_dict)

In [10]:
tas_data

{'historical': <xarray.Dataset>
 Dimensions:  (time: 165, lon: 144, lat: 96)
 Coordinates:
   * time     (time) int64 1850 1851 1852 1853 1854 ... 2010 2011 2012 2013 2014
   * lon      (lon) float64 0.0 2.5 5.0 7.5 10.0 ... 350.0 352.5 355.0 357.5
   * lat      (lat) float64 -90.0 -88.11 -86.21 -84.32 ... 84.32 86.21 88.11 90.0
 Data variables:
     tas      (time, lat, lon) float64 225.2 225.1 225.1 ... 260.4 260.4 260.4,
 'ssp126': <xarray.Dataset>
 Dimensions:  (time: 86, lon: 144, lat: 96)
 Coordinates:
   * time     (time) int64 2015 2016 2017 2018 2019 ... 2096 2097 2098 2099 2100
   * lon      (lon) float64 0.0 2.5 5.0 7.5 10.0 ... 350.0 352.5 355.0 357.5
   * lat      (lat) float64 -90.0 -88.11 -86.21 -84.32 ... 84.32 86.21 88.11 90.0
 Data variables:
     tas      (time, lat, lon) float64 227.7 227.6 227.5 ... 261.9 261.9 261.9,
 'ssp370': <xarray.Dataset>
 Dimensions:  (time: 86, lon: 144, lat: 96)
 Coordinates:
   * time     (time) int64 2015 2016 2017 2018 2019 ... 2096 20

In [11]:
# save data
for scenario in scenarios:
    hurs_data[scenario].to_netcdf(f'hurs_{scenario}.nc')
for scenario in scenarios:
    tas_data[scenario].to_netcdf(f'tas_{scenario}.nc')

In [12]:
# tas data is in Kelvin
# hurs data is in % (may be over 100%)