# CMIP6 Compute NBWF/P

**Following steps are included in this script:**

1. Load netCDF files
2. Compute NBWF/P

Save and replace netcdf files

In [1]:
# ========== Packages ==========
import xarray as xr
import pandas as pd
import numpy as np
import dask
import os
import glob
from scipy.stats import pearsonr

import matplotlib.pyplot as plt

%matplotlib inline

In [2]:
# ========= Helper function to open the dataset ========
def open_dataset(filename):
    ds = xr.open_dataset(filename)
    return ds

In [3]:
# Define a helper function to open and merge datasets
def open_and_merge_datasets(folder, model, experiment_id, variables):
    filepaths = []
    for var in variables:
        path = f'../../data/CMIP6/{experiment_id}/{folder}/{var}'
        fp = glob.glob(os.path.join(path, f'CMIP.{model}.{experiment_id}.{var}_regridded.nc'))
        if fp:
            filepaths.append(fp[0])
        else:
            #print(f"No file found for variable '{var}' in model '{model}'.")
            print(fp)

    datasets = [xr.open_dataset(fp) for fp in filepaths]
    ds = xr.merge(datasets)
    return ds

In [4]:
def save_file(save_file, folder, save_var=True):
    """
    Save files as netCDF.

    Args:
        savefile (dict or dataset): Dictionary of xarray datasets or dataset.
        folder (string): Name of folder data is saved in.
        save_var (boolean): If True, data is saved separately for each variable. If false, one file is saved with all variables.
        

    Returns:
        nc_out: Path were data is saved in.
    """
    
    if save_var:
        for key, ds in ds_dict.items():
            for var in ds:
                # Variable to keep
                variable_to_keep = var
                dimensions_to_keep = {'time', 'lat', 'lon'}
                coordinates_to_keep = {'time', 'lat', 'lon'}

                if any('depth' in ds[var].dims for var in ds.variables):
                    dimensions_to_keep.add('depth')
                    coordinates_to_keep.add('depth')

                # Create a new dataset with only the desired variable
                ds_var = ds[[variable_to_keep]]

                # Keep only the desired dimensions
                ds_var = ds_var.isel({dim: slice(None) for dim in dimensions_to_keep.intersection(ds_var.dims)})

                # Set the desired coordinates
                coords_to_set = set(ds_var.variables).intersection(coordinates_to_keep)
                ds_var = ds_var.set_coords(list(coords_to_set))

                savepath = f'../../data/CMIP6/{ds_var.experiment_id}/{folder}/{var}/'
                filename = f'CMIP.{ds_var.source_id}.{ds_var.experiment_id}.{var}_regridded.nc'
                nc_out = os.path.join(savepath, filename)
                os.makedirs(savepath, exist_ok=True) 
                if os.path.exists(nc_out):
                        inp = input(f"Delete old file {filename} (y/n):")
                        if inp.lower() in ["y"]:
                            os.remove(nc_out)
                            print(f"File  with path: {nc_out} removed")
                        else:
                            filename = "temp_file.nc"
                            nc_out = os.path.join(savepath, filename)
                            print(f"Filename change to {filename}")

                # Save to netcdf file
                with dask.config.set(scheduler='threads'):
                    ds_var.to_netcdf(nc_out)
                    print(f"File with path: {nc_out} saved")
       
    else:
        for key in save_file.keys():
            ds_in = save_file[key]
            filename = f'CMIP.{ds_in.source_id}.{ds_in.experiment_id}_regridded.nc'
            savepath = f'../../data/CMIP6/{ds_in.experiment_id}/{folder}'
            nc_out = os.path.join(savepath, filename)
            os.makedirs(savepath, exist_ok=True) 
            if os.path.exists(nc_out):
                inp = input(f"Delete old file {filename} (y/n):")
                if inp.lower() in ["y"]:
                    os.remove(nc_out)
                    print(f"File  with path: {nc_out} removed")
                else:
                    filename = "temp_file.nc"
                    nc_out = os.path.join(savepath, filename)
                    print(f"Filename change to {filename}")

            # Save to netcdf file
            with dask.config.set(scheduler='threads'):
                ds_in.to_netcdf(nc_out)

    return nc_out

### 1. Load netCDF files

In [5]:
folder='preprocessed'

In [31]:
# ========= Define period, models and path ==============
variable=['mrro', 'tran', 'pr']
experiment_id = 'historical'
source_id = ['TaiESM1', 'BCC-CSM2-MR',  'CanESM5', 'CNRM-CM6-1', 'CNRM-ESM2-1', 'IPSL-CM6A-LR', 'UKESM1-0-LL', 'MPI-ESM1-2-LR', 'CESM2-WACCM', 'NorESM2-MM', 'Ensemble mean', 'Ensemble median'] #
folder='preprocessed'

# ========= Use Dask to parallelize computations ==========
dask.config.set(scheduler='processes')

# Create dictionary using a dictionary comprehension and Dask
ds_dict = dask.compute({model: open_and_merge_datasets(folder, model, experiment_id, variable) for model in source_id})[0]

In [19]:
# ========= Have a look into the dictionary =======
print(list(ds_dict.keys()))
ds_dict[list(ds_dict.keys())[0]]

['TaiESM1', 'BCC-CSM2-MR', 'CanESM5', 'CNRM-CM6-1', 'CNRM-ESM2-1', 'IPSL-CM6A-LR', 'UKESM1-0-LL', 'MPI-ESM1-2-LR', 'CESM2-WACCM', 'NorESM2-MM']


### Compute NBWF/P

In [42]:
def compute_nbwfp(ds_dict):
    for name, ds in ds_dict.items():
        ds['nbwfp'] = (ds['mrro'] - ds['tran'])/ds['pr']
        ds_dict[name] = ds_dict[name].drop(['mrro', 'tran', 'pr'])
        
    return ds_dict

In [None]:
ds_dict = compute_nbwfp(ds_dict)

In [None]:
# ========= Have a look into the dictionary =======
print(list(ds_dict.keys()))
ds_dict[list(ds_dict.keys())[2]].nbwfp.isel(time=5).plot()

### Save files

In [23]:
nc_out = save_file(ds_dict, folder=folder)

File with path: ../../data/CMIP6/historical/preprocessed/EI/CMIP.TaiESM1.historical.EI_regridded.nc saved
File with path: ../../data/CMIP6/historical/preprocessed/EI/CMIP.BCC-CSM2-MR.historical.EI_regridded.nc saved
File with path: ../../data/CMIP6/historical/preprocessed/EI/CMIP.CanESM5.historical.EI_regridded.nc saved
File with path: ../../data/CMIP6/historical/preprocessed/EI/CMIP.CNRM-CM6-1.historical.EI_regridded.nc saved
File with path: ../../data/CMIP6/historical/preprocessed/EI/CMIP.CNRM-ESM2-1.historical.EI_regridded.nc saved
File with path: ../../data/CMIP6/historical/preprocessed/EI/CMIP.IPSL-CM6A-LR.historical.EI_regridded.nc saved
File with path: ../../data/CMIP6/historical/preprocessed/EI/CMIP.UKESM1-0-LL.historical.EI_regridded.nc saved
File with path: ../../data/CMIP6/historical/preprocessed/EI/CMIP.MPI-ESM1-2-LR.historical.EI_regridded.nc saved
File with path: ../../data/CMIP6/historical/preprocessed/EI/CMIP.CESM2-WACCM.historical.EI_regridded.nc saved
File with path: 

In [None]:
#test if data is correct
xr.open_dataset(nc_out)