# CMIP6 Interpolating MRSOL and TSL depth

This is only needed if the variables have different depth layers.

**Following steps are included in this script:**

1. Load netCDF files
2. Interpolate to common depth layers
3. Save and replace netcdf files

In [1]:
# ========== Packages ==========
import xarray as xr
import pandas as pd
import numpy as np
import dask
import os

### Functions

In [2]:
def open_dataset(filename):
    ds = xr.open_dataset(filename)
    return ds

In [3]:
def save_file(save_file, folder, save_var=True):
    """
    Save files as netCDF.

    Args:
        savefile (dict or dataset): Dictionary of xarray datasets or dataset.
        folder (string): Name of folder data is saved in.
        save_var (boolean): If True, data is saved separately for each variable. If false, one file is saved with all variables.
        

    Returns:
        nc_out: Path were data is saved in.
    """
    
    if save_var:
        for key, ds in save_file.items():
            for var in ds:
                # Variable to keep
                variable_to_keep = var
                dimensions_to_keep = {'time', 'lat', 'lon'}
                coordinates_to_keep = {'time', 'lat', 'lon'}

                if any('depth' in ds[var].dims for var in ds.variables):
                    dimensions_to_keep.add('depth')
                    coordinates_to_keep.add('depth')

                # Create a new dataset with only the desired variable
                ds_var = ds[[variable_to_keep]]

                # Keep only the desired dimensions
                ds_var = ds_var.isel({dim: slice(None) for dim in dimensions_to_keep.intersection(ds_var.dims)})

                # Set the desired coordinates
                coords_to_set = set(ds_var.variables).intersection(coordinates_to_keep)
                ds_var = ds_var.set_coords(list(coords_to_set))

                savepath = f'../../data/CMIP6/{ds_var.experiment_id}/{folder}/{var}/'
                filename = f'CMIP.{ds_var.source_id}.{ds_var.experiment_id}.{var}.nc'
                nc_out = os.path.join(savepath, filename)
                os.makedirs(savepath, exist_ok=True) 
                if os.path.exists(nc_out):
                        inp = input(f"Delete old file {filename} (y/n):")
                        if inp.lower() in ["y"]:
                            os.remove(nc_out)
                            print(f"File  with path: {nc_out} removed")
                        else:
                            filename = "temp_file.nc"
                            nc_out = os.path.join(savepath, filename)
                            print(f"Filename change to {filename}")

                # Save to netcdf file
                with dask.config.set(scheduler='threads'):
                    ds_var.to_netcdf(nc_out)
                    print(f"File with path: {nc_out} saved")
       
    else:
        for key in save_file.keys():
            ds_in = save_file[key]
            filename = f'CMIP.{ds_in.source_id}.{ds_in.experiment_id}.nc'
            savepath = f'../../data/CMIP6/{ds_in.experiment_id}/{folder}'
            nc_out = os.path.join(savepath, filename)
            os.makedirs(savepath, exist_ok=True) 
            if os.path.exists(nc_out):
                inp = input(f"Delete old file {filename} (y/n):")
                if inp.lower() in ["y"]:
                    os.remove(nc_out)
                    print(f"File  with path: {nc_out} removed")
                else:
                    filename = "temp_file.nc"
                    nc_out = os.path.join(savepath, filename)
                    print(f"Filename change to {filename}")

            # Save to netcdf file
            with dask.config.set(scheduler='threads'):
                ds_in.to_netcdf(nc_out)

    return nc_out

### 1. Load netCDF files

In [4]:
# LOAD TSL
# ========= Define period, models and path ==============
folder='preprocessed'
variable='tsl'
experiment_id = 'ssp370'
source_id = 'IPSL-CM6A-LR' # 'SAM0-UNICON''AWI-ESM-1-1-LR', 'BCC-CSM2-MR', 'BCC-ESM1', 'CanESM5', 'CESM2-FV2', 'CESM2-WACCM-FV2', 'CESM2-WACCM', 'CESM2', 'CNRM-CM6-1-HR','CNRM-CM6-1', 'CNRM-ESM2-1', ''IPSL-CM6A-LR', 'NorESM2-MM', 'TaiESM1', 'UKESM1-0-LL', 'SAM0-UNICON'], ['BCC-CSM2-MR', 'CESM2', 'CNRM-CM6-1-HR','NorESM2-MM', 'SAM0-UNICON', 'TaiESM1'] 
savepath = f'../../data/CMIP6/{experiment_id}/{folder}/{variable}'

# ========= Load dataset =======
ds_tsl = open_dataset(os.path.join(savepath, f'CMIP.{source_id}.{experiment_id}.{variable}.nc'))

In [8]:
# LOAD MRSOL
# ========= Define period, models and path ==============
variable='mrsol'
source_id = 'IPSL-CM6A-LR' # 'SAM0-UNICON''AWI-ESM-1-1-LR', 'BCC-CSM2-MR', 'BCC-ESM1', 'CanESM5', 'CESM2-FV2', 'CESM2-WACCM-FV2', 'CESM2-WACCM', 'CESM2', 'CNRM-CM6-1-HR','CNRM-CM6-1', 'CNRM-ESM2-1', ''IPSL-CM6A-LR', 'NorESM2-MM', 'TaiESM1', 'UKESM1-0-LL', 'SAM0-UNICON'], ['BCC-CSM2-MR', 'CESM2', 'CNRM-CM6-1-HR','NorESM2-MM', 'SAM0-UNICON', 'TaiESM1'] 
savepath = f'../../data/CMIP6/{experiment_id}/{folder}/{variable}'

# ========= Load dataset =======
ds_mrsol = open_dataset(os.path.join(savepath, f'CMIP.{source_id}.{experiment_id}.{variable}.nc'))

In [6]:
# ========= Have a look into the tsl data =======
ds_tsl

In [9]:
# ========= Have a look into the mrsol data =======
ds_mrsol

### 2. Interpolate to common depth layers

I chose the mrsol depth layer as reference as it extends to 2m, which is the depth of interest in my analysis.

In [10]:
# Get mrsol depth layer
mrsol_depth_layers = ds_mrsol.depth

In [11]:
# Interpolate tsl to mrsol depth layers
tsl_interp = ds_tsl.interp(depth=mrsol_depth_layers)

In [12]:
# Add comment about changes to data 
if 'log' in tsl_interp.attrs:
    log_old = tsl_interp.attrs['log']
    tsl_interp.attrs['log'] = f'Interpolation of tsl depth to match the mrsol depth layers. // {log_old}'
else:
    tsl_interp.attrs['log'] = 'Interpolation of tsl depth to match the mrsol depth layers.'

In [13]:
# ========= Have a look into interpolated tsl data =======
tsl_interp

In [14]:
ds_dict = {}
ds_dict[tsl_interp.source_id] = tsl_interp

In [15]:
ds_dict[tsl_interp.source_id].depth

### 3. Save and replace netcdf files

In [16]:
folder = 'preprocessed'

In [17]:
# =========== Store file and remove any former one ==========
nc_out = save_file(ds_dict, folder=folder)

Delete old file CMIP.IPSL-CM6A-LR.ssp370.tsl.nc (y/n): y


File  with path: ../../data/CMIP6/ssp370/preprocessed/tsl/CMIP.IPSL-CM6A-LR.ssp370.tsl.nc removed
File with path: ../../data/CMIP6/ssp370/preprocessed/tsl/CMIP.IPSL-CM6A-LR.ssp370.tsl.nc saved


In [50]:
# =========== Check stored file ==============
xr.open_dataset(nc_out)