# Pre-processing workflow for MPAS-Ocean model output of basal melt rate

This notebook removes the linear trend (if present), seasonal signal and draft dependence of basal melt from the MPAS-Ocean SORRMv2.1 ocean model run. What remains at the end is the variability component. It adds the seasonal and variability components and saves this as a forcing file.


In [1]:
import sys
import os
os.environ['USE_PYGEOS'] = '0'
import gc
from pathlib import Path

import cartopy.crs as ccrs
import cartopy
import matplotlib.pyplot as plt
import geopandas as gpd

import numpy as np
import xarray as xr
from xeofs.xarray import EOF
import rioxarray

from shapely.geometry import mapping
from sklearn.linear_model import LinearRegression
from aislens import config, dataprep, generator, geospatial, plot, utils

In [None]:
# Helper functions

def detrend_dim(data, dim, deg):
    # Store the original mean
    #original_mean = data.mean(dim=dim)
    # detrend along a single dimension
    p = data.polyfit(dim=dim, deg=deg)
    fit = xr.polyval(data[dim], p.polyfit_coefficients)
    detrended = data - fit
    # Add back the original mean
    #detrended += original_mean
    return detrended

def clip_data(total_data, basin):
    """
    Clip the map to a specific domain
    data: input data (xarray DataArray)
    domain: domain name (string), as defined in the ice shelf geometry file (icems)
    """
    clipped_data = total_data.rio.clip(icems.loc[[basin],'geometry'].apply(mapping),icems.crs)
    #clipped_data = clipped_data.dropna('time',how='all')
    #clipped_data = clipped_data.dropna('y',how='all')
    #clipped_data = clipped_data.dropna('x',how='all')
    #clipped_data = clipped_data.drop("month")
    return clipped_data

def find_ice_shelf_index(ice_shelf_name):
    return icems[icems['name']==ice_shelf_name].index[0]

def deseasonalize(data):
    # Group data by month
    data_month = data.groupby("Time.month")
    # Calculate climatological mean for each month
    data_clm = data_month.mean("Time")
    # Calculate deseasonalized anomalies
    data_anm = data_month - data_clm
    # Add back the original mean
    original_mean = data.mean("Time")
    data_anm += original_mean
    return data_anm

def dedraft(data, draft):
    data_tm = data.mean(dim='Time')
    draft_tm = draft.mean(dim='Time')
    data_stack = data_tm.stack(z=('x', 'y'))
    draft_stack = draft_tm.stack(z=('x', 'y'))
    data_stack_noNaN = data_stack.fillna(0)
    draft_stack_noNaN = draft_stack.fillna(0)
    data_stack_noNaN_vals = data_stack_noNaN.values.reshape(-1,1)
    draft_stack_noNaN_vals = draft_stack_noNaN.values.reshape(-1,1)
    reg = LinearRegression().fit(draft_stack_noNaN_vals, data_stack_noNaN_vals)
    data_pred_stack_noNaN_vals = reg.predict(draft_stack_noNaN_vals).reshape(-1)
    data_pred_stack_noNaN = data_stack_noNaN.copy(data=data_pred_stack_noNaN_vals)
    data_pred_stack = data_pred_stack_noNaN.where(~data_stack.isnull(), np.nan)
    data_pred = data_pred_stack.unstack('z').transpose()
    #data_dedraft = data - data_pred
    return data_pred #reg.coef_, reg.intercept_, data_pred, data_dedraft

# Define a function to write_crs for the xarray dataset, with the crs input parameter defaulting to a string "epsg:3031"
def write_crs(ds, crs='epsg:3031'):
    ds.rio.write_crs(crs, inplace=True)
    return ds


In [None]:
# Detrend the data

# Method 1: Detrend the time series of spatial mean melt rate using a linear trend that is unique at each spatial point
SORRMv21_flux_detrend_perpixel = detrend_dim(SORRMv21_flux, 'Time', 1).compute()
#SORRMv21_flux_detrend_perpixel_ts = SORRMv21_flux_detrend_perpixel.mean(dim=['x', 'y']).compute()
print("Data detrended")

# Remove the seasonal cycle
# Deseasonalize
SORRMv21_flux_detrend_perpixel_deseasonalize = deseasonalize(SORRMv21_flux_detrend_perpixel).compute()
#SORRMv21_flux_detrend_perpixel_deseasonalize_ts = SORRMv21_flux_detrend_perpixel_deseasonalize.mean(dim=['x', 'y']).compute()
print("Data deseasonalized")

# Remove the draft dependence

print('Removing draft dependence...')
iceShelfRegions = range(33,133)

# write_crs for the data to be clipped
SORRMv21_flux_detrend_perpixel_deseasonalize = write_crs(SORRMv21_flux_detrend_perpixel_deseasonalize)
SORRMv21_draft = write_crs(SORRMv21_draft)

for i in iceShelfRegions:
    print('extracting data for catchment {}'.format(icems.name.values[i]))
    mlt = clip_data(SORRMv21_flux_detrend_perpixel_deseasonalize, i)
    h = clip_data(SORRMv21_draft, i)
    mlt_tm = mlt.mean(dim='Time')
    h_tm = h.mean(dim='Time')
    print('calculating linear regression for catchment {}'.format(icems.name.values[i]))
    mlt_pred = dedraft(mlt, h)

    mlt_pred.name = 'draftDepenBasalMeltPred'
    mlt_pred.attrs['long_name'] = 'Predicted flux of mass through the ocean surface based on draft dependence coefficients. Positive into ocean.'
    mlt_pred.attrs['units'] = 'kg m^-2 s^-1'

    mlt_pred.to_netcdf(main_dir / DIR_interim / 'draft_dependence/sorrm/{}_draftPred.nc'.format(icems.name.values[i]))
    print('{} file saved'.format(icems.name.values[i]))

    del mlt, h, mlt_tm, h_tm, mlt_pred
    print('deleted interim variables')
    gc.collect()
print('draft dependence removed, predicted flux files saved for individual ice shelves')

# Merge draft dependence parameters for all ice shelves into a single xarray dataset

iceShelfRegions = range(33,133)
ds = xr.Dataset()
for i in iceShelfRegions:
    ds = xr.merge([ds, xr.open_dataset(main_dir / DIR_interim / 'draft_dependence/sorrm/{}_draftPred.nc'.format(icems.name.values[i]))])
ds.to_netcdf(main_dir / DIR_interim / 'draft_dependence/sorrm/SORRMv21_draftDependencePred.nc')

print('merged draft dependence parameters for all ice shelves into a single xarray dataset')


# Load the draft dependence prediction
# ds = xr.open_dataset(main_dir / DIR_interim / 'draft_dependence/sorrm/SORRMv21_draftDependencePred.nc')
# ds = ds.draftDepenBasalMeltPred

# Remove draft dependence from the data
SORRMv21_flux_detrend_perpixel_deseasonalize_dedraft = SORRMv21_flux_detrend_perpixel_deseasonalize - ds#['draftDepenBasalMeltPred']
#SORRMv21_flux_detrend_perpixel_deseasonalize_dedraft_ts = SORRMv21_flux_detrend_perpixel_deseasonalize_dedraft.mean(dim=['x', 'y']).compute()

# Save the preprocessed data
SORRMv21_variability = SORRMv21_flux_detrend_perpixel_deseasonalize_dedraft

# Rename name attribute for the variable
#SORRMv21_variability.attrs['name'] = 'landIceFreshwaterFluxVariability'
SORRMv21_variability.to_netcdf(main_dir / DIR_processed / 'draft_dependence/sorrm/SORRMv21_variability.nc')
print('Preprocessed data saved')

In [2]:
# Load configuration paths
satobs_path = config.FILE_PAOLO23_SATOBS
model_path = config.FILE_MPASO_MODEL
iceshelf_masks_path = config.FILE_ICESHELFMASKS
draft_dependence_output_path = config.FILE_DRAFT_DEPENDENCE
variability_output_path = config.FILE_VARIABILITY

AttributeError: module 'aislens.config' has no attribute 'FILE_PAOLO23_SATOBS'

In [17]:
# Load datasets
print("Loading datasets...")
model_data = xr.open_dataset(model_path)
iceshelf_masks = geospatial.read_ice_shelves_mask(iceshelf_masks_path)

Loading datasets...


FileNotFoundError: [Errno 2] No such file or directory: b'/Users/smurugan9/research/aislens/AISLENS/data/external/Regridded_SORRMv2.1.ISMF.FULL.nc'

In [None]:
# Detrend the data
print("Detrending data...")
detrended_data = dataprep.detrend_dim(
    model_data["timeMonthly_avg_landIceFreshwaterFlux"],
    dim=config.TIME_DIM,
    deg=1
)

In [None]:
# Deseasonalize the data
print("Deseasonalizing data...")
deseasonalized_data = dataprep.deseasonalize(detrended_data)


In [None]:
# Write CRS for geospatial operations
print("Writing CRS...")
deseasonalized_data = utils.write_crs(deseasonalized_data, crs=config.config.CRS_TARGET)
model_draft = utils.write_crs(model_data["timeMonthly_avg_ssh"], crs=config.config.CRS_TARGET)

In [None]:
# Remove draft dependence
print("Removing draft dependence...")
ice_shelf_regions = range(33, 133)
draft_dependence_results = []

for i in ice_shelf_regions:
    print(f"Processing ice shelf region {i}...")
    melt = geospatial.clip_data(deseasonalized_data, i, iceshelf_masks)
    draft = geospatial.clip_data(model_draft, i, iceshelf_masks)

    # Calculate draft dependence
    draft_dependence = dataprep.dedraft(melt, draft)
    draft_dependence.name = "draftDepenBasalMeltPred"
    draft_dependence.attrs["long_name"] = "Predicted flux of mass through the ocean surface based on draft dependence coefficients. Positive into ocean."
    draft_dependence.attrs["units"] = "kg m^-2 s^-1"

    # Save draft dependence for the region
    output_file = config.config.DIR_ICESHELF_DEDRAFT / f"{iceshelf_masks.name.values[i]}_draftPred.nc"
    utils.ensure_dir_exists(output_file)
    draft_dependence.to_netcdf(output_file)
    draft_dependence_results.append(draft_dependence)

    print(f"Draft dependence saved for region {i}.")
    del melt, draft, draft_dependence
    gc.collect()

# Merge draft dependence parameters into a single dataset
print("Merging draft dependence parameters...")
merged_draft_dependence = xr.merge(draft_dependence_results)
merged_draft_dependence.to_netcdf(draft_dependence_output_path)
print("Merged draft dependence parameters saved.")

# Remove draft dependence from the data
print("Removing draft dependence from the data...")
dedrafted_data = deseasonalized_data - merged_draft_dependence["draftDepenBasalMeltPred"]

In [None]:
# Save variability data
print("Saving variability data...")
dedrafted_data.name = "landIceFreshwaterFluxVariability"
utils.ensure_dir_exists(variability_output_path)
dedrafted_data.to_netcdf(variability_output_path)
print("Variability data saved.")