This notebook compiles a dataset of m_star values and features based on GOTM SMC output. Currently (Oct. 8th, 2025), the features include mean boundary layer depth, constant zonal wind stress, constant heat flux and the local Coriolis frequency. The mean boundary layer depth is averaged over inertial periods of SMC simulations with the total length of 10 days. The first inertial period is discarded from the mean calculation. The m_star mean values are computed in the same fashion.

In [1]:
import xarray as xr
import matplotlib.pyplot as plt
import numpy as np
import os
import json

import torch

In [2]:
data_directory = '../m_star_dataset'
mld_data = xr.open_dataset(os.path.join(data_directory, 'ePBL_paper_mean_mld.nc'))
m_star_data = xr.open_dataset(os.path.join(data_directory, 'ePBL_paper_mean_m_star.nc'))

with open(os.path.join(data_directory, 'ePBL_paper_training_set_cases.json'), 'r') as file:
    gotm_case_dict = json.load(file)


In [5]:
def compute_f(latitude: float):
    omega = 2 * np.pi / 24 / 60 / 60
    return 2 * omega * np.sin(np.pi * latitude / 180)

In [13]:
rho0 = 1027

coriolis_freqs = np.array([compute_f(case['lat']) for case in gotm_case_dict.values()])
u_stars = np.array([(case['tx'] / rho0) ** 0.5 for case in gotm_case_dict.values()])
heat_fluxes = np.array([case['heat_flux'] for case in gotm_case_dict.values()])
mlds = mld_data.mean_mld.values
m_stars = m_star_data.mean_m_star.values
xs = mlds * coriolis_freqs / u_stars

In [28]:
dataset = torch.tensor(np.array([xs, heat_fluxes, m_stars]))
torch.save(dataset, os.path.join(data_directory, "ePBL_dataset_two_features.pt"))