In [6]:
import os
import h5py
import torch
import pickle
import logging
import warnings
import numpy as np

from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

# ---- Paths (edit if needed) ----
FILEDIR  = '/global/cfs/cdirs/m4334/sferrett/monsoon-sr/data/processed'
SAVEDIR  = '/global/cfs/cdirs/m4334/sferrett/monsoon-sr/data/results'
MODELDIR = '/global/cfs/cdirs/m4334/sferrett/monsoon-sr/models/nn'
H5_FILE  = 'ml_data_subset.h5'  # the same file used at train time

# ---- Experiments (must match training) ----
CONFIGS = [
    {'name':'exp_1','inputvars':['bl'],'description':'Experiment 1'},
    {'name':'exp_2','inputvars':['cape','subsat'],'description':'Experiment 2'},
    {'name':'exp_3','inputvars':['capeprofile'],'description':'Experiment 3'},
    {'name':'exp_4','inputvars':['subsatprofile'],'description':'Experiment 4'},
    {'name':'exp_5','inputvars':['capeprofile','subsatprofile'],'description':'Experiment 5'},
    {'name':'exp_6','inputvars':['t','q'],'description':'Experiment 6'},
]

# ---- Target normalization flags (must match training) ----
NORMTARGET  = False
LOG1PTARGET = False

In [7]:
PROFILE_VARS = ['t','q','capeprofile','subsatprofile']

def _stack_with_masks(h5file, split, inputvars):
    blocks = []
    for var in inputvars:
        X = h5file[f'{var}_{split}'][:]
        if var in PROFILE_VARS and X.shape[1] > 1:
            mask = np.isfinite(X).astype(np.float32)
            X    = np.nan_to_num(X, nan=0.0, posinf=0.0, neginf=0.0).astype(np.float32)
            X    = X * mask
            blocks.append(np.concatenate([X, mask], axis=1))
        else:
            X = np.nan_to_num(X, nan=0.0, posinf=0.0, neginf=0.0).astype(np.float32)
            blocks.append(X)
    return torch.tensor(np.concatenate(blocks, axis=1), dtype=torch.float32)

def load_splits(inputvars, filename, filedir=FILEDIR):
    """Return Xtrain, Xvalid, Xtest, ytrain, yvalid, ytest, normparams."""
    filepath = os.path.join(filedir, filename)
    with h5py.File(filepath, 'r') as f:
        Xtrain = _stack_with_masks(f, 'train', inputvars)
        Xvalid = _stack_with_masks(f, 'valid', inputvars)
        Xtest  = _stack_with_masks(f, 'test',  inputvars)

        # keep y as numpy until normalization is applied
        ytrain = f['pr_train'][:].astype(np.float32).squeeze()
        yvalid = f['pr_valid'][:].astype(np.float32).squeeze()
        ytest  = f['pr_test'][:].astype(np.float32).squeeze()

    normparams = None
    if NORMTARGET:
        if LOG1PTARGET:
            ytrain = np.log1p(ytrain)
            yvalid = np.log1p(yvalid)
            ytest  = np.log1p(ytest)
        ymean = ytrain.mean()
        ystd  = ytrain.std() + 1e-8
        ytrain = (ytrain - ymean) / ystd
        yvalid = (yvalid - ymean) / ystd
        ytest  = (ytest  - ymean) / ystd
        normparams = {'mean': ymean, 'std': ystd, 'log1p': LOG1PTARGET}

    # convert to tensors once
    ytrain = torch.tensor(ytrain, dtype=torch.float32)
    yvalid = torch.tensor(yvalid, dtype=torch.float32)
    ytest  = torch.tensor(ytest,  dtype=torch.float32)

    return Xtrain, Xvalid, Xtest, ytrain, yvalid, ytest, normparams

In [9]:
# def invert_normalization(y_norm, normparams):
#     if isinstance(y_norm, torch.Tensor):
#         y_norm = y_norm.cpu().numpy()
#     if normparams is None:
#         return y_norm
#     y = y_norm * normparams['std'] + normparams['mean']
#     if normparams.get('log1p', False):
#         y = np.expm1(y)
#     return y

In [10]:
class NNMODEL:
    def __init__(self, inputsize):
        self.inputsize = inputsize
        self.device    = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.model     = torch.nn.Sequential(
            torch.nn.Linear(self.inputsize,256), torch.nn.BatchNorm1d(256), torch.nn.GELU(),
            torch.nn.Linear(256,128),            torch.nn.BatchNorm1d(128), torch.nn.GELU(),
            torch.nn.Linear(128,64),             torch.nn.BatchNorm1d(64),  torch.nn.GELU(),
            torch.nn.Linear(64,32),              torch.nn.BatchNorm1d(32),  torch.nn.GELU(),
            torch.nn.Linear(32,1)
        ).to(self.device)

    def predict(self, X):
        self.model.eval()
        with torch.no_grad():
            yhat = self.model(X.to(self.device)).squeeze()
        return yhat.cpu().numpy()

In [12]:
exp = next(c for c in CONFIGS if c['name'] == 'exp_1')  # change to 'exp_2', etc.
inputvars = exp['inputvars']

# Load test split + normalization params
Xtrain, Xvalid, Xtest, ytrain, yvalid, ytest, normparams = load_splits(inputvars, H5_FILE)

# Build the model skeleton with correct input width
model = NNMODEL(inputsize=Xtest.shape[1])

# Load the saved weights
model_path = os.path.join(MODELDIR, f"{exp['name']}_best.pth")
state_dict = torch.load(model_path, map_location=model.device)
model.model.load_state_dict(state_dict)
print(f"Loaded weights from: {model_path}")

Loaded weights from: /global/cfs/cdirs/m4334/sferrett/monsoon-sr/models/nn/exp_1_best.pth


  state_dict = torch.load(model_path, map_location=model.device)


In [None]:
X = Xvalid+Xtrain

In [17]:
# Predict (normalized space) and invert back to physical units
y_pred = model.predict(Xvalid)
# y_pred = invert_normalization(y_pred_norm, normparams)
# y_true = invert_normalization(ytest, normparams)
y_true = ytest

OutOfMemoryError: CUDA out of memory. Tried to allocate 6.19 GiB. GPU 0 has a total capacity of 39.38 GiB of which 3.63 GiB is free. Including non-PyTorch memory, this process has 35.74 GiB memory in use. Of the allocated memory 35.23 GiB is allocated by PyTorch, and 17.96 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
# Metrics
r2   = r2_score(y_true, y_pred)
rmse = mean_squared_error(y_true, y_pred, squared=False)
mae  = mean_absolute_error(y_true, y_pred)
print(exp['name'], '→ R²:', r2, 'RMSE:', rmse, 'MAE:', mae)