## Imports

In [None]:
import xarray as xr
import pathlib
import numpy as np

## specify filepath for data
DATA_FP = pathlib.Path("../data/mpi")

## Functions

In [None]:
def file_pattern(name, sim, n):
    """
    Function to get file pattern. Args:
        - name: "ts" or "ssh"
        - simulation type: one of "historical" or "ssp585"
        - n: ensemble member ID, ranging from 1 to 50
    """
    return (DATA_FP / name).glob(f"*{sim}*r{n}i*.nc")


def load_member(name, sim, n):
    """load ensemble member. Args:
    - name: "ts" or "ssh"
    - simulation type: one of "historical" or "ssp585"
    - n: ensemble member ID, ranging from 1 to 50
    """
    return xr.open_mfdataset(file_pattern(name, sim, n))


def spatial_avg(data):
    """get spatial average, weighting by cos of latitude"""

    ## get weighted data
    weights = np.cos(np.deg2rad(data.latitude))

    return data.weighted(weights).mean(["latitude", "longitude"])


class eof:
    def __init__(self, data):

        ## stack latitude and longitude into one dimension
        data_stacked = data.stack(posn=["latitude", "longitude"])

        ## find nan values
        nan_mask = np.isnan(data_stacked.isel(time=0))

        ## filter out NaNs and compute SVD
        data_nonan = data_stacked.isel(posn=~nan_mask.values)

        ## compute SVD
        self.u, self.s, self.vt = np.linalg.svd(data_nonan.values, full_matrices=False)

        return

Preprocessing steps
1. Compute spatial patterns
2. Compress data
3. Check compression (compare recon of random ensemble to actual)
4. Remove ensemble mean (external forcing)
5. Check ho

#### compute EOFs

In [None]:
## load single member
data = xr.concat(
    [load_member("ts", "historical", 1), load_member("ts", "ssp585", 1)], dim="time"
)
data.load()

## initialize EOF object
e = eof(data["sst"])