In [108]:
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt
import os
import cftime
import pandas as pd
from datetime import datetime
import matplotlib.colors as mcolors
from scipy.stats import linregress
from eofs.xarray import Eof
from eofs.examples import example_data_path

#My Functions
import importlib
import trend_projection_functions_new_method
importlib.reload(trend_projection_functions_new_method)
from trend_projection_functions_new_method import get_time_bounds
from trend_projection_functions_new_method import get_models_for_experiment
from trend_projection_functions_new_method import CVDP_EM_crop_NA_sector#
from trend_projection_functions_new_method import open_cropNA_unitshPA
from trend_projection_functions_new_method import calculate_spatial_ensemble_mean
from trend_projection_functions_new_method import calculate_seasonal_spatial_ensemble_mean_djf
from trend_projection_functions_new_method import calculate_linear_trend_spat_pattern
from trend_projection_functions_new_method import calculate_regression_map
from trend_projection_functions_new_method import project_onto_regression

In [109]:
def weighted_inner(EOF1, EOF2):
    """
    Compute weighted inner product between two EOFs (DataArrays lat x lon)
    """
    inner = (EOF1 * EOF2).sum(dim=('lat','lon'))
    norm1 = np.sqrt((EOF1**2).sum(dim=('lat','lon')))
    norm2 = np.sqrt((EOF2**2).sum(dim=('lat','lon')))
    cos_sim = inner / (norm1 * norm2)
    return float(inner), float(cos_sim)

EOF_NAO = xr.open_dataset('/gws/nopw/j04/extant/users/slbennie/regression_patterns/concatenating/psl_mon_historical_HadGEM3-GC31-LL_DJF_EOF_pattern_concat_1850-20153.nc').sel(mode=0)['eofs']
EOF_EA = xr.open_dataset('/gws/nopw/j04/extant/users/slbennie/regression_patterns/concatenating/psl_mon_historical_HadGEM3-GC31-LL_DJF_EOF_pattern_concat_1850-20153.nc').sel(mode=1)['eofs']

inner, cos_sim = weighted_inner(EOF_NAO, EOF_EA)
print(inner, cos_sim)

home = '/gws/nopw/j04/extant/users/slbennie/'
folder_path = f'{home}trend_calc_LESFMIP/linear_regression/NAO/historical/HadGEM3-GC31-LL/'
ens_files = [os.path.join(folder_path, filename) for filename in os.listdir(folder_path) if 'HadGEM3-GC31-LL' in filename and '1850-2015' in filename]
trend_raw = open_cropNA_unitshPA(ens_files[0])#, 1850,2014)
trend_raw = trend_raw * 165

-2.7755575615628914e-17 -2.775557561562892e-17


In [113]:
import xarray as xr
import numpy as np

if isinstance(trend_raw, xr.DataArray):
    trend = trend_raw
else:
    print('here')
    trend = trend_raw['slope']
    
# --- Step 1: Align grids ---
EOF_NAO_interp = EOF_NAO.interp(lat=trend['lat'], lon=trend['lon'])
EOF_EA_interp  = EOF_EA.interp(lat=trend['lat'], lon=trend['lon'])

# --- Step 2: Apply consistent mask ---
valid_mask = np.isfinite(trend)
trend_masked = trend.where(valid_mask).fillna(0)
EOF_NAO_masked = EOF_NAO_interp.where(valid_mask).fillna(0)
EOF_EA_masked  = EOF_EA_interp.where(valid_mask).fillna(0)

# --- Step 3: Apply solver weights ---
w = np.sqrt(np.cos(np.radians(trend['lat'])))
w2d, _ = xr.broadcast(w, trend)
Y  = (trend_masked * w2d).stack(spatial=('lat','lon')).values
E1 = (EOF_NAO_masked).stack(spatial=('lat','lon')).values
E2 = (EOF_EA_masked ).stack(spatial=('lat','lon')).values

# --- Optional: check orthogonality ---
inner = np.dot(E1, E2)
cos_sim = inner / (np.linalg.norm(E1) * np.linalg.norm(E2))
inner2, cos_sim2 = weighted_inner(EOF_NAO, EOF_EA)

print("Weighted inner product:", inner, inner2)
print("Weighted cosine similarity:", cos_sim, inner2)
#there has been a slight change in EOF orthogonality when masking etc. and matching grids - could do this all prior to calc EOFS.

# --- Step 4: Stack EOFs into matrix ---
E = np.stack([E1, E2], axis=1)  # N x 2

# --- Step 5: Solve weighted least squares ---
c = np.linalg.lstsq(E, Y, rcond=None)[0]  # [c_NAO, c_EA]

# --- Step 6: Reconstruct projections ---
Y_hat_NAO = c[0] * E1
Y_hat_EA  = c[1] * E2
Y_hat_total = Y_hat_NAO + Y_hat_EA
residual = Y - Y_hat_total

# --- Step 7: Variance decomposition ---
SST = np.sum(Y**2)
R2_NAO = np.dot(Y_hat_total, Y_hat_NAO) / SST
R2_EA  = np.dot(Y_hat_total, Y_hat_EA) / SST
R2_res = np.sum(residual**2) / SST

print("R² NAO       :", R2_NAO)
print("R² EA        :", R2_EA)
print("R² residual  :", R2_res)
print("Check sum    :", R2_NAO + R2_EA + R2_res)  # should be 1


here
Weighted inner product: -1.2996873542669984e-17 -2.7755575615628914e-17
Weighted cosine similarity: -1.2996873542669989e-17 -2.7755575615628914e-17
R² NAO       : 0.6018582763618643
R² EA        : 0.13585626603985326
R² residual  : 0.26228545759828203
Check sum    : 0.9999999999999996


In [111]:
EOF_NAO = xr.open_dataset('/gws/nopw/j04/extant/users/slbennie/regression_patterns/concatenating/psl_mon_historical_HadGEM3-GC31-LL_DJF_EOF_pattern_concat_1850-20153.nc').sel(mode=0)['eofs']
EOF_EA = xr.open_dataset('/gws/nopw/j04/extant/users/slbennie/regression_patterns/concatenating/psl_mon_historical_HadGEM3-GC31-LL_DJF_EOF_pattern_concat_1850-20153.nc').sel(mode=1)['eofs']


home = '/gws/nopw/j04/extant/users/slbennie/'
folder_path = f'{home}trend_calc_LESFMIP/linear_regression/NAO/historical/HadGEM3-GC31-LL/'
ens_files = [os.path.join(folder_path, filename) for filename in os.listdir(folder_path) if 'HadGEM3-GC31-LL' in filename and '1850-2015' in filename]
trend_raw = open_cropNA_unitshPA(ens_files[0])#, 1850,2014)
trend_raw = trend_raw * 165

if isinstance(trend_raw, xr.DataArray):
    trend = trend_raw
else:
    print('here')
    trend = trend_raw['slope']
    
# --- Solver weight ---
w = np.sqrt(np.cos(np.radians(trend['lat'])))
w2d, _ = xr.broadcast(w, trend)  # shape lat x lon

# --- Apply consistent mask: NaNs in trend are masked in EOFs ---
valid = np.isfinite(trend)
trend_masked = trend.where(valid).fillna(0)
EOF_NAO_masked = EOF_NAO.where(valid).fillna(0)
EOF_EA_masked  = EOF_EA.where(valid).fillna(0)

# --- Flatten and apply weights ---
Y  = (trend_masked * w2d).stack(spatial=('lat','lon')).values
E1 = (EOF_NAO_masked).stack(spatial=('lat','lon')).values
E2 = (EOF_EA_masked).stack(spatial=('lat','lon')).values
E  = np.stack([E1, E2], axis=1)  # N x 2

# --- Solve weighted least squares ---
c = np.linalg.lstsq(E, Y, rcond=None)[0]  # coefficients: [c_NAO, c_EA]
#basically it finds the coefficients that best approximate the trend field Y
#as a linear combination of the EOFS I have given it.


# --- Reconstruct and residual ---
#this basically reconstructs the field from the basis (EOFs -E) and their coefficients (c)
# basically: Y^=c0*E1+c1*E2
Y_hat = E @ c
residual = Y - Y_hat

# --- Variance decomposition ---
#SST = np.sum(Y**2)
#R2_NAO = c[0]**2 * np.sum(E1**2) / SST
#R2_EA  = c[1]**2 * np.sum(E2**2) / SST
#R2_res = np.sum(residual**2) / SST

Y_hat_NAO = c[0] * E1
Y_hat_EA  = c[1] * E2
residual  = Y - Y_hat_NAO - Y_hat_EA

SST = np.sum(Y**2)
R2_NAO = np.sum(Y_hat_NAO**2) / SST
R2_EA  = np.sum(Y_hat_EA**2) / SST
R2_res = np.sum(residual**2) / SST


print("R² NAO       :", R2_NAO)
print("R² EA        :", R2_EA)
print("R² residual  :", R2_res)
print("Check sum    :", R2_NAO + R2_EA + R2_res)  # should be 1


# Full reconstructed field
Y_hat_total = Y_hat_NAO + Y_hat_EA

# Residual
residual = Y - Y_hat_total

# Total variance
SST = np.sum(Y**2)

# Fraction of variance explained
R2_total = 1 - np.sum(residual**2) / SST

# Incremental contributions (optional)
# Use projection of Y_hat_total onto each mode in the weighted inner product space
R2_NAO = np.dot(Y_hat_total, Y_hat_NAO) / SST
R2_EA  = np.dot(Y_hat_total, Y_hat_EA) / SST
R2_res = np.sum(residual**2) / SST

print("R² NAO       :", R2_NAO)
print("R² EA        :", R2_EA)
print("R² residual  :", R2_res)
print("Check sum    :", R2_NAO + R2_EA + R2_res)  # should be 1


here
R² NAO       : 0.6018582763618644
R² EA        : 0.13585626603985326
R² residual  : 0.26228545759828203
Check sum    : 0.9999999999999997
R² NAO       : 0.6018582763618643
R² EA        : 0.13585626603985326
R² residual  : 0.26228545759828203
Check sum    : 0.9999999999999996
