In [None]:
import xarray as xr
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Reading the data
exp_name = "t5mzQxzi"                     # RGBS_p00     RGBSLANSHEGLA_p00_cleaned  t7Hiwh6A(2000+ boxes)   kzHjKthT (200+ boxes) ME0X3ynk aE4pm07D 8FU3FYuL 8asQEA6P (best so far) n8djAKPf
# # train_scores.nc path:
# train_scores_path = f"/lustre_scratch/shaerdan/scores/train_scores_{exp_name}.nc"
# test_scores.nc path:
test_scores_path = f"/lustre_scratch/shaerdan/scores/test_scores_{exp_name}.nc"


ds_test_toy = xr.open_dataset(test_scores_path)

print(ds_test_toy)

In [None]:
import numpy as np
import pandas as pd
import xarray as xr
import matplotlib.pyplot as plt

# Assume ds_test_toy is already loaded and available

predictions = ds_test_toy.hires_estimate[:, 0, :, :]
ground_truth = ds_test_toy.ST_slices[:, 0, :, :]
predictions_renamed = predictions.rename({"model_output_y": "y", "model_output_x": "x"})

# Validate shapes to avoid zero-sized or unexpected dimensions
if predictions.shape[1] == 0 or ground_truth.shape[1] == 0:
    raise ValueError("Dataset has zero-sized dimensions.")

# Function to compute RMSE
def compute_rmse(pred, gt):
    mse = ((pred - gt) ** 2).mean(dim=("y", "x"))  # Mean over spatial dimensions
    rmse = np.sqrt(mse)  # Root Mean Square Error
    return rmse

# Vectorized Pearson correlation function using xarray
def compute_pearson_corr(pred, gt):
    pred_flat = pred.stack(z=("y", "x"))
    gt_flat = gt.stack(z=("y", "x"))

    pred_mean = pred_flat.mean(dim="z")
    gt_mean = gt_flat.mean(dim="z")

    pred_anom = pred_flat - pred_mean
    gt_anom = gt_flat - gt_mean

    numerator = (pred_anom * gt_anom).sum(dim="z")
    denominator = np.sqrt((pred_anom ** 2).sum(dim="z") * (gt_anom ** 2).sum(dim="z"))

    corr = numerator / denominator

    return corr

# Extract month and year from box_time
time_index = pd.to_datetime(ds_test_toy.box_time.values)
months = time_index.month
years = time_index.year

# Initialize dictionaries to store results
yearly_monthly_rmse = {}
yearly_monthly_pearson_corr = {}

# Calculate metrics for each year and month
for year in np.unique(years):
    yearly_monthly_rmse[year] = {}
    yearly_monthly_pearson_corr[year] = {}
    print(f"processing year {year}")
    
    for month in np.unique(months):
        mask = (months == month) & (years == year)
        if np.sum(mask) == 0:
            continue
        print(f"processing month {month}")

        pred_month = predictions_renamed.isel(box=mask)
        gt_month = ground_truth.isel(box=mask)

        rmse_values = compute_rmse(pred_month, gt_month)
        pearson_corr_values = compute_pearson_corr(pred_month, gt_month)

        yearly_monthly_rmse[year][month] = rmse_values
        yearly_monthly_pearson_corr[year][month] = pearson_corr_values

print("processing finished")


In [None]:
# Plot RMSE for each year
for year in yearly_monthly_rmse.keys():
    plt.figure(figsize=(15, 7))
    plt.boxplot(yearly_monthly_rmse[year].values(), labels=yearly_monthly_rmse[year].keys())
    plt.title(f"Box Plot of RMSE over Testing Data by Month for Year {year}")
    plt.ylabel("RMSE")
    plt.xlabel("Month")
    plt.xticks(rotation=45)
    plt.show()

# Plot Pearson Correlation for each year
for year in yearly_monthly_pearson_corr.keys():
    plt.figure(figsize=(15, 7))
    plt.boxplot(yearly_monthly_pearson_corr[year].values(), labels=yearly_monthly_pearson_corr[year].keys())
    plt.title(f"Box Plot of Pearson Correlation over Testing Data by Month for Year {year}")
    plt.ylabel("Pearson Correlation")
    plt.xlabel("Month")
    plt.xticks(rotation=45)
    plt.show()