This notebook calculates the mean & standard deviation MMD for all model in all tasks. It uses pre-generated datasets from each generative model, in which should be placed in a specific place in OS.

In [3]:
import os
import numpy as np
from collections import defaultdict
from sbibm import get_task
from sbibm.metrics.mmd import mmd
import torch

nb_dir = os.getcwd()

  from .autonotebook import tqdm as notebook_tqdm


ModuleNotFoundError: No module named 'future'

In [10]:
def calculate_mmd(model:str, task_name:str):    
    task = get_task(task_name)
    reference_samples = task.get_reference_posterior_samples(num_observation=1)

    # Folder containing .npz files
    if model == "dep":
      datasets_path = os.path.join(nb_dir, "cVAE", "Runs - Dependent_cVAE", task_name)
    elif model == "indep":
      datasets_path = os.path.join(nb_dir, "cVAE", "Runs - Independent_cVAE", task_name)
    elif model == "NSF":
      datasets_path = os.path.join(nb_dir, "Flow Based Methods", "Runs - NSF", task_name)
    elif model == "MAF":
      datasets_path = os.path.join(nb_dir, "Flow Based Methods", "Runs - MAF", task_name)
    else:
      print("Model not found")
      return

    # Store MMDs grouped by budget
    mmd_by_budget = defaultdict(list)

    for fname in sorted(os.listdir(datasets_path)):
      file_path = os.path.join(datasets_path, fname)

      # Extract budget from filename (e.g., 'budget_5k')
      try:
          budget = fname.split("budget_")[1].replace(".npz", "")
      except IndexError:
          print(f"Could not extract budget from {fname}, skipping...")
          continue

      # Load generated samples
      data = np.load(file_path)
      if "thetas" in data:
          generated = data["thetas"]
      else:
          print(f"'thetas' key not found in {fname}, skipping...")
          continue

      # Compute MMD and store
      # Convert to torch tensors
      ref_tensor = torch.tensor(reference_samples, dtype=torch.float32)
      gen_tensor = torch.tensor(generated, dtype=torch.float32)

      # Compute MMD
      score = mmd(ref_tensor, gen_tensor)
      
      if score < np.inf:
        mmd_by_budget[budget].append(score)

    # Print mean and std MMDs per budget
    print(f"\nTask: {task_name}\nMMD Summary (Mean ± Std):")
    for budget in sorted(mmd_by_budget.keys(), key=lambda b: int(b.replace("k", ""))):
        scores = np.array(mmd_by_budget[budget])
        mean_score = scores.mean()
        std_score = scores.std()
        print(f"Budget {budget}: MMD = {mean_score:.3f} ± {std_score:.3f} [Nr. Scores used: {len(scores)}]")

In [11]:
tasks = [
"gaussian_linear",
"gaussian_linear_uniform",
"slcp",
"slcp_distractors",
"bernoulli_glm",
"bernoulli_glm_raw",
"gaussian_mixture",
"two_moons",
"sir",
"lotka_volterra"
 ]

# Dependent cVAE

In [13]:
for task_name in tasks:
    calculate_mmd("dep", task_name)

  ref_tensor = torch.tensor(reference_samples, dtype=torch.float32)



Task: gaussian_linear
MMD Summary (Mean ± Std):
Budget 5k: MMD = 0.016 ± 0.010 [Nr. Scores used: 5]
Budget 10k: MMD = 0.005 ± 0.003 [Nr. Scores used: 5]
Budget 20k: MMD = 0.090 ± 0.068 [Nr. Scores used: 5]


  ref_tensor = torch.tensor(reference_samples, dtype=torch.float32)



Task: gaussian_linear_uniform
MMD Summary (Mean ± Std):
Budget 5k: MMD = 0.031 ± 0.013 [Nr. Scores used: 5]
Budget 10k: MMD = 0.027 ± 0.020 [Nr. Scores used: 5]
Budget 20k: MMD = 0.014 ± 0.008 [Nr. Scores used: 5]


  ref_tensor = torch.tensor(reference_samples, dtype=torch.float32)



Task: slcp
MMD Summary (Mean ± Std):
Budget 5k: MMD = 0.076 ± 0.037 [Nr. Scores used: 5]
Budget 10k: MMD = 0.044 ± 0.042 [Nr. Scores used: 5]
Budget 20k: MMD = 0.075 ± 0.061 [Nr. Scores used: 5]


  ref_tensor = torch.tensor(reference_samples, dtype=torch.float32)



Task: slcp_distractors
MMD Summary (Mean ± Std):
Budget 5k: MMD = 0.154 ± 0.080 [Nr. Scores used: 5]
Budget 10k: MMD = 0.072 ± 0.071 [Nr. Scores used: 5]
Budget 20k: MMD = 0.030 ± 0.011 [Nr. Scores used: 5]


  ref_tensor = torch.tensor(reference_samples, dtype=torch.float32)



Task: bernoulli_glm
MMD Summary (Mean ± Std):
Budget 5k: MMD = 0.121 ± 0.058 [Nr. Scores used: 5]
Budget 10k: MMD = 0.099 ± 0.050 [Nr. Scores used: 5]
Budget 20k: MMD = 0.050 ± 0.022 [Nr. Scores used: 5]


  ref_tensor = torch.tensor(reference_samples, dtype=torch.float32)



Task: bernoulli_glm_raw
MMD Summary (Mean ± Std):
Budget 5k: MMD = 0.102 ± 0.081 [Nr. Scores used: 5]
Budget 10k: MMD = 0.070 ± 0.021 [Nr. Scores used: 5]
Budget 20k: MMD = 0.047 ± 0.013 [Nr. Scores used: 5]


  ref_tensor = torch.tensor(reference_samples, dtype=torch.float32)



Task: gaussian_mixture
MMD Summary (Mean ± Std):
Budget 5k: MMD = 0.116 ± 0.037 [Nr. Scores used: 5]
Budget 10k: MMD = 0.122 ± 0.045 [Nr. Scores used: 5]
Budget 20k: MMD = 0.251 ± 0.207 [Nr. Scores used: 5]


  ref_tensor = torch.tensor(reference_samples, dtype=torch.float32)



Task: two_moons
MMD Summary (Mean ± Std):
Budget 5k: MMD = 0.011 ± 0.008 [Nr. Scores used: 5]
Budget 10k: MMD = 0.010 ± 0.007 [Nr. Scores used: 5]
Budget 20k: MMD = 0.006 ± 0.005 [Nr. Scores used: 5]


  ref_tensor = torch.tensor(reference_samples, dtype=torch.float32)



Task: sir
MMD Summary (Mean ± Std):
Budget 5k: MMD = 0.300 ± 0.077 [Nr. Scores used: 5]
Budget 10k: MMD = 0.389 ± 0.203 [Nr. Scores used: 5]
Budget 20k: MMD = 0.540 ± 0.116 [Nr. Scores used: 5]


  ref_tensor = torch.tensor(reference_samples, dtype=torch.float32)



Task: lotka_volterra
MMD Summary (Mean ± Std):
Budget 5k: MMD = 0.545 ± 0.007 [Nr. Scores used: 5]
Budget 10k: MMD = 0.534 ± 0.017 [Nr. Scores used: 5]
Budget 20k: MMD = 0.518 ± 0.012 [Nr. Scores used: 5]


# Independent cVAE

In [12]:
for task_name in tasks:
    calculate_mmd("indep", task_name)

  ref_tensor = torch.tensor(reference_samples, dtype=torch.float32)



Task: gaussian_linear
MMD Summary (Mean ± Std):
Budget 5k: MMD = 0.030 ± 0.024 [Nr. Scores used: 5]
Budget 10k: MMD = 0.017 ± 0.016 [Nr. Scores used: 5]
Budget 20k: MMD = 0.042 ± 0.060 [Nr. Scores used: 5]


  ref_tensor = torch.tensor(reference_samples, dtype=torch.float32)



Task: gaussian_linear_uniform
MMD Summary (Mean ± Std):
Budget 5k: MMD = 0.067 ± 0.037 [Nr. Scores used: 5]
Budget 10k: MMD = 0.085 ± 0.043 [Nr. Scores used: 5]
Budget 20k: MMD = 0.171 ± 0.095 [Nr. Scores used: 5]


  ref_tensor = torch.tensor(reference_samples, dtype=torch.float32)



Task: slcp
MMD Summary (Mean ± Std):
Budget 5k: MMD = 0.070 ± 0.025 [Nr. Scores used: 5]
Budget 10k: MMD = 0.055 ± 0.038 [Nr. Scores used: 5]
Budget 20k: MMD = 0.088 ± 0.047 [Nr. Scores used: 5]


  ref_tensor = torch.tensor(reference_samples, dtype=torch.float32)



Task: slcp_distractors
MMD Summary (Mean ± Std):
Budget 5k: MMD = 0.129 ± 0.054 [Nr. Scores used: 5]
Budget 10k: MMD = 0.144 ± 0.072 [Nr. Scores used: 5]
Budget 20k: MMD = 0.098 ± 0.061 [Nr. Scores used: 5]


  ref_tensor = torch.tensor(reference_samples, dtype=torch.float32)



Task: bernoulli_glm
MMD Summary (Mean ± Std):
Budget 5k: MMD = 0.159 ± 0.030 [Nr. Scores used: 5]
Budget 10k: MMD = 0.199 ± 0.021 [Nr. Scores used: 5]
Budget 20k: MMD = 0.180 ± 0.035 [Nr. Scores used: 5]


  ref_tensor = torch.tensor(reference_samples, dtype=torch.float32)



Task: bernoulli_glm_raw
MMD Summary (Mean ± Std):
Budget 5k: MMD = 0.179 ± 0.092 [Nr. Scores used: 5]
Budget 10k: MMD = 0.188 ± 0.060 [Nr. Scores used: 5]
Budget 20k: MMD = 0.164 ± 0.041 [Nr. Scores used: 5]


  ref_tensor = torch.tensor(reference_samples, dtype=torch.float32)



Task: gaussian_mixture
MMD Summary (Mean ± Std):
Budget 5k: MMD = 0.211 ± 0.159 [Nr. Scores used: 5]
Budget 10k: MMD = 0.086 ± 0.051 [Nr. Scores used: 5]
Budget 20k: MMD = 0.198 ± 0.144 [Nr. Scores used: 5]


  ref_tensor = torch.tensor(reference_samples, dtype=torch.float32)



Task: two_moons
MMD Summary (Mean ± Std):
Budget 5k: MMD = 0.011 ± 0.006 [Nr. Scores used: 5]
Budget 10k: MMD = 0.008 ± 0.008 [Nr. Scores used: 5]
Budget 20k: MMD = 0.011 ± 0.007 [Nr. Scores used: 5]


  warn("JULIA_SYSIMAGE_DIFFEQTORCH not set")
  ref_tensor = torch.tensor(reference_samples, dtype=torch.float32)



Task: sir
MMD Summary (Mean ± Std):
Budget 5k: MMD = 0.359 ± 0.109 [Nr. Scores used: 5]
Budget 10k: MMD = 0.458 ± 0.139 [Nr. Scores used: 5]
Budget 20k: MMD = 0.463 ± 0.241 [Nr. Scores used: 5]


  ref_tensor = torch.tensor(reference_samples, dtype=torch.float32)



Task: lotka_volterra
MMD Summary (Mean ± Std):
Budget 5k: MMD = 0.552 ± 0.002 [Nr. Scores used: 5]
Budget 10k: MMD = 0.555 ± 0.004 [Nr. Scores used: 5]
Budget 20k: MMD = 0.548 ± 0.005 [Nr. Scores used: 5]


# Masked Autoregressive Flows (MAF)

In [14]:
for task_name in tasks:
    calculate_mmd("MAF", task_name)

  ref_tensor = torch.tensor(reference_samples, dtype=torch.float32)



Task: gaussian_linear
MMD Summary (Mean ± Std):
Budget 5k: MMD = 0.006 ± 0.002 [Nr. Scores used: 5]
Budget 10k: MMD = 0.006 ± 0.004 [Nr. Scores used: 5]
Budget 20k: MMD = 0.002 ± 0.001 [Nr. Scores used: 5]


  ref_tensor = torch.tensor(reference_samples, dtype=torch.float32)



Task: gaussian_linear_uniform
MMD Summary (Mean ± Std):
Budget 5k: MMD = 0.013 ± 0.005 [Nr. Scores used: 5]
Budget 10k: MMD = 0.008 ± 0.004 [Nr. Scores used: 5]
Budget 20k: MMD = 0.005 ± 0.002 [Nr. Scores used: 5]


  ref_tensor = torch.tensor(reference_samples, dtype=torch.float32)



Task: slcp
MMD Summary (Mean ± Std):
Budget 5k: MMD = 0.062 ± 0.017 [Nr. Scores used: 5]
Budget 10k: MMD = 0.014 ± 0.003 [Nr. Scores used: 5]
Budget 20k: MMD = 0.061 ± 0.074 [Nr. Scores used: 5]


  ref_tensor = torch.tensor(reference_samples, dtype=torch.float32)



Task: slcp_distractors
MMD Summary (Mean ± Std):
Budget 5k: MMD = 0.271 ± 0.238 [Nr. Scores used: 4]
Budget 10k: MMD = 0.208 ± 0.254 [Nr. Scores used: 4]
Budget 20k: MMD = 0.170 ± 0.253 [Nr. Scores used: 5]


  ref_tensor = torch.tensor(reference_samples, dtype=torch.float32)



Task: bernoulli_glm
MMD Summary (Mean ± Std):
Budget 5k: MMD = 0.109 ± 0.037 [Nr. Scores used: 5]
Budget 10k: MMD = 0.075 ± 0.040 [Nr. Scores used: 5]
Budget 20k: MMD = 0.060 ± 0.027 [Nr. Scores used: 5]


  ref_tensor = torch.tensor(reference_samples, dtype=torch.float32)



Task: bernoulli_glm_raw
MMD Summary (Mean ± Std):
Budget 5k: MMD = 0.109 ± 0.022 [Nr. Scores used: 5]
Budget 10k: MMD = 0.080 ± 0.042 [Nr. Scores used: 5]
Budget 20k: MMD = 0.089 ± 0.047 [Nr. Scores used: 5]


  ref_tensor = torch.tensor(reference_samples, dtype=torch.float32)



Task: gaussian_mixture
MMD Summary (Mean ± Std):
Budget 5k: MMD = 0.171 ± 0.024 [Nr. Scores used: 5]
Budget 10k: MMD = 0.127 ± 0.022 [Nr. Scores used: 5]
Budget 20k: MMD = 0.105 ± 0.020 [Nr. Scores used: 5]


  ref_tensor = torch.tensor(reference_samples, dtype=torch.float32)



Task: two_moons
MMD Summary (Mean ± Std):
Budget 5k: MMD = 0.002 ± 0.001 [Nr. Scores used: 5]
Budget 10k: MMD = 0.004 ± 0.006 [Nr. Scores used: 5]
Budget 20k: MMD = 0.000 ± 0.000 [Nr. Scores used: 5]


  ref_tensor = torch.tensor(reference_samples, dtype=torch.float32)



Task: sir
MMD Summary (Mean ± Std):
Budget 5k: MMD = 0.328 ± 0.183 [Nr. Scores used: 5]
Budget 10k: MMD = 0.212 ± 0.081 [Nr. Scores used: 5]
Budget 20k: MMD = 0.418 ± 0.146 [Nr. Scores used: 5]


  ref_tensor = torch.tensor(reference_samples, dtype=torch.float32)



Task: lotka_volterra
MMD Summary (Mean ± Std):
Budget 5k: MMD = 0.548 ± 0.004 [Nr. Scores used: 5]
Budget 10k: MMD = 0.537 ± 0.010 [Nr. Scores used: 5]
Budget 20k: MMD = 0.499 ± 0.029 [Nr. Scores used: 5]


# Neural Spline Flows

In [15]:
for task_name in tasks:
    calculate_mmd("NSF", task_name)

  ref_tensor = torch.tensor(reference_samples, dtype=torch.float32)



Task: gaussian_linear
MMD Summary (Mean ± Std):
Budget 5k: MMD = 0.018 ± 0.008 [Nr. Scores used: 5]
Budget 10k: MMD = 0.088 ± 0.098 [Nr. Scores used: 5]
Budget 20k: MMD = 0.013 ± 0.008 [Nr. Scores used: 5]


  ref_tensor = torch.tensor(reference_samples, dtype=torch.float32)



Task: gaussian_linear_uniform
MMD Summary (Mean ± Std):
Budget 5k: MMD = 0.048 ± 0.057 [Nr. Scores used: 5]
Budget 10k: MMD = 0.069 ± 0.071 [Nr. Scores used: 5]
Budget 20k: MMD = 0.054 ± 0.077 [Nr. Scores used: 5]


  ref_tensor = torch.tensor(reference_samples, dtype=torch.float32)



Task: slcp
MMD Summary (Mean ± Std):
Budget 5k: MMD = 0.041 ± 0.014 [Nr. Scores used: 5]
Budget 10k: MMD = 0.025 ± 0.010 [Nr. Scores used: 5]
Budget 20k: MMD = 0.023 ± 0.008 [Nr. Scores used: 5]


  ref_tensor = torch.tensor(reference_samples, dtype=torch.float32)



Task: slcp_distractors
MMD Summary (Mean ± Std):
Budget 5k: MMD = 0.090 ± 0.040 [Nr. Scores used: 5]
Budget 10k: MMD = 0.089 ± 0.033 [Nr. Scores used: 5]
Budget 20k: MMD = 0.085 ± 0.038 [Nr. Scores used: 5]


  ref_tensor = torch.tensor(reference_samples, dtype=torch.float32)



Task: bernoulli_glm
MMD Summary (Mean ± Std):
Budget 5k: MMD = 0.062 ± 0.019 [Nr. Scores used: 5]
Budget 10k: MMD = 0.054 ± 0.025 [Nr. Scores used: 5]
Budget 20k: MMD = 0.164 ± 0.238 [Nr. Scores used: 5]


  ref_tensor = torch.tensor(reference_samples, dtype=torch.float32)



Task: bernoulli_glm_raw
MMD Summary (Mean ± Std):
Budget 5k: MMD = 0.217 ± 0.207 [Nr. Scores used: 5]
Budget 10k: MMD = 0.162 ± 0.171 [Nr. Scores used: 5]
Budget 20k: MMD = 0.154 ± 0.136 [Nr. Scores used: 5]


  ref_tensor = torch.tensor(reference_samples, dtype=torch.float32)



Task: gaussian_mixture
MMD Summary (Mean ± Std):
Budget 5k: MMD = 0.058 ± 0.032 [Nr. Scores used: 5]
Budget 10k: MMD = 0.127 ± 0.214 [Nr. Scores used: 5]
Budget 20k: MMD = 0.014 ± 0.009 [Nr. Scores used: 5]


  ref_tensor = torch.tensor(reference_samples, dtype=torch.float32)



Task: two_moons
MMD Summary (Mean ± Std):
Budget 5k: MMD = 0.002 ± 0.005 [Nr. Scores used: 5]
Budget 10k: MMD = 0.002 ± 0.004 [Nr. Scores used: 5]
Budget 20k: MMD = 0.003 ± 0.003 [Nr. Scores used: 5]


  ref_tensor = torch.tensor(reference_samples, dtype=torch.float32)



Task: sir
MMD Summary (Mean ± Std):
Budget 5k: MMD = 0.421 ± 0.168 [Nr. Scores used: 5]
Budget 10k: MMD = 0.328 ± 0.127 [Nr. Scores used: 5]
Budget 20k: MMD = 0.415 ± 0.182 [Nr. Scores used: 5]


  ref_tensor = torch.tensor(reference_samples, dtype=torch.float32)



Task: lotka_volterra
MMD Summary (Mean ± Std):
Budget 5k: MMD = 0.537 ± 0.010 [Nr. Scores used: 5]
Budget 10k: MMD = 0.512 ± 0.007 [Nr. Scores used: 5]
Budget 20k: MMD = 0.530 ± 0.023 [Nr. Scores used: 5]


In [5]:
dep = [0.016, 0.005, 0.090,0.031,0.027,0.014,0.076,0.044,0.075,0.154,0.072,0.030,0.121,0.099,0.050,0.102,0.070,0.047,0.116,0.122,0.251,0.011,0.010,0.006,0.300,0.389,0.540,0.545,0.534,0.518 ]

indep = [0.030,0.017,0.042,0.067,0.085,0.171,0.070,0.055,0.088,0.129,0.144,0.098,0.159,0.199,0.180,0.179,0.188,0.164,0.211,0.086,0.198,0.011,0.008,0.011,0.359,0.458,0.463,0.552,0.555,0.548]

MAF = [0.006,0.006,0.002,0.013,0.008,0.005,0.062,0.014,0.061,0.271,0.208,0.170,0.109,0.075,0.060,0.109,0.080,0.089,0.171,0.127,0.105,0.002,0.004,0.000,0.328,0.212,0.418,0.548,0.537,0.499]

NSF = [0.018,0.088,0.013,0.048,0.069,0.054,0.041,0.025,0.023,0.090,0.089,0.085,0.062,0.054,0.164,0.217,0.162,0.154,0.058,0.127,0.014,0.014,0.002,0.003,0.421,0.328,0.415,0.537,0.512,0.530]

print(f"dep: {np.mean(dep)}")
print(f"indep: {np.mean(indep)}")
print(f"MAF: {np.mean(MAF)}")
print(f"NSF: {np.mean(NSF)}")


dep: 0.14883333333333332
indep: 0.18416666666666667
MAF: 0.1433
NSF: 0.14723333333333333
