In [1]:
import os
import sys
import numpy as np
import time
import warnings

# Define paths
notebooks_path = os.path.abspath(os.getcwd()) 
src_path = os.path.abspath(os.path.join(notebooks_path, "../src"))
if src_path not in sys.path:
    sys.path.insert(0, src_path)




In [2]:
# Import your custom functions (including the markovian solvers)
from optimal_code.utils import *
from optimal_code.optimal_solver import *
from optimal_code.optimal_solver_markov import *

from trees.build_trees_from_paths import *

# Import modules
from trees.multi_dimension.Multidimension_trees import *
from trees.multi_dimension.Multidimension_solver import *
from trees.multi_dimension.Multidimension_adapted_empirical_measure import *


In [3]:


from trees.tree_Node import *
from trees.treeAnalysis import *
from trees.treeVisualization import *
from trees.tree_AWD_utilities import *
from trees.build_trees_from_paths import build_tree_from_paths

from trees.awd_trees.Discrete_OT_Solver_algo import *
from trees.awd_trees.Gurobi_AOT import *
from trees.awd_trees.Nested_Dist_Algo import *
# Suppress warnings
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=UserWarning)

In [4]:
from adapted_empirical_measure.AEM_grid import *

In [5]:
from benchmark_value_gaussian.Comp_AWD2_Gaussian import *

### Generate Paths for d=1

In [9]:
# Normalization flag
normalize = False

# Define factor matrices
L0 = np.array([[1, 0, 0, 0], [1, 2, 0, 0], [1, 2, 3, 0], [1,2,3, 4]])
A0 = L0 @ L0.T
L = L0 / np.sqrt(np.trace(A0)) if normalize else L0
A = L @ L.T

M0 = np.array([[1, 0, 0, 0], [2, 1, 0, 0], [3, 2, 1, 0], [4, 3, 2, 1]])
B0 = M0 @ M0.T
M = M0 / np.sqrt(np.trace(B0)) if normalize else M0
B = M @ M.T

# Parameters
d = 1
T = 4
dim = d * T
n_sample_plot = 100

# Generate all noise samples at once
noise1 = np.random.normal(size=(n_sample_plot, dim))
noise2 = np.random.normal(size=(n_sample_plot, dim))

# Apply transformations
X_increments = (noise1 @ L.T).reshape(n_sample_plot, T, d)
Y_increments = (noise2 @ M.T).reshape(n_sample_plot, T, d)

# Prepend zeros along the time axis
X_paths = np.concatenate([np.zeros((n_sample_plot, 1, d)), X_increments], axis=1)
Y_paths = np.concatenate([np.zeros((n_sample_plot, 1, d)), Y_increments], axis=1)

### Compute Nested Distance (Multi-Dimensional Framework for $\mathbb{R}^{1\cdot T}$)

In [None]:
# Adapt empirical measures
adapted_X, adapted_weights_X = multidim_uniform_empirical_grid_measure(X_paths, use_weights=True)
adapted_Y, adapted_weights_Y = multidim_uniform_empirical_grid_measure(Y_paths, use_weights=True)

# Build trees
adapted_tree_1 = multidim_build_tree_from_paths(adapted_X, adapted_weights_X)
adapted_tree_2 = multidim_build_tree_from_paths(adapted_Y, adapted_weights_Y)

# Compute nested distance
max_depth = multidim_get_depth(adapted_tree_1)
start_time = time.time()
distance_pot = multidim_compute_nested_distance(adapted_tree_1, adapted_tree_2, max_depth, power=2)
end_time = time.time()

print("Nested distance multi dim:", distance_pot)
print("Computation time: {:.4f} seconds".format(end_time - start_time))

### Compute Nested Distance (Original Code for $\mathbb{R}^{T}$)

In [None]:
# Adapt empirical measures
X, Y = np.squeeze(X_paths, axis=-1), np.squeeze(Y_paths, axis=-1)
adapted_X, adapted_weights_X = uniform_empirical_grid_measure(X, use_weights=True)
adapted_Y, adapted_weights_Y = uniform_empirical_grid_measure(Y, use_weights=True)

# Build trees
adapted_tree_1 = build_tree_from_paths(adapted_X, adapted_weights_X)
adapted_tree_2 = build_tree_from_paths(adapted_Y, adapted_weights_Y)

# Compute nested distance
max_depth = get_depth(adapted_tree_1)
start_time = time.time()
distance_pot = compute_nested_distance_parallel(adapted_tree_1, adapted_tree_2, max_depth, power=2)
elapsed_time_pot = time.time() - start_time

print("Nested distance single dim:", distance_pot)
print("Computation time: {:.4f} seconds".format(elapsed_time_pot))

### Theoretical Nested Distance

In [None]:
a, b = np.zeros(dim), np.zeros(dim)
distance_aw2 = adapted_wasserstein_squared_multidim(a, A, b, B, d, T)

print("Adapted Wasserstein Squared Distance for custom Gaussian process:", distance_aw2)

## For d = 2

In [None]:
# Parameters
d = 2
T = 2
dim = d * T
n_sample_plot = 100

# Generate all noise samples at once
noise1 = np.random.normal(size=(n_sample_plot, dim))
noise2 = np.random.normal(size=(n_sample_plot, dim))

# Apply transformations
X_increments = (noise1 @ L.T).reshape(n_sample_plot, T, d)
Y_increments = (noise2 @ M.T).reshape(n_sample_plot, T, d)

# Prepend zeros along the time axis
X_paths = np.concatenate([np.zeros((n_sample_plot, 1, d)), X_increments], axis=1)
Y_paths = np.concatenate([np.zeros((n_sample_plot, 1, d)), Y_increments], axis=1)

In [None]:
# Adapt empirical measures
adapted_X, adapted_weights_X = multidim_uniform_empirical_grid_measure(X_paths, use_weights=True)
adapted_Y, adapted_weights_Y = multidim_uniform_empirical_grid_measure(Y_paths, use_weights=True)

# Build trees
adapted_tree_1 = multidim_build_tree_from_paths(adapted_X, adapted_weights_X)
adapted_tree_2 = multidim_build_tree_from_paths(adapted_Y, adapted_weights_Y)

# Compute nested distance
max_depth = multidim_get_depth(adapted_tree_1)
start_time = time.time()
distance_pot = multidim_nested_optimal_transport_loop_parallel(adapted_tree_1, adapted_tree_2, max_depth, power=2, n_processes= 1)
end_time = time.time()

print("Nested distance multi dim:", distance_pot)
print("Computation time: {:.4f} seconds".format(end_time - start_time))

In [None]:
# Adapt empirical measures
adapted_X, adapted_weights_X = multidim_empirical_k_means_measure_new(X_paths, use_weights=True)
adapted_Y, adapted_weights_Y = multidim_empirical_k_means_measure_new(Y_paths, use_weights=True)

# Build trees
adapted_tree_1 = multidim_build_tree_from_paths(adapted_X, adapted_weights_X)
adapted_tree_2 = multidim_build_tree_from_paths(adapted_Y, adapted_weights_Y)

# Compute nested distance
max_depth = multidim_get_depth(adapted_tree_1)
start_time = time.time()
distance_pot = multidim_nested_optimal_transport_loop_parallel(adapted_tree_1, adapted_tree_2, max_depth, power=2, n_processes= 1)
end_time = time.time()

print("Nested distance multi dim:", distance_pot)
print("Computation time: {:.4f} seconds".format(end_time - start_time))

In [None]:
a, b = np.zeros(dim), np.zeros(dim)
distance_aw2 = adapted_wasserstein_squared_multidim(a, A, b, B, d, T)

print("Adapted Wasserstein Squared Distance for custom Gaussian process:", distance_aw2)

# Comparing my implementation vs the other implentation

In [6]:
from optimal_code.utils_multidim import *
from optimal_code.optimal_solver_multidim import *

In [1]:
# Parameters
d = 2
T = 2
dim = d * T
n_sample_plot = 150

# Example transformation matrices (L and M) of shape (dim, dim)
# Here we simply use random matrices for illustration
np.random.seed(42)

# Generate noise and compute paths
X, A = Lmatrix2paths_flat(L, n_sample_plot, d, T, seed=1, verbose=True)
Y, B = Lmatrix2paths_flat(M, n_sample_plot, d, T, seed=1, verbose=True)

print("X_paths shape:", X_paths.shape)
print("Y_paths shape:", Y_paths.shape)

NameError: name 'np' is not defined

In [None]:
a, b = np.zeros(dim), np.zeros(dim)
distance_aw2 = adapted_wasserstein_squared_multidim(a, A, b, B, d, T)

print("Adapted Wasserstein Squared Distance for custom Gaussian process:", distance_aw2)

fixed_grid = 0.1

# Project paths onto the fixed grid (applied elementwise)
adaptedX = path2adaptedpath_multidim(X, delta_n=fixed_grid)
adaptedY = path2adaptedpath_multidim(Y, delta_n=fixed_grid)

# Build quantization mapping:
# Reshape the data so that we obtain a list of all ℝ² points:
# adaptedX: shape (T+1, n_sample, d) -> (n_sample, T+1, d)
points_X = adaptedX.transpose(1,0,2).reshape(-1, d)
points_Y = adaptedY.transpose(1,0,2).reshape(-1, d)
# Use np.unique along axis=0 to get unique grid points in ℝ².
q2v = np.unique(np.concatenate([points_X, points_Y], axis=0), axis=0)
# Convert each unique row (a 2D vector) to a tuple so it can be used as a key.
q2v_list = [tuple(row) for row in q2v]
v2q = {val: i for i, val in enumerate(q2v_list)}

# Quantize each path: iterate over sample paths.
# Resulting qX will have shape (n_sample, T+1) with integer entries.
qX = np.array([[v2q[tuple(x)] for x in sample] 
                for sample in adaptedX.transpose(1,0,2)])
qY = np.array([[v2q[tuple(x)] for x in sample] 
                for sample in adaptedY.transpose(1,0,2)])

# Sort the quantized paths lexicographically (as in your 1D code)
qX = sort_qpath_multidim(qX.T)
qY = sort_qpath_multidim(qY.T)

# Build conditional distributions from the quantized paths.
mu_x = qpath2mu_x_multidim(qX)
nu_y = qpath2mu_x_multidim(qY)
mu_x_c, mu_x_cn, mu_x_v, mu_x_w, mu_x_cumn = list_repr_mu_x_multidim(mu_x, q2v)
nu_y_c, nu_y_cn, nu_y_v, nu_y_w, nu_y_cumn = list_repr_mu_x_multidim(nu_y, q2v)

# Compute the numerical adapted Wasserstein squared distance.
t_start = time.perf_counter()
AW_2square = nested2_parallel_multidim(mu_x_cn, mu_x_v, mu_x_w, mu_x_cumn,
                                nu_y_cn, nu_y_v, nu_y_w, nu_y_cumn,
                                n_processes=4, power=2)
elapsed = time.perf_counter() - t_start

Adapted Wasserstein Squared Distance for custom Gaussian process: 22.02336710106509


100%|██████████| 114/114 [00:00<00:00, 141.05it/s]
100%|██████████| 115/115 [00:00<00:00, 141.68it/s]
100%|██████████| 115/115 [00:00<00:00, 134.70it/s]
100%|██████████| 114/114 [00:00<00:00, 127.96it/s]
100%|██████████| 1/1 [00:00<00:00, 26.34it/s]


In [None]:
print(AW_2square)

8.172473333333329


In [None]:
# Adapt empirical measures
adapted_X, adapted_weights_X = multidim_uniform_empirical_grid_measure(X, delta_n= fixed_grid, use_weights=True)
adapted_Y, adapted_weights_Y = multidim_uniform_empirical_grid_measure(Y, delta_n= fixed_grid,  use_weights=True)

# Build trees
adapted_tree_1 = multidim_build_tree_from_paths(adapted_X, adapted_weights_X)
adapted_tree_2 = multidim_build_tree_from_paths(adapted_Y, adapted_weights_Y)

# Compute nested distance
max_depth = multidim_get_depth(adapted_tree_1)
start_time = time.time()
distance_pot = multidim_nested_optimal_transport_loop_parallel(adapted_tree_1, adapted_tree_2, max_depth, power=2, n_processes= 4)
end_time = time.time()

print("Nested distance multi dim:", distance_pot)
print("Computation time: {:.4f} seconds".format(end_time - start_time))

Depth 1:  11%|█         | 20209/191902 [08:24<1:10:00, 40.87pair/s]Process SpawnProcess-24:
Process SpawnProcess-23:
Process SpawnProcess-22:
Process SpawnProcess-25:
Traceback (most recent call last):
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/concurrent/futures/process.py", line 240, in _process_worker
    call_item = call_queue.get(block=True)
  File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/multiprocessing/queues.py", line 102, in get
    with self._rlock:
  File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/multiprocessing/synchronize