In [1]:
import os
import sys
import numpy as np
import time
import warnings
import random

# Define paths
notebooks_path = os.path.abspath(os.getcwd()) 
src_path = os.path.abspath(os.path.join(notebooks_path, "../src"))
if src_path not in sys.path:
    sys.path.insert(0, src_path)

# Import modules
from Multi_dimension.Multidimension_trees import *
from Multi_dimension.Multidimension_solver import *
from Multi_dimension.Multidimension_adapted_empirical_measure import *

from Measure_sampling.Gen_Path_and_AdaptedTrees import generate_adapted_tree
from trees.Tree_Node import *
from trees.TreeAnalysis import *
from trees.TreeVisualization import *
from trees.Save_Load_trees import *
from trees.Tree_AWD_utilities import *
from trees.Build_trees_from_paths import build_tree_from_paths

from adapted_empirical_measure.AEM_grid import *
from adapted_empirical_measure.AEM_kMeans import *
from benchmark_value_gaussian.Comp_AWD2_Gaussian import *
from awd_trees.Gurobi_AOT import *
from awd_trees.Nested_Dist_Algo import compute_nested_distance, compute_nested_distance_parallel

# Suppress warnings
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=UserWarning)

# Import custom modules from src
from utils_solver import Lmatrix2paths, adapted_empirical_measure, adapted_wasserstein_squared, quantization, nested, plot_V

In [2]:
# Set random seeds for reproducibility
np.random.seed(0)
random.seed(0)
verbose = False

# Create a random seed variable for additional randomness in the notebook
random_seed = np.random.randint(100)
print("Random seed for this run:", random_seed)

Random seed for this run: 44


## Generating Path (same randomness)

In [3]:
n_sample = 2000


# For measure "mu"
print("mu")
L = np.array([[1, 0, 0, 0], [1, 2, 0, 0], [1, 2, 3, 0], [1,2,3, 4]])
normalize = False  # Not used explicitly here
X, A = Lmatrix2paths(L, n_sample, seed=random_seed)

# For measure "nu"
print("nu")
M = np.array([[1, 0, 0, 0], [2, 1, 0, 0], [3, 2, 1, 0], [4, 3, 2, 1]])
Y, B = Lmatrix2paths(M, n_sample, seed=random_seed)

mu
Cholesky:
[[1 0 0 0]
 [1 2 0 0]
 [1 2 3 0]
 [1 2 3 4]]
Covariance:
[[ 1  1  1  1]
 [ 1  5  5  5]
 [ 1  5 14 14]
 [ 1  5 14 30]]
nu
Cholesky:
[[1 0 0 0]
 [2 1 0 0]
 [3 2 1 0]
 [4 3 2 1]]
Covariance:
[[ 1  2  3  4]
 [ 2  5  8 11]
 [ 3  8 14 20]
 [ 4 11 20 30]]


## Real distance (not particularly relevant here, as we are comparing speed rather than convergence). 

### What matters here is that the three methods produce the same output, as they should each solve the discrete AOT problem exactly.

In [4]:
dist_bench = adapted_wasserstein_squared(A, B)
print("Theoretical AW_2^2: ", dist_bench)

Theoretical AW_2^2:  30.0


## With your code

In [7]:
# Grid projection of k-mean projection
adaptedX = adapted_empirical_measure(X, delta_n = 0.1)
adaptedY = adapted_empirical_measure(Y, delta_n = 0.1)

q2v, v2q, mu_x, nu_y, q2v_x, v2q_x, q2v_y, v2q_y = quantization(adaptedX, adaptedY, markovian=False)

start_time = time.time()
AW_2square, V = nested(mu_x, nu_y, v2q_x, v2q_y, q2v, markovian=False)
elapsed_time_pot = time.time() - start_time

dist_bench = adapted_wasserstein_squared(A, B)
print("Theoretical AW_2^2: ", dist_bench)
print("Numerical AW_2^2: ", AW_2square)
print("Elapsed time (Adapted OT): {:.4f} seconds".format(elapsed_time_pot))

Quantization ......
Number of distinct values in global quantization:  313
Number of condition subpaths of mu_x
Time 0: 1
Time 1: 64
Time 2: 1402
Time 3: 1993
Number of condition subpaths of nu_y
Time 0: 1
Time 1: 64
Time 2: 1078
Time 3: 1953
Nested backward induction .......


Timestep 3: 100%|██████████| 1993/1993 [20:55<00:00,  1.59it/s]
Timestep 2: 100%|██████████| 1402/1402 [08:49<00:00,  2.65it/s]
Timestep 1: 100%|██████████| 64/64 [00:02<00:00, 28.55it/s]
Timestep 0: 100%|██████████| 1/1 [00:00<00:00, 244.01it/s]


Theoretical AW_2^2:  30.0
Numerical AW_2^2:  28.437871180096256
Elapsed time (Adapted OT): 1787.1538 seconds


## With my code not-parallel

In [5]:
# Compute uniform adapted empirical grid measures with weights
adapted_X, adapted_weights_X = uniform_empirical_grid_measure(X.T, delta_n=0.1, use_weights=True)
adapted_Y, adapted_weights_Y = uniform_empirical_grid_measure(Y.T, delta_n=0.1, use_weights=True)

# Build trees from the adapted paths
adapted_tree_1 = build_tree_from_paths(adapted_X, adapted_weights_X)
adapted_tree_2 = build_tree_from_paths(adapted_Y, adapted_weights_Y)

In [10]:
# Compute the nested (adapted optimal transport) distance and measure execution time
max_depth = get_depth(adapted_tree_1)
start_time = time.time()
distance_pot = compute_nested_distance(
    adapted_tree_1,
    adapted_tree_2,
    max_depth,
    method="solver_lp_pot",
    return_matrix=False,
    lambda_reg=0,
    power=2,
)
elapsed_time_pot = time.time() - start_time

print("Numerical AW_2^2 (Adapted OT):", distance_pot)
print("Elapsed time (Adapted OT): {:.4f} seconds".format(elapsed_time_pot))

Depth 3: 100%|██████████| 1993/1993 [14:32<00:00,  2.28it/s]
Depth 2: 100%|██████████| 1402/1402 [05:15<00:00,  4.45it/s]
Depth 1: 100%|██████████| 64/64 [00:01<00:00, 55.04it/s]
Depth 0: 100%|██████████| 1/1 [00:00<00:00, 323.39it/s]

Numerical AW_2^2 (Adapted OT): 28.437871180096263
Elapsed time (Adapted OT): 1189.5874 seconds





## My code parallel

In [6]:
# Compute nested distance
max_depth_val = get_depth(adapted_tree_1)
start_time = time.time()
distance_pot = compute_nested_distance_parallel(adapted_tree_1, adapted_tree_2, max_depth_val, return_matrix=False, power=2)
elapsed_time_pot = time.time() - start_time

print("Nested distance Parellel:", distance_pot)
print("Computation time Parellel: {:.4f} seconds".format(elapsed_time_pot))

Depth: 3


Parallel Depth 3: 100%|██████████| 6/6 [01:29<00:00, 14.92s/it]


Depth: 2


Parallel Depth 2: 100%|██████████| 6/6 [00:46<00:00,  7.77s/it]


Depth: 1
Depth: 0
Nested distance Parellel: 28.43787118009626
Computation time Parellel: 144.0695 seconds
