In [1]:
%load_ext autoreload
%load_ext line_profiler
%autoreload 2
%matplotlib ipympl

import sys
import numpy as np
import time

sys.path.append('./src')
verbose = False

random_seed = np.random.randint(100)
random_seed

75

In [2]:
from src.utils_solver import Lmatrix2paths, adapted_empirical_measure, adapted_wasserstein_squared, quantization, nested, plot_V

n_sample = 100
normalize = False

print("mu")
L = np.array([[1, 0, 0], [2, 4, 0], [3, 2, 1]])
X,A = Lmatrix2paths(L, n_sample, seed = random_seed)

print("nu")
M = np.array([[1, 0, 0], [2, 3, 0], [3, 1, 2]])
Y,B = Lmatrix2paths(M, n_sample, seed = random_seed)

mu
Cholesky:
[[1 0 0]
 [2 4 0]
 [3 2 1]]
Covariance:
[[ 1  2  3]
 [ 2 20 14]
 [ 3 14 14]]
nu
Cholesky:
[[1 0 0]
 [2 3 0]
 [3 1 2]]
Covariance:
[[ 1  2  3]
 [ 2 13  9]
 [ 3  9 14]]


In [3]:
from adapted_empirical_measure.AEM_grid import uniform_empirical_grid_measure
from trees.Build_trees_from_paths import build_tree_from_paths
from trees.TreeAnalysis import get_depth
from awd_trees.Nested_Dist_Algo import compute_nested_distance


In [4]:
# Compute uniform adapted empirical grid measures with weights
adapted_X, adapted_weights_X = uniform_empirical_grid_measure(X.T, delta_n=0.1, use_weights=True)
adapted_Y, adapted_weights_Y = uniform_empirical_grid_measure(Y.T, delta_n=0.1, use_weights=True)

# Build trees from the adapted paths
adapted_tree_1 = build_tree_from_paths(adapted_X, adapted_weights_X)
adapted_tree_2 = build_tree_from_paths(adapted_Y, adapted_weights_Y)

# Compute the nested (adapted optimal transport) distance and measure execution time
max_depth = get_depth(adapted_tree_1)
start_time = time.perf_counter()
distance_pot = compute_nested_distance(
    adapted_tree_1,
    adapted_tree_2,
    max_depth,
    method="solver_lp_pot",
    return_matrix=False,
    lambda_reg=0,
    power=2,
)
end_time = time.perf_counter()

print("Numerical AW_2^2 (Adapted OT):", distance_pot)
print("Elapsed time (Adapted OT): {:.4f} seconds".format(end_time - start_time))

Depth 2: 100%|██████████| 99/99 [00:00<00:00, 273.65it/s]
Depth 1: 100%|██████████| 38/38 [00:00<00:00, 695.78it/s]
Depth 0: 100%|██████████| 1/1 [00:00<00:00, 1428.09it/s]

Numerical AW_2^2 (Adapted OT): 3.2470999999999988
Elapsed time (Adapted OT): 0.4301 seconds





In [5]:
adaptedX = adapted_empirical_measure(X, delta_n = 0.1)
adaptedY = adapted_empirical_measure(Y, delta_n = 0.1)

q2v, v2q, mu_x, nu_y, q2v_x, v2q_x, q2v_y, v2q_y = quantization(adaptedX, adaptedY, markovian=False)

start_time = time.perf_counter()
AW_2square, V = nested(mu_x, nu_y, v2q_x, v2q_y, q2v, markovian=False)
end_time = time.perf_counter()

dist_bench = adapted_wasserstein_squared(A, B)
print("Theoretical AW_2^2: ", dist_bench)
print("Numerical AW_2^2: ", AW_2square)
print("Elapsed time (Adapted OT): {:.4f} seconds".format(end_time - start_time))

Quantization ......
Number of distinct values in global quantization:  145
Number of condition subpaths of mu_x
Time 0: 1
Time 1: 38
Time 2: 99
Number of condition subpaths of nu_y
Time 0: 1
Time 1: 38
Time 2: 98
Nested backward induction .......


Timestep 2: 100%|██████████| 99/99 [00:00<00:00, 115.28it/s]
Timestep 1: 100%|██████████| 38/38 [00:00<00:00, 205.78it/s]
Timestep 0: 100%|██████████| 1/1 [00:00<00:00, 1270.62it/s]

Theoretical AW_2^2:  3.0
Numerical AW_2^2:  3.247100000000001
Elapsed time (Adapted OT): 1.0484 seconds





In [11]:
import time
import multiprocessing
import concurrent.futures
def do_sth(second):
    print(f"Sleep {second} second")
    time.sleep(second)
    return f"Done Sleeping {second} second..."

secs = [5, 4, 3, 2, 1]

start = time.perf_counter()
with concurrent.futures.ProcessPoolExecutor() as executor:
    results = executor.map(do_sth, secs)
finish = time.perf_counter()
print(f"Finished in {round(finish - start,2)} seconds")

Finished in 0.07 seconds


Process SpawnProcess-11:
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/concurrent/futures/process.py", line 240, in _process_worker
    call_item = call_queue.get(block=True)
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/multiprocessing/queues.py", line 122, in get
    return _ForkingPickler.loads(res)
AttributeError: Can't get attribute 'do_sth' on <module '__main__' (built-in)>
Process SpawnProcess-12:
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/Library/Framework

In [None]:
from tqdm import tqdm
import ot
import concurrent

def nested_mp(mu_x, nu_y, v2q_x, v2q_y, q2v, markovian=False, verbose=True):
    T = len(mu_x)
    square_cost_matrix = (q2v[None, :] - q2v[None, :].T) ** 2

    V = [np.zeros([len(v2q_x[t]), len(v2q_y[t])]) for t in range(T)]
    if verbose:
        print("Nested backward induction .......")
    for t in range(T - 1, -1, -1):
        print(len(mu_x[t]), len(nu_y[t]))
        tqdm_bar = tqdm(mu_x[t].items()) if verbose else mu_x[t].items()

        with concurrent.futures.ProcessPoolExecutor() as executor:
            results = executor.map(do_sth, secs)

        for k1, v1 in tqdm_bar:
            if verbose:
                tqdm_bar.set_description(f"Timestep {t}")
            for k2, v2 in nu_y[t].items():
                # list of probability of conditional distribution mu_x
                w1 = list(v1.values())
                w1 = np.array(w1) / sum(w1)
                # list of probability of conditional distribution nu_y
                w2 = list(v2.values())
                w2 = np.array(w2) / sum(w2)
                # list of quantized values of conditional distribution mu_x (nu_y)
                q1 = list(v1.keys())
                q2 = list(v2.keys())
                # square cost of the values indexed by quantized values: |q2v[q1] - q2v[q2]|^2
                cost = square_cost_matrix[np.ix_(q1, q2)]

                # At T-1: add V[T] = 0, otherwise add the V[t+1] already computed
                if t < T - 1:
                    if (
                        markovian
                    ):  # If markovian, for condition path (k1,q), only the last value q matters, and V[t+1] is indexed by the time re-quantization of q
                        q1s = [v2q_x[t + 1][q] for q in v1.keys()]
                        q2s = [v2q_y[t + 1][q] for q in v2.keys()]
                    else:  # If non-markovian, for condition path (k1,q), the V[t+1] is indexed by the time re-quantization of tuple (k1,q)
                        q1s = [v2q_x[t + 1][k1 + (q,)] for q in v1.keys()]
                        q2s = [v2q_y[t + 1][k2 + (q,)] for q in v2.keys()]
                    cost += V[t + 1][np.ix_(q1s, q2s)]

                # solve the OT problem with cost |x_t-y_t|^2 + V_{t+1}(x_{1:t},y_{1:t})
                V[t][v2q_x[t][k1], v2q_y[t][k2]] = ot.emd2(w1, w2, cost)

    AW_2square = V[0][0, 0]
    return AW_2square, V

In [8]:
AW_2square, V = nested_mp(mu_x, nu_y, v2q_x, v2q_y, q2v, markovian=False)

Nested backward induction .......
99 98


Timestep 2: 100%|██████████| 99/99 [00:00<00:00, 109.10it/s]


38 38


Timestep 1: 100%|██████████| 38/38 [00:00<00:00, 180.52it/s]


1 1


Timestep 0: 100%|██████████| 1/1 [00:00<00:00, 914.19it/s]
