#### NOT SURE THIS WORKS -> will check later Now for markov use the other code in utils_solver

In [12]:
import numpy as np
import ot  # POT library for optimal transport

# Helper: convert a value to a hashable type.
def to_hashable(x):
    if isinstance(x, np.ndarray):
        return tuple(x.tolist())
    return x

###############################################################################
# 1. MARKOV (RECOMBINING) TREE DATA STRUCTURE
###############################################################################

class MarkovTreeNode:
    """
    A node for a recombining (Markov) tree.
    
    Each node holds a state value and a dictionary of children.
    The keys of the dictionary are hashable versions of the child state.
    Each entry is a tuple (child_node, cumulative_weight) that will later be
    normalized to represent the transition probability.
    """
    def __init__(self, value):
        self.value = value
        self.children = {}  # key -> (child_node, cumulative_weight)

    def add_child(self, child_node, weight):
        key = to_hashable(child_node.value)
        if key in self.children:
            existing_node, cum_weight = self.children[key]
            self.children[key] = (existing_node, cum_weight + weight)
        else:
            self.children[key] = (child_node, weight)

def build_markov_tree_from_paths(sample_paths, weights):
    """
    Builds a recombining (Markov) tree from sample paths.
    
    For a Markov process the next state depends only on the current state.
    Therefore, if two paths share the same state at a given time step, they are merged.
    
    Parameters:
      sample_paths (list of lists): Each inner list is a path (sequence of states).
      weights (list or array): A weight associated with each sample path.
      
    Returns:
      MarkovTreeNode: The root node of the recombining tree.
    """
    if len(sample_paths) == 0:
        raise ValueError("No sample paths provided.")
    
    # Ensure all sample paths start at the same state.
    initial_state = sample_paths[0][0]
    for path in sample_paths:
        if to_hashable(path[0]) != to_hashable(initial_state):
            raise ValueError("All sample paths must have the same initial state.")
    
    # Create the root node.
    root = MarkovTreeNode(initial_state)
    
    # Insert each path into the tree.
    for path, weight in zip(sample_paths, weights):
        current_node = root
        for state in path[1:]:
            key = to_hashable(state)
            if key in current_node.children:
                child_node, cum_weight = current_node.children[key]
                current_node.children[key] = (child_node, cum_weight + weight)
            else:
                child_node = MarkovTreeNode(state)
                current_node.children[key] = (child_node, weight)
            current_node = child_node

    # Normalize the children weights to obtain transition probabilities.
    def normalize_transitions(node):
        total_weight = sum(prob for (_, prob) in node.children.values())
        if total_weight > 0:
            for key in node.children:
                child, cum_weight = node.children[key]
                node.children[key] = (child, cum_weight / total_weight)
                normalize_transitions(child)
    normalize_transitions(root)
    return root

###############################################################################
# 2. RECURSIVE ADAPTED WASSERSTEIN DISTANCE (NESTED) FOR MARKOV PROCESSES
###############################################################################

from tqdm import tqdm

def nested_transport_cost(
    node1, node2, current_depth, max_depth, power, method="lp", lambda_reg=1e-2, pbar=None
):
    """
    Recursively computes the nested (adapted Wasserstein) cost between two nodes.
    
    Parameters:
      node1, node2: MarkovTreeNode objects at the same depth.
      current_depth: Current depth (starting at 0).
      max_depth: Maximum depth (number of transitions) to consider.
      power: Exponent for the cost (e.g., 1 for absolute difference, 2 for squared difference).
      method: (unused here; placeholder if one wants to switch solvers).
      lambda_reg: Regularization parameter (unused in the LP solver version).
      pbar: Progress bar instance (used for tracking recursion depth).

    Returns:
      Total nested cost between node1 and node2.
    """
    # Initialize the progress bar only at the first call
    if current_depth == 0 and pbar is None:
        pbar = tqdm(total=max_depth, desc="Computing Nested Transport", unit="depth")

    # Base case: if maximum depth reached, no further cost.
    if current_depth == max_depth:
        return 0.0

    # Retrieve children lists: each element is (key, (child_node, prob))
    children1 = list(node1.children.items())
    children2 = list(node2.children.items())

    # If both nodes are terminal, return 0.
    if not children1 and not children2:
        return 0.0

    n1 = len(children1)
    n2 = len(children2)
    cost_matrix = np.zeros((n1, n2))

    # Compute the cost matrix between all pairs of children.
    for i, (_, (child1, prob1)) in enumerate(children1):
        for j, (_, (child2, prob2)) in enumerate(children2):
            # Immediate cost for scalar states (d=1)
            immediate_cost = abs(child1.value - child2.value) ** power
            # Add the nested cost (future steps)
            nested_cost = nested_transport_cost(child1, child2, current_depth + 1, max_depth, power, method, lambda_reg, pbar)
            cost_matrix[i, j] = immediate_cost + nested_cost

    # Update progress bar **only once per depth level**
    if current_depth == 0:
        pbar.update(1)

    # Get the children distributions (transition probabilities).
    p = np.array([prob for (_, (child, prob)) in children1])
    q = np.array([prob for (_, (child, prob)) in children2])

    # Solve the optimal transport problem between p and q.
    transport_plan = ot.lp.emd(p, q, cost_matrix)
    total_cost = np.sum(transport_plan * cost_matrix)

    # Close progress bar at the end of recursion
    if current_depth == 0 and pbar is not None:
        pbar.close()

    return total_cost


def compute_adapted_wasserstein_markov(tree1_root, tree2_root, max_depth, power=1, method="lp", lambda_reg=1e-2):
    """
    Computes the adapted Wasserstein (nested) distance between two Markov processes.
    
    The distance is computed recursively from the root nodes.
    
    Parameters:
      tree1_root, tree2_root: Root nodes of the two Markov trees.
      max_depth: Maximum depth (number of transitions) to consider.
      power: Exponent in the cost (1 for absolute difference, 2 for squared difference, etc.).
      method: Which OT solver to use (here we use "lp" for the linear programming solver).
      lambda_reg: Regularization parameter (not used for the LP solver).
    
    Returns:
      The adapted Wasserstein distance.
    """
    return nested_transport_cost(tree1_root, tree2_root, current_depth=0, max_depth=max_depth, power=power, method=method, lambda_reg=lambda_reg)



In [13]:
# Modified uniform_empirical_grid_measure: supports both 2D (d=1) and 3D (d>1) sample paths.
def uniform_empirical_grid_measure(data, delta_n=None, use_weights=False):
    # If data is 2D, each path is a 1D sequence (d = 1).
    if data.ndim == 2:
        num_path, t = data.shape
        if delta_n is None:
            delta_n = 1 / (num_path ** (1 / t))
        grid_func = lambda x: np.floor(x / delta_n + 0.5) * delta_n
        quantized_data = grid_func(data)
        quantized_data[:, 0] = data[:, 0]
        if not use_weights:
            return quantized_data
        else:
            unique_paths, indices, counts = np.unique(
                quantized_data, axis=0, return_inverse=True, return_counts=True
            )
            weights = counts / num_path
            return unique_paths, weights
    # If data is 3D, each path has shape (T+1, d).
    elif data.ndim == 3:
        num_path, t, d = data.shape
        if delta_n is None:
            # For multi-dimensional paths, one may adjust the exponent as needed.
            delta_n = 1 / (num_path ** (1 / t))
        grid_func = lambda x: np.floor(x / delta_n + 0.5) * delta_n
        quantized_data = grid_func(data)
        quantized_data[:, 0, :] = data[:, 0, :]
        if not use_weights:
            return quantized_data
        else:
            # Convert each sample path (2D array) into a hashable tuple of tuples.
            quantized_paths = [tuple(map(tuple, quantized_data[i])) for i in range(num_path)]
            # Preserve the original order.
            unique_paths_list = []
            counts_list = []
            for p in quantized_paths:
                if p in unique_paths_list:
                    counts_list[unique_paths_list.index(p)] += 1
                else:
                    unique_paths_list.append(p)
                    counts_list.append(1)
            weights = np.array(counts_list) / num_path
            # Convert unique paths back to numpy arrays.
            unique_paths = np.array([np.array(up) for up in unique_paths_list])
            return unique_paths, weights
    else:
        raise ValueError("Data must be either 2D (d=1) or 3D (d>1).")

In [14]:
# Set normalization flag (set to False to use L0 and M0 directly)
normalize = False

# Define the parameters for the 1D Markov process at 4 time steps
L0 = 1.0
L1 = 2.0
L2 = 1.5
L3 = 1.2

# Off-diagonals (transition coefficients)
A1 = 0.5
A2 = 0.3
A3 = 0.2

# Construct the 4x4 lower-triangular matrix with the Markovian (banded) structure:
L_markov = np.array([
    [L0, 0,   0,   0],
    [A1, L1,  0,   0],
    [0,  A2, L2,   0],
    [0,  0,  A3,  L3]
])
A0 = L_markov @ L_markov.T
L = L_markov / np.sqrt(np.trace(A0)) if normalize else L_markov
A = L @ L.T

# Define the parameters for the 1D Markov process at 4 time steps
M0 = 6.0
M1 = 7.0
M2 = 2.5
M3 = 3.2

# Off-diagonals (transition coefficients)
B1 = 0.7
B2 = 0.9
B3 = 0.1

# Construct the 4x4 lower-triangular matrix with the Markovian (banded) structure:
M_markov = np.array([
    [M0, 0,   0,   0],
    [B1, M1,  0,   0],
    [0,  B2, M2,   0],
    [0,  0,  B3,  M3]
])
M0 = M_markov @ M_markov.T
M = M_markov / np.sqrt(np.trace(M0)) if normalize else M_markov
B = M @ M.T



# Set dimension parameters: for d=2, T=3 (thus total dimension = 6)
d = 1
T = 4
dim = d * T  # 4

n_sample_plot = 500  # number of sample paths

X_paths = []
Y_paths = []
for _ in range(n_sample_plot):
    # Generate noise as a vector in R^{dim}
    noise1 = np.random.normal(size=(dim,))  # shape: (6,)
    noise2 = np.random.normal(size=(dim,))  # shape: (6,)
    # Obtain increments: these are vectors in R^{dim} (6,)
    X_increments = L @ noise1  # shape: (6,)
    Y_increments = M @ noise2  # shape: (6,)
    # Reshape into (T, d) = (3, 2)
    X_increments = X_increments.reshape((T, d))
    Y_increments = Y_increments.reshape((T, d))
    # (Optionally, if you still want to prepend a zero step, do it here.)
    X_sample = np.vstack([np.zeros((1, d)), X_increments])
    Y_sample = np.vstack([np.zeros((1, d)), Y_increments])

    X_paths.append(X_sample)
    Y_paths.append(Y_sample)
    
X_paths = np.array(X_paths)  # shape: (160, 3, 2)
Y_paths = np.array(Y_paths)  # shape: (160, 3, 2)

# Adapt the empirical measure using uniform grid quantization.
adapted_X, adapted_weights_X = uniform_empirical_grid_measure(X_paths, use_weights=True)
adapted_Y, adapted_weights_Y = uniform_empirical_grid_measure(Y_paths, use_weights=True)

# Build the recombining (Markov) trees.
tree1 = build_markov_tree_from_paths(adapted_X, adapted_weights_X)
tree2 = build_markov_tree_from_paths(adapted_Y, adapted_weights_Y)

# Set maximum depth: here, each path has 4 states (3 transitions).
max_depth = 6
power = 2  # using absolute difference as cost

# Compute the adapted Wasserstein distance between the two Markov processes.
adapted_distance = compute_adapted_wasserstein_markov(tree1, tree2, max_depth, power)
print("Adapted Wasserstein Distance (Markov Processes):", adapted_distance)

  cost_matrix[i, j] = immediate_cost + nested_cost
Computing Nested Transport:  17%|█▋        | 1/6 [00:55<04:38, 55.64s/depth]

Adapted Wasserstein Distance (Markov Processes): 73.18363159811074





In [15]:
import sys
import os

notebooks_path = os.path.abspath(os.getcwd()) 
src_path = os.path.abspath(os.path.join(notebooks_path, "../src"))

if src_path not in sys.path:
    sys.path.insert(0, src_path)

from benchmark_value_gaussian.Comp_AWD2_Gaussian import *


# Set dimension parameters: d = 2, T = 3 (thus total dimension = 6)
d = 1
T = 4
dim = d * T

# Define zero mean vectors for both processes in R^(d*T)
a = np.zeros(dim)
b = np.zeros(dim)

# Compute the adapted Wasserstein squared distance for the custom Gaussian process
distance_aw2 = adapted_wasserstein_squared(a, A, b, B, d, T)
print("Adapted Wasserstein Squared Distance for custom Gaussian process:", distance_aw2)

Adapted Wasserstein Squared Distance for custom Gaussian process: 55.41000000000001
