In [None]:
from cscg_actions_orig import *
from ged import *
from util import *

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import random
import os
import pickle

# import random
random.seed(42)

In [None]:
# import numpy as np

def generate_custom_colors(num_unique_observations):
    # Define a fixed set of custom colors as RGB values
    predefined_colors = np.array([
        [214, 214, 214],
        [85, 35, 157],
        [253, 252, 144],
        [114, 245, 144],
        [151, 38, 20],
        [239, 142, 192],
        [214, 134, 48],
        [140, 194, 250],
        [72, 160, 162],
    ])

    # If the number of unique observations is greater than the number of predefined colors,
    # cycle through the predefined colors to ensure enough colors are available
    if num_unique_observations > len(predefined_colors):
        extra_colors_needed = num_unique_observations - len(predefined_colors)
        additional_colors = np.tile(predefined_colors, (extra_colors_needed // len(predefined_colors) + 1, 1))
        custom_colors = np.vstack((predefined_colors, additional_colors))[:num_unique_observations]
    else:
        custom_colors = predefined_colors[:num_unique_observations]

    return custom_colors


In [None]:
# room = np.array(
#     [
#         [1, 2, 3, 0, 3,],
#         [1, 1, 3, 2, 3,],
#         [1, 1, 2, 0, 1,],
#         [0, 2, 1, 1, 3,],
#         [3, 3, 1, 0, 1,],
#         [2, 1, 2, 3, 3,],
#     ]
# )

# Uncomment this for generating data from a bigger room. Will take longer to train.

room = np.array(
    [
        [1, 2, 3, 0, 3, 1, 1, 1],
        [1, 1, 3, 2, 3, 2, 3, 1],
        [1, 1, 2, 0, 1, 2, 1, 0],
        [0, 2, 1, 1, 3, 0, 0, 2],
        [3, 3, 1, 0, 1, 0, 3, 0],
        [2, 1, 2, 3, 3, 3, 2, 0],
    ]
)


# Plot the layout of the room
cmap = colors.ListedColormap(custom_colors[-4:])
plt.matshow(room, cmap=cmap)
plt.title('Spatial environment (room)')
plt.savefig("figures/rectangular_room_layout.pdf")

In [None]:
import numpy as np

rows = 6
cols = 8
total_nodes = rows * cols
adjacency_matrix = np.zeros((total_nodes, total_nodes), dtype=int)

# Helper function to convert 2D grid coordinates to 1D index
def node_index(row, col):
    return row * cols + col

# Fill the adjacency matrix
for row in range(rows):
    for col in range(cols):
        index = node_index(row, col)

        # Self-connection for boundary nodes
        if row == 0 or row == rows - 1 or col == 0 or col == cols - 1:
            adjacency_matrix[index, index] = 1

        # Check for connection to the right
        if col + 1 < cols:
            right_index = node_index(row, col + 1)
            adjacency_matrix[index, right_index] = 1
        
        # Check for connection to the left
        if col - 1 >= 0:
            left_index = node_index(row, col - 1)
            adjacency_matrix[index, left_index] = 1
        
        # Check for connection above
        if row - 1 >= 0:
            up_index = node_index(row - 1, col)
            adjacency_matrix[index, up_index] = 1
        
        # Check for connection below
        if row + 1 < rows:
            down_index = node_index(row + 1, col)
            adjacency_matrix[index, down_index] = 1

# Optionally print the matrix
print(adjacency_matrix)
gt_A = adjacency_matrix


In [None]:
# nclone=1
a,x,rc = datagen_structured_obs_room(room, length=50000)
results_nclones = []
results_totalclone = []
results_ged = []
# alphas = np.arange(0.1, 1, 0.1)
nclones = np.arange(10,251,60)
seeds = np.arange(0,100,1)
currmodel = 'spatial'
n_orig_clones = 70
for nclone in nclones: 
    seed_results_totalclone = []
    seed_results_nclones = []
    seed_results_ged = []
    for seed in seeds: 
        # filename = 'model_' + currmodel + '_alpha_' + str(nclone) + '_seed_' + str(seed) + '.pkl'
        filename = 'model_spatial_benchmark_nclone_' + str(nclone) + '_seed_' + str(seed) + '.pkl'
        folderpath = 'models'
        fullpath = os.path.join(folderpath, filename)
        if os.path.isfile(fullpath):
            try: 
                print(filename)
                with open(fullpath, 'rb') as file: 
                    chmm = pickle.load(file)
            except (pickle.UnpicklingError, EOFError) as e: 
                print("failed")
        chmm.pseudocount = 2e-3

        temp_output_file = f"rectangular_room_graph_large_benchmark_num_clones_{nclone}.png" # Temporary file for each clone
        
        ged = graph_edit_distance_nx_norm(chmm, x, a, gt_A, output_file=temp_output_file, cmap=cmap)
        print(ged)
        
        
        graph, v, g = plot_graph(chmm, x, a, output_file=temp_output_file, cmap=cmap)
        # print("Ground truth number of nodes: {}, number of nodes recovered {}".format(len(room.flatten()), len(v)))
        
        # # display the image inline
        # display(Image(filename=temp_output_file))
        
        n_clones = 0 
        # container = chmm.container
        table = []
        
        # for roomid in range(len(container.groups_of_tables)):
        #     # print("Room {} has {} tables (clones)".format(roomid, len(container.groups_of_tables[roomid])))
        #     n_clones += len(container.groups_of_tables[roomid])
        #     table.append(len(container.groups_of_tables[roomid]))
        # print("Total clones used: {}".format(n_clones))
        # print("Clones that would have been used by the original code: {}".format(len(container.groups_of_tables)*n_orig_clones))
        n_clones = len(v)
        seed_results_totalclone.append(n_clones)
        seed_results_ged.append(ged)
    #     seed_results_nclones.append(table)
    results_totalclone.append(seed_results_totalclone)
    results_ged.append(seed_results_ged)
    # results_nclones.append(seed_results_nclones)
            
        

In [None]:
import pandas as pd
# Convert to a numpy array
data_array = np.array(results_totalclone)

# Check original shape
print("Original shape:", data_array.shape)  # Should print (10, 100)

# Reshape to (100, 10)
reshaped_array = data_array.transpose()  # Transpose to change rows to columns and vice versa

# Check reshaped array's shape
print("Reshaped array shape:", reshaped_array.shape)  # Should print (100, 10)

# Convert to a pandas DataFrame
df = pd.DataFrame(reshaped_array)

# Export to CSV
csv_filename = 'spatial_results_benchmark.csv'
df.to_csv(csv_filename, index=False)  # index=False means do not write row indices


In [None]:
np.save('ged_norm_spatial_benchmark.npy', results_ged)

In [None]:
np.save('ged_norm_spatial_benchmark.npy', results_ged)

In [None]:
np.mean(results_totalclone,axis=1)

In [None]:
np.std(results_totalclone,axis=1)

## Comparing the graph edit distance with the ground-truth graph

In [None]:
# import numpy as np
import igraph as ig


def grid_to_directed_igraph(grid):
    """
    Convert a 2D numpy array to a directed igraph.Graph.
    Each cell has bidirectional connections to its horizontal and vertical neighbors.
    """
    rows, cols = grid.shape
    adjacency_matrix = np.zeros((rows * cols, rows * cols), dtype=int)

    index = lambda r, c: r * cols + c

    for r in range(rows):
        for c in range(cols):
            current_index = index(r, c)

            # North
            if r > 0:
                north_index = index(r - 1, c)
                adjacency_matrix[current_index, north_index] = 1
                adjacency_matrix[north_index, current_index] = 1

            # South
            if r < rows - 1:
                south_index = index(r + 1, c)
                adjacency_matrix[current_index, south_index] = 1
                adjacency_matrix[south_index, current_index] = 1

            # East
            if c < cols - 1:
                east_index = index(r, c + 1)
                adjacency_matrix[current_index, east_index] = 1
                adjacency_matrix[east_index, current_index] = 1

            # West
            if c > 0:
                west_index = index(r, c - 1)
                adjacency_matrix[current_index, west_index] = 1
                adjacency_matrix[west_index, current_index] = 1

    # Creating an igraph from the adjacency matrix
    graph = ig.Graph.Adjacency((adjacency_matrix > 0).tolist(), mode=ig.ADJ_DIRECTED)
    return graph

# Example room array
room = np.array([
    [1, 2, 3, 0, 3, 1, 1, 1],
    [1, 1, 3, 2, 3, 2, 3, 1],
    [1, 1, 2, 0, 1, 2, 1, 0],
    [0, 2, 1, 1, 3, 0, 0, 2],
    [3, 3, 1, 0, 1, 0, 3, 0],
    [2, 1, 2, 3, 3, 3, 2, 0],
])

directed_igraph = grid_to_directed_igraph(room)
print("Directed Graph Representation with igraph:")
print(directed_igraph)

In [None]:
import igraph as ig
from queue import PriorityQueue

def heuristic(graph1, graph2):
    """
    Heuristic that considers both vertices and edges.
    """
    v_diff = abs(len(graph1.vs) - len(graph2.vs))
    e_diff = abs(len(graph1.es) - len(graph2.es))
    return v_diff + e_diff

def graph_edit_distance_igraph(graph1, graph2, max_iterations=10000):
    """
    Computes the approximate graph edit distance between two graphs
    using the A* algorithm with practical limitations.
    """
    frontier = PriorityQueue()
    graph_id = 0
    graphs = {graph_id: graph1}
    frontier.put((heuristic(graph1, graph2), 0, graph_id))
    graph_id += 1

    iterations = 0
    while not frontier.empty() and iterations < max_iterations:
        iterations += 1
        estimated_cost, actual_cost, current_graph_id = frontier.get()
        current_graph = graphs[current_graph_id]

        # Check if we reached the target structure
        if heuristic(current_graph, graph2) == 0:
            return actual_cost

        # Try adding and removing vertices and edges
        if len(current_graph.vs) < 20:  # Limit vertex count for demo purposes
            new_graph = current_graph.copy()
            new_graph.add_vertices(1)
            new_cost = actual_cost + 1
            graphs[graph_id] = new_graph
            frontier.put((new_cost + heuristic(new_graph, graph2), new_cost, graph_id))
            graph_id += 1

        if len(current_graph.vs) > 1:
            for v in range(len(current_graph.vs)):
                new_graph = current_graph.copy()
                new_graph.delete_vertices(v)
                new_cost = actual_cost + 1
                graphs[graph_id] = new_graph
                frontier.put((new_cost + heuristic(new_graph, graph2), new_cost, graph_id))
                graph_id += 1
                break  # Limit to one vertex modification per expansion for demo

        # Check if edges can be modified if graphs are similar in vertices but differ in edges
        if len(current_graph.vs) == len(graph2.vs):
            for e in current_graph.es:
                new_graph = current_graph.copy()
                new_graph.delete_edges(e.index)
                new_cost = actual_cost + 1
                graphs[graph_id] = new_graph
                frontier.put((new_cost + heuristic(new_graph, graph2), new_cost, graph_id))
                graph_id += 1
                break  # Limit to one edge modification for demo

    return float('inf')  # Return infinity if no solution found within the constraints

# Example usage would involve defining specific igraph instances to test this function.

# # Example usage with igraph.Graph
# room_graph = ig.Graph.Adjacency((room > 0).tolist())
# target_graph = ig.Graph.Adjacency((np.random.randint(0, 2, room.shape) > 0).tolist())

# ged = graph_edit_distance_igraph(room_graph, target_graph)
# print(f"Graph Edit Distance: {ged}")


In [None]:
def spectral_similarity(graph1, graph2):
    L1 = np.array(graph1.laplacian())
    L2 = np.array(graph2.laplacian())
    eigenvalues1 = np.sort(np.linalg.eigvals(L1))
    eigenvalues2 = np.sort(np.linalg.eigvals(L2))

    # Pad the smaller array of eigenvalues to match the larger one
    max_len = max(len(eigenvalues1), len(eigenvalues2))
    eigenvalues1 = np.pad(eigenvalues1, (0, max_len - len(eigenvalues1)), 'constant')
    eigenvalues2 = np.pad(eigenvalues2, (0, max_len - len(eigenvalues2)), 'constant')

    # Calculate the Euclidean distance between the eigenvalue arrays
    distance = np.linalg.norm(eigenvalues1 - eigenvalues2)
    return distance

In [None]:
np.arange(0,100,10)

In [None]:
nclone=1
a,x,rc = datagen_structured_obs_room(room, length=50000)
# alpha_results_nclones = []
# alpha_results_totalclone = []
geds = []
alphas = np.arange(0.1, 1, 0.1)
seeds = np.arange(0,100,10) # modified to save time
currmodel = 'spatial'
n_orig_clones = 70
for alpha in alphas: 
    # seed_results_totalclone = []
    # seed_results_nclones = []
    seed_geds = []
    for seed in seeds: 
        filename = 'model_' + currmodel + '_alpha_' + str(alpha) + '_seed_' + str(seed) + '.pkl'
        folderpath = 'models'
        fullpath = os.path.join(folderpath, filename)
        if os.path.isfile(fullpath):
            try: 
                print(filename)
                with open(fullpath, 'rb') as file: 
                    chmm = pickle.load(file)
            except (pickle.UnpicklingError, EOFError) as e: 
                print("failed")
        chmm.pseudocount = 0.01

        temp_output_file = f"rectangular_room_graph_large_num_clones_{nclone}.png" # Temporary file for each clone
        # graph, v, g = plot_graph(chmm, x, a, output_file=temp_output_file, cmap=cmap)

        graph, v, g = plot_graph(chmm, x, a, output_file=temp_output_file, cmap=cmap)
        # score = graph_edit_distance_igraph(g, directed_igraph)
        score = spectral_similarity(g,directed_igraph)
        seed_geds.append(score)
        print(score)
    geds.append(seed_geds)

In [None]:
np.mean(geds,axis=0)


In [None]:
np.std(geds,axis=0)

In [None]:
spectral_similarity(g, directed_igraph)