In [9]:
from helper import *
from sklearn.metrics import pairwise_distances # for computing manhattan distance of padded vectors
from scipy.cluster.hierarchy import linkage, dendrogram  # implementing hieararchical clustering and plotting it
from matplotlib import pyplot as plt
%matplotlib widget

#### List all the trajectories

In [10]:
trajectories = [('0.52','2.4','1'),('0.52','2.4','3')]
start_slice = 20

#### Run Priority BFS

In [3]:
vectors = []
for trajectory in trajectories:
    
    phi = trajectory[0]
    chi = trajectory[1]
    replica = trajectory[2]
    
    # load graphs for the trajectory
    graphs = get_graph_from_file(phi, chi, replica)[start_slice:]

    # traverse the graph and create vector
    vectors += generate_vectors(graphs)

> 80 points loaded from file BR0.52-CHI2.4-R1.file.
> 80 points loaded from file BR0.52-CHI2.4-R3.file.


#### Define a distance function
- performs layerwise padding
- computes the L2 distance of the 2 padded vectors

In [4]:
def layerwise_distance(vector1, vector2):
    """
    args:
        vector1 (ndarray): first vector
        vector2 (ndarray): second vector
    return:
        distance (int): scalar l2 distance between the 2 vectors after they are padded layerwise
    """
    
    # trim all the trailing zeroes that were added to make the 
    # input work with linkage function that requires equal dimensional vectors
    vector1 = np.trim_zeros(vector1, 'b')
    vector2 = np.trim_zeros(vector2, 'b')
    
    padded_vectors = generate_padded_vectors([vector1,vector2])
    
    distance = pairwise_distances(padded_vectors, metric='manhattan')[0][1]
    
    return distance
    

linkage only accepts vectors of same length

so I am front padding all the vectors with the max dimension (experimantally)

and trying to observe the results

In [5]:
max_dimension = max(map(len, vectors))
front_padded_vectors = []
for vector in vectors:
    front_padded_vectors.append(front_pad(vector, max_dimension))

In [6]:
clusters = linkage(front_padded_vectors, metric=layerwise_distance)

In [11]:
# calculate full dendrogram
plt.figure(figsize=(13, 10))
plt.title('Hierarchical Clustering Dendrogram')
plt.xlabel('sample index')
plt.ylabel('distance')
dendrogram(
    Z=clusters,
    leaf_rotation=90.,  # rotates the x axis labels
    leaf_font_size=8.,  # font size for the x axis labels
)
plt.show()



Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …