In [None]:
!pip install giotto-tda

import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.cluster import DBSCAN
import gtda.mapper as mp
import random
from sklearn.cluster import KMeans
import networkx as nx
import igraph as ig

In [None]:
#Original Code
x = np.random.uniform(0,100)
y1 = np.random.uniform(0,10)
y2 = np.random.uniform(0,10)
y3 = np.random.uniform(1000,1450)

data_y = [y1, y2, y3]
data_x = [x, x, x]
print(data_y)
print(data_x)
plt.scatter(data_x, data_y, color='blue', marker='o')
data = np.vstack((data_x, data_y)).T
print(data)
print(data.shape)
filter_func = mp.Projection(columns=[1]) # Define filter function
cover = mp.CubicalCover(n_intervals=3, overlap_frac=0.05) # Define cover
clusterer = KMeans(n_clusters=1) # Define clusterer


# Initialise pipeline
pipe = mp.make_mapper_pipeline(
    filter_func=filter_func,
    cover=cover,
    clusterer=clusterer
)

fig = mp.plot_static_mapper_graph(pipe, data, color_data=data[:,1])
fig.show(config={'scrollZoom': False})


In [None]:
#Lone Tester, doesn't go through every interval
num_iterations = 25
connected_components_counts = []
count_0 = 0
count_1 = 0
count_2 = 0
count_3 = 0

for _ in range(num_iterations):
    x = np.random.uniform(0, 100)
    y1 = np.random.uniform(0, 10)
    y2 = np.random.uniform(0, 10)
    y3 = np.random.uniform(1000, 1450)
    y4 = np.random.uniform(0, 1000)
    y5 = np.random.uniform(0, 1000)
    y6 = np.random.uniform(0, 1000)

    data_y = [y1, y2, y3, y4, y5, y6]
    data_x = [x, x, x, x, x, x]
    data = np.vstack((data_x, data_y)).T


    filter_func = mp.Projection(columns=[1])  # Define filter function
    cover = mp.CubicalCover(n_intervals=3, overlap_frac=0.5)  # Define cover
    clusterer = KMeans(n_clusters=1)  # Define clusterer

    # Initialise pipeline
    pipe = mp.make_mapper_pipeline(
        filter_func=filter_func,
        cover=cover,
        clusterer=clusterer
    )


    graph = pipe.fit_transform(data)
    igraph_graph = graph
    if num_connected_components == 0:
      count_0 = count_0 + 1
    elif num_connected_components == 1:
      count_1 = count_1 + 1
    elif num_connected_components == 2:
      count_2 = count_2 + 1
    elif num_connected_components == 3:
      count_3 = count_3 + 1


    num_connected_components = len(igraph_graph.clusters())
    connected_components_counts.append(num_connected_components)

average_connected_components = np.mean(connected_components_counts)
print(f"Average number of connected components over {num_iterations} runs: {average_connected_components}")





In [None]:
#Diagnostics for Indvidual Graphs
fig = mp.plot_static_mapper_graph(pipe, data, color_data=data[:,1])
fig.show(config={'scrollZoom': False})
print(f"Average number of connected components over {num_iterations} runs: {average_connected_components}")
print(f"Number of Mapper Graphs with 0 connected components: {count_0}")
print(f"Number of Mapper Graphs with 1 connected components: {count_1}")
print(f"Number of Mapper Graphs with 2 connected components: {count_2}")
print(f"Number of Mapper Graphs with 3 connected components: {count_3}")

In [None]:
#Overlap Fraction vs. Interval Code/Graph
num_iterations = 100
overlap_fractions = np.arange(0.01, 0.96, 0.05)
average_connected_components = []


for overlap_frac in overlap_fractions:
    connected_components_counts = []
    for _ in range(num_iterations):
        x = np.random.uniform(0, 100)
        y1 = np.random.uniform(0, 10)
        y2 = np.random.uniform(0, 10)
        y3 = np.random.uniform(1000, 1450)
        y4 = np.random.uniform(0, 1450)
        y5 = np.random.uniform(0, 1450)
        y6 = np.random.uniform(0, 1450)

        data_y = [y1, y2, y3, y4, y5, y6]
        data_x = [x, x, x, x, x, x]
        data = np.vstack((data_x, data_y)).T


        filter_func = mp.Projection(columns=[1])  # Define filter function
        cover = mp.CubicalCover(n_intervals=6, overlap_frac=overlap_frac)  # Define cover with current overlap
        clusterer = KMeans(n_clusters=1)  # Define clusterer

        # Initialise pipeline
        pipe = mp.make_mapper_pipeline(
            filter_func=filter_func,
            cover=cover,
            clusterer=clusterer
        )


        graph = pipe.fit_transform(data)
        igraph_graph = graph


        num_connected_components = len(igraph_graph.clusters())
        connected_components_counts.append(num_connected_components)


    average = np.mean(connected_components_counts)
    average_connected_components.append((overlap_frac, average))


for overlap_frac, average in average_connected_components:
    print(f"Average number of connected components for overlap fraction {overlap_frac:.2f}: {average}")


overlap_values, averages = zip(*average_connected_components)
plt.plot(overlap_values, averages, marker='o')
plt.title('Average Number of Connected Components vs. Overlap Fraction')
plt.xlabel('Overlap Fraction')
plt.ylabel('Average Number of Connected Components')
plt.grid()
plt.show()


In [None]:
#Distance Matrix Calculations
#from scipy.spatial import distance_matrix
num_iterations = 25
connected_components_counts = []
dm0 = []
dm1 = []
dm2 = []
dm3 = []
dm4 = []
dm5 = []
dm6 = []
count_0 = 0
count_1 = 0
count_2 = 0
count_3 = 0

for _ in range(num_iterations):
    x = np.random.uniform(0, 100)
    y1 = np.random.uniform(0, 10)
    y2 = np.random.uniform(0, 10)
    y3 = np.random.uniform(1000, 1450)
    y4 = np.random.uniform(0, 1000)
    y5 = np.random.uniform(0, 1000)
    y6 = np.random.uniform(0, 1000)

    data_y = [y1, y2, y3, y4, y5, y6]
    data_x = [x, x, x, x, x, x]
    data = np.vstack((data_x, data_y)).T

    dist_matrix = distance_matrix(data, data)
    distance_matrices.append(dist_matrix)

    filter_func = mp.Projection(columns=[1])  # Define filter function
    cover = mp.CubicalCover(n_intervals=3, overlap_frac=0.5)  # Define cover
    clusterer = KMeans(n_clusters=1)  # Define clusterer

    # Initialise pipeline
    pipe = mp.make_mapper_pipeline(
        filter_func=filter_func,
        cover=cover,
        clusterer=clusterer
    )


    graph = pipe.fit_transform(data)
    igraph_graph = graph
    if num_connected_components == 0:
      count_0 = count_0 + 1
      dm0 = distance_matrix(data,data)
    elif num_connected_components == 1:
      count_1 = count_1 + 1
    elif num_connected_components == 2:
      count_2 = count_2 + 1
    elif num_connected_components == 3:
      count_3 = count_3 + 1


    num_connected_components = len(igraph_graph.clusters())
    connected_components_counts.append(num_connected_components)

average_connected_components = np.mean(connected_components_counts)
print(f"Average number of connected components over {num_iterations} runs: {average_connected_components}")


In [None]:
print("Distance matrices for each iteration:")
for i, dist_matrix in enumerate(distance_matrices):
    print(f"Distance Matrix for Iteration {i + 1}:\n{dist_matrix}\n")
    total_distance = np.sum(dist_matrix)
    print(total_distance)

average_connected_components = np.mean(connected_components_counts)
print(f"Average number of connected components over {num_iterations} runs: {average_connected_components}")

fig = mp.plot_static_mapper_graph(pipe, data, color_data=data[:, 1])
fig.show(config={'scrollZoom': False})