In [None]:
def simulate_agents(graph, num_agents, num_traversals, seed):
    agent_data = []
    for _ in range(num_traversals):
        for _ in range(num_agents):
            start_node = random.choice(list(graph.nodes()))
            end_node = random.choice(list(graph.nodes()))
            color_preference = random.choice(['red', 'green'])
            if graph.nodes[start_node]['color'] != graph.nodes[end_node]['color']:
                continue
            agent = Agent(start_node, end_node, color_preference)
            path, preferred_color_count = agent.traverse_graph(graph)
            agent_data.append({
                'Seed': seed,
                'Agent': _,
                'Start Node': start_node,
                'End Node': end_node,
                'Color Preference': color_preference,
                'Path Length': len(path),
                'Preferred Color Count': preferred_color_count,
                'Path': path,
                'Visited Shapes': agent.visited_shapes,
                'Visited Shininess': agent.visited_shininess
            })
    return pd.DataFrame(agent_data)

# Example usage of the simulation
num_nodes = 10
colors = ['red', 'green']
edge_probability = 0.9
results = []

for seed in range(50):
    G = generate_colored_graph(num_nodes, colors, edge_probability, seed, seed + 1000)
    plot_colored_graph(G)
    df_agents = simulate_agents(G, 5, 10, seed)  # Simulate 5 agents, 10 traversals
    results.append(df_agents)

# Combine all results into a single DataFrame for analysis
full_results = pd.concat(results, ignore_index=True)
print(full_results.head())


In [None]:
#  full_results is already loaded and available
def process_clustering(df):
    # Convert paths to sets of nodes for Jaccard distance calculation
    df['Path_Set'] = df['Path'].apply(lambda x: set(x))
    
    # Define the function to calculate the Jaccard distance
    def jaccard_distance(set1, set2):
        if len(set1.union(set2)) == 0:
            return 1  # Return 1 if both sets are empty, implying maximum distance
        return 1 - len(set1.intersection(set2)) / len(set1.union(set2))

    results = []
    linkage_methods = ['single', 'complete', 'average', 'ward']
    n_clusters = 2

    # Generate the distance matrix
    for seed, group in df.groupby('Seed'):
        n = len(group)
        distance_matrix = np.zeros((n, n))
        for i in range(n):
            for j in range(i + 1, n):
                distance_matrix[i, j] = jaccard_distance(group.iloc[i]['Path_Set'], group.iloc[j]['Path_Set'])
                distance_matrix[j, i] = distance_matrix[i, j]

        # Convert the distance matrix to a format suitable for linkage
        distance_matrix_condensed = squareform(distance_matrix)

        for method in linkage_methods:
            Z = linkage(distance_matrix_condensed, method=method)
            clusters = fcluster(Z, t=n_clusters, criterion='maxclust')
            group['Cluster'] = clusters

            # Print dendrogram for visual analysis
#             plt.figure(figsize=(10, 6))
#             plt.title(f'Seed {seed} - {method} Clustering Dendrogram')
#             dendrogram(Z)
#             plt.show()

            # Create summary for each cluster
            group['Colors_String'] = group['Mapped Colors'].apply(lambda x: ', '.join(map(str, x)))
            color_counts_per_cluster = group.groupby('Cluster')['Colors_String'].value_counts().unstack(fill_value=0)

            results.append({
                'Seed': seed,
                'Linkage Method': method,
                'Color Distribution': color_counts_per_cluster,
                'Number of Clusters': n_clusters
            })

    # Create a DataFrame from the results for easier comparison and analysis
    result_df = pd.DataFrame(results)
    return result_df

    # Display results
#     for result in results:
#         print(f"Results for Seed {result['Seed']} using {result['Linkage Method']}:")
#         print(result['Color Distribution'])
        

# Process the clustering for the full_results DataFrame
result_df= process_clustering(full_results.copy())
