In [3]:
from sklearn.metrics import silhouette_score
import pandas as pd
import numpy as np

def generate_colored_graph(num_nodes, colors, edge_probability, node_color_seed, edge_seed):
    G = nx.Graph()
    random.seed(node_color_seed)
    edge_random = random.Random(edge_seed)
    color_weights = {color: edge_random.uniform(0.5, 1.5) for color in colors}
    shapes = ['circle', 'square', 'triangle']
    shininess = ['shiny', 'not_shiny']

    for i in range(1, num_nodes):
        weight = color_weights[colors[i % len(colors)]]
        color = colors[i % len(colors)]
        shape = random.choice(shapes)
        shiny = random.choice(shininess)
        G.add_node(i, weight=weight, color=color, shape=shape, shiny=shiny)

    for i in range(1, num_nodes):
        for j in range(i + 1, num_nodes):
            if edge_random.random() < edge_probability:
                if G.nodes[i]['color'] == G.nodes[j]['color']:
                    G.add_edge(i, j, weight=edge_random.uniform(0.5, 1.5))
                else:
                    G.add_edge(i, j, weight=edge_random.uniform(0.1, 0.5))

    return G

def plot_colored_graph(G):
    shape_to_marker = {'circle': 'o', 'square': 's', 'triangle': '^'}
    node_shapes = {'circle': 'o', 'square': 's', 'triangle': '^'}
    node_colors = [G.nodes[i]['color'] for i in G.nodes()]
    pos = nx.spring_layout(G, seed=42)
    node_markers = [node_shapes.get(G.nodes[i]['shape'], 'o') for i in G.nodes()]

    for node, shape, color in zip(G.nodes, node_markers, node_colors):
        nx.draw_networkx_nodes(G, pos, nodelist=[node], node_size=300, node_shape=shape, node_color=color)

    nx.draw_networkx_edges(G, pos)
    nx.draw_networkx_labels(G, pos)
    plt.axis('off')
    plt.show()

class Agent:
    def __init__(self, start_node, end_node, color_preference):
        self.current_node = start_node
        self.color_preference = color_preference
        self.visited_nodes = set()
        self.visited_shapes = []
        self.visited_shininess = []
        self.start_node = start_node
        self.end_node = end_node

    def find_next_node(self, graph):
        neighboring_nodes = list(graph.neighbors(self.current_node))
        unvisited_neighbors = [node for node in neighboring_nodes if node not in self.visited_nodes]
        neighbors_with_preference = [node for node in unvisited_neighbors if graph.nodes[node]['color'] == self.color_preference]

        if neighbors_with_preference:
            next_node = random.choice(neighbors_with_preference)
        else:
            next_node = None

        return next_node

    def traverse_graph(self, graph):
        path = [self.current_node]

        while self.current_node != self.end_node:
            next_node = self.find_next_node(graph)
            if next_node is None:
                break

            self.visited_nodes.add(next_node)
            path.append(next_node)
            self.visited_shapes.append(graph.nodes[next_node]['shape'])
            self.visited_shininess.append(graph.nodes[next_node]['shiny'])
            self.current_node = next_node

        return path

def simulate_agents(graph, num_agents, num_traversals):
    agent_data = []

    for traversal in range(num_traversals):
        success_count = [0] * num_agents

        for agent_id in range(num_agents):
            start_node = random.choice(list(graph.nodes()))
            end_node = random.choice(list(graph.nodes()))
            color_preference = random.choice(colors)
            agent = Agent(start_node, end_node, color_preference)
            start_time = time.time()
            path = agent.traverse_graph(graph)
            end_time = time.time()
            time_taken = end_time - start_time
            reached_destination = agent.current_node == agent.end_node
            if reached_destination:
                success_count[agent_id] += 1

            average_speed = len(path) / time_taken if time_taken > 0 else None
            preferred_color_count = sum(1 for node in path if graph.nodes[node]['color'] == color_preference)
            distinct_nodes_visited = len(set(path))
            path_complexity = sum(1 for i in range(1, len(path)) if path[i] != path[i - 1])
            visited_shapes = agent.visited_shapes
            visited_shininess = agent.visited_shininess

            agent_trajectory = {
                'Agent': agent_id,
                'Start Node': start_node,
                'End Node': end_node,
                'Color Preference': color_preference,
                'Path': path,
                'Length': len(path),
                'Time Taken': time_taken,
                'reached_destination': reached_destination,
                'Preferred_Color_Count': preferred_color_count,
                'Distinct_Nodes_Visited': distinct_nodes_visited,
                'Path_Complexity': path_complexity,
                'Visited_Shapes': visited_shapes,
                'Visited_Shininess': visited_shininess
            }

            agent_data.append(agent_trajectory)

    df = pd.DataFrame(agent_data)
    return df

def hierarchical_clustering(distance_matrix, linkage_method='complete', n_clusters=3):
    Z = linkage(distance_matrix, method=linkage_method)
    clusters = fcluster(Z, n_clusters, criterion='maxclust')
    return Z, clusters

def jaccard_distance(set1, set2):
    if len(set1.union(set2)) == 0:
        return 0
    return 1 - len(set1.intersection(set2)) / len(set1.union(set2))

def evaluate_clustering_methods(num_seeds, graph_params, simulation_params):
    results = []

    for seed in range(num_seeds):
        G = generate_colored_graph(*graph_params, node_color_seed=seed, edge_seed=seed)
        df = simulate_agents(G, *simulation_params)
        filtered_df = df[df['reached_destination'] == True]
        filtered_df = filtered_df[filtered_df['Start Node'] != filtered_df['End Node']]
        filtered_df.drop("reached_destination", axis=1, inplace=True)
        filtered_df.reset_index(drop=True, inplace=True)
        
        color_mapping = {node: G.nodes[node]['color'] for node in G.nodes()}
        filtered_df['Mapped_Colors'] = filtered_df['Path'].apply(lambda path: [color_mapping[node] for node in path])
        filtered_df['Path_Set'] = filtered_df['Path'].apply(lambda x: set(x))
        
        n = len(filtered_df)
        distance_matrix = np.zeros((n, n))
        for i in range(n):
            for j in range(i + 1, n):
                distance_matrix[i, j] = jaccard_distance(filtered_df.iloc[i]['Path_Set'], filtered_df.iloc[j]['Path_Set'])
                distance_matrix[j, i] = distance_matrix[i, j]

        distance_matrix_condensed = squareform(distance_matrix)
        linkage_methods = ['single', 'complete', 'average', 'ward']
        n_clusters = 2

        for linkage_method in linkage_methods:
            Z, clusters = hierarchical_clustering(distance_matrix_condensed, linkage_method, n_clusters)
            filtered_df['Cluster'] = clusters
            silhouette_avg = silhouette_score(distance_matrix, clusters, metric='precomputed')
            results.append({
                'Linkage Method': linkage_method,
                'Seed': seed,
                'Silhouette Score': silhouette_avg
            })

    results_df = pd.DataFrame(results)
    best_method = results_df.groupby('Linkage Method')['Silhouette Score'].mean().idxmax()
    best_score = results_df.groupby('Linkage Method')['Silhouette Score'].mean().max()

    return best_method, best_score, results_df



In [9]:
import networkx as nx
import matplotlib.pyplot as plt
import random
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import DBSCAN
from sklearn.mixture import GaussianMixture
import seaborn as sns
from sklearn.metrics import adjusted_rand_score
from sklearn.metrics import adjusted_mutual_info_score
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.cluster import KMeans
from scipy.spatial.distance import squareform
import time
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import cosine_similarity
from scipy.cluster.hierarchy import linkage, fcluster, dendrogram
from scipy.spatial.distance import squareform
from sklearn.metrics import silhouette_score


In [10]:
# Parameters for the graph generation
num_nodes = 10
colors = ['red', 'green']
edge_probability = 0.9
graph_params = (num_nodes, colors, edge_probability)

# Parameters for the simulation
num_agents = 5
num_traversals = 20
simulation_params = (num_agents, num_traversals)

# Evaluate clustering methods
best_method, best_score, results_df = evaluate_clustering_methods(50, graph_params, simulation_params)

print(f"Best Clustering Method: {best_method} with Silhouette Score: {best_score}")


ValueError: Number of labels is 1. Valid values are 2 to n_samples - 1 (inclusive)