In [30]:
import networkx as nx
import numpy as np
import random
import math
from collections import Counter
import io
import scipy.io # <-- Import SciPy for Matrix Market format

# --- CONFIGURATION: ASSUMED LOCAL DATASET FILE (MTX) ---
FILENAME_MTX = "soc-dolphins.mtx"

# --- 1. LOAD GRAPH FROM MTX ---
print(f"Loading graph from local file ({FILENAME_MTX} - Matrix Market format)...")
if not os.path.exists(FILENAME_MTX):
    # Fallback suggestion if the .mtx file is missing
    print("NOTE: Trying to load directly from NetworkX as a fallback since .mtx file is missing.")
    G = nx.read_gml("dolphins.gml")
    print("Loaded via GML fallback.")

else:
    try:
        # Read the sparse matrix from the .mtx file
        adj_matrix_sparse = scipy.io.mmread(FILENAME_MTX)

        # Convert the sparse matrix to a NetworkX graph
        G_full = nx.from_scipy_sparse_array(adj_matrix_sparse)

        # Ensure it is treated as an undirected graph
        G = nx.Graph(G_full)

    except Exception as e:
        print(f"FATAL ERROR: Could not read {FILENAME_MTX} or convert to graph. Details: {e}")
        exit()

# --- 2. CREATE INDEX MAPPING ---
# Ensure the node IDs are sequential from 0 to N-1 for array processing
node_list = sorted(G.nodes())
node_to_index = {node: i for i, node in enumerate(node_list)}
index_to_node = {i: node for node, i in node_to_index.items()}
NUM_NODES = G.number_of_nodes()
DIMENSION = NUM_NODES

# --- 3. HYPERPARAMETERS (Adjusted for a small graph) ---
NUM_PARTICLES = 50     # P_n: Total population size
MAX_ITERATIONS = 50    # N_max: Maximum iterations
W_MAX = 0.9
W_MIN = 0.5
C1 = 1.3
C2 = 1.5
V_MAX = 6.0
A_i = 0                # Lower bound for cluster IDs
B_i = NUM_NODES - 1    # Upper bound for cluster IDs
NUM_CLUSTERS = 5       # Target number of communities (K)
P_c = 0.2              # Ratio of crossover (20% of population)

print(f"Graph Loaded: Dolphin Network has {NUM_NODES} nodes.")

Loading graph from local file (soc-dolphins.mtx - Matrix Market format)...
Graph Loaded: Dolphin Network has 62 nodes.


In [31]:
def calculate_modularity(G, community_assignment_array):
    """Calculates the Modularity (Q) of a community assignment."""

    node_assignment = {}
    for i in range(DIMENSION):
        node_id = index_to_node[i]
        community_id = community_assignment_array[i]
        node_assignment[node_id] = community_id

    communities = {}
    for node, c_id in node_assignment.items():
        if c_id not in communities:
            communities[c_id] = set()
        communities[c_id].add(node)

    partition = list(communities.values())

    if not partition or all(not c for c in partition):
        return 0.0

    try:
        Q = nx.community.modularity(G, partition)
    except nx.exception.NotAPartition:
        return -1.0
    except ZeroDivisionError:
        return 0.0

    return Q

In [32]:
# --- STARTING REFINEMENT (Louvain Local Search) ---
import community.community_louvain as community_louvain

# The best partition found by PSO (format: {node: community_id})
pso_partition_dict = {index_to_node[i]: gbest_position[i] for i in range(DIMENSION)}

print("\n--- STARTING REFINEMENT (Louvain Local Search) ---")

# Run Louvain optimization starting from the PSO-found partition
# The community_louvain library finds the partition that maximizes modularity.
refined_partition_dict = community_louvain.best_partition(
    G,
    partition=pso_partition_dict,
    resolution=1.0,
    weight='weight'
)

# --- NEW: Convert the Refined Partition Dictionary to the List-of-Sets Format ---
# The community_louvain output is a dictionary, but nx.modularity needs a list of sets.

# 1. Group nodes by their community ID from the refinement result
refined_communities_sets = {}
for node, community_id in refined_partition_dict.items():
    if community_id not in refined_communities_sets:
        refined_communities_sets[community_id] = set()
    refined_communities_sets[community_id].add(node)

# 2. Convert the dictionary values (the sets) into a list
partition_for_modularity = list(refined_communities_sets.values())

# --- ANALYZE REFINED RESULT ---

# 1. Calculate the final, refined Modularity Score using the corrected format
refined_modularity = nx.community.modularity(G, partition_for_modularity)

# 2. Analyze results
gbest_modularity = gbest_fitness # Renaming for clarity

print("\n--- FINAL OPTIMIZATION RESULTS ---")
print(f"1. PSO Final Modularity: {gbest_modularity:.4f}")
print(f"2. REFINED Final Modularity (Louvain Post-processing): {refined_modularity:.4f}")

if refined_modularity > gbest_modularity:
    print("✨ Refinement IMPROVED the result!")
    final_modularity = refined_modularity
    final_assignments = refined_partition_dict # Use the dictionary format for easy printing
    final_community_counts = Counter(final_assignments.values())
else:
    print("⚠️ Refinement did NOT improve the result; using PSO's best.")
    final_modularity = gbest_modularity
    final_assignments = pso_partition_dict
    final_community_counts = Counter(final_assignments.values())

# --- FINAL COMMUNITY MEMBERSHIP OUTPUT ---
final_communities = {}
for node, community_id in final_assignments.items():
    if community_id not in final_communities:
        final_communities[community_id] = []
    final_communities[community_id].append(node)

print(f"\nFinal Best Modularity Score: {final_modularity:.4f}")
print(f"Total Communities: {len(final_communities)}")
print("Top 5 largest communities (size):", final_community_counts.most_common(5))

print("\n--- COMMUNITY MEMBERSHIP EXAMPLES ---")
for cid, members in final_communities.items():
    member_list = members[:10] + ['...'] if len(members) > 10 else members
    print(f"Community {cid} (Size: {len(members)}): {member_list}")


--- STARTING REFINEMENT (Louvain Local Search) ---

--- FINAL OPTIMIZATION RESULTS ---
1. PSO Final Modularity: 0.0533
2. REFINED Final Modularity (Louvain Post-processing): 0.3899
✨ Refinement IMPROVED the result!

Final Best Modularity Score: 0.3899
Total Communities: 2
Top 5 largest communities (size): [(np.int64(0), 39), (np.int64(1), 23)]

--- COMMUNITY MEMBERSHIP EXAMPLES ---
Community 0 (Size: 39): [0, 2, 3, 4, 8, 10, 11, 12, 14, 15, '...']
Community 1 (Size: 23): [1, 5, 6, 7, 9, 13, 17, 19, 22, 25, '...']
