In [37]:
import networkx as nx
import numpy as np
import random
import math
from collections import Counter
import io
import community.community_louvain as community_louvain # Needed for refinement

# --- 1. LOAD NETWORKX GRAPH ---
print("Loading College Football Network Graph...")
# The College Football network is a classic graph available in many repos.
# We assume the file is named "football.gml"
FILENAME_GML = "football.gml"
try:
    G = nx.read_gml(FILENAME_GML)
except FileNotFoundError:
    raise FileNotFoundError(f"Error: The file '{FILENAME_GML}' was not found. Please download the College Football network file.")

# --- 2. CREATE INDEX MAPPING ---
# Ensure the node IDs are sequential from 0 to N-1 for array processing
node_list = sorted(G.nodes())
node_to_index = {node: i for i, node in enumerate(node_list)}
index_to_node = {i: node for node, i in node_to_index.items()}
NUM_TEAMS = G.number_of_nodes()
DIMENSION = NUM_TEAMS

# --- 3. HYPERPARAMETERS (Adjusted for conference detection) ---
NUM_PARTICLES = 50     # P_n: Total population size
MAX_ITERATIONS = 50    # N_max: Maximum iterations

# PSO parameters (standard)
W_MAX = 0.9
W_MIN = 0.4
C1 = 2.0
C2 = 2.0
V_MAX = 6.0

# OBL/Crossover parameters
A_i = 0
B_i = NUM_TEAMS - 1
NUM_CLUSTERS = 12      # Target number of communities (K) - Close to the actual number of conferences
P_c = 0.2              # Ratio of crossover (20% of population)

print(f"Graph Loaded: College Football Network has {NUM_TEAMS} teams.")

Loading College Football Network Graph...
Graph Loaded: College Football Network has 115 teams.


In [38]:
def calculate_modularity(G, community_assignment_array):
    """Calculates the Modularity (Q) of a community assignment."""

    node_assignment = {}
    for i in range(DIMENSION):
        node_id = index_to_node[i]
        community_id = community_assignment_array[i]
        node_assignment[node_id] = community_id

    communities = {}
    for node, c_id in node_assignment.items():
        if c_id not in communities:
            communities[c_id] = set()
        communities[c_id].add(node)

    partition = list(communities.values())

    if not partition or all(not c for c in partition):
        return 0.0

    try:
        Q = nx.community.modularity(G, partition)
    except nx.exception.NotAPartition:
        # Penalty for invalid partition
        return -1.0
    except ZeroDivisionError:
        return 0.0

    return Q

In [40]:
# --- MAIN HYBRID PSO LOOP ---
print(f"\n--- Starting Hybrid PSO for College Football Detection (K={NUM_CLUSTERS}) ---")

for iter in range(MAX_ITERATIONS):
    w = W_MAX - (W_MAX - W_MIN) * iter / MAX_ITERATIONS

    # ----------------------------------------------------
    # PHASE 1: STANDARD PSO UPDATES (Movement)
    # ----------------------------------------------------
    for particle in swarm:
        # Update Pbest and Gbest before movement
        if particle.current_fitness > particle.pbest_fitness:
            particle.pbest_fitness = particle.current_fitness
            particle.pbest_position = particle.position.copy()

        if particle.pbest_fitness > gbest_fitness:
            gbest_fitness = particle.pbest_fitness
            gbest_position = particle.pbest_position.copy()

        # PSO Velocity and Position Updates
        r1 = np.random.rand(DIMENSION)
        r2 = np.random.rand(DIMENSION)

        cognitive_term = C1 * r1 * (particle.pbest_position - particle.position)
        social_term = C2 * r2 * (gbest_position - particle.position)

        new_velocity = w * particle.velocity + cognitive_term + social_term
        new_velocity = np.clip(new_velocity, -V_MAX, V_MAX)
        particle.velocity = new_velocity

        # Discretization
        for i in range(DIMENSION):
            p_switch = 1.0 / (1.0 + math.exp(-particle.velocity[i]))

            if random.random() < p_switch:
                pbest_assignment = particle.pbest_position[i]
                gbest_assignment = gbest_position[i]

                if random.random() < 0.5:
                    particle.position[i] = gbest_assignment
                else:
                    particle.position[i] = pbest_assignment

            particle.position[i] = int(particle.position[i] % NUM_CLUSTERS)

        particle.current_fitness = calculate_modularity(G, particle.position)

    # ----------------------------------------------------
    # PHASE 2: CUSTOM CROSSOVER OPERATION (Elitist)
    # ----------------------------------------------------

    swarm.sort(key=lambda p: p.current_fitness, reverse=True)

    num_crossover = int(NUM_PARTICLES * P_c)
    if num_crossover % 2 != 0: num_crossover = max(2, num_crossover + 1)

    crossover_parents = swarm[:num_crossover]
    new_offspring_swarm = []

    # Paired Crossover
    for j in range(num_crossover // 2):
        parent_A = crossover_parents[j]
        parent_B = crossover_parents[num_crossover - 1 - j]

        split_point = random.randint(1, DIMENSION - 1)

        offspring_1_pos = np.concatenate((parent_A.position[:split_point], parent_B.position[split_point:]))
        offspring_2_pos = np.concatenate((parent_B.position[:split_point], parent_A.position[split_point:]))

        offspring_1 = Particle(DIMENSION, NUM_CLUSTERS, initial_position=offspring_1_pos)
        offspring_1.current_fitness = calculate_modularity(G, offspring_1_pos)

        offspring_2 = Particle(DIMENSION, NUM_CLUSTERS, initial_position=offspring_2_pos)
        offspring_2.current_fitness = calculate_modularity(G, offspring_2_pos)

        new_offspring_swarm.extend([offspring_1, offspring_2])

    # 3. Elitist Selection (Combine Parents + Offspring and select the best N)
    if new_offspring_swarm:
        combined_population = swarm + new_offspring_swarm
        combined_population.sort(key=lambda p: p.current_fitness, reverse=True)
        swarm = combined_population[:NUM_PARTICLES]

    # Final Gbest check from the newly selected swarm
    current_gbest = max(swarm, key=lambda p: p.current_fitness)
    if current_gbest.current_fitness > gbest_fitness:
        gbest_fitness = current_gbest.current_fitness
        gbest_position = current_gbest.position.copy()

    print(f"Iteration {iter+1}/{MAX_ITERATIONS}: Global Best Modularity = {gbest_fitness:.4f}")

# --- FINAL RESULT AND REFINEMENT ---

# The best partition found by PSO (format: {node: community_id})
pso_partition_dict = {index_to_node[i]: gbest_position[i] for i in range(DIMENSION)}

print("\n--- STARTING REFINEMENT (Louvain Local Search) ---")

# Run Louvain optimization starting from the PSO-found partition
refined_partition_dict = community_louvain.best_partition(
    G,
    partition=pso_partition_dict,
    resolution=1.0,
    weight='weight'
)

# Convert the refined partition dictionary to the list-of-sets format for NetworkX modularity
refined_communities_sets = {}
for node, community_id in refined_partition_dict.items():
    if community_id not in refined_communities_sets:
        refined_communities_sets[community_id] = set()
    refined_communities_sets[community_id].add(node)
partition_for_modularity = list(refined_communities_sets.values())

# 1. Calculate the final, refined Modularity Score
refined_modularity = nx.community.modularity(G, partition_for_modularity)

# 2. Analyze results
gbest_modularity = gbest_fitness

print("\n--- FINAL OPTIMIZATION RESULTS ---")
print(f"1. PSO Final Modularity: {gbest_modularity:.4f}")
print(f"2. REFINED Final Modularity (Louvain Post-processing): {refined_modularity:.4f}")

if refined_modularity > gbest_modularity:
    print("✨ Refinement IMPROVED the result!")
    final_modularity = refined_modularity
    final_assignments = refined_partition_dict
    final_community_counts = Counter(final_assignments.values())
else:
    print("⚠️ Refinement did NOT improve the result; using PSO's best.")
    final_modularity = gbest_modularity
    final_assignments = pso_partition_dict
    final_community_counts = Counter(final_assignments.values())

# --- FINAL COMMUNITY MEMBERSHIP OUTPUT ---
final_communities = {}
for node, community_id in final_assignments.items():
    if community_id not in final_communities:
        final_communities[community_id] = []
    final_communities[community_id].append(node)

print(f"\nFinal Best Modularity Score: {final_modularity:.4f}")
print(f"Total Communities: {len(final_communities)}")
print("Top 5 largest communities (size):", final_community_counts.most_common(5))

print("\n--- COMMUNITY MEMBERSHIP EXAMPLES ---")
for cid, members in final_communities.items():
    member_list = members[:10] + ['...'] if len(members) > 10 else members
    print(f"Community {cid} (Size: {len(members)}): {member_list}")


--- Starting Hybrid PSO for College Football Detection (K=12) ---
Iteration 1/50: Global Best Modularity = 0.0286
Iteration 2/50: Global Best Modularity = 0.0330
Iteration 3/50: Global Best Modularity = 0.0381
Iteration 4/50: Global Best Modularity = 0.0526
Iteration 5/50: Global Best Modularity = 0.0634
Iteration 6/50: Global Best Modularity = 0.0746
Iteration 7/50: Global Best Modularity = 0.0886
Iteration 8/50: Global Best Modularity = 0.0909
Iteration 9/50: Global Best Modularity = 0.1000
Iteration 10/50: Global Best Modularity = 0.1010
Iteration 11/50: Global Best Modularity = 0.1036
Iteration 12/50: Global Best Modularity = 0.1060
Iteration 13/50: Global Best Modularity = 0.1095
Iteration 14/50: Global Best Modularity = 0.1095
Iteration 15/50: Global Best Modularity = 0.1109
Iteration 16/50: Global Best Modularity = 0.1109
Iteration 17/50: Global Best Modularity = 0.1109
Iteration 18/50: Global Best Modularity = 0.1109
Iteration 19/50: Global Best Modularity = 0.1109
Iteration 2