In [22]:
import networkx as nx
import pickle
import numpy as np

from pathlib import Path
import pandas as pd
import os

import sys
sys.path.append("../src")
from data_component import DataComponent
from graph_utils import *


In [24]:
# Read graph and opinions
data = DataComponent(real_data="Brexit")
opinions = data.get_opinions()
G_brexit = data.get_graph()

Loading follow graph ..
Reverse edge directionality! 
 BEFORE: u->v: u follows v 
 NOW u<-v: propagation goes from v to u  )
Graph loaded  ✅
|V|=7_589 |E|=532_459 node types={'int'}


In [25]:
# Remove edges to obtain a "reduced" version of the raw graph (no edges between nodes with opposing stance)
edges_to_remove = []
gaps = []
edges = G_brexit.edges()
for edge in edges:
    i, j = edge
    if i == j:
        continue
    gaps.append(abs(opinions[i]-opinions[j]))
    if (not (opinions[i] >= 0.5 and opinions[j] >= 0.5)) and (not (opinions[i] < 0.5 and opinions[j] < 0.5)):
        edges_to_remove.append(edge)
        
G_brexit_reduced = G_brexit.copy()
G_brexit_reduced.remove_edges_from(edges_to_remove)
print("Proportion of edges removed:", len(edges_to_remove)/len(edges))


Proportion of edges removed: 0.28203110474233695


In [26]:
# Function definition
import networkx as nx
import community as community_louvain
import numpy as np
from collections import defaultdict
    

def analyze_communities(G, G_raw, opinions, position_type, min_community_size=100):
    # Compute the best partition using Louvain method on graph G
    partition = community_louvain.best_partition(nx.Graph(G), random_state=42)  
    
    # Initialize dictionaries to store results
    community_nodes_count = defaultdict(int)
    community_opinion_avg = {}

    # Calculate the number of nodes in each community and their average opinion
    for node, comm in partition.items():
        community_nodes_count[comm] += 1
        if comm not in community_opinion_avg:
            community_opinion_avg[comm] = []
        community_opinion_avg[comm].append(opinions[node])

    # Calculate average opinions and filter communities
    large_communities = {comm: nodes for comm, nodes in community_nodes_count.items() if nodes >= min_community_size}
    community_avg_opinions = {comm: np.mean(community_opinion_avg[comm]) for comm in large_communities.keys()}

    # Identify the community with the highest or lowest average opinion based on metric
    target_comm = None
    if position_type == "positive-central" or position_type == "positive-high-degree" or position_type == "positive-lowest-degree":
        target_comm = max(community_avg_opinions, key=community_avg_opinions.get)
    elif position_type == "negative-central" or position_type == "negative-high-degree" or position_type == "negative-lowest-degree":
        target_comm = min(community_avg_opinions, key=community_avg_opinions.get)

    # Get the nodes in the target community
    nodes_in_target_comm = [node for node, community in partition.items() if community == target_comm]

    if position_type in ["positive-central", "negative-central"]:
        # Calculate centrality in G_raw
        centrality = nx.degree_centrality(G_raw)
    elif position_type in ["positive-high-degree", "negative-high-degree", "positive-lowest-degree", "negative-lowest-degree"]:
        # Calculate degree in G_raw
        centrality = dict(G_raw.degree(nodes_in_target_comm))

    # Identify the node with the highest or lowest centrality/degree
    target_node = None
    if position_type == "positive-lowest-degree" or position_type == "negative-lowest-degree":
        target_node = min((node for node in nodes_in_target_comm if centrality[node] > 1), key=lambda node: centrality[node], default=None)
    else:
        target_node = max(nodes_in_target_comm, key=lambda node: centrality[node])
    
    return target_node, large_communities, community_avg_opinions, partition
    

---

# Demo: choose a position type and observe the outcome

In [28]:
# Choose:
# "positive-central": i.e., MOST central in  the most positive community
# "negative-central": i.e., MOST central in  the most negative community
# "positive-high-degree": i.e., MAX out-degree in the most positive community
# "negative-high-degree": i.e., MAX out-degree in the most negative community
# "positive-lowest-degree": i.e., out-degree > 1
# "negative-lowest-degree": i.e., out-degree > 1

position_llm_node_dict = {}

position_types = ["positive-central", "negative-central", "positive-high-degree", "negative-high-degree", "positive-lowest-degree", "negative-lowest-degree"]

for position_type in position_types:
    nodeid, communities, community_avg_opinions, partition = analyze_communities(G_brexit_reduced, G_brexit, opinions, position_type=position_type)
    print(f"Position: {position_type}\nNode: {nodeid}\nCommunities: {communities}\nAvg opinions: {community_avg_opinions}")
    position_llm_node_dict[position_type] = nodeid

Position: positive-central
Node: 3638
Communities: {0: 3125, 3: 2894, 4: 1343}
Avg opinions: {0: 0.8565699737015398, 3: 0.09390009890120354, 4: 0.7998372334670752}
Position: negative-central
Node: 4426
Communities: {0: 3125, 3: 2894, 4: 1343}
Avg opinions: {0: 0.8565699737015398, 3: 0.09390009890120354, 4: 0.7998372334670752}
Position: positive-high-degree
Node: 3638
Communities: {0: 3125, 3: 2894, 4: 1343}
Avg opinions: {0: 0.8565699737015398, 3: 0.09390009890120354, 4: 0.7998372334670752}
Position: negative-high-degree
Node: 4426
Communities: {0: 3125, 3: 2894, 4: 1343}
Avg opinions: {0: 0.8565699737015398, 3: 0.09390009890120354, 4: 0.7998372334670752}
Position: positive-lowest-degree
Node: 7269
Communities: {0: 3125, 3: 2894, 4: 1343}
Avg opinions: {0: 0.8565699737015398, 3: 0.09390009890120354, 4: 0.7998372334670752}
Position: negative-lowest-degree
Node: 14
Communities: {0: 3125, 3: 2894, 4: 1343}
Avg opinions: {0: 0.8565699737015398, 3: 0.09390009890120354, 4: 0.7998372334670752

In [34]:
position_dict_path = sys.path[-1] + "/../data/processed/brexit_position_dict.pkl"

In [35]:
position_dict_path

'../src/../data/processed/brexit_position_dict.pkl'

In [36]:
with open(position_dict_path, "wb") as f:
    pickle.dump(position_llm_node_dict, f)

In [13]:
nodeid

4426

In [14]:
communities

{2: 3121, 1: 2894, 3: 1337}

In [15]:
community_avg_opinions

{2: 0.8560817583522307, 1: 0.09390009890120354, 3: 0.799562257202405}

In [14]:
partition[nodeid] == 0

True