# Election Simulation Analysis

This notebook analyzes the results of the election simulation.


In [50]:
from utils import get_data, calculate_similarity

# data = get_data("all_data")
# data = get_data("same_seed")
data = get_data("different_seed")

In [51]:
sim_data = data[1]  # data used to analayze

In [52]:
def get_voter_agents(data):
    people = {}
    for person in data["people_agents"]:
        people[person["name"]] = person

    return people


def get_voter_names(data):
    people = get_voter_agents(data)
    return [person["name"] for person in people.values()]


def get_candidate_agents(data):
    candidates = {}
    for candidate in data["candidate_agents"]:
        candidates[candidate["name"]] = candidate

    return candidates


def get_candidate_names(data, include_abstain=False):
    candidates = get_candidate_agents(data)
    candidate_names = [candidate["name"] for candidate in candidates.values()]
    if include_abstain:
        return candidate_names + ["ABSTAIN"]
    else:
        return candidate_names


def get_all_agents(data):
    people = get_voter_agents(data)
    candidates = get_candidate_agents(data)

    agents = {**people, **candidates}

    return agents

In [53]:
from typing import List
import pandas as pd

# all data is from one simulation

id_searching_name = []


def initialize_id_searching_name(data):
    ids = {}

    def _recursive_comments(item):
        ids[item["id"]] = item["name"]
        for replies in item.get("replies", []):
            _recursive_comments(replies)

    for posts in data["platform"]["platform"]:
        ids[posts["id"]] = posts["name"]
        _recursive_comments(posts)

    return ids


def id_get_name(id_, ids_search_pool):
    if id_ in ids_search_pool:
        return ids_search_pool[id_]
    else:
        return None


def get_all_actions_by_model(
    data,
    ids_search_pool,
    include: List[str] = ["people", "candidate"],
):  # people is just voters
    all_actions = []

    missed_ids = set()

    for type_ in include:
        for agent in data[f"{type_}_agents"]:
            for actions in agent["all_posting_actions"]:
                hour = actions["hour"]
                day = actions["day"]
                for action in actions["actions"]:
                    if action["action"] == "POST":
                        continue

                    action_on_id = id_get_name(action["id"], ids_search_pool)

                    if None == action_on_id:
                        missed_ids.add(action["id"])
                        continue

                    all_actions.append(
                        {
                            "from": agent["name"],
                            "to": action_on_id,
                            "action": action["action"],
                            "hour": hour,
                            "day": day,
                        }
                    )

    print(f"Missed IDs: {missed_ids}")  # normal as models sometimes produced wrong ids

    return all_actions

In [54]:
for sim_data in data:
    id_searching_name.append(initialize_id_searching_name(sim_data))

    actions = get_all_actions_by_model(sim_data, id_searching_name[-1])

    # Convert to DataFrame for easier manipulation
    actions_df = pd.DataFrame(actions)

    # Display the DataFrame
    print(actions_df.head())

Missed IDs: {'d81e7a068', '6f11b9048b', '68ef5c46', '9a594d6c72', 'ac4f95e20d5', 'f891c48dad', '9c648f094', 'be76f66d', '67088dda0', '6a39154c95', '21c945f7d1', 'a3f11d8d9d', '21d945f7d1', '27f32eddce', '8e690331ba', 'b7a80c1995', '73e8b4fd5b'}
            from             to action  hour  day
0  Mark Campbell   Jason Morgan  REPLY    10    1
1  Mark Campbell  Ashley Guzman  REPLY    10    1
2  Mark Campbell   Jimmy Morris  REPLY    12    1
3  Mark Campbell   Sean Ferrell  REPLY    12    1
4  Mark Campbell   Jason Morgan  REPLY    12    1
Missed IDs: {'Christine Riley', 'Annette Johnson', '91c1e8e850', 'cc83a8cd3a', '1d151bdeb', 'cab710faf', 'fb56b6cd4b', '04d095bf7f', '6b0667f035', 'd45d87be04', '42214dda57', 'b7e816f54d', '38d4de0228', 'David Graves', 'a95089b4e8', '6a37d76792', '8312a31a22', '9d36c9dde2', 'Gregory Richard', 'fd31c10d2c', '9d886b81af', '99c05a05f6', 'fecc6c2b9f1'}
        from               to action  hour  day
0  Terry Cox       Shawn Park  REPLY    14    1
1  Terry

In [55]:
import networkx as nx
import plotly.graph_objects as go
import plotly.express as px
from collections import defaultdict, Counter
import matplotlib.cm as cm
import matplotlib.colors as mcolors


def create_interaction_graph(sim_data, sim_index=0):
    """Create a network graph showing interactions between agents."""

    all_agents = get_all_agents(sim_data)
    voters = get_voter_agents(sim_data)

    ids_search_pool = initialize_id_searching_name(sim_data)

    actions = get_all_actions_by_model(sim_data, ids_search_pool)

    G = nx.DiGraph()

    for agent_name, agent_data in all_agents.items():
        agent_type = "voter" if agent_name in voters else "candidate"
        G.add_node(agent_name, type=agent_type, data=agent_data)

    interaction_counts = defaultdict(int)

    for action in actions:
        from_agent = action["from"]
        to_agent = action["to"]

        if from_agent in all_agents and to_agent in all_agents:
            interaction_counts[(from_agent, to_agent)] += 1

    # Add edges to graph with weights
    for (from_agent, to_agent), count in interaction_counts.items():
        if G.has_edge(from_agent, to_agent):
            G[from_agent][to_agent]["weight"] += count
        else:
            G.add_edge(from_agent, to_agent, weight=count)

    # Calculate node sizes based on incoming interactions
    incoming_counts = defaultdict(int)
    outgoing_counts = defaultdict(int)

    for from_agent, to_agent in interaction_counts.keys():
        incoming_counts[to_agent] += interaction_counts[(from_agent, to_agent)]
        outgoing_counts[from_agent] += interaction_counts[(from_agent, to_agent)]

    for node in G.nodes():
        G.nodes[node]["incoming_count"] = incoming_counts.get(node, 0)
        G.nodes[node]["outgoing_count"] = outgoing_counts.get(node, 0)

    return G, interaction_counts


G, interaction_counts = create_interaction_graph(sim_data)
print(f"Graph created with {G.number_of_nodes()} nodes and {G.number_of_edges()} edges")
print(f"Total interactions: {sum(interaction_counts.values())}")

Missed IDs: {'64cc605a0', '3e82e817e5', '29a227adfd', 'c8a4156299', '6a327e614d', '19c421c56c', 'c12b1b18fc', '150bb75a5a', '82e478af88', '01876cbd', 'ebbbe82c8a', 'f1c323c41x', 'ca00c33cde', '18f7f1dd5f', 'a559ec975', '5f966f566f', 'f1ed44aca9'}
Graph created with 18 nodes and 299 edges
Total interactions: 6287


In [56]:
import networkx as nx
import plotly.graph_objects as go
import plotly.express as px
from collections import defaultdict, Counter
import matplotlib.cm as cm
import matplotlib.colors as mcolors


def create_network_graph(sim_data, interaction_filter=None):
    """
    Create network graph with optional interaction filtering.

    Args:
        sim_data: Simulation data
        interaction_filter: 'REPLY', 'LIKE', or None for all interactions
    """
    all_agents = get_all_agents(sim_data)
    voters = get_voter_agents(sim_data)
    ids_search_pool = initialize_id_searching_name(sim_data)
    actions = get_all_actions_by_model(sim_data, ids_search_pool)

    # Filter actions if specified
    if interaction_filter:
        actions = [
            action for action in actions if action["action"] == interaction_filter
        ]

    # Create directed graph
    G = nx.DiGraph()
    for agent_name, agent_data in all_agents.items():
        agent_type = "voter" if agent_name in voters else "candidate"
        G.add_node(agent_name, type=agent_type, data=agent_data)

    # Count interactions and add edges
    interaction_counts = defaultdict(int)
    for action in actions:
        from_agent = action["from"]
        to_agent = action["to"]
        if from_agent in all_agents and to_agent in all_agents:
            interaction_counts[(from_agent, to_agent)] += 1

    # Add edges with weights
    for (from_agent, to_agent), count in interaction_counts.items():
        G.add_edge(from_agent, to_agent, weight=count)

    # Calculate interaction counts for node sizing
    incoming_counts = defaultdict(int)
    outgoing_counts = defaultdict(int)
    for (from_agent, to_agent), count in interaction_counts.items():
        incoming_counts[to_agent] += count
        outgoing_counts[from_agent] += count

    # Add counts to node attributes
    for node in G.nodes():
        G.nodes[node]["incoming_count"] = incoming_counts.get(node, 0)
        G.nodes[node]["outgoing_count"] = outgoing_counts.get(node, 0)

    return G, interaction_counts


def get_master_layout(sim_data, seed=42):
    """Create a consistent layout based on all agents, to be used across all graphs."""
    all_agents = get_all_agents(sim_data)

    # Create a graph with all agents for consistent positioning
    master_graph = nx.Graph()
    for agent_name in all_agents.keys():
        master_graph.add_node(agent_name)

    # Generate layout that will be consistent across all visualizations
    return nx.spring_layout(master_graph, k=3, iterations=100, seed=seed)


def create_edge_traces(G, pos, sim_data):
    """Create edge traces with similarity-based coloring."""
    color_palette = [
        "#67001f",
        "#b2182b",
        "#d6604d",
        "#f4a582",
        "#fddbc7",
        "#d1e5f0",
        "#92c5de",
        "#4393c3",
        "#2166ac",
        "#053061",
    ]

    all_agents = get_all_agents(sim_data)
    edge_traces = []
    similarities = []

    # Calculate similarities for color scaling
    for edge in G.edges(data=True):
        from_agent, to_agent, edge_attr = edge
        from_profile = all_agents.get(from_agent, {})
        to_profile = all_agents.get(to_agent, {})
        similarity = calculate_similarity(from_profile, to_profile)
        similarities.append(similarity)

    # Normalize similarities for color mapping
    min_sim = min(similarities) if similarities else 0
    max_sim = max(similarities) if similarities else 1
    sim_range = max_sim - min_sim if max_sim > min_sim else 1

    # Create edge traces
    for i, (edge, similarity) in enumerate(zip(G.edges(data=True), similarities)):
        from_agent, to_agent, edge_attr = edge
        weight = edge_attr["weight"]

        x0, y0 = pos[from_agent]
        x1, y1 = pos[to_agent]

        # Map similarity to color
        normalized_sim = (similarity - min_sim) / sim_range if sim_range > 0 else 0.5
        color_index = int(normalized_sim * (len(color_palette) - 1))
        color_index = max(0, min(len(color_palette) - 1, color_index))

        hex_color = color_palette[color_index].lstrip("#")
        r, g, b = tuple(int(hex_color[i : i + 2], 16) for i in (0, 2, 4))
        edge_color = f"rgba({r}, {g}, {b}, 0.7)"

        # Scale line width based on interaction count
        line_width = max(1, min(10, weight / 3))

        edge_traces.append(
            go.Scatter(
                x=[x0, x1, None],
                y=[y0, y1, None],
                mode="lines",
                line=dict(width=line_width, color=edge_color),
                hovertemplate=f"<b>{from_agent} → {to_agent}</b><br>"
                f"Interactions: {weight}<br>"
                f"Similarity: {similarity:.3f}<extra></extra>",
                showlegend=False,
            )
        )

    return edge_traces, (min_sim, max_sim)


def create_node_traces(G, pos, all_agents):
    """Create node traces with size based on incoming interactions."""
    incoming_counts = [G.nodes[node].get("incoming_count", 0) for node in G.nodes()]
    min_incoming = min(incoming_counts) if incoming_counts else 0
    max_incoming = max(incoming_counts) if incoming_counts else 0

    node_traces = []

    # Show all agents in the same positions, even if they don't have interactions in this specific graph
    for agent_name in all_agents.keys():
        x, y = pos[agent_name]

        # Get interaction counts if the node exists in this graph, otherwise 0
        if agent_name in G.nodes():
            incoming_count = G.nodes[agent_name].get("incoming_count", 0)
            outgoing_count = G.nodes[agent_name].get("outgoing_count", 0)
            node_type = G.nodes[agent_name].get("type", "unknown")
        else:
            incoming_count = 0
            outgoing_count = 0
            # Determine type from all_agents data
            node_type = (
                "voter" if agent_name in get_voter_agents(sim_data) else "candidate"
            )

        # Calculate node size based on incoming interactions
        if max_incoming > min_incoming and max_incoming > 0:
            size_range = 80 - 15
            normalized_incoming = (incoming_count - min_incoming) / (
                max_incoming - min_incoming
            )
            size = 15 + (normalized_incoming * size_range)
        else:
            size = (
                30 if incoming_count > 0 else 15
            )  # Smaller size for nodes with no interactions

        # Format display name for long names
        display_name = (
            agent_name.replace(" ", "\n") if len(agent_name) > 12 else agent_name
        )
        node_color = "#2166ac" if node_type == "voter" else "#b2182b"

        # Make nodes with no interactions more transparent
        opacity = 0.25

        node_traces.append(
            go.Scatter(
                x=[x],
                y=[y],
                mode="markers+text",
                marker=dict(
                    size=size,
                    color=node_color,
                    line=dict(width=2, color="white"),
                    opacity=opacity,
                ),
                text=display_name,
                textposition="middle center",
                textfont=dict(
                    size=max(8, min(12, size / 10)),
                    color="black",
                    family="Arial Black",
                ),
                showlegend=False,
                hovertemplate=f"<b>{agent_name}</b><br>"
                f"Type: {node_type}<br>"
                f"Incoming: {incoming_count}<br>"
                f"Outgoing: {outgoing_count}<extra></extra>",
            )
        )

    return node_traces


def create_network_visualization(G, sim_data, title, master_pos):
    """Create interactive network visualization using consistent layout."""
    all_agents = get_all_agents(sim_data)

    edge_traces, (min_sim, max_sim) = create_edge_traces(G, master_pos, sim_data)
    node_traces = create_node_traces(G, master_pos, all_agents)

    fig = go.Figure(data=edge_traces + node_traces)

    # Add color scale for edge similarity
    colorscale = [
        [i / 9, color]
        for i, color in enumerate(
            [
                "#67001f",
                "#b2182b",
                "#d6604d",
                "#f4a582",
                "#fddbc7",
                "#d1e5f0",
                "#92c5de",
                "#4393c3",
                "#2166ac",
                "#053061",
            ]
        )
    ]

    fig.add_trace(
        go.Scatter(
            x=[None],
            y=[None],
            mode="markers",
            marker=dict(
                size=0,
                color=[min_sim, max_sim],
                colorscale=colorscale,
                showscale=True,
                colorbar=dict(
                    title=dict(text="Edge Similarity", side="right"),
                    thickness=15,
                    len=0.7,
                    x=1.02,
                ),
            ),
            showlegend=False,
            hoverinfo="skip",
        )
    )

    # Add legend items
    fig.add_trace(
        go.Scatter(
            x=[None],
            y=[None],
            mode="markers",
            marker=dict(size=14, color="#2166ac"),
            name="Voters",
            showlegend=True,
            textfont=dict(size=18),
        )
    )
    fig.add_trace(
        go.Scatter(
            x=[None],
            y=[None],
            mode="markers",
            marker=dict(size=14, color="#b2182b"),
            name="Candidates",
            showlegend=True,
            textfont=dict(size=18),
        )
    )

    # Calculate total interactions for title
    total_interactions = sum(data["weight"] for _, _, data in G.edges(data=True))

    fig.update_layout(
        title=dict(
            text=f"{title} ({total_interactions} interactions)",
            x=0.5,
            font=dict(size=18),
        ),
        showlegend=True,
        hovermode="closest",
        margin=dict(b=20, l=5, r=50, t=40),
        annotations=[
            dict(
                text="Node size = incoming interactions<br>"
                "Edge width = interaction count<br>"
                "Edge color = similarity (dark red=low, dark blue=high)<br>",
                showarrow=False,
                xref="paper",
                yref="paper",
                x=0.005,
                y=-0.002,
                xanchor="left",
                yanchor="bottom",
                font=dict(size=14),
            )
        ],
        xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
        yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
        plot_bgcolor="white",
        width=1600,
        height=800,
    )

    return fig


def analyze_simulation_networks(sim_data, sim_index=0):
    """Create three separate network visualizations: REPLY only, LIKE only, and COMBINED."""
    print(f"\n{'='*60}")
    print(f"SIMULATION {sim_index+1} - NETWORK ANALYSIS")
    print(f"{'='*60}")

    # Create master layout that will be consistent across all graphs
    master_pos = get_master_layout(sim_data, seed=42)
    print(f"Using consistent layout for {len(master_pos)} agents")

    network_types = [
        ("REPLY", "Reply Network"),
        ("LIKE", "Like Network"),
        (None, "Combined Network"),
    ]

    results = {}

    for filter_type, network_name in network_types:
        print(f"\n{network_name}:")

        # Create network graph
        G, interaction_counts = create_network_graph(sim_data, filter_type)

        # Calculate network statistics
        total_interactions = sum(interaction_counts.values())
        stats = {
            "nodes": G.number_of_nodes(),
            "edges": G.number_of_edges(),
            "total_interactions": total_interactions,
            "density": nx.density(G),
            "is_connected": nx.is_weakly_connected(G),
        }

        # Print statistics
        print(f"  Nodes: {stats['nodes']}, Edges: {stats['edges']}")
        print(f"  Total interactions: {stats['total_interactions']}")
        print(f"  Network density: {stats['density']:.3f}")
        print(f"  Is connected: {stats['is_connected']}")

        # Create and show visualization using consistent layout
        title = f"Simulation {sim_index+1} - {network_name}"
        fig = create_network_visualization(G, sim_data, title, master_pos)
        fig.show()

        # Store results
        results[filter_type or "COMBINED"] = {
            "graph": G,
            "stats": stats,
            "interactions": interaction_counts,
        }

    return results


# Create network visualizations
print("Creating three separate network visualizations:")
print("1. REPLY interactions only")
print("2. LIKE interactions only")
print("3. COMBINED interactions (both replies and likes)")

sim_results = analyze_simulation_networks(sim_data, 0)

Creating three separate network visualizations:
1. REPLY interactions only
2. LIKE interactions only
3. COMBINED interactions (both replies and likes)

SIMULATION 1 - NETWORK ANALYSIS
Using consistent layout for 18 agents

Reply Network:
Missed IDs: {'64cc605a0', '3e82e817e5', '29a227adfd', 'c8a4156299', '6a327e614d', '19c421c56c', 'c12b1b18fc', '150bb75a5a', '82e478af88', '01876cbd', 'ebbbe82c8a', 'f1c323c41x', 'ca00c33cde', '18f7f1dd5f', 'a559ec975', '5f966f566f', 'f1ed44aca9'}
  Nodes: 18, Edges: 254
  Total interactions: 3234
  Network density: 0.830
  Is connected: True



Like Network:
Missed IDs: {'64cc605a0', '3e82e817e5', '29a227adfd', 'c8a4156299', '6a327e614d', '19c421c56c', 'c12b1b18fc', '150bb75a5a', '82e478af88', '01876cbd', 'ebbbe82c8a', 'f1c323c41x', 'ca00c33cde', '18f7f1dd5f', 'a559ec975', '5f966f566f', 'f1ed44aca9'}
  Nodes: 18, Edges: 204
  Total interactions: 3053
  Network density: 0.667
  Is connected: True



Combined Network:
Missed IDs: {'64cc605a0', '3e82e817e5', '29a227adfd', 'c8a4156299', '6a327e614d', '19c421c56c', 'c12b1b18fc', '150bb75a5a', '82e478af88', '01876cbd', 'ebbbe82c8a', 'f1c323c41x', 'ca00c33cde', '18f7f1dd5f', 'a559ec975', '5f966f566f', 'f1ed44aca9'}
  Nodes: 18, Edges: 299
  Total interactions: 6287
  Network density: 0.977
  Is connected: True
