In [9]:
# Load libraries; functions to load graps, calculate feedback loops 

import networkx as nx
import pandas as pd

def load_graph(node_file, edge_file):
    """
    Loads a directed graph from node and edge list CSV files.
    :param node_file: Path to the node list CSV file
    :param edge_file: Path to the edge list CSV file
    :return: A directed graph (DiGraph)
    """
    # Load node list
    nodes_df = pd.read_csv(node_file, dtype={"Node.ID": str})
    
    # Load edge list
    edges_df = pd.read_csv(edge_file, dtype={"Source.Node.ID": str, "Sink.Node.ID": str})
    
    # Create directed graph
    G = nx.DiGraph()
    
    # Add nodes
    for _, row in nodes_df.iterrows():
        G.add_node(row["Node.ID"], description=row["Node.Description"])
    
    # Add edges
    for _, row in edges_df.iterrows():
        G.add_edge(row["Source.Node.ID"], row["Sink.Node.ID"], weight=row["Value"])
    
    return G

def find_unique_loops(G):
    """
    Finds unique loops (cycles) in a directed graph and determines their polarity.
    :param G: A directed graph (DiGraph)
    :return: A Pandas DataFrame containing unique loops with polarity (Reinforcing or Balancing)
    """
    cycles = list(nx.simple_cycles(G))  # Get all cycles
    unique_cycles = []
    seen = set()
    
    reinforcing_count = 0
    balancing_count = 0
    loop_data = []

    for cycle in cycles:
        sorted_cycle = tuple(sorted(cycle))  # Sort cycle for uniqueness
        if sorted_cycle not in seen:
            seen.add(sorted_cycle)
            
            # Calculate polarity
            polarity = 1
            edges = []
            for i in range(len(cycle)):
                source = cycle[i]
                target = cycle[(i + 1) % len(cycle)]
                weight = G[source][target]['weight']
                polarity *= weight
                edges.append(f"{source} -> {target} ({weight})")

            loop_type = "Reinforcing" if polarity == 1 else "Balancing"

            # Assign loop number
            if loop_type == "Reinforcing":
                reinforcing_count += 1
                loop_id = f"R{reinforcing_count}"
            else:
                balancing_count += 1
                loop_id = f"B{balancing_count}"

            loop_data.append([loop_id, ", ".join(edges), loop_type])

    # Convert results to DataFrame
    df_loops = pd.DataFrame(loop_data, columns=["Loop #", "Edges", "Polarity"])
    return df_loops  # Ensure the function returns a DataFrame


In [3]:
# Function to compare node lists. Not used in workflow. Differences of variable name, spelling not important.

def compare_node_lists(ground_truth_file, generated_file):
    """
    Compares the Generated nodes.csv against Ground Truth nodes.csv using precision, recall, and F1-score.
    :param ground_truth_file: Path to the ground truth node list CSV file
    :param generated_file: Path to the generated node list CSV file
    :return: Precision, Recall, and F1-score
    """
    # Load node lists
    ground_truth_df = pd.read_csv(ground_truth_file, dtype={"Node.ID": str})
    generated_df = pd.read_csv(generated_file, dtype={"Node.ID": str})
    
    # Extract node descriptions and normalize case
    ground_truth_nodes = set(ground_truth_df["Node.Description"].str.strip().str.lower().tolist())
    generated_nodes = set(generated_df["Node.Description"].str.strip().str.lower().tolist())
    
    # Compute true positives, false positives, and false negatives
    true_positives = len(ground_truth_nodes & generated_nodes)
    false_positives = len(generated_nodes - ground_truth_nodes)
    false_negatives = len(ground_truth_nodes - generated_nodes)
    
    # Compute precision, recall, and F1-score
    precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0
    recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    
    return precision, recall, f1

In [5]:
# Function to comapre two Edge lists

def compare_edge_lists(ground_truth_file, generated_file):
    """
    Compares the Generated edges.csv against Ground Truth edges.csv using precision, recall, and F1-score.
    :param ground_truth_file: Path to the ground truth edge list CSV file
    :param generated_file: Path to the generated edge list CSV file
    :return: Precision, Recall, and F1-score
    """
    # Load edge lists
    ground_truth_df = pd.read_csv(ground_truth_file, dtype={"Source.Node.ID": str, "Sink.Node.ID": str, "Value": int})
    generated_df = pd.read_csv(generated_file, dtype={"Source.Node.ID": str, "Sink.Node.ID": str, "Value": int})
    
    # Extract edge sets as tuples (source, sink, value)
    ground_truth_edges = set(tuple(row) for row in ground_truth_df.to_records(index=False))
    generated_edges = set(tuple(row) for row in generated_df.to_records(index=False))
    
    # Compute true positives, false positives, and false negatives
    true_positives = len(ground_truth_edges & generated_edges)
    false_positives = len(generated_edges - ground_truth_edges)
    false_negatives = len(ground_truth_edges - generated_edges)
    
    # Compute precision, recall, and F1-score
    precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0
    recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    
    return precision, recall, f1

In [15]:
# Function to calculate betweenness centrality

import networkx as nx

def calculate_betweenness_centrality(node_file, edge_file, weight_penalty_for_negative_link):
    """
    Calculate betweenness centrality for a network with the option to penalize negative links.
    
    Parameters:
    node_file (str): Path to the nodes CSV file
    edge_file (str): Path to the edges CSV file
    weight_penalty_for_negative_link (float): Weight to replace negative link values
    
    Returns:
    networkx.Graph: The constructed graph with all attributes
    dict: Betweenness centrality values for each node
    """
    # Initialize an empty graph
    G = nx.Graph()
    
    # Read nodes file
    with open(node_file, "r") as f:
        headers = f.readline().strip().split(',')  # Read the header line
        
        # Get column indices (handling different file formats)
        node_id_idx = headers.index('Node.ID') if 'Node.ID' in headers else 0
        node_desc_idx = headers.index('Node.Description') if 'Node.Description' in headers else 1
        
        for line in f:
            data = line.strip().split(",")
            node_id = data[node_id_idx]
            description = data[node_desc_idx] if len(data) > node_desc_idx else "No Description"
            
            # Add node with description attribute
            G.add_node(node_id, description=description)
    
    # Read edges file
    with open(edge_file, "r") as f:
        headers = f.readline().strip().split(',')  # Read the header line
        
        # Get column indices (handling different file formats)
        source_idx = headers.index('Source.Node.ID') if 'Source.Node.ID' in headers else 0
        target_idx = headers.index('Sink.Node.ID') if 'Sink.Node.ID' in headers else 1
        weight_idx = headers.index('Value') if 'Value' in headers else 2
        
        for line in f:
            data = line.strip().split(",")
            source = data[source_idx]
            target = data[target_idx]
            
            # Handle weight and apply penalty for negative links
            raw_weight = float(data[weight_idx])
            if raw_weight < 0:
                weight = weight_penalty_for_negative_link
            else:
                weight = raw_weight
            
            # Add edge with weight
            G.add_edge(source, target, weight=weight)
    
    print(f"Number of nodes: {G.number_of_nodes()}")
    print(f"Number of edges: {G.number_of_edges()}")
    
    # Calculate weighted betweenness centrality
    centrality = nx.betweenness_centrality(G, weight="weight")
    
    # Export results to CSV
    with open("Betweenness_Centrality.csv", "w") as f:
        f.write("Node.ID,Node.Description,Between.Centrality\n")
        for node in G.nodes():
            description = G.nodes[node].get('description', 'No Description')
            betweenness = centrality.get(node, 0)
            f.write(f"{node},{description},{betweenness}\n")
    
    print("Exported Betweenness_Centrality.csv")
    
    return G, centrality

# Example usage:
# G, centrality = calculate_betweenness_centrality("nodes.csv", "edges.csv", 10)

In [11]:
# Code snippet to run accuracy results. Save the files as follows first then just run this
# ground_truth_file = "edges_ground_truth.csv"
# For the three LLMs, use :-
# "edges_ChatGPT.csv"
# "edges_claude.csv"
# "edges_deepseek.csv"


print("\nEdge List Comparison ChatGPT:")

ground_truth_file = "edges_ground_truth.csv"
generated_file = "edges_ChatGPT.csv"
precision, recall, f1 = compare_edge_lists(ground_truth_file, generated_file)
    

print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-score: {f1:.2f}")

#----------------------------------------------------------------------------------
print("\nEdge List Comparison ClaudeAI:")

ground_truth_file = "edges_ground_truth.csv"
generated_file = "edges_claude.csv"
precision, recall, f1 = compare_edge_lists(ground_truth_file, generated_file)

print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-score: {f1:.2f}")

#----------------------------------------------------------------------------------
print("\nEdge List Comparison DeepSeek:")

ground_truth_file = "edges_ground_truth.csv"
generated_file = "edges_deepseek.csv"
precision, recall, f1 = compare_edge_lists(ground_truth_file, generated_file)
    
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-score: {f1:.2f}")




Edge List Comparison ChatGPT:
Precision: 1.00
Recall: 1.00
F1-score: 1.00

Edge List Comparison ClaudeAI:
Precision: 1.00
Recall: 1.00
F1-score: 1.00

Edge List Comparison DeepSeek:
Precision: 1.00
Recall: 0.97
F1-score: 0.98


In [45]:
# Code snippet to enumerate all the loops for the three LLMs
# node file = "nodes_ground_truth.csv"
# For the three LLMs, use :-
# "edges_ChatGPT.csv"
# "edges_claude.csv"
# "edges_deepseek.csv"

node_file = "Nodes_ground_truth.csv"
edge_file = "Edges_chatgpt.csv"
    
G = load_graph(node_file, edge_file)
loop_df = find_unique_loops(G)  
loop_df.to_csv("loops_ChatGPT.csv", index=False)
#---------------------------------------------------------------------------------

node_file = "Nodes_ground_truth.csv"
edge_file = "Edges_claude.csv"
    
G = load_graph(node_file, edge_file)
loop_df.to_csv("loops_Claude.csv", index=False)
#-----------------------------------------------------------------------------------

node_file = "Nodes_ground_truth.csv"
edge_file = "Edges_deepseek.csv"
    
G = load_graph(node_file, edge_file)
loop_df = find_unique_loops(G)  
loop_df.to_csv("loops_deepseek.csv", index=False)


In [19]:
# Code to call network analysis/get betweenness
G, centrality = calculate_betweenness_centrality("nodes_ground_truth.csv", "edges_ground_truth.csv", 10)

Number of nodes: 17
Number of edges: 29
Exported Betweenness_Centrality.csv
