In [None]:
import neo4j
import time
from neo4j import GraphDatabase, basic_auth
from easydict import EasyDict as edict

### Import Graph Data (Constraint Graph)
Which authors worked together on a paper? --> Create connections between those

In [None]:
URI = "neo4j://127.0.0.1:7687"
AUTH = ("neo4j", "12345678")

with GraphDatabase.driver(URI, auth=AUTH) as driver:
    driver.verify_connectivity()

In [None]:
AUTHORS_PATH = f"datasets\\temp\\authors_20260106-170423.txt"
CONNECTIONS_PATH = f"datasets\\temp\\author_connections_20260106-170518.txt"

with open(AUTHORS_PATH) as f:
    authors = f.read()

authors = [line for line in authors.split("\n") if line.strip()]

connections = []
with open(CONNECTIONS_PATH, 'r') as file:
    for line in file:
        clean_line = line.strip().lstrip('(').rstrip(')')

        parts = clean_line.split(',')

        entry = (parts[0].strip(), parts[1].strip())
        connections.append(entry)

In [None]:
def clear_database(driver, database):
    driver.execute_query("""
        MATCH (n)
        DETACH DELETE n
    """, database_=database)
    print("Database cleared.")

def setup_database(driver, database):
    """Creates constraints and indexes."""
    # Constraints must be run in their own transaction
    driver.execute_query("""
        CREATE CONSTRAINT author_name_unique IF NOT EXISTS
        FOR (a:Author) REQUIRE a.name IS UNIQUE
    """, database_=database)
    print("Constraint verified/created.")

def import_data(driver, authors_list, connections_list, database):
    # 1. Bulk create authors
    driver.execute_query("""
        UNWIND $names AS name
        MERGE (:Author {name: name})
    """, names=authors_list, database_=database)
    
    # 2. Bulk create connections
    driver.execute_query("""
        UNWIND $pairs AS pair
        MATCH (a:Author {name: pair[0]})
        MATCH (b:Author {name: pair[1]})
        MERGE (a)-[:CO_AUTHOR]->(b)
    """, pairs=connections_list, database_=database)

with GraphDatabase.driver(URI, auth=AUTH) as driver:
    clear_database(driver, "test")
    setup_database(driver, "test")
    import_data(driver, authors, connections, "test")

### Create Instance Graph
- need parameter to determine how many labels/ connections to exchange (fraud sizes)

In [None]:
# Hyperparameters
hypp = edict()
hypp.fraud_number = 10

In [None]:
# delete all info in graph first in order not to create duplicates
def clear_database(driver, database):
    driver.execute_query("""
        MATCH (n)
        DETACH DELETE n
    """, database_=database)
    print("Database cleared.")


def swap_random_authors(driver, database, number_of_swaps):
    query = """
    // 1. Get a pool of authors and pick two random ones
    MATCH (a1:Author), (a2:Author)
    WHERE id(a1) < id(a2) // Ensure we don't pick the same node twice
    WITH a1, a2 ORDER BY rand()
    LIMIT $limit

    // 2. Store their names in temporary variables
    WITH a1, a2, a1.name AS oldName1, a2.name AS oldName2
    
    // 3. Perform the swap
    SET a1.name = oldName2
    SET a2.name = oldName1
    
    // 4. Label them as fraudulent/swapped so we can find them
    SET a1:Fraudulent, a2:Fraudulent
    
    RETURN a1.name, a2.name
    """
    
    with driver.session(database=database) as session:
        result = session.run(query, limit=number_of_swaps)
        for record in result:
            print(f"Swapped identities: {record[0]} <-> {record[1]}")

with GraphDatabase.driver(URI, auth=AUTH) as driver:
    clear_database(driver, "test-instance-graph")
    # setup_database(driver, "test-instance-graph") probably constraint not for indtance bc duplicates could exist in it
    import_data(driver, authors, connections, "test-instance-graph")
    swap_random_authors(driver, "test-instance-graph", hypp.fraud_number)