In [1]:
from neo4j import GraphDatabase, basic_auth
from easydict import EasyDict as edict
from dotenv import load_dotenv
import os

In [None]:
env_path = "env.txt"
load_dotenv(dotenv_path=env_path, override=True)
def _strip_quotes(v):
    if v is None:
        return None
    return v.strip().strip('"').strip("'")
URI = _strip_quotes(os.getenv("NEO4J_URI"))
USERNAME = _strip_quotes(os.getenv("NEO4J_USERNAME"))
PASSWORD = _strip_quotes(os.getenv("NEO4J_PASSWORD"))
AUTH = (USERNAME, PASSWORD)
ITERATION_NUMBER = int(os.getenv("ITERATION_NUMBER") or 30)
CONSTRAINT_DB = _strip_quotes(os.getenv("NEO4J_CONSTRAINT_DB"))
INSTANCE_DB   = _strip_quotes(os.getenv("NEO4J_INSTANCE_DB"))
RESULT_DB     = _strip_quotes(os.getenv("NEO4J_RESULT_DB"))

In [3]:
AUTHORS_PATH = f"datasets\\temp\\authors_20260106-170423.txt"

with open(AUTHORS_PATH) as f:
    authors = f.read()

all_possible_authors = [line for line in authors.split("\n") if line.strip()]

## Greedy
- Choose vertex relabeling that eliminates the most violations (over all vertices and labels number of original violations - remaining after relabeling)
    - +normalization


In [None]:
def get_violation_set_of_vertex(driver, instance_db, constraint_db, vertex): # for version 1
    query_1 = f"""
    MATCH (a {{name: "{vertex}"}})-[:CO_AUTHOR]-(b)
    RETURN b.name AS name
    """
    constraint_co_authors = [record["name"] for record in driver.execute_query(query_1, database_=constraint_db).records]
    instance_co_authors = [record["name"] for record in driver.execute_query(query_1, database_=instance_db).records]
    # print(f"real co-authors of {vertex}: {constraint_co_authors}")
    # print(f"instance co-authors of {vertex}: {instance_co_authors}")
    violation_set = list(set(instance_co_authors) - set(constraint_co_authors))
    # print(f"violation set of {vertex}: {violation_set}")
    return violation_set

def get_potential_violations(driver, instance_db, constraint_db, vertex_in_instance, test_label):
    """
    Calculates how many violations a vertex WOULD have if its name was changed to test_label.
    """
    # 1. Get the current neighbors of this specific vertex in the instance graph
    query_neighbors = f"""
    MATCH (a {{name: "{vertex_in_instance}"}})-[:CO_AUTHOR]-(b)
    RETURN b.name AS name
    """

    neighbors = [record["name"] for record in driver.execute_query(query_neighbors, database_=instance_db).records]
    
    query_constraint = f"""
    MATCH (a {{name: "{test_label}"}})-[:CO_AUTHOR]-(b)
    RETURN b.name AS name
    """
    allowed_co_authors = [record["name"] for record in driver.execute_query(query_constraint, database_=constraint_db).records]
    
    potential_violations = [n for n in neighbors if n not in allowed_co_authors and n != test_label]
    
    return len(potential_violations)

def choose_best_relabeling(driver, instance_db, constraint_db, all_possible_labels):
    best_score = -float('inf')
    best_action = None # Will store (vertex_name, new_label)

    # In Greedy V2, iterate over ALL vertices and ALL labels
    for author in all_possible_labels:
        # Get current violation count: |T(v, lambda(v))|
        v_set = get_violation_set_of_vertex(driver, instance_db, constraint_db, author)
        print(f"Current violations for {author}: {v_set}")
        current_v_count = len(v_set)
        
        if current_v_count == 0:
            continue

        for potential_label in all_possible_labels:
            if potential_label == author:
                continue
                
            # Calculate cost delta_l
            cost = relabeling_cost(author, potential_label)
            if cost == 0: cost = 1 # Avoid division by zero
            
            # Calculate potential violations: |T(v, lambda'(v))|
            new_v_count = get_potential_violations(driver, instance_db, constraint_db, author, potential_label)
            
            # Normalizing violation elimination gain by the relabeling cost
            if current_v_count > new_v_count:
                score = (current_v_count - new_v_count) / cost
                
                if score > best_score:
                    best_score = score
                    best_action = (author, potential_label)
                    
    return best_action, best_score


def relabeling_cost(vertex, new_label):
    if new_label == vertex:
        return 0
    else:
        return 1

In [None]:
def greedy_v2(driver, instance_db, constraint_db):
    """
    Greedily selects the vertex relabeling that eliminates the most 
    violations normalized by cost.
    """
    iteration = 0
    while iteration < ITERATION_NUMBER:
        print(f"--- Greedy V2 Iteration {iteration + 1} ---")
        
        # Step 1: Find the globally optimal repair action
        best_action, score = choose_best_relabeling(driver, instance_db, constraint_db, all_possible_authors)
        
        if not best_action or score <= 0:
            print("No more beneficial repairs found.")
            break
            
        target_vertex, new_label = best_action
        
        # Step 2: Apply the repair
        update_query = f"""
        MATCH (a {{name: "{target_vertex}"}})
        SET a.name = "{new_label}"
        """
        driver.execute_query(update_query, database_=instance_db)
        print(f"Repaired {target_vertex} -> {new_label} with efficiency score {score:.2f}")
        
        iteration += 1

    return "Repair complete."

In [6]:
def get_all_violations(driver, instance_db, constraint_db):
    """
    Returns a dictionary mapping each vertex to its violation set.
    """
    violations_dict = {}
    for author in all_possible_authors:
        violation_set = get_violation_set_of_vertex(driver, instance_db, constraint_db, author)
        if violation_set:
            violations_dict[author] = violation_set
    print(violations_dict, len(violations_dict))
    return violations_dict

## Contraction

In [None]:
# create super node and vertex class
class SuperNode:
    def __init__(self, label, host):
        self.label = label
        self.host = host
        self.guests = []
        self.stored_cost = 0
    
    
    def get_host(self):
        return self.host

    @property
    def id(self):
        return self.host.id
    
    def get_stored_cost(self):
        return self.stored_cost
    
    def set_stored_cost(self, new_cost):
        self.stored_cost = new_cost
    
    def get_guests(self):
        return self.guests
    
    def set_host(self, new_host):
        self.host = new_host
    
    def set_guests(self, new_guests):
        self.guests = new_guests
    
    def set_label(self, new_label):
        self.label = new_label
    
    def get_label(self):
        return self.label
    
    def get_all_vertices(self):
        """Formula: V(R) = {h(R)} U (Union of V(Ri) for all Ri in guests)"""
        verts = [self.host]
        for guest in self.guests:
            verts.extend(guest.get_all_vertices())
        # Return unique vertices only
        return list(set(verts))
    
    def get_cost(self, candidate_label):
        """
        Implements Formula 7 from the paper.
        """
        # 1. Calculate the total cost of relabeling all vertices in this node
        current_relabel_cost = 0
        for v in self.get_all_vertices():
            if v.get_label() != candidate_label:
                # Using the default count cost (delta_l = 1 for non-identical)
                current_relabel_cost += 1

        # 2. Sum up the costs of all previous internal contractions (guest nodes)
        previous_guests_costs = sum(guest.stored_cost for guest in self.get_guests())

        # 3. Formula 7
        final_cost = current_relabel_cost - previous_guests_costs
        
        return final_cost

class Vertex:
    def __init__(self, id, label):
        self.id = id
        self.neighbors = []
        self.label = label

    def get_neighbors(self):
        return self.neighbors
    
    def add_neighbor(self, neighbor):
        self.neighbors.append(neighbor)

    def get_label(self):
        return self.label
    
    def set_label(self, new_label):
        self.label = new_label


In [None]:
def get_violation_set_of_vertex_contract(driver, instance_db, constraint_db, vertex):
    query = """
    MATCH (a {name: $name})-[:CO_AUTHOR]-(b)
    RETURN b.name AS neighbor_name
    """
    
    constraint_results = driver.execute_query(query, name=vertex.label, database_=constraint_db).records
    instance_results = driver.execute_query(query, name=vertex.label, database_=instance_db).records
    
    constraint_co_authors = [r["neighbor_name"] for r in constraint_results]
    instance_co_authors = [r["neighbor_name"] for r in instance_results]
    
    violation_names = list(set(instance_co_authors) - set(constraint_co_authors))
    
    return violation_names

def create_all_vertices(driver, instance_db):
    vertices = {}
    query = f"""
    MATCH (a)
    OPTIONAL MATCH (a)-[:CO_AUTHOR]-(b)
    RETURN elementId(a) AS v_id, a.name AS name, elementId(b) AS neighbor_id
    """

    results = driver.execute_query(query, database_=instance_db).records
    
    for record in results:
        v_id = record["v_id"]
        label = record["name"]
        neighbor_id = record["neighbor_id"]
        
        if v_id not in vertices:
            vertices[v_id] = Vertex(v_id, label)
        
        if neighbor_id:
            vertices[v_id].add_neighbor(neighbor_id)
            
    return vertices

# for each vertex, create its super node
def create_super_nodes(vertices):
    super_nodes = {}
    for vertex_name, vertex in vertices.items():
        host = vertex
        super_node = SuperNode(vertex.label, host)
        super_nodes[vertex_name] = super_node
    return super_nodes

def get_node_pair_most_violations(driver, instance_db, constraint_db, super_nodes):
    best_pair = (None, None)
    max_violations = -1
    # 1. iterate over vertices in super_nodes
    for i in range(len(super_nodes)):
        for j in range(i + 1, len(super_nodes)):
            R1, R2 = super_nodes[i], super_nodes[j]
            current_pair_violations = 0

            for v in R1.get_all_vertices():
                violation_names = get_violation_set_of_vertex_contract(driver, instance_db, constraint_db, v)
                print(f"Violation set for vertex {v.id}: {violation_names}")
                # Get the labels of all vertices in R2
                r2_labels = [vert.label for vert in R2.get_all_vertices()]
                
                for v_name in violation_names:
                    if v_name in r2_labels:
                        current_pair_violations += 1
            
            print(f"Current pair ({R1.label}, {R2.label}) has {current_pair_violations} violations.")
            
            if current_pair_violations > max_violations:
                max_violations = current_pair_violations
                best_pair = (R1, R2)
                
    return best_pair


def get_all_neighbors_in_instance(driver, instance_db, vertex, all_vertices_dict):
    query = """
    MATCH (a) WHERE elementId(a) = $v_id
    MATCH (a)-[:CO_AUTHOR]-(b)
    RETURN elementId(b) AS neighbor_id
    """
    results = driver.execute_query(query, v_id=vertex.id, database_=instance_db).records
    
    # Map the neighbor IDs back to the actual Vertex objects
    neighbor_objects = [all_vertices_dict[r["neighbor_id"]] for r in results]
    return neighbor_objects

def check_satisfaction(driver, constraint_db, l1, l2):
    if l1 == l2:
        return True

    # 1. Check the constraint graph for an edge between l1 and l2
    query = """
    MATCH (a {name: $l1})
    MATCH (b {name: $l2})
    RETURN EXISTS((a)-[:CO_AUTHOR]-(b)) AS is_satisfied
    """
    
    result = driver.execute_query(
        query, 
        l1=l1, 
        l2=l2, 
        database_=constraint_db
    )
    
    if not result.records:
        return False
        
    return result.records[0]["is_satisfied"]

def get_candidate_label(driver, instance_db, constraint_db, R_to_repair, R_target, all_vertices_dict):
    # h(R1) is the host vertex of the target node
    host_label = R_target.get_host().get_label()
    
    # 1. Get all possible candidate labels L' from constraint_db 
    query_candidates = f"""
    MATCH (l1 {{name: "{host_label}"}})-[:CO_AUTHOR]-(l2)
    RETURN DISTINCT l2.name AS label
    """
    possible_labels = [r["label"] for r in driver.execute_query(query_candidates, database_=constraint_db).records]
    
    best_label = None
    max_gain = -float('inf')

    # 2. Evaluate each candidate l' based on Formula 6
    for l_prime in possible_labels:
        # |T(R2)|: Violations before repair
        # |T(R2, l')|: Violations if all vertices in R_to_repair are relabeled to l_prime
        
        current_violations_count = 0
        new_violations_count = 0
        
        for v in R_to_repair.get_all_vertices():
            # Get neighbors in the instance graph
            neighbors = get_all_neighbors_in_instance(driver, instance_db, v, all_vertices_dict)
            
            for neighbor in neighbors:
                print(type(neighbor))
                # Check if neighbor's label is incompatible with l_prime
                if not check_satisfaction(driver, constraint_db, l_prime, neighbor.get_label()):
                    new_violations_count += 1
            
            # Count original violations for this vertex to calculate gain
            current_violations_count += len(get_violation_set_of_vertex_contract(driver, instance_db, constraint_db, v))

        # Gain = |T(R2)| - |T(R2, l')|
        gain = current_violations_count - new_violations_count
        
        if gain > max_gain:
            max_gain = gain
            best_label = l_prime
            
    return best_label

In [None]:
def count_total_inter_node_violations(driver, instance_db, constraint_db, super_nodes_list):
    total_violations = 0
    processed_edges = set()

    # 1. Iterate through every active SuperNode
    for i, R1 in enumerate(super_nodes_list):
        # Create a set of IDs for vertices in R1 for internal check
        r1_ids = {v.id for v in R1.get_all_vertices()}
        
        # 2. Compare with every other active SuperNode
        for j in range(i + 1, len(super_nodes_list)):
            R2 = super_nodes_list[j]
            r2_ids = {v.id for v in R2.get_all_vertices()}
            
            # 3. Check every vertex in R1
            for v in R1.get_all_vertices():
                # Get the violation names for this vertex
                violation_names = get_violation_set_of_vertex_contract(driver, instance_db, constraint_db, v)
                
                # 4. Check if the violating neighbor's label belongs to anyone in R2
                for v_name in violation_names:
                    for v_target in R2.get_all_vertices():
                        if v_target.label == v_name:
                            # edge between R1 and R2 is a violation
                            total_violations += 1
                            
    return total_violations

In [10]:
def contract(driver, instance_db, constraint_db, vertices, super_nodes):
        
    r_1, r_2 = get_node_pair_most_violations(driver, instance_db, constraint_db, list(super_nodes.values()))
    l_1 = get_candidate_label(driver, instance_db, constraint_db, r_1, r_2, vertices)
    l_2 = get_candidate_label(driver, instance_db, constraint_db, r_2, r_1, vertices)
    print(f"Chosen pair: R1 label={r_1.get_label()}, R2 label={r_2.get_label()}")
    print(f"Candidate labels: l1={l_1}, l2={l_2}")

    cost_r1 = r_1.get_cost(l_1)
    cost_r2 = r_2.get_cost(l_2)
    
    if cost_r2 > cost_r1:
        r_1, r_2 = r_2, r_1  # Swap
        chosen_label = l_1
        applied_cost = cost_r1
    else:
        chosen_label = l_2
        applied_cost = cost_r2
    
    r_2.set_stored_cost(applied_cost)
    
    for v in r_2.get_all_vertices():
        v.set_label(chosen_label)
        
    r_1.get_guests().append(r_2)
    # Remove r_2 from super_nodes
    if r_2.host.id in super_nodes:
        del super_nodes[r_2.host.id]

    return vertices, super_nodes

## AlterGC

In [None]:
def create_supernode_graph(driver, instance_db):
    # Step 1: Create all vertices from the instance graph
    vertices = create_all_vertices(driver, instance_db)
    # Step 2: Create super nodes for each vertex
    super_nodes = create_super_nodes(vertices)

    return vertices, super_nodes

def alter_gc(driver, instance_db, constraint_db, numm_iterations, vertices, super_nodes):
    iteration = 0
    while (count_total_inter_node_violations(driver, instance_db, constraint_db, list(super_nodes.values()))) > 0:
        
        # --- Phase 1: CONTRACT ---
        vertices, super_nodes = contract(driver, instance_db, constraint_db, vertices, super_nodes)
        
        # --- Phase 2: GREEDY ---
        best_node = None
        best_label = None
        best_score = 0
        
        # Iterate over active SuperNodes
        for sn_id, sn in super_nodes.items():
            current_sn_violations = count_violations_for_supernode(driver, constraint_db, sn, vertices)
            
            if current_sn_violations == 0: continue
            
            possible_labels = get_candidate_labels_only(driver, constraint_db, sn.get_host().label)
            
            for l_prime in possible_labels:
                cost = sn.get_cost(l_prime)
                if cost <= 0: cost = 1
                
                new_sn_violations = count_potential_violations_for_sn(driver, constraint_db, sn, l_prime, vertices)
                
                score = (current_sn_violations - new_sn_violations) / cost
                if score > best_score:
                    best_score = score
                    best_node = sn
                    best_label = l_prime
        
        # Apply the best greedy relabeling found across all SuperNodes
        if best_node and best_score > 0:
            print(f"Greedy Relabel: SuperNode {best_node.get_host().id} -> {best_label} (Score: {best_score:.2f})")
            for v in best_node.get_all_vertices():
                v.set_label(best_label)
        iteration += 1
    print(f"Alter GC completed in {iteration} iterations.")
    return vertices, super_nodes

def count_violations_for_supernode(driver, constraint_db, sn, all_vertices):
    v_count = 0
    for v in sn.get_all_vertices():
        for neighbor_id in v.get_neighbors():
            neighbor = all_vertices[neighbor_id]
            if not check_satisfaction(driver, constraint_db, v.label, neighbor.label):
                v_count += 1
    return v_count

def count_potential_violations_for_sn(driver, constraint_db, sn, test_label, all_vertices):
    v_count = 0
    for v in sn.get_all_vertices():
        for neighbor_id in v.get_neighbors():
            neighbor = all_vertices[neighbor_id]
            if not check_satisfaction(driver, constraint_db, test_label, neighbor.label):
                v_count += 1
    return v_count

def get_candidate_labels_only(driver, constraint_db, current_label):
    query = """
    MATCH (a {name: $name})-[:CO_AUTHOR]-(b)
    RETURN DISTINCT b.name AS candidate_label
    """
    
    results = driver.execute_query(
        query, 
        name=current_label, 
        database_=constraint_db
    ).records
    
    # Extract the names from the records
    candidates = [r["candidate_label"] for r in results]
    
    if current_label not in candidates:
        candidates.append(current_label)
        
    return candidates

In [12]:
# def apply_repairs_to_db(driver, instance_db, vertices):
#     print("Syncing repairs to Neo4j...")
#     query = "MATCH (a) WHERE elementId(a) = $v_id SET a.name = $new_name"
#     with driver.session(database=instance_db) as session:
#         for v in vertices.values():
#             session.run(query, v_id=v.id, new_name=v.label)

In [13]:
with GraphDatabase.driver(URI, auth=AUTH) as driver:
    driver.verify_connectivity()
    vertices, super_nodes = create_supernode_graph(driver, INSTANCE_DB)
    origin_num_violations = count_total_inter_node_violations(driver, INSTANCE_DB, CONSTRAINT_DB, list(super_nodes.values()))
    vertices, super_nodes = alter_gc(driver, INSTANCE_DB, CONSTRAINT_DB, ITERATION_NUMBER, vertices, super_nodes)
    num_violations = count_total_inter_node_violations(driver, INSTANCE_DB, CONSTRAINT_DB, list(super_nodes.values()))


Violation set for vertex 4:3d0f4514-a80a-4e80-adce-6bb803ec9148:47: ['j. p. callan', 'e. shamir', 't. dietterich']
Current pair (j. jackson, h. s. seung) has 0 violations.
Violation set for vertex 4:3d0f4514-a80a-4e80-adce-6bb803ec9148:47: ['j. p. callan', 'e. shamir', 't. dietterich']
Current pair (j. jackson, a. y. ng) has 0 violations.
Violation set for vertex 4:3d0f4514-a80a-4e80-adce-6bb803ec9148:47: ['j. p. callan', 'e. shamir', 't. dietterich']
Current pair (j. jackson, a. ehrenfeucht) has 0 violations.
Violation set for vertex 4:3d0f4514-a80a-4e80-adce-6bb803ec9148:47: ['j. p. callan', 'e. shamir', 't. dietterich']
Current pair (j. jackson, d. haussler) has 0 violations.
Violation set for vertex 4:3d0f4514-a80a-4e80-adce-6bb803ec9148:47: ['j. p. callan', 'e. shamir', 't. dietterich']
Current pair (j. jackson, d. lewis) has 0 violations.
Violation set for vertex 4:3d0f4514-a80a-4e80-adce-6bb803ec9148:47: ['j. p. callan', 'e. shamir', 't. dietterich']
Current pair (j. jackson, d.

In [14]:
print(origin_num_violations)
print(num_violations)

34
0


In [15]:
for supernode in super_nodes.values():
    print(f"SuperNode Label: {supernode.get_label()}")
    print(f" Host: {supernode.get_host().label}")
    print(f" Guests: {[guest.get_host().label for guest in supernode.get_guests()]}")
    print(f" All Vertices: {[v.label for v in supernode.get_all_vertices()]}")
    print(f" All VerticesID: {[v.id for v in supernode.get_all_vertices()]}")
    print(f" Stored Cost: {supernode.get_stored_cost()}")
    print("-----")
print("---------------------------------")


SuperNode Label: j. jackson
 Host: j. jackson
 Guests: ['m. j. kearns', 'm. j. kearns', 'm. j. kearns', 'm. j. kearns']
 All Vertices: ['m. j. kearns', 'j. jackson', 'm. j. kearns', 'm. j. kearns', 'm. j. kearns', 'm. j. kearns']
 All VerticesID: ['4:3d0f4514-a80a-4e80-adce-6bb803ec9148:52', '4:3d0f4514-a80a-4e80-adce-6bb803ec9148:47', '4:3d0f4514-a80a-4e80-adce-6bb803ec9148:62', '4:3d0f4514-a80a-4e80-adce-6bb803ec9148:85', '4:3d0f4514-a80a-4e80-adce-6bb803ec9148:81', '4:3d0f4514-a80a-4e80-adce-6bb803ec9148:61']
 Stored Cost: 0
-----
SuperNode Label: h. s. seung
 Host: h. s. seung
 Guests: ['m. j. kearns']
 All Vertices: ['h. s. seung', 'm. j. kearns']
 All VerticesID: ['4:3d0f4514-a80a-4e80-adce-6bb803ec9148:48', '4:3d0f4514-a80a-4e80-adce-6bb803ec9148:64']
 Stored Cost: 0
-----
SuperNode Label: a. y. ng
 Host: a. y. ng
 Guests: []
 All Vertices: ['a. y. ng']
 All VerticesID: ['4:3d0f4514-a80a-4e80-adce-6bb803ec9148:49']
 Stored Cost: 0
-----
SuperNode Label: a. ehrenfeucht
 Host: m. 

In [16]:
def apply_final_repairs_to_neo4j(driver, instance_db, vertices):
    """
    Takes the final 'repaired' labels from the Python objects 
    and updates the actual Neo4j database.
    """
    print(f"Applying repairs to {len(vertices)} nodes in Neo4j...")
    
    # We use elementId to ensure we update the exact physical node
    query = """
    MATCH (v) 
    WHERE elementId(v) = $v_id
    SET v.name = $new_label
    """
    
    with driver.session(database=instance_db) as session:
        # Use a transaction for better performance
        session.execute_write(lambda tx: [
            tx.run(query, v_id=v.id, new_label=v.label) 
            for v in vertices.values()
        ])
    print("Neo4j Instance Graph is now repaired.")

In [None]:
def clear_database(driver, database):
    driver.execute_query("MATCH (n) DETACH DELETE n", database_=database)
    print(f"Database '{database}' cleared.")


def duplicate_database(driver, source_db, target_db):
    drop_query = f"DROP DATABASE `{target_db}` IF EXISTS WAIT"
    create_query = f"CREATE DATABASE `{target_db}` AS SEED `{source_db}` WAIT"
    
    with driver.session(database="system") as session:
        print(f"Dropping existing `{target_db}` if it exists...")
        session.run(drop_query)
        
        print(f"Cloning `{source_db}` into `{target_db}`...")
        session.run(create_query)
        print("Clone complete.")

def manual_copy(driver, source_db, target_db):
    # 1. Get everything from source
    with driver.session(database=source_db) as session:
        nodes = session.run("MATCH (n) RETURN n, labels(n) as labels, elementId(n) as id").data()
        rels = session.run("MATCH (a)-[r]->(b) RETURN elementId(a) as start, type(r) as type, elementId(b) as end").data()

    # 2. Write everything to target
    with driver.session(database=target_db) as session:
        # Clear target
        session.run("MATCH (n) DETACH DELETE n")
        
        # Recreate nodes
        for node in nodes:
            labels = ":".join(node['labels'])
            session.run(f"CREATE (n:{labels}) SET n = $props, n.original_id = $oid", 
                        props=node['n'], oid=node['id'])
        
        session.run("""
        UNWIND $rels_list AS rel
        MATCH (oa {original_id: rel.start})
        MATCH (ob {original_id: rel.end})
        CREATE (oa)-[:CO_AUTHOR]->(ob)
        """, rels_list=rels)

In [19]:
with GraphDatabase.driver(URI, auth=AUTH) as driver:
    driver.verify_connectivity()
    clear_database(driver, RESULT_DB)
    manual_copy(driver, INSTANCE_DB, RESULT_DB)
    apply_final_repairs_to_neo4j(driver, RESULT_DB, vertices)

Database 'cora-result' cleared.
Applying repairs to 47 nodes in Neo4j...
Neo4j Instance Graph is now repaired.
