In [63]:
import pandas as pd

In [1]:
from py2neo import Graph, Node, Relationship

In [74]:
from py2neo.bulk import create_relationships, merge_relationships

In [2]:
from py2neo.matching import *

In [5]:
uri = "bolt://localhost:7687"
user = "neo4j"
password = "localadmin"
g = Graph(uri, user=user, password=password)

## Consolidating nodes via batched relationship redirection

The graph APOC merge node method does everything at once so maxes out memory. Hence using some simple routines to rewire relationships in batches, until down to none, then can remove node

In [50]:
to_merge_act_id = 4219 # has fewer rels, so start there
merge_into_act_id = 4942 # has more rels

In [51]:
first_act = nodes.match("Act", act_id=to_merge_act_id).first()
print(first_act)

(_35063:Act {act_id: 4219.0, act_sum: 'CODE OF CRIMINAL PROCEDURE, 1973', total_count: 321363.0})


In [58]:
def extract_batch_cases(act_id=to_merge_act_id, batch_size=10):
    query_result = graph.run(f"""
        MATCH ()-[r]->(a:Act{{act_id:{act_id}}}) RETURN r LIMIT {batch_size}
    """).data()
    cases = [rel['r'].start_node.get('case_id') for rel in query_result]
    return cases

In [61]:
print(extract_batch_cases())

['17-36-01-206400003412018', '17-13-01-206400011902018', '17-21-01-206400000422018', '17-13-21-206400038412018', '17-23-01-206400015502018', '17-35-01-206400023842018', '17-27-17-206400002192018', '17-19-18-206400001192018', '17-11-01-206400002322018', '17-19-19-206400007442018']


In [70]:
# used this to check if we had multiple sections being responsible for multiple relationships in graph, but
# appears to be duplication instead
# act_case_df = pd.read_csv('../data/acts_sections.csv')
# act_case_df.head()
# act_case_df[act_case_df.ddl_case_id == '17-36-01-206400003412018']

In [100]:
count_rels = lambda act_id: graph.run(f"MATCH (c:Case)-[r]->(a:Act{{act_id:{act_id}}}) return count(r)").evaluate()

In [107]:
def merge_and_delete_batch(batch_size=10):
    print("Initiating batch of rewires, rels into to merge: ", count_rels(to_merge_act_id), " and dest: ", count_rels(merge_into_act_id))
    # get the cases
    cases_to_rewire = extract_batch_cases(batch_size=batch_size)
    # wire them to the new act
    data = [(case_id, { "type": "USES_ACT" }, merge_into_act_id) for case_id in cases_to_rewire]
    create_relationships(graph.auto(), data, "USES_ACT", 
                         start_node_key=("Case", "case_id"), 
                         end_node_key=("Act", "act_id"))
    # delete them from the old act
    graph.run(f"""
        match (c:Case)-[r]->(a:Act{{act_id:{to_merge_act_id}}}) where 
        c.case_id in ($cases) delete r;
    """, cases=cases_to_rewire).evaluate()
    # complete
    left_to_merge = count_rels(to_merge_act_id)
    at_destination = count_rels(merge_into_act_id)
    print("Completed batch, rels into to merge: ", left_to_merge, " and dest: ", at_destination)
    return left_to_merge, at_destination

In [108]:
to_merge_rels = count_rels(to_merge_act_id)
while to_merge_rels > 0:
    to_merge_rels, _ = merge_and_delete_batch(batch_size=100)

Initiating batch of rewires, rels into to merge:  300484  and dest:  521006
Completed batch, rels into to merge:  299584  and dest:  521106
Initiating batch of rewires, rels into to merge:  299584  and dest:  521106
Completed batch, rels into to merge:  298684  and dest:  521206
Initiating batch of rewires, rels into to merge:  298684  and dest:  521206
Completed batch, rels into to merge:  297756  and dest:  521306
Initiating batch of rewires, rels into to merge:  297756  and dest:  521306
Completed batch, rels into to merge:  296776  and dest:  521406
Initiating batch of rewires, rels into to merge:  296776  and dest:  521406
Completed batch, rels into to merge:  295776  and dest:  521506
Initiating batch of rewires, rels into to merge:  295776  and dest:  521506
Completed batch, rels into to merge:  294776  and dest:  521606
Initiating batch of rewires, rels into to merge:  294776  and dest:  521606
Completed batch, rels into to merge:  293776  and dest:  521706
Initiating batch of 

Completed batch, rels into to merge:  240630  and dest:  526906
Initiating batch of rewires, rels into to merge:  240630  and dest:  526906
Completed batch, rels into to merge:  239590  and dest:  527006
Initiating batch of rewires, rels into to merge:  239590  and dest:  527006
Completed batch, rels into to merge:  238557  and dest:  527106
Initiating batch of rewires, rels into to merge:  238557  and dest:  527106
Completed batch, rels into to merge:  237507  and dest:  527206
Initiating batch of rewires, rels into to merge:  237507  and dest:  527206
Completed batch, rels into to merge:  236469  and dest:  527306
Initiating batch of rewires, rels into to merge:  236469  and dest:  527306
Completed batch, rels into to merge:  235434  and dest:  527406
Initiating batch of rewires, rels into to merge:  235434  and dest:  527406
Completed batch, rels into to merge:  234400  and dest:  527506
Initiating batch of rewires, rels into to merge:  234400  and dest:  527506
Completed batch, rel

Completed batch, rels into to merge:  183356  and dest:  532806
Initiating batch of rewires, rels into to merge:  183356  and dest:  532806
Completed batch, rels into to merge:  182514  and dest:  532906
Initiating batch of rewires, rels into to merge:  182514  and dest:  532906
Completed batch, rels into to merge:  181675  and dest:  533006
Initiating batch of rewires, rels into to merge:  181675  and dest:  533006
Completed batch, rels into to merge:  180829  and dest:  533106
Initiating batch of rewires, rels into to merge:  180829  and dest:  533106
Completed batch, rels into to merge:  179990  and dest:  533206
Initiating batch of rewires, rels into to merge:  179990  and dest:  533206
Completed batch, rels into to merge:  179150  and dest:  533306
Initiating batch of rewires, rels into to merge:  179150  and dest:  533306
Completed batch, rels into to merge:  178311  and dest:  533406
Initiating batch of rewires, rels into to merge:  178311  and dest:  533406
Completed batch, rel

Completed batch, rels into to merge:  136825  and dest:  538706
Initiating batch of rewires, rels into to merge:  136825  and dest:  538706
Completed batch, rels into to merge:  136087  and dest:  538806
Initiating batch of rewires, rels into to merge:  136087  and dest:  538806
Completed batch, rels into to merge:  135354  and dest:  538906
Initiating batch of rewires, rels into to merge:  135354  and dest:  538906
Completed batch, rels into to merge:  134614  and dest:  539006
Initiating batch of rewires, rels into to merge:  134614  and dest:  539006
Completed batch, rels into to merge:  133872  and dest:  539106
Initiating batch of rewires, rels into to merge:  133872  and dest:  539106
Completed batch, rels into to merge:  133133  and dest:  539206
Initiating batch of rewires, rels into to merge:  133133  and dest:  539206
Completed batch, rels into to merge:  132388  and dest:  539306
Initiating batch of rewires, rels into to merge:  132388  and dest:  539306
Completed batch, rel

Completed batch, rels into to merge:  99435  and dest:  544606
Initiating batch of rewires, rels into to merge:  99435  and dest:  544606
Completed batch, rels into to merge:  98894  and dest:  544706
Initiating batch of rewires, rels into to merge:  98894  and dest:  544706
Completed batch, rels into to merge:  98358  and dest:  544806
Initiating batch of rewires, rels into to merge:  98358  and dest:  544806
Completed batch, rels into to merge:  97816  and dest:  544906
Initiating batch of rewires, rels into to merge:  97816  and dest:  544906
Completed batch, rels into to merge:  97281  and dest:  545006
Initiating batch of rewires, rels into to merge:  97281  and dest:  545006
Completed batch, rels into to merge:  96741  and dest:  545106
Initiating batch of rewires, rels into to merge:  96741  and dest:  545106
Completed batch, rels into to merge:  96201  and dest:  545206
Initiating batch of rewires, rels into to merge:  96201  and dest:  545206
Completed batch, rels into to merg

Initiating batch of rewires, rels into to merge:  70769  and dest:  550506
Completed batch, rels into to merge:  70327  and dest:  550606
Initiating batch of rewires, rels into to merge:  70327  and dest:  550606
Completed batch, rels into to merge:  69885  and dest:  550706
Initiating batch of rewires, rels into to merge:  69885  and dest:  550706
Completed batch, rels into to merge:  69445  and dest:  550806
Initiating batch of rewires, rels into to merge:  69445  and dest:  550806
Completed batch, rels into to merge:  69012  and dest:  550906
Initiating batch of rewires, rels into to merge:  69012  and dest:  550906
Completed batch, rels into to merge:  68585  and dest:  551006
Initiating batch of rewires, rels into to merge:  68585  and dest:  551006
Completed batch, rels into to merge:  68144  and dest:  551106
Initiating batch of rewires, rels into to merge:  68144  and dest:  551106
Completed batch, rels into to merge:  67706  and dest:  551206
Initiating batch of rewires, rels 

Completed batch, rels into to merge:  49304  and dest:  556506
Initiating batch of rewires, rels into to merge:  49304  and dest:  556506
Completed batch, rels into to merge:  49068  and dest:  556606
Initiating batch of rewires, rels into to merge:  49068  and dest:  556606
Completed batch, rels into to merge:  48836  and dest:  556706
Initiating batch of rewires, rels into to merge:  48836  and dest:  556706
Completed batch, rels into to merge:  48588  and dest:  556806
Initiating batch of rewires, rels into to merge:  48588  and dest:  556806
Completed batch, rels into to merge:  48345  and dest:  556906
Initiating batch of rewires, rels into to merge:  48345  and dest:  556906
Completed batch, rels into to merge:  48109  and dest:  557006
Initiating batch of rewires, rels into to merge:  48109  and dest:  557006
Completed batch, rels into to merge:  47874  and dest:  557106
Initiating batch of rewires, rels into to merge:  47874  and dest:  557106
Completed batch, rels into to merg

Initiating batch of rewires, rels into to merge:  31485  and dest:  562406
Completed batch, rels into to merge:  31149  and dest:  562506
Initiating batch of rewires, rels into to merge:  31149  and dest:  562506
Completed batch, rels into to merge:  30802  and dest:  562606
Initiating batch of rewires, rels into to merge:  30802  and dest:  562606
Completed batch, rels into to merge:  30463  and dest:  562706
Initiating batch of rewires, rels into to merge:  30463  and dest:  562706
Completed batch, rels into to merge:  30264  and dest:  562806
Initiating batch of rewires, rels into to merge:  30264  and dest:  562806
Completed batch, rels into to merge:  30026  and dest:  562906
Initiating batch of rewires, rels into to merge:  30026  and dest:  562906
Completed batch, rels into to merge:  29791  and dest:  563006
Initiating batch of rewires, rels into to merge:  29791  and dest:  563006
Completed batch, rels into to merge:  29547  and dest:  563106
Initiating batch of rewires, rels 

Completed batch, rels into to merge:  16789  and dest:  568406
Initiating batch of rewires, rels into to merge:  16789  and dest:  568406
Completed batch, rels into to merge:  16542  and dest:  568506
Initiating batch of rewires, rels into to merge:  16542  and dest:  568506
Completed batch, rels into to merge:  16308  and dest:  568606
Initiating batch of rewires, rels into to merge:  16308  and dest:  568606
Completed batch, rels into to merge:  16066  and dest:  568706
Initiating batch of rewires, rels into to merge:  16066  and dest:  568706
Completed batch, rels into to merge:  15830  and dest:  568806
Initiating batch of rewires, rels into to merge:  15830  and dest:  568806
Completed batch, rels into to merge:  15600  and dest:  568906
Initiating batch of rewires, rels into to merge:  15600  and dest:  568906
Completed batch, rels into to merge:  15362  and dest:  569006
Initiating batch of rewires, rels into to merge:  15362  and dest:  569006
Completed batch, rels into to merg

Completed batch, rels into to merge:  2384  and dest:  574406
Initiating batch of rewires, rels into to merge:  2384  and dest:  574406
Completed batch, rels into to merge:  2142  and dest:  574506
Initiating batch of rewires, rels into to merge:  2142  and dest:  574506
Completed batch, rels into to merge:  1903  and dest:  574606
Initiating batch of rewires, rels into to merge:  1903  and dest:  574606
Completed batch, rels into to merge:  1661  and dest:  574706
Initiating batch of rewires, rels into to merge:  1661  and dest:  574706
Completed batch, rels into to merge:  1534  and dest:  574806
Initiating batch of rewires, rels into to merge:  1534  and dest:  574806
Completed batch, rels into to merge:  1396  and dest:  574906
Initiating batch of rewires, rels into to merge:  1396  and dest:  574906
Completed batch, rels into to merge:  1252  and dest:  575006
Initiating batch of rewires, rels into to merge:  1252  and dest:  575006
Completed batch, rels into to merge:  1110  and 

In [110]:
graph.run(f"match (a:Act{{act_id:{to_merge_act_id}}}) delete a").evaluate()