Skip to content

Commit

Permalink
Optimize performance of detach delete (apache#1276)
Browse files Browse the repository at this point in the history
Previously, for each vertex to be deleted, all edge tables were
scanned once to process the connected edges. Now, this task is postponed
until all vertices are deleted. So, the connected edges can be processed
in only one scan of the edge tables regardless of the number of deleted
vertices.
  • Loading branch information
rafsun42 committed Oct 17, 2023
1 parent 3e12ec6 commit bf807ba
Show file tree
Hide file tree
Showing 4 changed files with 236 additions and 55 deletions.
115 changes: 111 additions & 4 deletions regress/expected/cypher_delete.out
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,9 @@ SELECT * FROM cypher('cypher_delete', $$CREATE (:v)-[:e]->(:v)$$) AS (a agtype);

--Should Fail
SELECT * FROM cypher('cypher_delete', $$MATCH(n1)-[e]->(n2) DELETE n1 RETURN n1$$) AS (a agtype);
ERROR: Cannot delete vertex n1, because it still has edges attached. To delete this vertex, you must first delete the attached edges.
ERROR: Cannot delete a vertex that has edge(s). Delete the edge(s) first, or try DETACH DELETE.
SELECT * FROM cypher('cypher_delete', $$MATCH(n1)-[e]->(n2) DELETE n2 RETURN n2$$) AS (a agtype);
ERROR: Cannot delete vertex n2, because it still has edges attached. To delete this vertex, you must first delete the attached edges.
ERROR: Cannot delete a vertex that has edge(s). Delete the edge(s) first, or try DETACH DELETE.
SELECT * FROM cypher('cypher_delete', $$MATCH(n1)-[e]->(n2) DELETE e RETURN e$$) AS (a agtype);
a
------------------------------------------------------------------------------------------------------------------------
Expand Down Expand Up @@ -188,7 +188,7 @@ SELECT * FROM cypher('cypher_delete', $$CREATE (n:v)-[:e]->(:v) CREATE (n)-[:e]-
(0 rows)

SELECT * FROM cypher('cypher_delete', $$MATCH(n1)-[]->() DELETE n1 RETURN n1$$) AS (a agtype);
ERROR: Cannot delete vertex n1, because it still has edges attached. To delete this vertex, you must first delete the attached edges.
ERROR: Cannot delete a vertex that has edge(s). Delete the edge(s) first, or try DETACH DELETE.
--Cleanup
SELECT * FROM cypher('cypher_delete', $$MATCH(n) DETACH DELETE n RETURN n$$) AS (a agtype);
a
Expand Down Expand Up @@ -234,7 +234,7 @@ SELECT * FROM cypher('cypher_delete', $$CREATE (n:v)-[:e]->(:v)$$) AS (a agtype)
(0 rows)

SELECT * FROM cypher('cypher_delete', $$MATCH(n1)-[e]->() DELETE n1, e RETURN n1$$) AS (a agtype);
ERROR: Cannot delete vertex n1, because it still has edges attached. To delete this vertex, you must first delete the attached edges.
ERROR: Cannot delete a vertex that has edge(s). Delete the edge(s) first, or try DETACH DELETE.
--Cleanup
SELECT * FROM cypher('cypher_delete', $$MATCH(n) DETACH DELETE n RETURN n$$) AS (a agtype);
a
Expand Down Expand Up @@ -651,6 +651,113 @@ SELECT * FROM cypher('cypher_delete', $$MATCH (u:vertices) RETURN u $$) AS (resu
--------
(0 rows)

--
-- Detach Delete
--
SELECT create_graph('detach_delete');
NOTICE: graph "detach_delete" has been created
create_graph
--------------

(1 row)

SELECT * FROM cypher('detach_delete',
$$
CREATE (x:Label3{name:'x', delete: true}),
(y:Label3{name:'y', delete: true}),
(a:Label1{name:'a', delete: true}),
(b:Label5{name:'b'}),
(c:Label5{name:'c'}),
(d:Label5{name:'d'}),
(m:Label7{name:'m', delete: true}),
(n:Label2{name:'n'}),
(p:Label2{name:'p'}),
(q:Label2{name:'q'}),
(a)-[:rel1{name:'ab'}]->(b),
(c)-[:rel2{name:'cd'}]->(d),
(n)-[:rel3{name:'nm'}]->(m),
(a)-[:rel4{name:'am'}]->(m),
(p)-[:rel5{name:'pq'}]->(q)
$$
) as (a agtype);
a
---
(0 rows)

-- no vertices or edges are deleted (error is expected)
SELECT * FROM cypher('detach_delete', $$ MATCH (x:Label1), (y:Label3), (z:Label7) DELETE x, y, z RETURN 1 $$) as (a agtype);
ERROR: Cannot delete a vertex that has edge(s). Delete the edge(s) first, or try DETACH DELETE.
SELECT * FROM cypher('detach_delete', $$ MATCH (v) RETURN v.name $$) as (vname agtype);
vname
-------
"x"
"y"
"a"
"b"
"c"
"d"
"m"
"n"
"p"
"q"
(10 rows)

SELECT * FROM cypher('detach_delete', $$ MATCH ()-[e]->() RETURN e.name $$) as (ename agtype);
ename
-------
"ab"
"cd"
"nm"
"am"
"pq"
(5 rows)

-- x, y, a, m, ab, nm, am are deleted
SELECT * FROM cypher('detach_delete', $$ MATCH (x:Label1), (y:Label3), (z:Label7) DETACH DELETE x, y, z RETURN 1 $$) as (a agtype);
a
---
1
1
(2 rows)

SELECT * FROM cypher('detach_delete', $$ MATCH (v) RETURN v.name $$) as (vname agtype);
vname
-------
"b"
"c"
"d"
"n"
"p"
"q"
(6 rows)

SELECT * FROM cypher('detach_delete', $$ MATCH ()-[e]->() RETURN e.name $$) as (ename agtype);
ename
-------
"cd"
"pq"
(2 rows)

SELECT drop_graph('detach_delete', true);
NOTICE: drop cascades to 12 other objects
DETAIL: drop cascades to table detach_delete._ag_label_vertex
drop cascades to table detach_delete._ag_label_edge
drop cascades to table detach_delete."Label3"
drop cascades to table detach_delete."Label1"
drop cascades to table detach_delete."Label5"
drop cascades to table detach_delete."Label7"
drop cascades to table detach_delete."Label2"
drop cascades to table detach_delete.rel1
drop cascades to table detach_delete.rel2
drop cascades to table detach_delete.rel3
drop cascades to table detach_delete.rel4
drop cascades to table detach_delete.rel5
NOTICE: graph "detach_delete" has been dropped
drop_graph
------------

(1 row)

--
-- Clean up
--
Expand Down
37 changes: 37 additions & 0 deletions regress/sql/cypher_delete.sql
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,43 @@ END;

SELECT * FROM cypher('cypher_delete', $$MATCH (u:vertices) RETURN u $$) AS (result agtype);

--
-- Detach Delete
--

SELECT create_graph('detach_delete');
SELECT * FROM cypher('detach_delete',
$$
CREATE (x:Label3{name:'x', delete: true}),
(y:Label3{name:'y', delete: true}),
(a:Label1{name:'a', delete: true}),
(b:Label5{name:'b'}),
(c:Label5{name:'c'}),
(d:Label5{name:'d'}),
(m:Label7{name:'m', delete: true}),
(n:Label2{name:'n'}),
(p:Label2{name:'p'}),
(q:Label2{name:'q'}),
(a)-[:rel1{name:'ab'}]->(b),
(c)-[:rel2{name:'cd'}]->(d),
(n)-[:rel3{name:'nm'}]->(m),
(a)-[:rel4{name:'am'}]->(m),
(p)-[:rel5{name:'pq'}]->(q)
$$
) as (a agtype);

-- no vertices or edges are deleted (error is expected)
SELECT * FROM cypher('detach_delete', $$ MATCH (x:Label1), (y:Label3), (z:Label7) DELETE x, y, z RETURN 1 $$) as (a agtype);
SELECT * FROM cypher('detach_delete', $$ MATCH (v) RETURN v.name $$) as (vname agtype);
SELECT * FROM cypher('detach_delete', $$ MATCH ()-[e]->() RETURN e.name $$) as (ename agtype);

-- x, y, a, m, ab, nm, am are deleted
SELECT * FROM cypher('detach_delete', $$ MATCH (x:Label1), (y:Label3), (z:Label7) DETACH DELETE x, y, z RETURN 1 $$) as (a agtype);
SELECT * FROM cypher('detach_delete', $$ MATCH (v) RETURN v.name $$) as (vname agtype);
SELECT * FROM cypher('detach_delete', $$ MATCH ()-[e]->() RETURN e.name $$) as (ename agtype);

SELECT drop_graph('detach_delete', true);

--
-- Clean up
--
Expand Down
118 changes: 67 additions & 51 deletions src/backend/executor/cypher_delete.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#include "parser/parsetree.h"
#include "storage/bufmgr.h"
#include "utils/rel.h"
#include "common/hashfn.h"

#include "catalog/ag_label.h"
#include "executor/cypher_executor.h"
Expand All @@ -48,9 +49,7 @@ static void rescan_cypher_delete(CustomScanState *node);

static void process_delete_list(CustomScanState *node);

static void find_connected_edges(CustomScanState *node, char *graph_name,
List *labels, char *var_name, graphid id,
bool detach_delete);
static void check_for_connected_edges(CustomScanState *node);
static agtype_value *extract_entity(CustomScanState *node,
TupleTableSlot *scanTupleSlot,
int entity_position);
Expand Down Expand Up @@ -83,6 +82,7 @@ static void begin_cypher_delete(CustomScanState *node, EState *estate,
cypher_delete_custom_scan_state *css =
(cypher_delete_custom_scan_state *)node;
Plan *subplan;
HASHCTL hashctl;

Assert(list_length(css->cs->custom_plans) == 1);

Expand Down Expand Up @@ -112,6 +112,16 @@ static void begin_cypher_delete(CustomScanState *node, EState *estate,
*/
css->edge_labels = get_all_edge_labels_per_graph(estate, css->delete_data->graph_oid);

/* init vertex_id_htab */
MemSet(&hashctl, 0, sizeof(hashctl));
hashctl.keysize = sizeof(graphid);
hashctl.entrysize =
sizeof(graphid); // entries are not used, but entrysize must >= keysize
hashctl.hash = tag_hash;
css->vertex_id_htab = hash_create(DELETE_VERTEX_HTAB_NAME,
DELETE_VERTEX_HTAB_SIZE, &hashctl,
HASH_ELEM | HASH_FUNCTION);

/*
* Postgres does not assign the es_output_cid in queries that do
* not write to disk, ie: SELECT commands. We need the command id
Expand Down Expand Up @@ -194,6 +204,10 @@ static TupleTableSlot *exec_cypher_delete(CustomScanState *node)
*/
static void end_cypher_delete(CustomScanState *node)
{
check_for_connected_edges(node);

hash_destroy(((cypher_delete_custom_scan_state *)node)->vertex_id_htab);

ExecEndNode(node->ss.ps.lefttree);
}

Expand Down Expand Up @@ -443,15 +457,15 @@ static void process_delete_list(CustomScanState *node)
}

/*
* For vertices, we need to check if the vertex is connected to any
* edges, * if there are, we need to delete them or throw an error,
* depending on if the query specified the DETACH option.
* For vertices, we insert the vertex ID in the hashtable
* vertex_id_htab. This hashtable is used later to process
* connected edges.
*/
if (original_entity_value->type == AGTV_VERTEX)
{
find_connected_edges(node, css->delete_data->graph_name,
css->edge_labels, item->var_name,
id->val.int_value, css->delete_data->detach);
bool found;
hash_search(css->vertex_id_htab, (void *)&(id->val.int_value),
HASH_ENTER, &found);
}

/* At this point, we are ready to delete the node/vertex. */
Expand All @@ -464,85 +478,87 @@ static void process_delete_list(CustomScanState *node)
}

/*
* Find the edges connected to the given node. If there is any edges either
* delete them or throw an error, depending on the detach delete option.
* Scans the edge tables and checks if the deleted vertices are connected to
* any edge(s). For DETACH DELETE, the connected edges are deleted. Otherwise,
* an error is thrown.
*/
static void find_connected_edges(CustomScanState *node, char *graph_name,
List *labels, char *var_name, graphid id,
bool detach_delete)
static void check_for_connected_edges(CustomScanState *node)
{
ListCell *lc;
cypher_delete_custom_scan_state *css =
(cypher_delete_custom_scan_state *)node;
EState *estate = css->css.ss.ps.state;
ListCell *lc;
char *graph_name = css->delete_data->graph_name;

Increment_Estate_CommandId(estate);

/*
* We need to scan through all the edges to see if this vertex has
* any edges attached to it.
*
* XXX: If we implement an on-disc graph storage system. Such as
* an adjacency matrix, the performance of this check can be massively
* improved. However, right now we have to scan every edge to see if
* one has this vertex as a start or end vertex.
*/
foreach(lc, labels)
/* scans each label from css->edge_labels */
foreach (lc, css->edge_labels)
{
char *label_name = lfirst(lc);
ResultRelInfo *resultRelInfo;
TableScanDesc scan_desc;
HeapTuple tuple;
TupleTableSlot *slot;

resultRelInfo = create_entity_result_rel_info(estate,
graph_name, label_name);

resultRelInfo = create_entity_result_rel_info(estate, graph_name,
label_name);
scan_desc = table_beginscan(resultRelInfo->ri_RelationDesc,
estate->es_snapshot, 0, NULL);

slot = ExecInitExtraTupleSlot(
estate, RelationGetDescr(resultRelInfo->ri_RelationDesc),
&TTSOpsHeapTuple);

// scan the table
while(true)
/* for each row */
while (true)
{
graphid startid, endid;
graphid startid;
graphid endid;
bool isNull;
bool found_startid = false;
bool found_endid = false;

tuple = heap_getnext(scan_desc, ForwardScanDirection);

// no more tuples to process, break and scan the next label.
/* no more tuples to process, break and scan the next label. */
if (!HeapTupleIsValid(tuple))
{
break;
}

ExecStoreHeapTuple(tuple, slot, false);

startid = GRAPHID_GET_DATUM(slot_getattr(slot, Anum_ag_label_edge_table_start_id, &isNull));
endid = GRAPHID_GET_DATUM(slot_getattr(slot, Anum_ag_label_edge_table_end_id, &isNull));
startid = GRAPHID_GET_DATUM(slot_getattr(
slot, Anum_ag_label_edge_table_start_id, &isNull));
endid = GRAPHID_GET_DATUM(
slot_getattr(slot, Anum_ag_label_edge_table_end_id, &isNull));

hash_search(css->vertex_id_htab, (void *)&startid, HASH_FIND,
&found_startid);

if (id == startid || id == endid)
if (!found_startid)
{
/*
* We have found an edge that uses the vertex. Either delete the
* edge or throw an error. Depending on whether the DETACH
* option was specified in the query.
*/
if (detach_delete)
hash_search(css->vertex_id_htab, (void *)&endid, HASH_FIND,
&found_endid);
}

if (found_startid || found_endid)
{
if (css->delete_data->detach)
{
delete_entity(estate, resultRelInfo, tuple);
}
else
ereport(ERROR,
(errcode(ERRCODE_INTERNAL_ERROR),
errmsg("Cannot delete vertex %s, because it still has edges attached. "
"To delete this vertex, you must first delete the attached edges.",
var_name)));
{
ereport(
ERROR,
(errcode(ERRCODE_INTERNAL_ERROR),
errmsg(
"Cannot delete a vertex that has edge(s). "
"Delete the edge(s) first, or try DETACH DELETE.")));
}
}
}

table_endscan(scan_desc);
destroy_entity_result_rel_info(resultRelInfo);
}

Decrement_Estate_CommandId(estate);
}
Loading

0 comments on commit bf807ba

Please sign in to comment.