New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Neo4j export #91
Neo4j export #91
Changes from all commits
b9f2081
8b6714c
c0d9558
80dc844
7abcbd7
234f057
9f7225d
e9cf1c5
faabd7e
6f0df20
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,139 @@ | ||
// Backend operations for Neo4j export. | ||
package com.lynxanalytics.biggraph.graph_operations | ||
import com.lynxanalytics.biggraph.graph_api._ | ||
import com.lynxanalytics.biggraph.graph_util.Timestamp | ||
import org.apache.spark | ||
|
||
object ExportAttributesToNeo4j extends OpFromJson { | ||
class Input extends MagicInputSignature { | ||
val t = table | ||
} | ||
class Output(implicit instance: MetaGraphOperationInstance) extends MagicOutput(instance) { | ||
val exportResult = scalar[String] | ||
} | ||
def fromJson(j: JsValue) = ExportAttributesToNeo4j( | ||
(j \ "url").as[String], (j \ "username").as[String], (j \ "password").as[String], | ||
(j \ "labels").as[String], (j \ "keys").as[Seq[String]], (j \ "version").as[Long], | ||
(j \ "nodesOrRelationships").as[String]) | ||
} | ||
|
||
// Makes it easy to send a DataFrame to a specified Neo4j instance. | ||
case class Neo4jConnectionParameters(url: String, username: String, password: String) { | ||
def send(df: spark.sql.DataFrame, query: String) { | ||
df.write | ||
.format("org.neo4j.spark.DataSource") | ||
.option("authentication.type", "basic") | ||
.option("authentication.basic.username", username) | ||
.option("authentication.basic.password", password) | ||
.option("url", url) | ||
.option("query", query) | ||
.save() | ||
} | ||
} | ||
|
||
case class ExportAttributesToNeo4j( | ||
url: String, username: String, password: String, labels: String, keys: Seq[String], | ||
version: Long, nodesOrRelationships: String) | ||
extends SparkOperation[ExportAttributesToNeo4j.Input, ExportAttributesToNeo4j.Output] { | ||
val neo = Neo4jConnectionParameters(url, username, password) | ||
@transient override lazy val inputs = new ExportAttributesToNeo4j.Input() | ||
def outputMeta(instance: MetaGraphOperationInstance) = new ExportAttributesToNeo4j.Output()(instance) | ||
override def toJson = Json.obj( | ||
"url" -> url, "username" -> username, "password" -> password, "labels" -> labels, | ||
"keys" -> keys, "version" -> version, "nodesOrRelationships" -> nodesOrRelationships) | ||
def execute( | ||
inputDatas: DataSet, | ||
o: ExportAttributesToNeo4j.Output, | ||
output: OutputBuilder, | ||
rc: RuntimeContext): Unit = { | ||
implicit val ds = inputDatas | ||
// Drop null keys. | ||
val df = keys.foldLeft(inputs.t.df)((df, key) => df.filter(df(key).isNotNull)) | ||
val keyMatch = keys.map(k => s"`$k`: event.`$k`").mkString(", ") | ||
val query = nodesOrRelationships match { | ||
case "nodes" => s"MATCH (n$labels {$keyMatch}) SET n += event" | ||
case "relationships" => s"MATCH ()-[r$labels {$keyMatch}]-() SET r += event" | ||
} | ||
neo.send(df, query) | ||
val exportResult = "Export done." | ||
output(o.exportResult, exportResult) | ||
} | ||
} | ||
|
||
object ExportGraphToNeo4j extends OpFromJson { | ||
class Input extends MagicInputSignature { | ||
val vs = table | ||
val es = table | ||
} | ||
class Output(implicit instance: MetaGraphOperationInstance) extends MagicOutput(instance) { | ||
val exportResult = scalar[String] | ||
} | ||
def fromJson(j: JsValue) = ExportGraphToNeo4j( | ||
(j \ "url").as[String], (j \ "username").as[String], (j \ "password").as[String], | ||
(j \ "nodeLabelsColumn").as[String], (j \ "relationshipTypeColumn").as[String], | ||
(j \ "version").as[Long]) | ||
val VID = "!LynxKite ID" | ||
val SRCDST = "!LynxKite endpoint IDs" | ||
val SRCID = "!Source LynxKite ID" | ||
val DSTID = "!Destination LynxKite ID" | ||
} | ||
|
||
case class ExportGraphToNeo4j( | ||
url: String, username: String, password: String, nodeLabelsColumn: String, | ||
relationshipTypeColumn: String, version: Long) | ||
extends SparkOperation[ExportGraphToNeo4j.Input, ExportGraphToNeo4j.Output] { | ||
import ExportGraphToNeo4j._ | ||
val neo = Neo4jConnectionParameters(url, username, password) | ||
@transient override lazy val inputs = new ExportGraphToNeo4j.Input() | ||
def outputMeta(instance: MetaGraphOperationInstance) = new ExportGraphToNeo4j.Output()(instance) | ||
override def toJson = Json.obj( | ||
"url" -> url, "username" -> username, "password" -> password, | ||
"nodeLabelsColumn" -> nodeLabelsColumn, "relationshipTypeColumn" -> relationshipTypeColumn, | ||
"version" -> version) | ||
def execute( | ||
inputDatas: DataSet, | ||
o: ExportGraphToNeo4j.Output, | ||
output: OutputBuilder, | ||
rc: RuntimeContext): Unit = { | ||
implicit val ds = inputDatas | ||
val F = spark.sql.functions | ||
// Prefix the internal IDs with the timestamp so different exports don't collide. | ||
// Also save the timestamp so the created entities can be easily cleaned up. | ||
val timestamp = F.lit(Timestamp.human) | ||
val vs = inputs.vs.df | ||
.withColumn(VID, F.concat(timestamp, F.lit(" "), F.col(VID))) | ||
.withColumn("!LynxKite export timestamp", timestamp) | ||
val es = inputs.es.df | ||
.withColumn(SRCID, F.concat(timestamp, F.lit(" "), F.col(SRCDST + "._1"))) | ||
.withColumn(DSTID, F.concat(timestamp, F.lit(" "), F.col(SRCDST + "._2"))) | ||
.drop(SRCDST) | ||
.withColumn("!LynxKite export timestamp", timestamp) | ||
|
||
if (nodeLabelsColumn.isEmpty) { | ||
neo.send(vs, s""" | ||
CREATE (n) | ||
SET n += event | ||
""") | ||
} else { | ||
neo.send(vs, s""" | ||
CALL apoc.create.node(split(event.`$nodeLabelsColumn`, ','), event) YIELD node | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See https://neo4j.com/labs/apoc/4.1/overview/apoc.create/apoc.create.node/. |
||
RETURN 1 | ||
""") | ||
} | ||
if (relationshipTypeColumn.isEmpty) { | ||
neo.send(es, s""" | ||
MATCH (src {`$VID`: event.`$SRCID`}), (dst {`$VID`: event.`$DSTID`}) | ||
CREATE (src)-[r:EDGE]->(dst) | ||
SET r += event | ||
""") | ||
} else { | ||
neo.send(es, s""" | ||
MATCH (src {`$VID`: event.`$SRCID`}), (dst {`$VID`: event.`$DSTID`}) | ||
CALL apoc.create.relationship(src, event.`$relationshipTypeColumn`, event, dst) YIELD rel | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
RETURN 1 | ||
""") | ||
} | ||
val exportResult = "Export done." | ||
output(o.exportResult, exportResult) | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
package com.lynxanalytics.biggraph.frontend_operations | ||
|
||
import com.lynxanalytics.biggraph.controllers.DirectoryEntry | ||
import com.lynxanalytics.biggraph.graph_api.Edge | ||
import com.lynxanalytics.biggraph.graph_api.Scripting._ | ||
import com.lynxanalytics.biggraph.graph_api.GraphTestUtils._ | ||
|
||
class Neo4jContainer | ||
extends org.testcontainers.containers.Neo4jContainer[Neo4jContainer]("neo4j:4.0.8-enterprise") | ||
|
||
class Neo4jExportImportTest extends OperationsTestBase { | ||
val server = new Neo4jContainer() | ||
.withoutAuthentication | ||
.withEnv("NEO4J_ACCEPT_LICENSE_AGREEMENT", "yes") | ||
.withEnv("NEO4JLABS_PLUGINS", "[\"apoc\"]") | ||
|
||
def exportExampleGraph() = { | ||
val res = box("Create example graph") | ||
.box("Export graph to Neo4j", Map("url" -> server.getBoltUrl)).exportResult | ||
dataManager.get(res) | ||
} | ||
|
||
test("full graph export and import") { | ||
server.start() | ||
exportExampleGraph() | ||
val p = importBox("Import from Neo4j", Map("url" -> server.getBoltUrl)).project | ||
assert(p.vertexAttributes.toMap.keySet == Set( | ||
"!LynxKite ID", "!LynxKite export timestamp", "<id>", "<labels>", | ||
"age", "gender", "id", "income", "location", "name")) | ||
assert(p.edgeAttributes.toMap.keySet == Set( | ||
"<rel_id>", "<rel_type>", "<source_id>", "<target_id>", "comment", "weight")) | ||
assert(get(p.vertexAttributes("name")).values.toSet == Set("Adam", "Bob", "Eve", "Isolated Joe")) | ||
assert(get(p.edgeAttributes("weight")).values.toSet == Set(1.0, 2.0, 3.0, 4.0)) | ||
server.stop() | ||
} | ||
|
||
test("attribute export") { | ||
server.start() | ||
exportExampleGraph() | ||
val g = box("Create example graph").box("Compute PageRank").box("Compute dispersion") | ||
dataManager.get(g.box( | ||
"Export vertex attributes to Neo4j", | ||
Map("url" -> server.getBoltUrl, "keys" -> "name", "to_export" -> "page_rank")).exportResult) | ||
dataManager.get(g.box( | ||
"Export edge attributes to Neo4j", | ||
Map("url" -> server.getBoltUrl, "keys" -> "comment", "to_export" -> "dispersion")).exportResult) | ||
val p = importBox("Import from Neo4j", Map("url" -> server.getBoltUrl)).project | ||
assert(get(p.vertexAttributes("page_rank")).values.toSet == | ||
get(g.project.vertexAttributes("page_rank")).values.toSet) | ||
assert(get(p.edgeAttributes("dispersion")).values.toSet == | ||
get(g.project.edgeAttributes("dispersion")).values.toSet) | ||
server.stop() | ||
} | ||
|
||
test("export with labels and types") { | ||
server.start() | ||
val res = box("Create example graph").box("Export graph to Neo4j", Map( | ||
"url" -> server.getBoltUrl, | ||
"node_labels" -> "gender", "relationship_type" -> "comment")).exportResult | ||
dataManager.get(res) | ||
val p = importBox("Import from Neo4j", Map("url" -> server.getBoltUrl)).project | ||
assert( | ||
get(p.vertexAttributes("<labels>")).values.toList | ||
.flatMap(_.asInstanceOf[scala.collection.mutable.WrappedArray[String]]).sorted | ||
== Seq("Female", "Male", "Male", "Male")) | ||
assert(get(p.edgeAttributes("<rel_type>")).values.toSet | ||
== Set("Adam loves Eve", "Bob envies Adam", "Bob loves Eve", "Eve loves Adam")) | ||
server.stop() | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Mention that the value of this attribute can be a comma separated list of label values if user wants multiple labels. Would be more elegant to allow vectors here, but probably now is not the time.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I expect you usually just use one label. Described the commas in the help.