Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Neo4j export #91

Merged
merged 10 commits into from Oct 2, 2020
Expand Up @@ -160,11 +160,11 @@ class ExportOperations(env: SparkFreeEnvironment) extends OperationRegistry {
Param("url", "Neo4j connection", defaultValue = "bolt://localhost:7687"),
Param("username", "Neo4j username", defaultValue = "neo4j"),
Param("password", "Neo4j password", defaultValue = "neo4j"),
NonNegInt("version", "Snapshot revision ID", default = 1))
NonNegInt("version", "Export repetition ID", default = 1))
lazy val project = projectInput("graph")
val nodesOrRelationships: String
override def enabled = FEStatus.enabled
override def trigger(wc: WorkspaceController, gdc: GraphDrawingController) = {
override def trigger(wc: WorkspaceController, gdc: GraphDrawingController): scala.concurrent.Future[Unit] = {
gdc.getComputeBoxResult(List(exportResult.gUID))
}
override def getOutputs(): Map[BoxOutput, BoxOutputState] = {
Expand All @@ -191,7 +191,7 @@ class ExportOperations(env: SparkFreeEnvironment) extends OperationRegistry {
import Operation.Implicits._
params ++= List(
Param("labels", "Node labels", defaultValue = ""),
Choice("keys", "Attribute to use as key",
Choice("keys", "Attribute(s) to use as key",
// Cannot join on internal ID ("<id>") and stuff like that.
options = project.vertexAttrList.filter(!_.id.startsWith("<")), multipleChoice = true),
Choice("to_export", "Exported attributes", options = project.vertexAttrList, multipleChoice = true))
Expand All @@ -203,7 +203,7 @@ class ExportOperations(env: SparkFreeEnvironment) extends OperationRegistry {
import Operation.Implicits._
params ++= List(
Param("labels", "Relationship labels", defaultValue = ""),
Choice("keys", "Attribute to use as key",
Choice("keys", "Attribute(s) to use as key",
// Cannot join on internal ID ("<id>") and stuff like that.
options = project.edgeAttrList.filter(!_.id.startsWith("<")), multipleChoice = true),
Choice("to_export", "Exported attributes", options = project.edgeAttrList, multipleChoice = true))
Expand All @@ -218,14 +218,14 @@ class ExportOperations(env: SparkFreeEnvironment) extends OperationRegistry {
Param("url", "Neo4j connection", defaultValue = "bolt://localhost:7687"),
Param("username", "Neo4j username", defaultValue = "neo4j"),
Param("password", "Neo4j password", defaultValue = "neo4j"),
NonNegInt("version", "Snapshot revision ID", default = 1),
NonNegInt("version", "Export repetition ID", default = 1),
Choice("node_labels", "Attribute with node labels",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Mention that the value of this attribute can be a comma separated list of label values if user wants multiple labels. Would be more elegant to allow vectors here, but probably now is not the time.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Mention that the value of this attribute can be a comma separated list of label values if user wants multiple labels. Would be more elegant to allow vectors here, but probably now is not the time.

I expect you usually just use one label. Described the commas in the help.

options = List(FEOption("", "")) ++ project.vertexAttrList[String]),
Choice("relationship_type", "Attribute with relationship type",
options = List(FEOption("", "")) ++ project.edgeAttrList[String]))
lazy val project = projectInput("graph")
override def enabled = FEStatus.enabled
override def trigger(wc: WorkspaceController, gdc: GraphDrawingController) = {
override def trigger(wc: WorkspaceController, gdc: GraphDrawingController): scala.concurrent.Future[Unit] = {
gdc.getComputeBoxResult(List(exportResult.gUID))
}
override def getOutputs(): Map[BoxOutput, BoxOutputState] = {
Expand Down
26 changes: 15 additions & 11 deletions app/com/lynxanalytics/biggraph/graph_operations/ExportToNeo4j.scala
@@ -1,9 +1,8 @@
// Backend operations for Neo4j export.
package com.lynxanalytics.biggraph.graph_operations

import com.lynxanalytics.biggraph.graph_api._
import com.lynxanalytics.biggraph.graph_util.HadoopFile
import com.lynxanalytics.biggraph.graph_util.Timestamp
import org.apache.spark
import org.apache.spark.sql.SaveMode

object ExportAttributesToNeo4j extends OpFromJson {
class Input extends MagicInputSignature {
Expand All @@ -18,7 +17,8 @@ object ExportAttributesToNeo4j extends OpFromJson {
(j \ "nodesOrRelationships").as[String])
}

case class Neo4jConnection(url: String, username: String, password: String) {
// Makes it easy to send a DataFrame to a specified Neo4j instance.
case class Neo4jConnectionParameters(url: String, username: String, password: String) {
def send(df: spark.sql.DataFrame, query: String) {
df.write
.format("org.neo4j.spark.DataSource")
Expand All @@ -35,7 +35,7 @@ case class ExportAttributesToNeo4j(
url: String, username: String, password: String, labels: String, keys: Seq[String],
version: Long, nodesOrRelationships: String)
extends SparkOperation[ExportAttributesToNeo4j.Input, ExportAttributesToNeo4j.Output] {
val neo = Neo4jConnection(url, username, password)
val neo = Neo4jConnectionParameters(url, username, password)
@transient override lazy val inputs = new ExportAttributesToNeo4j.Input()
def outputMeta(instance: MetaGraphOperationInstance) = new ExportAttributesToNeo4j.Output()(instance)
override def toJson = Json.obj(
Expand Down Expand Up @@ -83,7 +83,7 @@ case class ExportGraphToNeo4j(
relationshipTypeColumn: String, version: Long)
extends SparkOperation[ExportGraphToNeo4j.Input, ExportGraphToNeo4j.Output] {
import ExportGraphToNeo4j._
val neo = Neo4jConnection(url, username, password)
val neo = Neo4jConnectionParameters(url, username, password)
@transient override lazy val inputs = new ExportGraphToNeo4j.Input()
def outputMeta(instance: MetaGraphOperationInstance) = new ExportGraphToNeo4j.Output()(instance)
override def toJson = Json.obj(
Expand All @@ -97,14 +97,18 @@ case class ExportGraphToNeo4j(
rc: RuntimeContext): Unit = {
implicit val ds = inputDatas
val F = spark.sql.functions
// Prefix the internal IDs with this operation's GUID so different exports don't collide.
val guid = F.lit(this.gUID.toString + "-")
// Prefix the internal IDs with the timestamp so different exports don't collide.
// Also save the timestamp so the created entities can be easily cleaned up.
val timestamp = F.lit(Timestamp.human)
val vs = inputs.vs.df
.withColumn(VID, F.concat(guid, F.col(VID)))
.withColumn(VID, F.concat(timestamp, F.lit(" "), F.col(VID)))
.withColumn("!LynxKite export timestamp", timestamp)
val es = inputs.es.df
.withColumn(SRCID, F.concat(guid, F.col(SRCDST + "._1")))
.withColumn(DSTID, F.concat(guid, F.col(SRCDST + "._2")))
.withColumn(SRCID, F.concat(timestamp, F.lit(" "), F.col(SRCDST + "._1")))
.withColumn(DSTID, F.concat(timestamp, F.lit(" "), F.col(SRCDST + "._2")))
.drop(SRCDST)
.withColumn("!LynxKite export timestamp", timestamp)

if (nodeLabelsColumn.isEmpty) {
neo.send(vs, s"""
CREATE (n)
Expand Down
38 changes: 38 additions & 0 deletions web/app/help/operations/export-edge-attributes-to-neo4j.asciidoc
@@ -0,0 +1,38 @@
### Export edge attributes to Neo4j

Exports edge attributes from a graph in LynxKite to a
corresponding graph in https://neo4j.com/[Neo4j].

The relationships in Neo4j are identified by a key property (or properties).
You must have a corresponding edge attribute in LynxKite by the same name.
This will be used to find the right relationship to update in Neo4j.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we need to explicitly spell out what happens if multiple edges match a given key value combination
a) in Neo4j
b) in LynxKite

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we need to explicitly spell out what happens if multiple edges match a given key value combination
a) in Neo4j
b) in LynxKite

Done. I've also added the full Cypher query to the docs. I imagine this may be of interest to advanced Neo4j users and it's not too scary.

The properties of the Neo4j relationships will be updated with the exported edge attributes.

====

[p-url]#Neo4j connection#::
The Neo4j connection string of the form `bolt://localhost:7687`.

[p-username]#Neo4j username#::
Username for the connection.

[p-password]#Neo4j password#::
Password for the connection. It will be saved in the workspace and visible to anyone with
access to the workspace.

[p-version]#Export repetition ID#::
LynxKite only re-computes outputs if parameters or inputs have changed.
This is true for exports too. If you want to repeat a previous export, you can increase this
export repetition ID parameter.

[p-labels]#Relationship type#::
Makes it possible to restrict the export to one relationship type in Neo4j.
This is useful to make sure no other relationship type is accidentally affected.
The format is as in Cypher: `:TYPE`. Leave empty to allow updating any node.

[p-keys]#Attribute(s) to use as key#::
Select the attribute (or attributes) to identify the Neo4j relationships by.
The attribute name must match the property name in Neo4j.

====
37 changes: 37 additions & 0 deletions web/app/help/operations/export-graph-to-neo4j.asciidoc
@@ -0,0 +1,37 @@
### Export graph to Neo4j

Exports a graph from LynxKite to https://neo4j.com/[Neo4j].
The whole graph will be copied to Neo4j with all attributes.
No existing data is modified in Neo4j.

A `!LynxKite export timestamp` property is added to each new
node and relationship in Neo4j. This helps clean up the export if needed.

====

[p-url]#Neo4j connection#::
The Neo4j connection string of the form `bolt://localhost:7687`.

[p-username]#Neo4j username#::
Username for the connection.

[p-password]#Neo4j password#::
Password for the connection. It will be saved in the workspace and visible to anyone with
access to the workspace.

[p-version]#Export repetition ID#::
LynxKite only re-computes outputs if parameters or inputs have changed.
This is true for exports too. If you want to repeat a previous export, you can increase this
export repetition ID parameter.

[p-node_labels]#Node labels#::
A string vertex attribute that is a comma-separated list of labels to apply to the newly
created nodes. Optional. You must have https://neo4j.com/developer/neo4j-apoc/[Neo4j APOC]
installed on the Neo4j instance to use this.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If omitted, the nodes will have no label, right? Is this acceptable in Neo4J then?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If omitted, the nodes will have no label, right? Is this acceptable in Neo4J then?

Surprisingly, yes.


[p-relationship_type]#Attribute with relationship type#::
A string edge attribute that specifies the relationship type for each newly created relationship.
Optional. You must have https://neo4j.com/developer/neo4j-apoc/[Neo4j APOC]
installed on the Neo4j instance to use this.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interesting. So edges can have only one relationship type, but vertices can have multiple labels? Sorry for bothering you with basic neo4j questions..

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interesting. So edges can have only one relationship type, but vertices can have multiple labels? Sorry for bothering you with basic neo4j questions..

Yes. Edges must always have a type, and they can only have a single type. Nodes can have multiple labels or no labels. It's pretty clear that you didn't design this system. 😄


====
38 changes: 38 additions & 0 deletions web/app/help/operations/export-vertex-attributes-to-neo4j.asciidoc
@@ -0,0 +1,38 @@
### Export vertex attributes to Neo4j

Exports vertex attributes from a graph in LynxKite to a
corresponding graph in https://neo4j.com/[Neo4j].

The nodes in Neo4j are identified by a key property (or properties).
You must have a corresponding vertex attribute in LynxKite by the same name.
This will be used to find the right nodes to update in Neo4j.

The properties of the Neo4j nodes will be updated with the exported vertex attributes.

====

[p-url]#Neo4j connection#::
The Neo4j connection string of the form `bolt://localhost:7687`.

[p-username]#Neo4j username#::
Username for the connection.

[p-password]#Neo4j password#::
Password for the connection. It will be saved in the workspace and visible to anyone with
access to the workspace.

[p-version]#Export repetition ID#::
LynxKite only re-computes outputs if parameters or inputs have changed.
This is true for exports too. If you want to repeat a previous export, you can increase this
export repetition ID parameter.

[p-labels]#Node labels#::
Makes it possible to restrict the export to one label (or combination of labels) in Neo4j.
This is useful to make sure no other node type is accidentally affected.
The format is as in Cypher: `:Label1:Label2`. Leave empty to allow updating any node.

[p-keys]#Attribute(s) to use as key#::
Select the attribute (or attributes) to identify the Neo4j nodes by.
The attribute name must match the property name in Neo4j.

====
6 changes: 6 additions & 0 deletions web/app/help/operations/index.asciidoc
Expand Up @@ -283,6 +283,10 @@ include::discard-vertex-attributes.asciidoc[]

include::embed-vertices.asciidoc[]

include::export-edge-attributes-to-neo4j.asciidoc[]

include::export-graph-to-neo4j.asciidoc[]

include::export-to-avro.asciidoc[]

include::export-to-csv.asciidoc[]
Expand All @@ -299,6 +303,8 @@ include::export-to-orc.asciidoc[]

include::export-to-parquet.asciidoc[]

include::export-vertex-attributes-to-neo4j.asciidoc[]

include::expose-internal-edge-id.asciidoc[]

include::expose-internal-vertex-id.asciidoc[]
Expand Down