Skip to content

Commit

Permalink
graphson import/export samples
Browse files Browse the repository at this point in the history
  • Loading branch information
darrenat10gen committed Jan 14, 2015
1 parent 34e66dc commit 6567ffa
Show file tree
Hide file tree
Showing 19 changed files with 934 additions and 0 deletions.
8 changes: 8 additions & 0 deletions mongodb-graphson/.gitignore
@@ -0,0 +1,8 @@
target
.idea
.classpath
.project
.settings
.metadata
.DS_Store

56 changes: 56 additions & 0 deletions mongodb-graphson/README.md
@@ -0,0 +1,56 @@
mongodb-graphson
================

This project provides examples of importing/exporting GraphSON data files to/from MongoDB.

## Javascript / Mongo Shell

Simple transformations between GraphSON data files and vertex/edge collections in MongoDB can be implemented using basic javascript executed via the MongoDB shell utility.
For example, to load the provided test GraphSON file into a local instance of MongoDB, the following can be executed from the repository root directory :

$ mongo --eval fileName=\"javascript/test.txt\" ./javascript/load-graphson.js

To export the graph data stored in the local MongoDB instance, the export script can be executed as follows :

$ mongo --quiet ./javascript/export-graphson.js > output.json

## Java Import/Export Tool

While the MongoDB shell utility is ideal for small import/export tasks, larger datasets are typically better handled using a dedicated MongoDB client. This project contains a MongoDB Java application that can be used for streaming to/from large GraphSON files.

Prerequisites are Maven and Java 6 or later JDK, the tool can be built, tested and packaged as follows :

$ mvn package

This generates a self contained JAR, to see full usage, use the --help option :

$ java -jar ./target/mongodb-graphson-0.1.0-SNAPSHOT.jar --help

Usage: GraphSONTool [options] [command] [command options]
Options:
--dburi MongoDB URI for target database
Default: mongodb://localhost:27017/graph
--ec Name of collection for storing edge data
Default: edges
--vc Name of collection for storing vertex data
Default: vertices
--help Print this message

Commands:

export Export MongoDB database to GraphSON
Usage: export [options] Path of exported data file

import Import GraphSON files
Usage: import [options] The list of files to import
Options:
--drop Drop existing any existing data in graph collections
Default: false
--duplicates Mode used for handling duplicates in existing data. Must be ignore, update or fail
Default: ignore
The tool has commands for import/export and options for handling existing/duplicate data during import. For example, the following command can be used to import the test GraphSON file and update any vertex/edge objects that already exist with the new data in the file :

$ java -jar ./target/mongodb-graphson-0.1.0-SNAPSHOT.jar import --duplicates=update javascript/test.txt
Importing javascript/test.txt into mongodb://localhost:27017/graph...

24 changes: 24 additions & 0 deletions mongodb-graphson/javascript/export-graphson.js
@@ -0,0 +1,24 @@
EDGES_LABEL = "edges";
VERTICES_LABEL = "vertices";

print("{\n \"" + EDGES_LABEL + "\" : [");

current = null;
db.edges.find().forEach(function(doc){
if(current != null){ print(" " + JSON.stringify(current) + ","); }
current = doc;

});

print(" " + JSON.stringify(current));
print("\n ],\n \"" + VERTICES_LABEL + "\" : [");
current = null;

db.vertices.find().forEach(function(doc){
if(current != null){ print(" " + JSON.stringify(current) + ","); }
current = doc;
});

print(" " + JSON.stringify(current));
print("\n ]\n}");

33 changes: 33 additions & 0 deletions mongodb-graphson/javascript/load-graphson.js
@@ -0,0 +1,33 @@
EDGES_LABEL = "edges";
VERTICES_LABEL = "vertices";
SOURCE_LABEL = "_outV";
DEST_LABEL = "_inV";

// Open file and parse it as JSON
var fileIn = JSON.parse(cat(fileName));

if(fileIn != null){

// If recreating, drop collections and add edge index
if(typeof recreate == "boolean" && recreate == true){
print("Recreating graph collections...");
db.vertices.drop();
db.edges.drop();
db.edges.ensureIndex({SOURCE_LABEL : 1, DEST_LABEL : 1});
}

// Iterate over edges adding to the edge collection
print("Inserting " + fileIn.edges.length + " edges...");
fileIn[EDGES_LABEL].forEach(function (edge) {
db.edges.insert(edge);
})

// Iterate over vertices adding the vertex collection
print("Inserting " + fileIn.vertices.length + " vertices...");
fileIn[VERTICES_LABEL].forEach(function (vertex) {
db.vertices.insert(vertex);
})
}



20 changes: 20 additions & 0 deletions mongodb-graphson/javascript/test.txt
@@ -0,0 +1,20 @@
{
"edges" : [
{"_id":1,"weight":0.2,"_type":"edge","_outV":1,"_inV":2,"_label":"emailed"},
{"_id":2,"weight":1,"_type":"edge","_outV":1,"_inV":4,"_label":"called"},
{"_id":3,"weight":0.6,"_type":"edge","_outV":2,"_inV":3,"_label":"called"},
{"_id":4,"weight":0.2,"_type":"edge","_outV":4,"_inV":5,"_label":"met"},
{"_id":5,"weight":0.8,"_type":"edge","_outV":4,"_inV":2,"_label":"texted"},
{"_id":6,"weight":0.9,"_type":"edge","_outV":6,"_inV":3,"_label":"worksfor"}

],
"vertices" : [
{"_id":1,"name":"bob","age":34,"_type":"person"},
{"_id":2,"name":"alice","age":33,"_type":"person"},
{"_id":3,"name":"charlie","age":65,"_type":"person"},
{"_id":4,"name":"carlos","age":44,"_type":"person"},
{"_id":5,"name":"carol","age":37,"_type":"person"},
{"_id":6,"name":"erin","age":22,"_type":"person"}

]
}
79 changes: 79 additions & 0 deletions mongodb-graphson/pom.xml
@@ -0,0 +1,79 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>org.mongodb</groupId>
<artifactId>mongodb-graphson</artifactId>
<version>0.1.0-SNAPSHOT</version>

<dependencies>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
<version>2.4.4</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<version>2.4.4</version>
</dependency>
<dependency>
<groupId>org.mongodb</groupId>
<artifactId>mongo-java-driver</artifactId>
<version>2.12.4</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.8.2</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.beust</groupId>
<artifactId>jcommander</artifactId>
<version>1.47</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>1.6</version>
<configuration>
<createDependencyReducedPom>true</createDependencyReducedPom>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
</filters>
</configuration>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<transformers>
<transformer
implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
<transformer
implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass>org.mongodb.graph.GraphSONTool</mainClass>
</transformer>
</transformers>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
@@ -0,0 +1,29 @@
package org.mongodb.graph;

/**
* Mode use for when duplicate ID's during import
* When edge and vertex data contains elements with the same
* _id value or newly imported elements have duplicate id
*/
public enum DuplicateMode {

/**
* If a duplicate element is added, it will be dropped and
* ignored, the original element will remain in place and
* the import continues. This is the default mode.
*/
IGNORE,

/**
* If a duplicate is detected, it will not be added to the graph
* and the import will fail at that point.
*/
FAIL,

/**
* If a duplicate is detected, it will replace the current element
* with the conflicting _id value.
*/
UPDATE

}
@@ -0,0 +1,9 @@
package org.mongodb.graph;

public interface GraphSON {

public static final String GRAPH = "graph";
public static final String EDGES = "edges";
public static final String VERTICES = "vertices";

}
@@ -0,0 +1,70 @@
package org.mongodb.graph;

import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.core.JsonToken;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.MappingJsonFactory;

import java.io.IOException;
import java.io.InputStream;

public class GraphSONReader {
private static final JsonFactory jsonFactory = new MappingJsonFactory();
private final JSONGraphListener listener;

public GraphSONReader(final JSONGraphListener listener) {
this.listener = listener;
}

public void readGraph(final InputStream graphStream) throws IOException {

final JsonParser jp = jsonFactory.createParser(graphStream);
JsonToken current = jp.nextToken();
processGraphDocument(jp, current);
jp.close();

}

private void processGraphDocument(final JsonParser jp, JsonToken current) throws IOException{
if (current != JsonToken.START_OBJECT) {
throw new IOException("GraphSON error : root should be object");
}
while (jp.nextToken() != JsonToken.END_OBJECT) {
String fieldName = jp.getCurrentName();
// move from field name to field value
current = jp.nextToken();
if (fieldName.equals(GraphSON.VERTICES)) {
if (current == JsonToken.START_ARRAY) {
// For each of the records in the array
while (jp.nextToken() != JsonToken.END_ARRAY) {
JsonNode node = jp.readValueAsTree();
String vertexJson = node.toString();
this.listener.addVertex(vertexJson);
}
} else {
System.err.println("GraphSON error : ignoring vertices, not an array");
jp.skipChildren();
}
} else if(fieldName.equals(GraphSON.EDGES)) {
if (current == JsonToken.START_ARRAY) {
// For each of the records in the array
while (jp.nextToken() != JsonToken.END_ARRAY) {
JsonNode node = jp.readValueAsTree();
String edgeJson = node.toString();
this.listener.addEdge(edgeJson);
}
} else {
System.err.println("GraphSON error : ignoring edges, not an array");
jp.skipChildren();
}
} else if(fieldName.equals(GraphSON.GRAPH)) {
// embedded graph element, recursively process
processGraphDocument(jp, current);
} else {
System.err.println("Skipping unrecognized property : " + fieldName);
jp.skipChildren();
}
}
}
}

0 comments on commit 6567ffa

Please sign in to comment.