Skip to content

Commit

Permalink
Merge pull request #67 from kazumatsudo/feature/add_analysis_method
Browse files Browse the repository at this point in the history
add: UsingSpecificKeyList method
  • Loading branch information
kazumatsudo committed Jan 14, 2024
2 parents d5d8161 + 2a674a4 commit abe22cc
Show file tree
Hide file tree
Showing 18 changed files with 613 additions and 64 deletions.
49 changes: 47 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,16 +51,61 @@ generate RDB (MySQL) DDL and INSERT sentence from GraphDB (Tinkerpop).
```shell
docker compose up -d
```
2. execute the script by sbt
2. select analysis method
choose analysis method to search Vertex/Edge algorithm.
- "by_exhaustive_search" (default)
- "using_specific_key_list"
please see following passage if you want to change method.
```shell
% ANALYSIS_METHOD="by_exhaustive_search"
```
3. execute the script by sbt
```shell
sbt run
```
3. generate SQL files
4. generate SQL files
- sql/ddl_edge.sql
- sql/ddl_vertex.sql
- sql/insert_edge.sql
- sql/insert_vertex.sql

### How to change analysis method

#### by_exhaustive_search

##### overview

analyze all Vertices and Edges.

- pros
- no advance preparation required
- cons
- inefficient (execute full search all vertices and edges count times)

##### how to choose

no advance preparation required because it's selected by default.

#### using_specific_key_list

##### overview

analyze specific vertices searched by keys

- pros
- faster than [[ByExhaustiveSearch]] (enable to search by index)
- cons
- required to prepare search condition

##### how to choose

1. set environment variable "ANALYSIS_METHOD" to "using_specific_key_list"
```shell
% ANALYSIS_METHOD="using_specific_key_list"
```
2. set target vertex label, property key, and its values in [using_key_list_file.json](https://github.com/kazumatsudo/GraphDB2RDB/blob/e163bdcfb7a50d5275eecfb722ac172214dd8a98/src/main/resources/using_key_list_file.json)
- json schema: [using_key_list_file_schema.json](https://github.com/kazumatsudo/GraphDB2RDB/blob/e163bdcfb7a50d5275eecfb722ac172214dd8a98/src/main/resources/using_key_list_file_schema.json)

## Settings

You can define following settings as you like.
Expand Down
6 changes: 6 additions & 0 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,13 @@ wartremoverErrors ++= Warts.allBut(
Wart.LeakingSealed,
Wart.NonUnitStatements,
Wart.Nothing,
Wart.OptionPartial,
Wart.Overloading,
Wart.PlatformDefault,
Wart.Product,
Wart.Serializable,
Wart.StringPlusAny,
Wart.SeqApply,
Wart.Throw
)

Expand All @@ -35,6 +38,9 @@ libraryDependencies ++= Seq(
"com.michaelpollmeier" %% "gremlin-scala" % "3.5.3.7",
"com.typesafe" % "config" % "1.4.3",
"com.typesafe.scala-logging" %% "scala-logging" % "3.9.5",
"io.circe" %% "circe-core" % "0.14.5",
"io.circe" %% "circe-generic" % "0.14.5",
"io.circe" %% "circe-parser" % "0.14.5",
"org.apache.tinkerpop" % "gremlin-driver" % "3.7.1",
"org.apache.tinkerpop" % "tinkergraph-gremlin" % "3.7.1",
"org.janusgraph" % "janusgraph-driver" % "1.1.0-20231130-164636.abdc113"
Expand Down
6 changes: 6 additions & 0 deletions src/main/resources/application.conf
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# analysis method
analysis_method="by_exhaustive_search"
analysis_method=${?ANALYSIS_METHOD}
analysis_method_using_specific_key_list_filepath="src/main/resources/using_key_list_file.json"
analysis_method_using_specific_key_list_filepath=${?ANALYSIS_METHOD_USING_SPECIFIC_KEY_LIST_FILEPATH}

# GraphDB
graphdb_remote_graph_properties="conf/remote-graph.properties"
graphdb_remote_graph_properties=${?GRAPHDB_REMOTE_GRAPH_PROPERTIES}
Expand Down
31 changes: 31 additions & 0 deletions src/main/resources/using_key_list_file.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
{
"$schema": "using_key_list_file_schema.json",
"value": [
{
"label": "person",
"value": [
{
"key": "age",
"value": [27, 29, 32, 35]
},
{
"key": "name",
"value": ["josh", "marko", "peter", "vadas"]
}
]
},
{
"label": "software",
"value": [
{
"key": "lang",
"value": ["java"]
},
{
"key": "name",
"value": ["lop", "ripple"]
}
]
}
]
}
39 changes: 39 additions & 0 deletions src/main/resources/using_key_list_file_schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "using_key_list_file",
"description": "",
"type": "object",
"properties": {
"value": {
"type": "array",
"items": {
"type": "object",
"properties": {
"label": {
"type": "string"
},
"value": {
"type": "array",
"items": {
"type": "object",
"properties": {
"key": {
"type": "string"
},
"value": {
"type": "array",
"items": {
"type": ["boolean", "number", "string"]
}
}
},
"required": ["key", "value"]
}
}
},
"required": ["label", "value"]
}
}
},
"required": ["value"]
}
48 changes: 43 additions & 5 deletions src/main/scala/Main.scala
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import com.typesafe.config.ConfigFactory
import com.typesafe.scalalogging.StrictLogging
import org.apache.tinkerpop.gremlin.process.traversal.AnonymousTraversalSource.traversal
import usecase.ByExhaustiveSearch
import utils.FileUtility
import usecase.{ByExhaustiveSearch, UsingSpecificKeyList}
import utils.{FileUtility, JsonUtility}

import scala.util.Using
import scala.util.{Failure, Success, Using}

object Main extends StrictLogging {

Expand All @@ -20,7 +20,11 @@ object Main extends StrictLogging {
}

/** generate DDL and Insert sentence from GraphDB
* 1. generate vertex SQL 2. generate edge SQL
*
* process
* - select analysis method
* - execute analysis method
* - output SQL
* @param args
*/
def main(args: Array[String]): Unit = {
Expand All @@ -30,9 +34,43 @@ object Main extends StrictLogging {
config.getString("graphdb_remote_graph_properties")
)
) { g =>
val usecase = ByExhaustiveSearch(g)
/* select analysis method */
sealed trait UsecaseCommand
final case class UsecaseCommandByExhausiveSearch() extends UsecaseCommand
final case class UsecaseCommandUsingSpecificKeyList()
extends UsecaseCommand
val usecaseCommand = config.getString("analysis_method") match {
case "by_exhaustive_search" => UsecaseCommandByExhausiveSearch()
case "using_specific_key_list" => UsecaseCommandUsingSpecificKeyList()
case value =>
throw new IllegalArgumentException(
s"analysis method must be by_exhaustive_search or using_specific_key_list. current analysis method: $value"
)
}
val usecase = usecaseCommand match {
case UsecaseCommandByExhausiveSearch() => ByExhaustiveSearch(g)
case UsecaseCommandUsingSpecificKeyList() =>
{
for {
jsonString <- FileUtility.readJson(
config.getString(
"analysis_method_using_specific_key_list_filepath"
)
)
request <- JsonUtility.parseForUsingSpecificKeyListRequest(
jsonString
)
} yield UsingSpecificKeyList(g, request)
} match {
case Failure(exception) => throw new Exception(exception)
case Success(value) => value
}
}

/* execute analysis method */
val (vertexResult, edgeResult) = usecase.execute

/* output SQL */
vertexResult match {
case Some((ddl, dml)) =>
FileUtility.outputSql(
Expand Down
4 changes: 2 additions & 2 deletions src/main/scala/domain/graph/GraphVertex.scala
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ case class GraphVertex(private val value: Vertex) {
private val columnNamePrefixLabel =
config.getString("column_name_prefix_label")

val id: AnyRef = value.id()

/** convert to Database Table Information
*
* @return
Expand All @@ -43,8 +45,6 @@ case class GraphVertex(private val value: Vertex) {
}

def toDml: RecordList = {
val id = value.id()

val propertyColumnList = value.valueMap.map { case (columnName, value) =>
(s"$columnNamePrefixProperty$columnName", value)
}
Expand Down
24 changes: 23 additions & 1 deletion src/main/scala/infrastructure/EdgeQuery.scala
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package infrastructure

import com.typesafe.scalalogging.StrictLogging
import domain.graph.GraphEdge
import domain.graph.{GraphEdge, GraphVertex}
import gremlin.scala.GremlinScala
import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversalSource

Expand All @@ -17,6 +17,17 @@ final case class EdgeQuery(private val g: GraphTraversalSource)
*/
def countAll: Long = GremlinScala(g.E()).count().head()

/** get in Edges List
*
* @param vertex
* target Vertex
* @return
* A list of Edge
*/
def getInEdgeList(vertex: GraphVertex): Seq[GraphEdge] = {
GremlinScala(g.V(vertex.id)).inE().toList().map(GraphEdge)
}

/** get Edges List
*
* @param start
Expand All @@ -41,4 +52,15 @@ final case class EdgeQuery(private val g: GraphTraversalSource)
throw e
}
}

/** get out Edges List
*
* @param vertex
* target Vertex
* @return
* A list of Edge
*/
def getOutEdgeList(vertex: GraphVertex): Seq[GraphEdge] = {
GremlinScala(g.V(vertex.id)).outE().toList().map(GraphEdge)
}
}
27 changes: 26 additions & 1 deletion src/main/scala/infrastructure/VertexQuery.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ package infrastructure

import com.typesafe.scalalogging.StrictLogging
import domain.graph.GraphVertex
import gremlin.scala.GremlinScala
import gremlin.scala.{GremlinScala, Key}
import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversalSource

import scala.util.control.NonFatal
Expand Down Expand Up @@ -41,4 +41,29 @@ final case class VertexQuery(private val g: GraphTraversalSource)
throw e
}
}

/** get Vertices List searched by property key
*
* @param label
* vertex label
* @param key
* vertex property key
* @param value
* vertex property value
* @return
* A list of Vertices.
*/
def getListByPropertyKey(
label: String,
key: String,
value: Any
): Seq[GraphVertex] = {
require(label.nonEmpty, "label must not be empty.")
require(key.nonEmpty, "key must not be empty.")

GremlinScala(g.V())
.has(label, Key[Any](key), value)
.toList()
.map(GraphVertex)
}
}
20 changes: 4 additions & 16 deletions src/main/scala/usecase/ByExhaustiveSearch.scala
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
package usecase

import com.typesafe.scalalogging.StrictLogging
import domain.table.ddl.TableList
import domain.table.dml.RecordList
import infrastructure.{EdgeQuery, VertexQuery}
import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversalSource

import scala.util.control.NonFatal

/** analyze all Vertices and Edges
*
* pros:
Expand All @@ -18,20 +15,11 @@ import scala.util.control.NonFatal
* @param g
* [[GraphTraversalSource]]
*/
final case class ByExhaustiveSearch(private val g: GraphTraversalSource)
extends StrictLogging {

private def executeWithExceptionHandling(
function: => (TableList, RecordList)
): Option[(TableList, RecordList)] = {
try {
Some(function)
} catch {
case NonFatal(_) => None
}
}
final case class ByExhaustiveSearch(
override protected val g: GraphTraversalSource
) extends UsecaseBase {

def execute
override def execute
: (Option[(TableList, RecordList)], Option[(TableList, RecordList)]) = {

// 1. generate vertex SQL
Expand Down

0 comments on commit abe22cc

Please sign in to comment.