Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixing Issue #476: creating a single step example #697

Merged
merged 2 commits into from Feb 3, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
74 changes: 74 additions & 0 deletions examples/single-step-ingest/README.md
@@ -0,0 +1,74 @@
# Example Single Step Ingest
This example shows how to insert a document into the STAGING database and harmonize the document ingested at the same time while calling a custom REST endpoint named **run-ingest-harmonize**

This example uses some of the same data as the online-store example

The sample data is located in the input/ folder
```
|-- input
|-- products
|-- xxx.xml
```

# TLDR; How do I run it?
1. Download the [latest quick-start war](https://github.com/marklogic-community/marklogic-data-hub/releases/download/v2.0.3/quick-start-2.0.3.war) into this folder.

1. Run the quick-start jar `java -jar quick-start-2.0.3.war`

1. Open your web browser to [http://localhost:8080](http://localhost:8080).

1. Browse to this folder from the login screen.

1. Initialize the project (if necessary)

1. Login with your MarkLogic credentials

1. Install the Hub into MarkLogic (if necessary)

# Loading and Ingesting the Products data via the REST API
To load the shirt.xml file from the input/products directory by calling the custom REST extension:

```
curl --anyauth --user user:password -X PUT \
-T shirt.xml -i -H "Content-type: application/xml" \
'http://localhost:8010/v1/resources/run-ingest-harmonize?rs:uri=shirt.xml&rs:job-id=1234&rs:entity-name=Products&rs:ingest-flow-name=Load Products&rs:harmonize-flow-name=Harmonize Products'
```

The parameters are:
- **rs:uri** - the URI that the document in the STAGING database should be saved to
- **rs:job-id** - a job id. any string is legit
- **rs:entity-name** - the name of the entity the flow belongs to
- **rs:ingest-flow-name** - the name of the ingestion flow
- **rs:harmonize-flow-name** - the name of the harmonization flow

The successful response of the CURL request is the following:
```
<?xml version="1.0" encoding="UTF-8"?>
<response>
<ingestion>
<envelope xmlns="http://marklogic.com/entity-services">
<headers/>
<triples/>
<instance>
<product xmlns="">
<id>10</id>
<sku>380140431212</sku>
<title>Shirt</title>
<game_title>promising title</game_title>
<description>A shirt for promising title</description>
<price>10.0</price>
<game_id>1000174</game_id>
<game_SKU>182232002232</game_SKU>
</product>
</instance>
<attachments/>
</envelope>
</ingestion>
<harmonization>
<harmonizationSuccessful>true</harmonizationSuccessful>
<errorFound>false</errorFound>
</harmonization>
</response>
```

The envelope that is generated from the ingestion flow is returned in the `envelope` element inside of the `ingestion` element. The `harmonization` element will contain a boolean value if the harmonization is successful or not and if there was an error found during the harmonization process. If there is an error thrown during the harmonization process, then the error string will be displayed.
10 changes: 10 additions & 0 deletions examples/single-step-ingest/input/products/shirt.xml
@@ -0,0 +1,10 @@
<product>
<id>10</id>
<sku>380140431212</sku>
<title>Shirt</title>
<game_title>promising title</game_title>
<description>A shirt for promising title</description>
<price>10.0</price>
<game_id>1000174</game_id>
<game_SKU>182232002232</game_SKU>
</product>
10 changes: 10 additions & 0 deletions examples/single-step-ingest/input/products/sunglasses.xml
@@ -0,0 +1,10 @@
<product>
<id>9</id>
<sku>372801441675</sku>
<title>Sunglasses</title>
<game_title>liable greenhouse</game_title>
<description>sunglasses for liable greenhouse</description>
<price>15.0</price>
<game_id>1000194</game_id>
<game_SKU>105698185919</game_SKU>
</product>
@@ -0,0 +1,14 @@
{
"info" : {
"title" : "Products",
"version" : "0.0.1"
},
"definitions" : {
"Products" : {
"required" : [ ],
"rangeIndex" : [ ],
"wordLexicon" : [ ],
"properties" : { }
}
}
}
@@ -0,0 +1,6 @@
mainModule=main.xqy
collectorCodeFormat=xqy
mainCodeFormat=xqy
codeFormat=xqy
collectorModule=collector.xqy
dataFormat=xml
@@ -0,0 +1,20 @@
xquery version "1.0-ml";

module namespace plugin = "http://marklogic.com/data-hub/plugins";

declare option xdmp:mapping "false";

(:~
: Collect IDs plugin
:
: @param $options - a map containing options. Options are sent from Java
:
: @return - a sequence of ids or uris
:)
declare function plugin:collect(
$options as map:map) as xs:string*
{
(: by default we return the URIs in the same collection as the Entity name :)
cts:uris((), (), cts:collection-query(map:get($options, "entity")))
};

@@ -0,0 +1,48 @@
xquery version "1.0-ml";

module namespace plugin = "http://marklogic.com/data-hub/plugins";

declare namespace es = "http://marklogic.com/entity-services";

declare option xdmp:mapping "false";

(:~
: Create Content Plugin
:
: @param $id - the identifier returned by the collector
: @param $options - a map containing options. Options are sent from Java
:
: @return - your transformed content
:)
declare function plugin:create-content(
$id as xs:string,
$options as map:map) as item()?
{
let $doc := fn:doc($id)
let $source :=
if ($doc/es:envelope) then
$doc/es:envelope/es:instance/node()
else if ($doc/envelope/instance) then
$doc/envelope/instance
else
$doc
return plugin:extractInstanceProduct($source)
};

declare private function plugin:extractInstanceProduct(
$source as node()?) as item()?
{
let $attachments := $source

let $sku := xs:string($source/sku || $source/SKU)
let $title := xs:string($source/title)
let $price := xs:decimal($source/price)

let $object := json:object()
let $_ := map:put($object, "$attachments", $attachments)
let $_ := map:put($object, "$type", "Product")
let $_ := map:put($object, "sku", $sku)
let $_ := map:put($object, "title", $title)
let $_ := map:put($object, "price", $price)
return $object
};
@@ -0,0 +1,24 @@
xquery version "1.0-ml";

module namespace plugin = "http://marklogic.com/data-hub/plugins";

declare namespace es = "http://marklogic.com/entity-services";

declare option xdmp:mapping "false";

(:~
: Create Headers Plugin
:
: @param $id - the identifier returned by the collector
: @param $content - the output of your content plugin
: @param $options - a map containing options. Options are sent from Java
:
: @return - zero or more header nodes
:)
declare function plugin:create-headers(
$id as xs:string,
$content as item()?,
$options as map:map) as node()*
{
()
};
@@ -0,0 +1,55 @@
xquery version "1.0-ml";

(: Your plugin must be in this namespace for the DHF to recognize it:)
module namespace plugin = "http://marklogic.com/data-hub/plugins";

(:
: This module exposes helper functions to make your life easier
: See documentation at:
: https://github.com/marklogic/marklogic-data-hub/wiki/dhf-lib
:)
import module namespace dhf = "http://marklogic.com/dhf"
at "/com.marklogic.hub/dhf.xqy";

(: include modules to construct various parts of the envelope :)
import module namespace content = "http://marklogic.com/data-hub/plugins" at "content.xqy";
import module namespace headers = "http://marklogic.com/data-hub/plugins" at "headers.xqy";
import module namespace triples = "http://marklogic.com/data-hub/plugins" at "triples.xqy";

(: include the writer module which persists your envelope into MarkLogic :)
import module namespace writer = "http://marklogic.com/data-hub/plugins" at "writer.xqy";

declare option xdmp:mapping "false";

(:~
: Plugin Entry point
:
: @param $id - the identifier returned by the collector
: @param $options - a map containing options. Options are sent from Java
:
:)
declare function plugin:main(
$id as xs:string,
$options as map:map)
{
let $content-context := dhf:content-context()
let $content := dhf:run($content-context, function() {
content:create-content($id, $options)
})

let $header-context := dhf:headers-context($content)
let $headers := dhf:run($header-context, function() {
headers:create-headers($id, $content, $options)
})

let $triple-context := dhf:triples-context($content, $headers)
let $triples := dhf:run($triple-context, function() {
triples:create-triples($id, $content, $headers, $options)
})

let $envelope := dhf:make-envelope($content, $headers, $triples, map:get($options, "dataFormat"))
return
(: writers must be invoked this way.
https://marklogic-community.github.io/marklogic-data-hub/docs/server-side/#run-writer :)
dhf:run-writer(xdmp:function(xs:QName("writer:write")), $id, $envelope, $options)
};
@@ -0,0 +1,26 @@
xquery version "1.0-ml";

module namespace plugin = "http://marklogic.com/data-hub/plugins";

declare namespace es = "http://marklogic.com/entity-services";

declare option xdmp:mapping "false";

(:~
: Create Triples Plugin
:
: @param $id - the identifier returned by the collector
: @param $content - the output of your content plugin
: @param $headers - the output of your headers plugin
: @param $options - a map containing options. Options are sent from Java
:
: @return - zero or more triples
:)
declare function plugin:create-triples(
$id as xs:string,
$content as item()?,
$headers as item()*,
$options as map:map) as sem:triple*
{
()
};
@@ -0,0 +1,22 @@
xquery version "1.0-ml";

module namespace plugin = "http://marklogic.com/data-hub/plugins";

declare option xdmp:mapping "false";

(:~
: Writer Plugin
:
: @param $id - the identifier returned by the collector
: @param $envelope - the final envelope
: @param $options - a map containing options. Options are sent from Java
:
: @return - nothing
:)
declare function plugin:write(
$id as xs:string,
$envelope as node(),
$options as map:map) as empty-sequence()
{
xdmp:document-insert($id, $envelope, xdmp:default-permissions(), map:get($options, "entity"))
};
@@ -0,0 +1,4 @@
mainModule=main.xqy
mainCodeFormat=xqy
codeFormat=xqy
dataFormat=xml
@@ -0,0 +1,22 @@
xquery version "1.0-ml";

module namespace plugin = "http://marklogic.com/data-hub/plugins";

declare option xdmp:mapping "false";

(:~
: Create Content Plugin
:
: @param $id - the identifier returned by the collector
: @param $raw-content - the raw content being loaded.
: @param $options - a map containing options. Options are sent from Java
:
: @return - your transformed content
:)
declare function plugin:create-content(
$id as xs:string,
$raw-content as node()?,
$options as map:map) as node()?
{
$raw-content
};
@@ -0,0 +1,24 @@
xquery version "1.0-ml";

module namespace plugin = "http://marklogic.com/data-hub/plugins";

declare namespace envelope = "http://marklogic.com/data-hub/envelope";

declare option xdmp:mapping "false";

(:~
: Create Headers Plugin
:
: @param $id - the identifier returned by the collector
: @param $content - the output of your content plugin
: @param $options - a map containing options. Options are sent from Java
:
: @return - zero or more header nodes
:)
declare function plugin:create-headers(
$id as xs:string,
$content as node()?,
$options as map:map) as node()*
{
()
};