## Starmap Tutorial

Starmap is a Python utility that allows for easy mapping of CSV files to SMS. It also creates an ontology file to enable viewing the graph in Stardog Explorer. This utility function can be run from the command line with user supplied arguments, or within a Jupyter notebook. If run from the command line, `starmap` uses the default datatypes of the pandas `read_csv` method. If you need to make adjustments to these defaults, you should import the file into Python, transform the data as desired, and then create the SMS and ontology files from a `starmap` object.

In [141]:
import pandas as pd
import numpy as np
import re

In [142]:

class parseException(BaseException):
    def __init__(self, msg):
        super().__init__(msg)

def flatten(list_of_lists):
    flat_list = [item for sub_list in list_of_lists for item in sub_list]
    return flat_list

def getDataType(var, df):
    # assume that we knnow var is in df
    data_type = str(df.dtypes[var])

    if 'object' in data_type:
        return 'xsd:string'
    elif 'datetime' in data_type:
        return 'xsd:datetime'
    elif 'date' in data_type:
        return 'xsd:date'
    elif 'float' in data_type:
        return 'xsd:decimal'
    elif 'int' in data_type:
        return 'xsd:integer'
    elif 'bool' in data_type:
        return 'xsd:boolean'
    else:
        raise parseException                    

def snakeCamel(svar):
    svar = svar.strip()
    svar = svar.split("_")
    if len(svar) == 1:
        return(svar[0].lower())
    else:
        result = "".join([s.title() for s in svar])
        result = result[:1].lower() + result[1:]
        return(result)

def snakePascal(svar):
    svar = svar.strip()
    svar = svar.split("_")
    if len(svar) == 1:
        return(svar[0].title())
    else:
        result = "".join([s.title() for s in svar])
        result = result[:1].upper() + result[1:]
        return(result)


def checkTokenRange(p):
    # the token should be either a numeric range or a column name of the data frame
    # Either way, we need to return a list.
    p = p.strip()
    column_names = df.columns.to_list()
    check = p.split(':')

    if len(check) == 2:
        rangeRegex = re.compile(r'(\d)*:(\d)*')
        test = rangeRegex.search(p)
        if test and (test.group() == p):
            column_range = [int(t) for t in check]
            #if any([col >= df.shape[1] for col in column_range]):
                #raise parseException("Column range out of bounds")
        else:
            lower = column_names.index(checkTokeninDF(check[0]))
            upper = column_names.index(checkTokeninDF(check[1])) + 1
            if lower >= upper:
                raise parseException("Field names to not constitute a valid range")
            column_range = [lower, upper]           
        
        response =  column_names[column_range[0]:column_range[1]]  
    else:
        response = [checkTokeninDF(p)]
    return response                  

def processPlus(p):

    check = p.split("+")
    trim_check = [c.strip() for c in check]
    response = []
    for token in trim_check:
        response.append(checkTokenRange(token))

    return flatten(response) 
 

def checkDash(formula):
    response = None
    check = formula.split("|")
    
    if len(check) == 1:
        return response
    elif len(check) == 2:
        response = [c.strip() for c in check]
        return response 
    else:
        raise parseException("Something wrong here")        

def checkTilde(formula):
    
    check = formula.split("~")
    output = {"lhs": None, "rhs": None}
    if len(check) == 1:
        output['lhs'] = check[0].strip()
    elif len(check) == 2:
        temp = [c.strip() for c in check]
        output['lhs'] = temp[0]
        output['rhs'] = temp[1]
    else:
        raise parseException("A formula can only have one ~ symbol")

    return output    

def checkTokeninDF(p):
    p = p.strip()
    if p in df.columns:
        return p
    else:
        raise parseException("Token not in dataframe namespace")  

def process_partial(p):
    check =  checkDash(p)
    if check:
        output = {'lhs': checkTokeninDF(check[0])}
        output['rhs'] = processPlus(check[1])
        return output   
    else:
        p = checkTokeninDF(p)
        return {'lhs': p, 'rhs': None}
       

def process_formula(formula, df):
    partials = checkTilde(formula)

    output = {}
    output['lhs'] = process_partial(partials['lhs'])
    if partials['rhs']:
        output['rhs'] = process_partial(partials['rhs'])
    else:
        output['rhs'] = None    
           
    return output

# Some utility functions for class Starmap

def process_literals(literals, df, urn, prefix, node_class):
    # Find the data type, create a binding to do the relevant data transformation
    # Add lines of sparql
    # Assume that the first line with the node_iri has already been written
    sparql = ""
    bindings = ""
    onto = ""
    for l in literals:
        sparql += ' ; \n' # finish off the previous line
        data_transform = getDataType(l, df)
        l_name = '?' + l + '_tr'
        property_name = snakeCamel(l)

        sparql += '\t' + prefix + ":" + property_name + " " + l_name
        bindings += 'BIND(' + data_transform + '(?' + l + ') as ' + l_name + ') \n'  
        onto += ( '<' + urn + property_name + '>' + " " + 'a owl:DatatypeProperty ; \n' \
            '\t' + " " + 'rdfs:label' + " " + "'" + property_name + "'" +  "; \n" \
                '\t' + " " + 'rdfs:domain' + " " + '<' + urn + node_class + '>' + " ; \n" \
                    '\t' + " " + 'rdfs:range' + " " + data_transform + " . \n"
        )
    sparql += ' . \n'    
    output = {"sparql": sparql, "bindings": bindings, "onto": onto}
    return output    

def process_side(side, df, urn, node_iri_list):
    # side is d['lhs'] or d['rhs']
    # node_iri_list is a list that gets modified as a side effect.
    # I hate myself for this, but there it is ....
    sparql = ""
    bindings = ""
    onto = ""
    node = side['lhs']
    literals = side['rhs']
    node_iri = '?' + node + '_iri'
    if node in node_iri_list:
        if literals:
            data_transform = getDataType(literals[0], df)
            l_name = '?' + literals[0] + '_tr'
            property_name = snakeCamel(literals[0])
            node_class = snakePascal(node)

            sparql += node_iri + " " + prefix + ":" + property_name + " " + l_name
            bindings += 'BIND(' + data_transform + '(?' + literals[0] + ') as ' + l_name + ') \n'  
            onto += ( '<' + urn + property_name + '>' + " " + 'a owl:DatatypeProperty ; \n' \
            '\t' + " " + 'rdfs:label' + " " + "'" + property_name + "'" + " ; \n" \
                '\t' + " " + 'rdfs:domain' + " " + '<' + urn + node_class + '>' + " ; \n" \
                    '\t' + " " + 'rdfs:range' + " " + data_transform + " . \n"
                )
            if len(literals) > 1:
                processed_strings = process_literals(literals[1:], df, urn, prefix, node_class)
                sparql += processed_strings['sparql']
                bindings += processed_strings['bindings']
            else:
                sparql += ' . \n'    
#         else:
#             sparql += ' . \n'        

    else:
        node_iri_list.append(node)
        node_class = snakePascal(node)
        sparql += node_iri + " " + "a" + " " + prefix + ':' + node_class + " ; \n"
        sparql += '\t' + 'rdfs:label' + " " + "?" + node 
        bindings += 'BIND(TEMPLATE("' + urn + node + '_{' + node + '}") as ' + node_iri + ')' + '\n'
        onto += (  '<' + urn + node_class + '>' + " " + 'a owl:Class ; \n' \
                '\trdfs:label' + " " + "'" + node_class + "'" + ' . \n' ) 
                           
    # Process literals, if they exist    

        if literals:
            processed_strings = process_literals(literals, df, urn, prefix, node_class)
            sparql += processed_strings['sparql']
            bindings += processed_strings['bindings']
            onto += processed_strings['onto']
            
        else:
            sparql += " . \n"

    output = {'sparql': sparql, 'bindings': bindings, 'onto': onto}
    return output        

class Starmap:

    def __init__(self, formulae, df, urn, prefix):
        if type(formulae) == str:
            formulae = [formulae]
        elif type(formulae) != list:
            raise parseException("formula must be a string or a list of strings")
        
        processed_formulae = []
        for formula in formulae:
            processed_formulae.append(process_formula(formula, df))  

        sparql = ""
        bindings = ""
        onto = ( "@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>" + ' . \n' +
        "@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>" + ' . \n' +
        "@prefix xsd: <http://www.w3.org/2001/XMLSchema#>" + ' . \n' +
        "@prefix owl: <http://www.w3.org/2002/07/owl#>" + ' . \n' +
        "@prefix stardog: <tag:stardog:api:>" + ' . \n' +
        "@prefix " + prefix + ": <" + urn + '> . \n\n' )

        node_iri_list = [] # in case we see the same node twice in the list of formulae
        for d in processed_formulae:
            # each dictionary represents a formulae
            # Process the IRI node on the LHS. It will always exist
            left_side = d['lhs']
            output = process_side(left_side, df, urn, node_iri_list)
            sparql += output['sparql'] 
            bindings += output['bindings']
            onto += output['onto']

            # Now let's hold that thought and process the right hand side
            right_side = d['rhs']
            if right_side:
                output = process_side(right_side, df, urn, node_iri_list)
                sparql += output['sparql'] 
                bindings += output['bindings']
                onto += output['onto']

                # And finally, connect the LHS to the RHS
                lhs_node_iri = '?' + left_side['lhs'] + '_iri'
                right_node = right_side['lhs']
                right_node_iri = '?' + right_node + '_iri'
                relation_name = "has" + snakePascal(right_node)
                sparql += lhs_node_iri + " " + prefix + ":" + relation_name + " " + right_node_iri + " . \n\n" 
                onto += (
                     '<' + urn + relation_name + '>' + " " + 'a owl:ObjectProperty ; \n' \
                        '\t' + " " + 'rdfs:label' + " " + "'" + relation_name + "'" + " ; \n" \
                            '\t' + " " + 'rdfs:domain' + " " + '<' + urn + snakePascal(left_side['lhs']) + '>' + " ; \n" \
                                '\t' + " " + 'rdfs:range' + " " + '<' + urn + snakePascal(right_node) + ">" + " . \n"
                )
                
                
                         

        theUsual = ( "prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>" + '\n' +
        "prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>" + '\n' +
        "prefix xsd: <http://www.w3.org/2001/XMLSchema#>" + '\n' +
        "prefix owl: <http://www.w3.org/2002/07/owl#>" + '\n' +
        "prefix stardog: <tag:stardog:api:>" + '\n' +
        "prefix " + prefix + ": <" + urn + '> \n' )

        theToBit = ("MAPPING" + "\n" + "FROM CSV {\n}\nTO {")
        theWhereBit = "\nWHERE {\n"
        
        self.sms = theUsual + '\n' + theToBit + '\n' + sparql + '\n}' + theWhereBit + bindings +'\n}'        
        self.onto = onto





The example file uses data from a hypothetical ordering system. This dataset is part of the larger Customer 360 demo.

In [143]:
df = pd.read_csv('c360_orders.csv')
df['order_timestamp'] = pd.to_datetime(df['order_timestamp'])
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 24486 entries, 0 to 24485
Data columns (total 18 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   profile_id       24486 non-null  int64         
 1   order_nbr        24486 non-null  int64         
 2   order_desc       24486 non-null  object        
 3   orditemID        24486 non-null  object        
 4   order_source     24486 non-null  object        
 5   order_timestamp  24486 non-null  datetime64[ns]
 6   order_type       24486 non-null  object        
 7   store_id         24486 non-null  int64         
 8   promo_id         24486 non-null  int64         
 9   order_qty        24486 non-null  int64         
 10  order_amt        24486 non-null  float64       
 11  order_profit     24486 non-null  float64       
 12  item_id          24486 non-null  object        
 13  item_qty         24486 non-null  int64         
 14  item_price       24486 non-null  float

`profile_id` gives a reference to the customer. Customers place orders, referenced by `order_nbr`. An order can have more than one row in the CSV file, corresponding to different items. There is no separate table for item specific information; it is contained in this table under fields `item_qty`, `item_price`, `item_cost`, `item_amt` and `item_profit`.

Orders may be associated with *stores*, referenced through `store_id`, and may have occurred in response to *promotions*, referenced through `promo_id`. Potentially, this table assumese a number of complex relationships, that we might want to capture in a knowledge graph.

## The SMS mapping language

The Stardog mapping language, SMS, allows to create RDF from a CSV file by means of a templating file. This file outlines the kind of relationships that we want to record in the data. SMS offers a lot of control over the resulting knowledge graph: we can determine the names of classes and properties, and effect transformation in the data. However, if one is prepared to live with some natural defaults regarding names, we can leverage the datatype information in a Pandas dataframe to map the data using a simpler syntax.

R users are used to using the R formula language as a shorthand summary of relationships between the variables of a data frame. `Starmap` uses a variant of this syntax to create shorthand mappings.

### Some IRI nodes

Let's start with a simple, if very boring knowledge graph. I'll just create an IRI node for each `profile_id` in the data frame. An IRI node needs a unique resource identifier, possibly with the form of a URL. It is customary to use an abbreviation to this URL when writing queries. URL's can get long, after all. Let's than my `urn` is `http://stardog.acme.com/`, with abbreviated prefix, `acme`. I now have everything I need to create my mapping: 

In [144]:
urn = "http://stardog.acme.com/"
prefix = 'acme'
output = Starmap("profile_id", df, urn, prefix)

In [145]:
print(output.sms)

prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
prefix xsd: <http://www.w3.org/2001/XMLSchema#>
prefix owl: <http://www.w3.org/2002/07/owl#>
prefix stardog: <tag:stardog:api:>
prefix acme: <http://stardog.acme.com/> 

MAPPING
FROM CSV {
}
TO {
?profile_id_iri a acme:ProfileId ; 
	rdfs:label ?profile_id . 

}
WHERE {
BIND(TEMPLATE("http://stardog.acme.com/profile_id_{profile_id}") as ?profile_id_iri)

}


And here we have the mappings. I could save this to a file, `acme.sms` say, and map my data to Stardog. Note what I have here:

* The profile_id belongs to class `ProfileId`
* It has a label, which is the original variable name from the CSV file
* The `Template` statement instructs SMS to create an IRI node from values in field `profile_id`. In order to avoid possible conflicts, I have added a prefix to `profile_id`. So if a particular customer has `profile_id = 50`, their IRI node with have resource identifier `http://stardog.com/profile_id_50`. 

`Starmap` also creates an ontology file from the same formula.

In [146]:
print(output.onto)

@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . 
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . 
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> . 
@prefix owl: <http://www.w3.org/2002/07/owl#> . 
@prefix stardog: <tag:stardog:api:> . 
@prefix acme: <http://stardog.acme.com/> . 

<http://stardog.acme.com/ProfileId> a owl:Class ; 
	rdfs:label 'ProfileId' . 



There's not much to it, obviously. I only have one class in my graph at this point. However, I can save `output.onto` to a `ttl` file and add it do the mapped data. From the command line, I would do something like this:

```
stardog-admin db create c360
stardog-admin virtual import c360 data.sms c360_orders.csv
stardog data add c360 onto.ttl
```

### Adding more nodes

I can add more nodes, by supplying a list of variable names to `Starmap`

In [147]:
output = Starmap(['profile_id', 'order_nbr'], df, urn, prefix)
print(output.sms)

prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
prefix xsd: <http://www.w3.org/2001/XMLSchema#>
prefix owl: <http://www.w3.org/2002/07/owl#>
prefix stardog: <tag:stardog:api:>
prefix acme: <http://stardog.acme.com/> 

MAPPING
FROM CSV {
}
TO {
?profile_id_iri a acme:ProfileId ; 
	rdfs:label ?profile_id . 
?order_nbr_iri a acme:OrderNbr ; 
	rdfs:label ?order_nbr . 

}
WHERE {
BIND(TEMPLATE("http://stardog.acme.com/profile_id_{profile_id}") as ?profile_id_iri)
BIND(TEMPLATE("http://stardog.acme.com/order_nbr_{order_nbr}") as ?order_nbr_iri)

}


### Adding a relationship

So far, our graph isn't very interesting. I have not established any relationships between customer profiles and their orders. In R, we use the `~` operator to indicate a relationship between an independent variable and its dependents. For example, `y ~ x` might be input to the linear model function to express that `y` depends on `x`.

By analogy, in `Starmap`, write `profile_id ~ order_nbr` to indicate that profiles have orders. In the graph, this will translate to an arrow going from `profile_id` nodes to `order_nbr` nodes.

In [148]:
output = Starmap("profile_id ~ order_nbr", df, urn, prefix)
print(output.sms)



prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
prefix xsd: <http://www.w3.org/2001/XMLSchema#>
prefix owl: <http://www.w3.org/2002/07/owl#>
prefix stardog: <tag:stardog:api:>
prefix acme: <http://stardog.acme.com/> 

MAPPING
FROM CSV {
}
TO {
?profile_id_iri a acme:ProfileId ; 
	rdfs:label ?profile_id . 
?order_nbr_iri a acme:OrderNbr ; 
	rdfs:label ?order_nbr . 
?profile_id_iri acme:hasOrderNbr ?order_nbr_iri . 


}
WHERE {
BIND(TEMPLATE("http://stardog.acme.com/profile_id_{profile_id}") as ?profile_id_iri)
BIND(TEMPLATE("http://stardog.acme.com/order_nbr_{order_nbr}") as ?order_nbr_iri)

}


`Starmap` had created IRI nodes for each term of the formula and an object property for their relationship. We get a richer ontology as well:

In [149]:
print(output.onto)

@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . 
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . 
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> . 
@prefix owl: <http://www.w3.org/2002/07/owl#> . 
@prefix stardog: <tag:stardog:api:> . 
@prefix acme: <http://stardog.acme.com/> . 

<http://stardog.acme.com/ProfileId> a owl:Class ; 
	rdfs:label 'ProfileId' . 
<http://stardog.acme.com/OrderNbr> a owl:Class ; 
	rdfs:label 'OrderNbr' . 
<http://stardog.acme.com/hasOrderNbr> a owl:ObjectProperty ; 
	 rdfs:label 'hasOrderNbr' ; 
	 rdfs:domain <http://stardog.acme.com/ProfileId> ; 
	 rdfs:range <http://stardog.acme.com/OrderNbr> . 



#### Multiple relationships

The `~` operator is binary; I can't chain several together. But if I want to map more than one relationship in the dataframe, I can supply a list of relationships. This code relates `profile_id` to `order_nbr` and `order_nbr` to `item_id`.

In [150]:
output = Starmap(["profile_id ~ order_nbr", "order_nbr ~ item_id"], df, urn, prefix)
print(output.sms)

prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
prefix xsd: <http://www.w3.org/2001/XMLSchema#>
prefix owl: <http://www.w3.org/2002/07/owl#>
prefix stardog: <tag:stardog:api:>
prefix acme: <http://stardog.acme.com/> 

MAPPING
FROM CSV {
}
TO {
?profile_id_iri a acme:ProfileId ; 
	rdfs:label ?profile_id . 
?order_nbr_iri a acme:OrderNbr ; 
	rdfs:label ?order_nbr . 
?profile_id_iri acme:hasOrderNbr ?order_nbr_iri . 

?item_id_iri a acme:ItemId ; 
	rdfs:label ?item_id . 
?order_nbr_iri acme:hasItemId ?item_id_iri . 


}
WHERE {
BIND(TEMPLATE("http://stardog.acme.com/profile_id_{profile_id}") as ?profile_id_iri)
BIND(TEMPLATE("http://stardog.acme.com/order_nbr_{order_nbr}") as ?order_nbr_iri)
BIND(TEMPLATE("http://stardog.acme.com/item_id_{item_id}") as ?item_id_iri)

}


#### bi-directional

I can reverse the order in the formula to have the relationship *point* the other way. In the following, I get a relationship from profile to order, and from order to profile.

In [151]:
output = Starmap(["profile_id ~ order_nbr", "order_nbr ~ profile_id"], df, urn, prefix)
# print(output.sms)

### Literal properties

In R, the dash operator, `|`, is used to indicate nested effects. In a similar way, I can think of literals as variables that are nested within their respective subjects. In this example, `item_qty`, `item_amt`, `item_price`, `item_cost` and `item_profit` are literal properties of `items`. 

In [152]:
output = Starmap(["item_id | item_qty + item_amt + item_price + item_cost + item_profit"], df, urn, prefix)
print(output.sms)

prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
prefix xsd: <http://www.w3.org/2001/XMLSchema#>
prefix owl: <http://www.w3.org/2002/07/owl#>
prefix stardog: <tag:stardog:api:>
prefix acme: <http://stardog.acme.com/> 

MAPPING
FROM CSV {
}
TO {
?item_id_iri a acme:ItemId ; 
	rdfs:label ?item_id ; 
	acme:itemQty ?item_qty_tr ; 
	acme:itemAmt ?item_amt_tr ; 
	acme:itemPrice ?item_price_tr ; 
	acme:itemCost ?item_cost_tr ; 
	acme:itemProfit ?item_profit_tr . 

}
WHERE {
BIND(TEMPLATE("http://stardog.acme.com/item_id_{item_id}") as ?item_id_iri)
BIND(xsd:integer(?item_qty) as ?item_qty_tr) 
BIND(xsd:decimal(?item_amt) as ?item_amt_tr) 
BIND(xsd:decimal(?item_price) as ?item_price_tr) 
BIND(xsd:decimal(?item_cost) as ?item_cost_tr) 
BIND(xsd:decimal(?item_profit) as ?item_profit_tr) 

}


`Starmap` uses the data types from dataframe `df` to deduce the appropriate data transformation. It can also handle dates, strings and booleans.

#### Ranges

`Starmap` can also handle a range of values, entered either as column names or column numbers. I can express the previous formula as a range to save myself some typing:

In [153]:
output = Starmap(["item_id | item_qty : item_profit"], df, urn, prefix)
print(output.sms)

prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
prefix xsd: <http://www.w3.org/2001/XMLSchema#>
prefix owl: <http://www.w3.org/2002/07/owl#>
prefix stardog: <tag:stardog:api:>
prefix acme: <http://stardog.acme.com/> 

MAPPING
FROM CSV {
}
TO {
?item_id_iri a acme:ItemId ; 
	rdfs:label ?item_id ; 
	acme:itemQty ?item_qty_tr ; 
	acme:itemPrice ?item_price_tr ; 
	acme:itemCost ?item_cost_tr ; 
	acme:itemAmt ?item_amt_tr ; 
	acme:itemProfit ?item_profit_tr . 

}
WHERE {
BIND(TEMPLATE("http://stardog.acme.com/item_id_{item_id}") as ?item_id_iri)
BIND(xsd:integer(?item_qty) as ?item_qty_tr) 
BIND(xsd:decimal(?item_price) as ?item_price_tr) 
BIND(xsd:decimal(?item_cost) as ?item_cost_tr) 
BIND(xsd:decimal(?item_amt) as ?item_amt_tr) 
BIND(xsd:decimal(?item_profit) as ?item_profit_tr) 

}


### Putting it all together

* profiles relate to orders, which have descriptions, amounts and datetimes
* orders have items, with their literals

In [154]:
output = Starmap(["profile_id ~ order_nbr | order_desc + order_timestamp + order_amt", "order_nbr ~ item_id | item_qty : item_profit"], df, urn, prefix)
print(output.sms)

prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
prefix xsd: <http://www.w3.org/2001/XMLSchema#>
prefix owl: <http://www.w3.org/2002/07/owl#>
prefix stardog: <tag:stardog:api:>
prefix acme: <http://stardog.acme.com/> 

MAPPING
FROM CSV {
}
TO {
?profile_id_iri a acme:ProfileId ; 
	rdfs:label ?profile_id . 
?order_nbr_iri a acme:OrderNbr ; 
	rdfs:label ?order_nbr ; 
	acme:orderDesc ?order_desc_tr ; 
	acme:orderTimestamp ?order_timestamp_tr ; 
	acme:orderAmt ?order_amt_tr . 
?profile_id_iri acme:hasOrderNbr ?order_nbr_iri . 

?item_id_iri a acme:ItemId ; 
	rdfs:label ?item_id ; 
	acme:itemQty ?item_qty_tr ; 
	acme:itemPrice ?item_price_tr ; 
	acme:itemCost ?item_cost_tr ; 
	acme:itemAmt ?item_amt_tr ; 
	acme:itemProfit ?item_profit_tr . 
?order_nbr_iri acme:hasItemId ?item_id_iri . 


}
WHERE {
BIND(TEMPLATE("http://stardog.acme.com/profile_id_{profile_id}") as ?profile_id_iri)
BIND(TEMPLATE("http://stardog.acme.com/order_nbr_

In [155]:
print(output.onto)

@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . 
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> . 
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> . 
@prefix owl: <http://www.w3.org/2002/07/owl#> . 
@prefix stardog: <tag:stardog:api:> . 
@prefix acme: <http://stardog.acme.com/> . 

<http://stardog.acme.com/ProfileId> a owl:Class ; 
	rdfs:label 'ProfileId' . 
<http://stardog.acme.com/OrderNbr> a owl:Class ; 
	rdfs:label 'OrderNbr' . 
<http://stardog.acme.com/orderDesc> a owl:DatatypeProperty ; 
	 rdfs:label 'orderDesc'; 
	 rdfs:domain <http://stardog.acme.com/OrderNbr> ; 
	 rdfs:range xsd:string . 
<http://stardog.acme.com/orderTimestamp> a owl:DatatypeProperty ; 
	 rdfs:label 'orderTimestamp'; 
	 rdfs:domain <http://stardog.acme.com/OrderNbr> ; 
	 rdfs:range xsd:datetime . 
<http://stardog.acme.com/orderAmt> a owl:DatatypeProperty ; 
	 rdfs:label 'orderAmt'; 
	 rdfs:domain <http://stardog.acme.com/OrderNbr> ; 
	 rdfs:range xsd:decimal . 
<http://stardog.acme.com/ha