In [1]:
import pandas as pd
import rdflib

g=rdflib.Graph()

ontology_files = [
    'recipes.yml.owl.ttl',
    'utensils.yml.owl.ttl',
    'measurement.yml.owl.ttl',
    'ingredients.yml.owl.ttl',
    'neapolitan_pizza.ttl'
]

ontology_files_old = [
    'recipe.owl',
    'ingredients.ttl',
    'measurement.ttl',
    'utensils.ttl',
    'neapolitan_pizza.ttl'
]

#g.load('recipe.owl', format='ttl')
#g.load('ingredients.ttl', format='ttl')
#g.load('measurement.ttl', format='ttl')
#g.load('utensils.ttl', format='ttl')
#g.load('neapolitan_pizza.ttl', format='ttl')

for f in ontology_files:
    g.load(f)

sparql_prefixes = """
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX neapolitan_pizza: <http://www.example.org/neapolitan_pizza#>
PREFIX ing: <http://www.semanticweb.org/tpfliss/ontologies/2020/0/ingredients#>
PREFIX measure: <http://www.semanticweb.org/tpfliss/ontologies/2020/0/measurement#>
BASE <http://www.semanticweb.org/tpfliss/ontologies/2020/0/recipes#>
"""

SAXParseException: file:///Users/tpfliss/git/ont_em/nb/recipes.yml.owl.ttl:1:0: no element found

In [None]:
recipes = pd.DataFrame(g.query(sparql_prefixes + """
SELECT DISTINCT ?recipe
WHERE {
    [] a :Recipe ;
    rdfs:label ?recipe .
}
""")).fillna('')

recipes

In [None]:
ingredients = pd.DataFrame(g.query(sparql_prefixes + """
SELECT DISTINCT ?amount ?unit ?ingredient ?comment
WHERE {
    [] a :Recipe ;
       :ingredient_list_item ?m .
    ?m :ingredient [ rdfs:label ?ingredient ] .
    OPTIONAL {
        ?m :amount ?amount .
    } OPTIONAL {
        ?m measure:unit [ rdfs:label ?unit ] .
    } OPTIONAL {
        ?m rdfs:comment ?comment .
    }
}
""")).fillna('')

ingredients

Note in the future we'll want to handle modifiers (large metal spoon)
Also currently recipe step ingredients are pointing to the ingredient class rather than to specific measurement instance from the ingredients list (or even a source portion divided out from the original ingredient list).

We'll want SHACL restrictions to verify the recipe model is valid, and also probably want a tool to generate a recipe model from a DSL or csv lists.

Note the use of property paths on ingredient to override rdfs label (maybe not a great idea?)

In [None]:
steps = pd.DataFrame(g.query(sparql_prefixes + """
SELECT DISTINCT ?order ?sub_order ?operation ?ingredient ?product ?equipment ?description ?comment
WHERE {
    {
        ?s a :Step ;
            rdfs:label ?description ; # suppress steps w/ substeps
            :order ?order .
    } UNION {
        ?sg a :Step ;
            :order ?order ;
            :sub_step ?s .
        ?s :order ?sub_order .
    }
    OPTIONAL {
        ?s rdfs:label ?description .
    }
    OPTIONAL {
        ?s :operation [ rdfs:label ?operation ] .
    }
    OPTIONAL {
        ?s :ingredient+ [ rdfs:label ?ingredient ] .
    }
    OPTIONAL {
        ?s :product [ rdfs:label ?product ] .
    }
    OPTIONAL {
        ?s ?eq [ a [ rdfs:label ?equipment ] ] .
        ?eq rdfs:subPropertyOf* :equipment .
    }
    OPTIONAL {
        ?s rdfs:comment ?comment .
    }
}
ORDER BY ?order ?sub_order
"""), columns=['order', 'sub_order', 'operation', 'ingredient', 'product', 'equipment', 'description', 'comment']).fillna('')
steps.set_index(['order', 'sub_order'], inplace=True)

def collapse_column_as_string_list(df, grp, col):
    df[col] = df.groupby(grp)[col].apply(', '.join)

    return  steps.drop_duplicates()

# Might treat these as sub-steps in the future?
steps = collapse_column_as_string_list(steps, ['order', 'sub_order'], 'ingredient')
steps = collapse_column_as_string_list(steps, ['order', 'sub_order'], 'product')
steps = collapse_column_as_string_list(steps, ['order', 'sub_order'], 'equipment')

# Getting duplicat values, hacky fix
for col in ['ingredient', 'product', 'equipment']:
    steps[col] = steps[col].apply(lambda x: list(set(x.split(', '))) if x else [])
 
steps

Note need to model "six pieces of dough" as a product of step 2.4

Need to model "move" destination better in step 2.6

This is a rough draft of encoding and querying relative time information in the recipe.

In [None]:
timing = pd.DataFrame(g.query(sparql_prefixes + """
SELECT DISTINCT ?event1 ?amount ?unit ?direction ?event2
WHERE {
    ?e1 :at [
        ?offset ?e2 ;
        :amount ?amount ;
        measure:unit [ rdfs:label ?unit ] ;
    ] .
    ?offset rdfs:subPropertyOf :time_offset ;
        rdfs:label ?direction .
    ?e1 rdfs:label ?event1 .
    ?e2 rdfs:label ?event2 .
}
"""), columns=['event1', 'amount', 'unit', 'direction', 'event2']).fillna('')

timing

The following queries subproperties of equipment that can be used as more specific annotation while still allowing more general queries on what equipment is used in various steps of the recipe. This kind of query can also be useful for populating dropdowns.

In [None]:
timing = pd.DataFrame(g.query(sparql_prefixes + """
SELECT DISTINCT ?p ?l
WHERE {
    ?s ?p ?o .
    ?p rdfs:subPropertyOf* :equipment .
    ?p rdfs:label ?l
}
"""), columns=['p', 'l']).fillna('')

timing