In [1]:
import pandas as pd
import rdflib

g=rdflib.Graph()

g.load('recipe.owl', format='ttl')
g.load('neapolitan_pizza.ttl', format='ttl')

sparql_prefixes = """
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX neapolitan_pizza: <http://www.example.org/neapolitan_pizza#>
BASE <http://www.semanticweb.org/tpfliss/ontologies/2020/0/recipes#>
"""

In [2]:
recipes = pd.DataFrame(g.query(sparql_prefixes + """
SELECT DISTINCT ?recipe
WHERE {
    [] a :Recipe ;
    rdfs:label ?recipe .
}
""")).fillna('')

recipes

Unnamed: 0,0
0,neapolitan pizza dough


In [3]:
ingredients = pd.DataFrame(g.query(sparql_prefixes + """
SELECT DISTINCT ?amount ?unit ?ingredient ?comment
WHERE {
    [] a :Recipe ;
       :ingredient_list_item ?m .
    ?m :ingredient [ rdfs:label ?ingredient ] .
    OPTIONAL {
        ?m :amount ?amount .
    } OPTIONAL {
        ?m :unit [ rdfs:label ?unit ] .
    } OPTIONAL {
        ?m rdfs:comment ?comment .
    }
}
""")).fillna('')

ingredients

Unnamed: 0,0,1,2,3
0,4.5,cup,flour,
1,1.0,teaspoon,yeast,
2,1.75,teaspoon,salt,
3,0.25,cup,olive oil,
4,1.75,cup,water,
5,,,cornmeal,for dusting


Note in the future we'll want to handle modifiers (large metal spoon)
Also currently recipe step ingredients are pointing to the ingredient class rather than to specific measurement instance from the ingredients list (or even a source portion divided out from the original ingredient list).

We'll want SHACL restrictions to verify the recipe model is valid, and also probably want a tool to generate a recipe model from a DSL or csv lists.

Note the use of property paths on ingredient to override rdfs label (maybe not a great idea?)

In [4]:
steps = pd.DataFrame(g.query(sparql_prefixes + """
SELECT DISTINCT ?order ?sub_order ?operation ?ingredient ?product ?equipment ?description ?comment
WHERE {
    {
        ?s a :Step ;
            rdfs:label ?description ; # suppress steps w/ substeps
            :order ?order .
    } UNION {
        ?sg a :Step ;
            :order ?order ;
            :sub_step ?s .
        ?s :order ?sub_order .
    }
    OPTIONAL {
        ?s rdfs:label ?description .
    }
    OPTIONAL {
        ?s :operation [ rdfs:label ?operation ] .
    }
    OPTIONAL {
        ?s :ingredient+ [ rdfs:label ?ingredient ] .
    }
    OPTIONAL {
        ?s :product [ rdfs:label ?product ] .
    }
    OPTIONAL {
        ?s ?eq [ a [ rdfs:label ?equipment ] ] .
        ?eq rdfs:subPropertyOf* :equipment .
    }
    OPTIONAL {
        ?s rdfs:comment ?comment .
    }
}
ORDER BY ?order ?sub_order
"""), columns=['order', 'sub_order', 'operation', 'ingredient', 'product', 'equipment', 'description', 'comment']).fillna('')
steps.set_index(['order', 'sub_order'], inplace=True)

def collapse_column_as_string_list(df, grp, col):
    df[col] = df.groupby(grp)[col].apply(', '.join)

    return  steps.drop_duplicates()

# Might treat these as sub-steps in the future?
steps = collapse_column_as_string_list(steps, ['order', 'sub_order'], 'ingredient')
steps = collapse_column_as_string_list(steps, ['order', 'sub_order'], 'product')
steps = collapse_column_as_string_list(steps, ['order', 'sub_order'], 'equipment')

# Getting duplicat values, hacky fix
for col in ['ingredient', 'product', 'equipment']:
    steps[col] = steps[col].apply(lambda x: list(set(x.split(', '))) if x else [])
 
steps

Unnamed: 0_level_0,Unnamed: 1_level_0,operation,ingredient,product,equipment,description,comment
order,sub_order,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,,stir,"[flour, yeast, water, salt]",[dough],"[bowl, spoon]","stir together the flour, salt, and instant yeast",
2,1.0,line,[],[],"[parchment paper, sheet pan]",Line sheet pan with parchment paper,
2,2.0,mist,[olive oil],[],"[parchment paper, sheet pan]",mist the parchment paper,
2,3.0,sprinkle,[cornmeal],[],[counter],sprinkle flour on the counter and transfer the...,
2,4.0,move,[mixed dough],[],[counter],transfer the dough to the counter,
2,5.0,cut,[dough],[six pieces of dough],[scraper],cut the dough into six equal parts,
2,6.0,round,[six pieces of dough],[six balls of dough],[hands],,round each piece into a ball
2,7.0,move,[six balls of dough],[],[sheet pan],transfer dough balls to sheet pan,
3,1.0,move,[six balls of dough],[],"[refrigerator, sheet pan]",put the pan into the refrigerator,to rest the dough
3,2.0,rest,[six balls of dough],[],"[refrigerator, sheet pan]",rest the dough overnight,


Note need to model "six pieces of dough" as a product of step 2.4

Need to model "move" destination better in step 2.6

This is a rough draft of encoding and querying relative time information in the recipe.

In [5]:
timing = pd.DataFrame(g.query(sparql_prefixes + """
SELECT DISTINCT ?event1 ?amount ?unit ?direction ?event2
WHERE {
    ?e1 :at [
        ?offset ?e2 ;
        :amount ?amount ;
        :unit [ rdfs:label ?unit ] ;
    ] .
    ?offset rdfs:subPropertyOf :time_offset ;
        rdfs:label ?direction .
    ?e1 rdfs:label ?event1 .
    ?e2 rdfs:label ?event2 .
}
"""), columns=['event1', 'amount', 'unit', 'direction', 'event2']).fillna('')

timing

Unnamed: 0,event1,amount,unit,direction,event2
0,remove the dough from the refrigerator,2,hour,before,top the pizza
1,preheat the oven,45,minute,before,top the pizza


The following queries subproperties of equipment that can be used as more specific annotation while still allowing more general queries on what equipment is used in various steps of the recipe. This kind of query can also be useful for populating dropdowns.

In [6]:
timing = pd.DataFrame(g.query(sparql_prefixes + """
SELECT DISTINCT ?p ?l
WHERE {
    ?s ?p ?o .
    ?p rdfs:subPropertyOf* :equipment .
    ?p rdfs:label ?l
}
"""), columns=['p', 'l']).fillna('')

timing

Unnamed: 0,p,l
0,http://www.semanticweb.org/tpfliss/ontologies/...,in
1,http://www.semanticweb.org/tpfliss/ontologies/...,from
2,http://www.semanticweb.org/tpfliss/ontologies/...,onto
3,http://www.semanticweb.org/tpfliss/ontologies/...,into
