In [2]:
import csv
import json
import re
from collections import defaultdict
import glob
import os
from rdflib import Graph, URIRef, Literal, XSD, Namespace, RDF

In [3]:
paths = glob.glob("data/processed_jl/*")
paths

['data/processed_jl/nyt.jl',
 'data/processed_jl/nyt_dinner.jl',
 'data/processed_jl/epicurious_dinner.jl',
 'data/processed_jl/epicurious_dessert.jl']

In [11]:
kg = Graph()
SCHEMA = Namespace("http://schema.org/")
RDFS = Namespace("http://www.w3.org/2000/01/rdf-schema#")
kg.parse("../recipe_model.ttl", format="turtle")

<Graph identifier=N5d19fe52e18146eeafabf1fd410eb125 (<class 'rdflib.graph.Graph'>)>

In [12]:
for path in paths:
    count = 0
    for line in open(path,"r").readlines():
        if count==4:
            break
        data = json.loads(line.strip())
        node = URIRef(data["url"])
        kg.add((node,RDF.type,SCHEMA["Recipe"]))
        
        kg.add((node,RDFS["label"],Literal(data["recipe_name"])))
        
        for i in data["ingredients"]:
            kg.add((node,SCHEMA["recipeIngredient"],Literal(i)))
        
        r = URIRef(data["url"]+"/rating")
        kg.add((r,RDF.type,SCHEMA["AggregateRating"]))
        kg.add((r,SCHEMA["ratingValue"],Literal(data["rating"])))
        kg.add((r,SCHEMA["bestRating"],Literal(5.0, datatype=XSD.float)))
        kg.add((node,SCHEMA["aggregateRating"],r))
        
        steps = URIRef(data["url"]+"/steps")
        kg.add((steps,RDF.type,SCHEMA["ItemList"]))
        for i in data["steps"]:
            kg.add((steps,SCHEMA["itemListElement"],Literal(i.strip())))
        kg.add((node,SCHEMA["recipeInstructions"],steps))
        
        tags = URIRef(data["url"]+"/tags")
        kg.add((tags,RDF.type,SCHEMA["ItemList"]))
        for i in data["tags"]:
            kg.add((tags,SCHEMA["itemListElement"],Literal(i.strip())))
        kg.add((node,SCHEMA["tags"],tags))
        
        kg.add((node,SCHEMA["healthScore"],Literal(data["health_score"],datatype=XSD.float)))
        
        nut = URIRef(data["url"]+"/nutrition")
        kg.add((nut,RDF.type,SCHEMA["NutritionInformation"]))
        kg.add((nut,SCHEMA["calories"],Literal(data["nutrition_total"]["calories"])))
        kg.add((nut,SCHEMA["fatContent"],Literal(data["nutrition_total"]["total fat"])))
        kg.add((nut,SCHEMA["carbohydrateContent"],Literal(data["nutrition_total"]["carbs"])))
        kg.add((nut,SCHEMA["sugarContent"],Literal(data["nutrition_total"]["sugars"])))
        kg.add((nut,SCHEMA["proteinContent"],Literal(data["nutrition_total"]["protein"])))
        kg.add((nut,SCHEMA["sodiumContent"],Literal(data["nutrition_total"]["sodium"])))
        kg.add((nut,SCHEMA["fiberContent"],Literal(data["nutrition_total"]["fiber"])))
        kg.add((nut,SCHEMA["saturatedFatContent"],Literal(data["nutrition_total"]["saturated fat"])))
        kg.add((nut,SCHEMA["transFatContent"],Literal(data["nutrition_total"]["trans fat"])))
        kg.add((nut,SCHEMA["monounsaturatedFatContent"],Literal(data["nutrition_total"]["monounsaturated fat"])))
        kg.add((nut,SCHEMA["polyunsaturatedFatContent"],Literal(data["nutrition_total"]["polyunsaturated fat"])))
        kg.add((nut,SCHEMA["cholesterolContent"],Literal(data["nutrition_total"]["cholesterol"])))
        kg.add((nut,SCHEMA["calciumContent"],Literal(data["nutrition_total"]["calcium"])))
        kg.add((nut,SCHEMA["magnesiumContent"],Literal(data["nutrition_total"]["magnesium"])))
        kg.add((nut,SCHEMA["potassiumContent"],Literal(data["nutrition_total"]["potassium"])))
        kg.add((nut,SCHEMA["ironContent"],Literal(data["nutrition_total"]["iron"])))
        kg.add((nut,SCHEMA["zincContent"],Literal(data["nutrition_total"]["zinc"])))
        kg.add((nut,SCHEMA["phosphorusContent"],Literal(data["nutrition_total"]["phosphorus"])))
        kg.add((nut,SCHEMA["vitaminAContent"],Literal(data["nutrition_total"]["vitamin a"])))
        kg.add((nut,SCHEMA["vitaminCContent"],Literal(data["nutrition_total"]["vitamin c"])))
        kg.add((nut,SCHEMA["thiaminB1Content"],Literal(data["nutrition_total"]["thiamin b1"])))
        kg.add((nut,SCHEMA["riboflavinB2Content"],Literal(data["nutrition_total"]["riboflavin b2"])))
        kg.add((nut,SCHEMA["niacinB3Content"],Literal(data["nutrition_total"]["niacin b3"])))
        kg.add((nut,SCHEMA["vitaminB6Content"],Literal(data["nutrition_total"]["vitabin b6"])))
        kg.add((nut,SCHEMA["folicAcidContent"],Literal(data["nutrition_total"]["folic acid"])))
        kg.add((nut,SCHEMA["vitaminB12Content"],Literal(data["nutrition_total"]["vitamin b12"])))
        kg.add((nut,SCHEMA["vitaminDContent"],Literal(data["nutrition_total"]["vitamin d"])))
        kg.add((nut,SCHEMA["vitaminEContent"],Literal(data["nutrition_total"]["vitamin e"])))
        kg.add((nut,SCHEMA["vitaminKContent"],Literal(data["nutrition_total"]["vitamin k"])))
        
        kg.add((nut,SCHEMA["caloriesDV"],Literal(data["nutrition_dv"]["calories"],datatype=XSD.float)))
        kg.add((nut,SCHEMA["fatContentDV"],Literal(data["nutrition_dv"]["total fat"],datatype=XSD.float)))
        kg.add((nut,SCHEMA["carbohydrateContentDV"],Literal(data["nutrition_dv"]["carbs"],datatype=XSD.float)))
        kg.add((nut,SCHEMA["sugarContentDV"],Literal(data["nutrition_dv"]["sugars"],datatype=XSD.float)))
        kg.add((nut,SCHEMA["proteinContentDV"],Literal(data["nutrition_dv"]["protein"],datatype=XSD.float)))
        kg.add((nut,SCHEMA["sodiumContentDV"],Literal(data["nutrition_dv"]["sodium"],datatype=XSD.float)))
        kg.add((nut,SCHEMA["fiberContentDV"],Literal(data["nutrition_dv"]["fiber"],datatype=XSD.float)))
        kg.add((nut,SCHEMA["saturatedFatContentDV"],Literal(data["nutrition_dv"]["saturated fat"],datatype=XSD.float)))
        kg.add((nut,SCHEMA["transFatContentDV"],Literal(data["nutrition_dv"]["trans fat"],datatype=XSD.float)))
        kg.add((nut,SCHEMA["monounsaturatedFatContentDV"],Literal(data["nutrition_dv"]["monounsaturated fat"],datatype=XSD.float)))
        kg.add((nut,SCHEMA["polyunsaturatedFatContentDV"],Literal(data["nutrition_dv"]["polyunsaturated fat"],datatype=XSD.float)))
        kg.add((nut,SCHEMA["cholesterolContentDV"],Literal(data["nutrition_dv"]["cholesterol"],datatype=XSD.float)))
        kg.add((nut,SCHEMA["calciumContentDV"],Literal(data["nutrition_dv"]["calcium"],datatype=XSD.float)))
        kg.add((nut,SCHEMA["magnesiumContentDV"],Literal(data["nutrition_dv"]["magnesium"],datatype=XSD.float)))
        kg.add((nut,SCHEMA["potassiumContentDV"],Literal(data["nutrition_dv"]["potassium"],datatype=XSD.float)))
        kg.add((nut,SCHEMA["ironContentDV"],Literal(data["nutrition_dv"]["iron"],datatype=XSD.float)))
        kg.add((nut,SCHEMA["zincContentDV"],Literal(data["nutrition_dv"]["zinc"],datatype=XSD.float)))
        kg.add((nut,SCHEMA["phosphorusContentDV"],Literal(data["nutrition_dv"]["phosphorus"],datatype=XSD.float)))
        kg.add((nut,SCHEMA["vitaminAContentDV"],Literal(data["nutrition_dv"]["vitamin a"],datatype=XSD.float)))
        kg.add((nut,SCHEMA["vitaminCContentDV"],Literal(data["nutrition_dv"]["vitamin c"],datatype=XSD.float)))
        kg.add((nut,SCHEMA["thiaminB1ContentDV"],Literal(data["nutrition_dv"]["thiamin b1"],datatype=XSD.float)))
        kg.add((nut,SCHEMA["riboflavinB2ContentDV"],Literal(data["nutrition_dv"]["riboflavin b2"],datatype=XSD.float)))
        kg.add((nut,SCHEMA["niacinB3ContentDV"],Literal(data["nutrition_dv"]["niacin b3"],datatype=XSD.float)))
        kg.add((nut,SCHEMA["vitaminB6ContentDV"],Literal(data["nutrition_dv"]["vitabin b6"],datatype=XSD.float)))
        kg.add((nut,SCHEMA["folicAcidContentDV"],Literal(data["nutrition_dv"]["folic acid"],datatype=XSD.float)))
        kg.add((nut,SCHEMA["vitaminB12ContentDV"],Literal(data["nutrition_dv"]["vitamin b12"],datatype=XSD.float)))
        kg.add((nut,SCHEMA["vitaminDContentDV"],Literal(data["nutrition_dv"]["vitamin d"],datatype=XSD.float)))
        kg.add((nut,SCHEMA["vitaminEContentDV"],Literal(data["nutrition_dv"]["vitamin e"],datatype=XSD.float)))
        kg.add((nut,SCHEMA["vitaminKContentDV"],Literal(data["nutrition_dv"]["vitamin k"],datatype=XSD.float)))
        kg.add((node,SCHEMA["nutrition"],nut))
        
        if type(data["author"])==str:
            a = URIRef(data["url"]+"/author0")
            kg.add((a,RDF.type,SCHEMA["Person"]))
            kg.add((a,SCHEMA["name"],Literal(data["author"])))
            kg.add((node,SCHEMA["author"],a))
        elif type(data["author"])==list:
            for i,u in enumerate(data["author"]):
                a = URIRef(data["url"]+"/author"+str(i))
                kg.add((a,RDF.type,SCHEMA["Person"]))
                kg.add((a,SCHEMA["name"],Literal(u)))
                kg.add((node,SCHEMA["author"],a))
        
        count += 1
kg.serialize("data/test.ttl", format="turtle")