In [None]:
class KnowledgeGraph:
    """The elemental data structure of knowledge graph.
    
    Based on ref[1], section II.B, knowledge graph is defined as
    triplet (E, R, F), where E for entities, R for relations, and
    F for facts. Fact is defined as triplet (h, r, t), where head
    h and tail t are in E and relation r in R.
    
    References
    ----------
    1. [A Survey on Knowledge Graphs: Representation, Acquisition and Applications](https://arxiv.org/abs/2002.00388v4).
    """
    
    def __init__(self,
                 entities: set = None,
                 relations: set = None,
                 facts: set = None):
        self.entities = set() if entities is None else entities
        self.relations = set() if relations is None else relations
        self.facts = set() if facts is None else facts

    def __iadd__(self, other):
        self.entities.update(other.entities)
        self.relations.update(other.relations)
        self.facts.update(other.facts)
        return self


class Entity:
    
    def __init__(self,
                 name: str,
                 category: str = None):
        self.name = name.replace(' ', '_')
        self.category = category

    def __eq__(self, other):
        return (
            self.name == other.name and
            self.category == other.category
        )

    def __hash__(self):
        return hash((self.name, self.category))

    def __repr__(self):
        if self.category is None:
            return f'{self.name}'
        else:
            return f'{self.name} ({self.category})'

class Relation:

    def __init__(self, name: str):
        self.name = name.replace(' ', '_')

    def __eq__(self, other):
        return self.name == other.name

    def __hash__(self):
        return hash(self.name)

    def __repr__(self):
        return self.name


class Fact:

    def __init__(self,
                 head: Entity,
                 relation: Relation,
                 tail: Entity):
        self.head = head
        self.relation = relation
        self.tail = tail

    def __repr__(self):
        return f'{repr(self.head)} -- {repr(self.relation)} --> {repr(self.tail)}'

In [None]:
def get_knowledge_graph(structured_json: dict,
                        kg: KnowledgeGraph = None):
    """
    Parameters
    ----------
    structured_json: We store the raw data in a structured JSON
        format. The keys are the heads, and values are also
        structured JSON, but with keys relations and values tails
        or list of tails. Both head and tail are in the format:
        
            <name>
        
        or
        
            <category>:<name>

        For example, in "data/taste.json",
        
            {
                "taste:sweet": {
                    "component": [
                        "element:earth",
                        "element:water"
                    ],
                    "attribute": "cooling"
                },
                "taste:sour": {
                    "component": [
                        "element:earth",
                        "element:fire"
                    ],
                    "attribute": "heating"
                },
                ...
            }

    kg: The base knowledge graph, on which new data are added.
        Defaults to `None`, where a new instance of `KnowledgeGraph`
        will be created instead.
    """
    if kg is None:
        kg = KnowledgeGraph()

    heads = structured_json
    for head, relations in heads.items():
        head = process_entity(head)
        kg.entities.add(head)
        for relation, tails in relations.items():
            tails = [tails] if isinstance(tails, str) else tails
            relation = process_relation(relation)
            kg.relations.add(relation)
            for tail in tails:
                tail = process_entity(tail)
                kg.entities.add(tail)
                fact = Fact(head, relation, tail)
                kg.facts.add(fact)
    return kg


def process_entity(raw_name: str):
    parts = raw_name.split(':')
    if len(parts) == 1:
        return Entity(raw_name)
    elif len(parts) == 2:
        category, name = parts
        return Entity(name, category)
    raise ValueError(f'Invalid raw_name: "{raw_name}"')


def process_relation(raw_name: str):
    return Relation(raw_name)

In [None]:
import os
import json


kg = KnowledgeGraph()
for dirpath, _, filenames in os.walk('../data'):
    for filename in filenames:
        _, extension = os.path.splitext(filename)
        if extension != '.json':
            print(extension)
            continue
        filepath = os.path.join(dirpath, filename)
        with open(filepath, 'r') as f:
            heads = json.load(f)
            kg += get_knowledge_graph(heads)

In [None]:
for fact in kg.facts:
    print(fact)