In [1]:
from typing import Union


class Entity:
    
    def __init__(self, name: str):
        self.name = name.replace(' ', '_')

    def __eq__(self, other):
        return self.name == other.name

    def __hash__(self):
        return hash(self.name)

    def __repr__(self):
        return f'{self.name}'


class Relation:

    def __init__(self, name: str):
        self.name = name.replace(' ', '_')

    def __eq__(self, other):
        return self.name == other.name

    def __hash__(self):
        return hash(self.name)

    def __repr__(self):
        return self.name


class Fact:

    def __init__(self,
                 head: Union[Entity, str],
                 relation: Union[Relation, str],
                 tail: Union[Entity, str]):
        if isinstance(head, str):
            head = Entity(head)
        self.head = head

        if isinstance(relation, str):
            relation = Relation(relation)
        self.relation = relation

        if isinstance(tail, str):
            tail = Entity(tail)
        self.tail = tail

    def __repr__(self):
        return f'{repr(self.head)} -- {repr(self.relation)} --> {repr(self.tail)}'


class KnowledgeGraph:
    """The elemental data structure of knowledge graph.
    
    Based on ref[1], section II.B, knowledge graph is defined as
    triplet (E, R, F), where E for entities, R for relations, and
    F for facts. Fact is defined as triplet (h, r, t), where head
    h and tail t are in E and relation r in R.

    References
    ----------
    1. [A Survey on Knowledge Graphs: Representation, Acquisition and Applications](https://arxiv.org/abs/2002.00388v4).
    """
    
    def __init__(self,
                 entities: set = None,
                 relations: set = None,
                 facts: set = None):
        self.entities = set() if entities is None else entities
        self.relations = set() if relations is None else relations
        self.facts = set() if facts is None else facts

    def add(self, fact: Fact):
        self.entities.add(fact.head)
        self.relations.add(fact.relation)
        self.entities.add(fact.tail)
        self.facts.add(fact)

    def __iadd__(self, other):
        self.entities.update(other.entities)
        self.relations.update(other.relations)
        self.facts.update(other.facts)
        return self

In [2]:
import os
import json


SUBCATEGORY_RELATION = 'is_of'


def update_knowledge_graph(kg: KnowledgeGraph,
                           data_path: str,
                           category: str = None):
    """Inplace update the knowledge graph `kg` by the data in the
    path `data_path`.

    Parameters
    ----------
    data_path: The path to the JSON file of the pre-structured data.
        We store the data in a specific JSON format, wherein the keys
        are the heads, and values are also JSON, but with the keys
        relations and the values tails or list of tails.

        For example, in "data/taste.json",
        
            {
                "sweet": {
                    "component": [
                        "earth",
                        "water"
                    ]
                },
                "sour": {
                    "component": [
                        "earth",
                        "fire"
                    ]
                },
                ...
            }

    kg: The base knowledge graph, on which new data are added.
    """
    with open(data_path, 'r') as f:
        heads = json.load(f)

    for head, relations in heads.items():
        kg.entities.add(Entity(head))
        kg.facts.add(Fact(head, SUBCATEGORY_RELATION, category))
        for relation, tails in relations.items():
            kg.relations.add(Relation(relation))
            tails = [tails] if isinstance(tails, str) else tails
            for tail in tails:
                kg.entities.add(Entity(tail))
                fact = Fact(head, relation, tail)
                kg.facts.add(fact)


def _load_data_recur(data_dir_path: str,
                     kg: KnowledgeGraph = None):
    if kg is None:
        kg = KnowledgeGraph()
        category = None
    else:
        _, category = os.path.split(data_dir_path)

    for filename in os.listdir(data_dir_path):
        file_path = os.path.join(data_dir_path, filename)

        if os.path.isdir(file_path):
            kg = _load_data_recur(file_path, kg)

        else:
            subcategory, ext = os.path.splitext(filename)
            assert ext == '.json'
            if category is not None:
                kg.add(Fact(subcategory, SUBCATEGORY_RELATION, category))
            update_knowledge_graph(kg, file_path, subcategory)
    return kg


def load_data(data_dir_path: str):
    return _load_data_recur(data_dir_path, None)

In [3]:
kg = load_data('../data')
for fact in kg.facts:
    print(fact)

cravings_for_sweet_and_cold_food_and_drink -- is_of --> others
cold -- is_of --> attribute
lack_of_inner_drive_and_mental_clarity -- hints_for_elevation --> kapha
strong_intolerance_for_heat -- hints_for_elevation --> pitta
susceptibility_to_colds_with_productive_cough -- hints_for_elevation --> kapha
light -- creates --> alertness
hard -- is_of --> attribute
sweet -- has_component --> earth
pungent -- has_component --> fire
cravings_for_sweet_and_cold_food_and_drink -- hints_for_elevation --> pitta
hot -- is_of --> attribute
strong_attachment_to_things -- is_of --> mental
burning_sensations_(especially_in_the_eyes) -- is_of --> others
slow -- is_of --> attribute
pitta -- has_function --> thirst
beef -- is_of --> meat
sweet -- has_component --> water
bitter -- is_of --> taste
static -- is_of --> attribute
heart_burn -- is_of --> others
oily -- is_of --> attribute
urine_is_blackish-brown -- is_of --> urine
strong_attachment_to_things -- hints_for_elevation --> kapha
burning_sensations_(