In [None]:
from pronto import Ontology
go = Ontology("go.obo")
go

In [None]:
with open("ms.json", "wb") as f:
    go.dump(f, format="json")

In [None]:
import json
with open("ms.json", "r") as f:
    go = json.load(f)

In [None]:
go["graphs"][0].keys()

In [None]:
go["graphs"][0]["nodes"][0]

In [None]:
edge_dict: dict = {}
for relationship in go["graphs"][0]["edges"]:
    parent_list = edge_dict.get(relationship["sub"].split("/")[-1], [])
    parent_list.append((relationship["obj"].split("/")[-1], relationship["pred"]))
    edge_dict[relationship["sub"].split("/")[-1]] = parent_list

In [None]:
edge_dict

In [None]:
for go_term in go["graphs"][0]["nodes"]:
    if go_term["type"] != "CLASS":
        print(go_term)

In [None]:
names: list[str] = []
id: list[str] = []
desc: list[str] = []
synonymes: list[list[str]] = []

for go_term in go["graphs"][0]["nodes"]:
    if go_term["type"] == "CLASS":
        id.append(go_term["id"].split("/")[-1])
        names.append(go_term["lbl"])
        desc.append(go_term["meta"]["definition"]["val"])
        synonymes.append([syn["val"] for syn in go_term["meta"]["synonyms"]])

In [None]:
import jsonschema
from jsonschema import validate

impatient_json: list[dict] = []
impatient_json_schema = {
    "type": "object",
    "properties": {
        "id": {"type": "string"},
        "text": {"type": "string"},
        "icon": {"type": "boolean"},
        "data": {
            "type": "object",
            "properties": {
                "description": {"type": "string"},
                "synonymes": {"type": "string"},
                "phenotype_datamined": {"type": "string"},
                "gene_datamined": {"type": "string"},
                "alternative_language": {"type": "string"},
                "correlates_with": {"type": "string"},
                "image_annotation": {"type": "boolean"},
                "hex_color": {"type": "string", "pattern": "^#[0-9a-fA-F]{6}$"},
                "hpo_datamined": {"type": "string"},
            },
            "required": [
                "description",
                "synonymes",
                "phenotype_datamined",
                "gene_datamined",
                "alternative_language",
                "correlates_with",
                "image_annotation",
                "hex_color",
                "hpo_datamined",
            ],
        },
        "parent": {"type": "string"},
    },
    "required": ["id", "text", "icon", "data", "parent"],
}

for index in range(len(id)):
    impatient_json.append(
        {
            "id": id[index].replace("_", ":"),
            "text": names[index],
            "icon": True,
            "data": {
                "description": desc[index],
                "synonymes": ','.join(synonymes[index]),
                "phenotype_datamined": "",
                "gene_datamined": "",
                "alternative_language": names[index],
                "correlates_with": "",
                "image_annotation": True if index==0 else False,
                "hex_color": "#FFFFFF",
                "hpo_datamined": "",
            },
            "parent": "#",
        }
    )
    
for child, parent in edge_dict.items():
    try:
        index_term = id.index(child)
    except ValueError:
        print(f"Term {child} not found in the list of terms")
        continue
    # Only one parent so yeah we are loosing information.
    impatient_json[index_term]["parent"] = parent[0][0].replace("_", ":")

In [None]:
json.dump(impatient_json, open("impatient.json", "w"))

In [None]:
for idx, json_data in enumerate(impatient_json, start=1):
    validate(instance=json_data, schema=impatient_json_schema)