In [247]:
import rdflib
from rdflib.extras.external_graph_libs import rdflib_to_networkx_multidigraph
import networkx as nx
import matplotlib.pyplot as plt
import json
import os

In [248]:
output_dir = os.path.abspath(os.path.join(os.getcwd(),"../..","model-results/cleaned-appraisal-report-rdf/"))
file_name = "PAGE-2.json"
with open(os.path.join(output_dir,file_name),"r") as f:
    data = json.load(f)

In [249]:
with open(os.path.join(output_dir,file_name.split(".json")[0]+".ttl"),"w+") as f:
    text = data["content"][0]["text"].split("<ontology>\n")[1].split("</ontology>")[0]
    f.write(text)

In [250]:
g = rdflib.Graph()
result = g.parse(os.path.join(output_dir,file_name.split(".json")[0]+".ttl"))

In [251]:
nodes = []
found_nodes=dict()
for s,p,o in result:
    for n in ([s,o] if "#type" not in p else [s]):
        if n not in found_nodes:
            found_nodes[n]=len(nodes)
            nodes.append({})
            nodes[-1]["id"]=str(n.replace("mf:",""))
            nodes[-1]["attributes"]=""
            nodes[-1]["type"]=""
            nodes[-1]["group"]=1
    print(s,p,o)
    if "#type" in p:
        nodes[found_nodes[s]]["type"]=str(o)

n45e87f50d2e24484b38351dc189473edb2 http://www.w3.org/2000/01/rdf-schema#label Josette D. Jackson, CCIM State Certified General Real Estate Appraiser RZ3275
mf:PropertyAppraisalDate_1 http://www.w3.org/1999/02/22-rdf-syntax-ns#type mf:PropertyAppraisalDate
mf:PropertyAddress_1 mf:hasZip 32578
mf:PropertyAddress_1 http://www.w3.org/1999/02/22-rdf-syntax-ns#type mf:PropertyAddress
mf:PropertyAppraisal_1 mf:hasTitle APPRAISAL REPORT
mf:PropertyAppraisal_1 http://www.w3.org/1999/02/22-rdf-syntax-ns#type mf:PropertyAppraisal
n45e87f50d2e24484b38351dc189473edb2 http://www.w3.org/1999/02/22-rdf-syntax-ns#type mf:PropertyAppraiser
mf:PropertyAppraisal_1 mf:hasAppraiser n45e87f50d2e24484b38351dc189473edb1
mf:Property_1 http://www.w3.org/1999/02/22-rdf-syntax-ns#type mf:Property
mf:Property_1 mf:hasAppraisal mf:PropertyAppraisal_1
mf:PropertyAppraisalDate_1 mf:hasDay 13
mf:PropertyAppraisalDate_1 http://www.w3.org/2000/01/rdf-schema#label Effective Date
mf:Property_1 mf:hasOwner mf:PropertyOwner

In [252]:
links = []
for s,p,o in result:
    if "#type" in p:
        continue
    s = s.replace("mf:","")
    o= o.replace("mf:","")
    p = p.replace("mf:","")
    if "#" in p:
        p=p.split("#")[1]
    if "/" in p:
        p=p.split("/")[-1]
    links.append({
        "source":s,
        "target":o,
        "label":p,
        "strength":1.0,
        "rationale":""
    })

In [253]:
parent = {n["id"]:n["id"] for n in nodes}
rank = {n["id"]:1 for n in nodes} 
def find(n):
    while n!=parent[n]:
        parent[n]=parent[parent[n]]
        n=parent[n]
    return n
def union(n1,n2):
    p1,p2 = find(n1),find(n2)
    if rank[p1]>rank[p2]:
        rank[p1]+=1
        parent[p2]=p1
    else:
        rank[p2]+=1
        parent[p1]=p2

In [254]:
for e in links:
    s,t = e["source"],e["target"]
    union(s,t)

In [255]:
top_level_parents = set()
for n,p in parent.items():
    top_level_parents.add(find(n))

In [256]:
top_level_parents

{'APPRAISAL REPORT'}

In [257]:
for i,v in enumerate(top_level_parents):
    if i ==0:
        nodes.append({})
        nodes[-1]["id"]="Knowledge"
        nodes[-1]["attributes"]=""
        nodes[-1]["type"]=""
        nodes[-1]["group"]=1
    links.append({
        "source":v,
        "target":"Knowledge",
        "label":"ManuallyAdded",
        "strength":1.0,
        "rationale":""
    })

In [258]:
out = {"links":links,"nodes":nodes,"groups":[{"group_id":1,"rationale":"default group"}]}

In [259]:
print(str(out).replace('\'','"'))

{"links": [{"source": "n45e87f50d2e24484b38351dc189473edb2", "target": "Josette D. Jackson, CCIM State Certified General Real Estate Appraiser RZ3275", "label": "label", "strength": 1.0, "rationale": ""}, {"source": "PropertyAddress_1", "target": "32578", "label": "hasZip", "strength": 1.0, "rationale": ""}, {"source": "PropertyAppraisal_1", "target": "APPRAISAL REPORT", "label": "hasTitle", "strength": 1.0, "rationale": ""}, {"source": "PropertyAppraisal_1", "target": "n45e87f50d2e24484b38351dc189473edb1", "label": "hasAppraiser", "strength": 1.0, "rationale": ""}, {"source": "Property_1", "target": "PropertyAppraisal_1", "label": "hasAppraisal", "strength": 1.0, "rationale": ""}, {"source": "PropertyAppraisalDate_1", "target": "13", "label": "hasDay", "strength": 1.0, "rationale": ""}, {"source": "PropertyAppraisalDate_1", "target": "Effective Date", "label": "label", "strength": 1.0, "rationale": ""}, {"source": "Property_1", "target": "PropertyOwner_1", "label": "hasOwner", "streng

In [260]:
out

{'links': [{'source': 'n45e87f50d2e24484b38351dc189473edb2',
   'target': 'Josette D. Jackson, CCIM State Certified General Real Estate Appraiser RZ3275',
   'label': 'label',
   'strength': 1.0,
   'rationale': ''},
  {'source': 'PropertyAddress_1',
   'target': '32578',
   'label': 'hasZip',
   'strength': 1.0,
   'rationale': ''},
  {'source': 'PropertyAppraisal_1',
   'target': 'APPRAISAL REPORT',
   'label': 'hasTitle',
   'strength': 1.0,
   'rationale': ''},
  {'source': 'PropertyAppraisal_1',
   'target': 'n45e87f50d2e24484b38351dc189473edb1',
   'label': 'hasAppraiser',
   'strength': 1.0,
   'rationale': ''},
  {'source': 'Property_1',
   'target': 'PropertyAppraisal_1',
   'label': 'hasAppraisal',
   'strength': 1.0,
   'rationale': ''},
  {'source': 'PropertyAppraisalDate_1',
   'target': '13',
   'label': 'hasDay',
   'strength': 1.0,
   'rationale': ''},
  {'source': 'PropertyAppraisalDate_1',
   'target': 'Effective Date',
   'label': 'label',
   'strength': 1.0,
   'rat

In [261]:
with open(os.path.join(output_dir,file_name.split(".json")[0]+"-graph.json"),"w+") as f:
    json.dump(out,f)