### Neo4j Graph

#### DIsease Graph

In [None]:
import pandas as pd
import json

with open("1639304259.json","r") as f:
    dataMed = json.load(f)
    
coln = list(dataMed[0].keys())

size = len(dataMed)
for i in range(size):
    if dataMed[i]["childs"] != 'Key Not found':
        dataMed[i]["childs"] = [int(x) for x in dataMed[i]["childs"]]
        
for i in range(1, size):
    try:
        dataMed[i]["parents"] = [int(x) for x in dataMed[i]["parents"]]
    except:
        pass
        
DF = pd.DataFrame(dataMed, columns = coln)
DF = DF.set_index("id")


In [22]:
DF.head(3)

Unnamed: 0_level_0,code,title,defn,syns,childs,parents
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1639304259,,Diseases of the skin,Diseases of the skin incorporate conditions af...,Key Not found,"[384984571, 533054712, 237197715, 106304476, 1...",[1880047481]
384984571,,Certain skin disorders attributable to infecti...,Infections and infestations affecting the skin...,Key Not found,"[1780076091, 435641775, 30770553, 160850170, 1...",[1639304259]
1780076091,,Certain skin disorders attributable to viral i...,This group incorporates both localized infecti...,Key Not found,"[467632481, 1072978670, 772982663, 556027555, ...",[384984571]


In [21]:
from neo4j import GraphDatabase

class driver(object):
    
    
    """initialize driver to communicate with local host"""
    def __init__(self, uri, user, password):
        self.driver = GraphDatabase.driver(uri, auth=(user, password))
        self.parents = {}
        self.children = {}
        
        

    """Add nodes to the graph based off of ID number, 
    each node contains additional info (code, title...)""" 
    def add_nodes(self, idnum, code=None, title=None,\
                  defn=None, syns=None, childs=None, parents=None):
        
        with self.driver.session() as session:
            session.run("MERGE (a:Disease {id: $idnum}) "
                        "ON CREATE SET a.code = $code, \
                         a.title = $title, a.defn = $defn,\
                         a.syns = $syns, a.childs = $childs,\
                         a.parents = $parents",
                        idnum=idnum, code=code,\
                        title=title, defn=defn,\
                        syns=syns, childs=childs,\
                        parents=parents)

            
            
    """Add parent relationships to the nodes - some nodes
       have chilren they are not parent to and vice versa. Make
       sure that .id type and .parents type are the same."""
    def add_parents(self):
        with self.driver.session() as session:
            session.run("MATCH (a:Disease),(b:Disease) "
            "WHERE a.id in b.parents "
            "CREATE (a)<-[r:Parent]-(b)")
            
            

    """Add children nodes to the graph"""
    def add_children(self):
        with self.driver.session() as session:
            session.run("MATCH (a:Disease),(b:Disease) "
            "WHERE a.id in b.childs "
            "CREATE (a)<-[r:Child]-(b)")
            
            
    
    """Empty graph of all nodes and relationships"""        
    def clear_graph(self):
        with self.driver.session() as session:
            session.run("MATCH (a) DETACH DELETE a")
            
            
    
    """Find the shortest path between two nodes given
       their id's: nodes (+info) along path are returned"""                     
    def find_shortest_path(self, node1, node2):
        with self.driver.session() as session:
            path = session.run("MATCH p = shortestPath((a1:Disease)-[*]-(a2:Disease)) "
                        "WHERE a1.id = $node1 AND a2.id = $node2 "
                        "RETURN  p", 
                        node1=node1, node2=node2)
            for record in path:
                nodes = record["p"].nodes
                for node in nodes:
                    print(node, '\n')
                    
                    
    
    """Find the desired generations of parents for a given node"""                
    def find_parents(self, idnum, gener, ref = False):
        if not ref:
            ref = gener
            self.parents = {}
        with self.driver.session() as session:
            pars = session.run("MATCH (a:Disease {id: $idnum}) "
                              "RETURN a.parents", idnum = idnum)
        try:    
            pars = pars.single()[0]
            self.parents[(ref-gener)+1] = pars
        except:
            pass
        if gener > 1: 
            for num in pars:
                self.find_parents(num, gener - 1, ref)
                
    
    """Return parent generations"""            
    def return_parents(self):
        return self.parents
    
    
    
    """Find the desired generations of children for a given node"""
    def find_children(self, idnum, gener, ref = False):
        if not ref:
            ref = gener
            self.children = {}
        with self.driver.session() as session:
            child = session.run("MATCH (a:Disease {id: $idnum}) "
                              "RETURN a.childs", idnum = idnum)
        try:    
            child = child.single()[0]
            self.children[(ref-gener)+1] = child
        except:
            pass
        if gener > 1: 
            for num in child:
                self.find_children(num, gener - 1, ref)
                
                
    
    """Return children generations""" 
    def return_children(self):
        return self.children
    
    
    
    """View info about single node in as pandas df"""
    def get_info(self, idnum):
        l = []
        with self.driver.session() as session:
            info = session.run("MATCH (a:Disease {id: $idnum}) "
                              "RETURN a.code, a.title, a.defn, "
                               "a.childs, a.parents", idnum = idnum)
        for item in info.single():
            l.append(item)
        df = pd.DataFrame([l], columns = ['Code', 'Title', 'Definition', 'Children', 'Parents'])
        return df
    
    

In [17]:
driver = driver(uri = "bolt://localhost:7687", user = "neo4j", password = "Aim1008")

In [18]:
for index, row in DF.iterrows():
    driver.add_nodes(int(index), row['code'], row['title'], row['defn'], row['syns'], row['childs'], row['parents'])

In [19]:
driver.add_parents()
driver.add_children()

In [20]:
#driver.clear_graph()