In [13]:
import pandas as pd
import logging
import json
from py2neo import Graph,Node

DISEASE_PATH = 'verified_data/diseases.csv'
SPECIALITIES_PATH = 'verified_data/specialties.csv'
SYMPTOMS_PATH = 'verified_data/symptoms.csv'
DISEASE_SYMPTOMS_PATH = 'verified_data/diseases_has_symptoms.csv'
DISEASE_SPECIALITIES_PATH = 'verified_data/diseases_health_specialties.csv'

DISEASES = pd.read_csv(DISEASE_PATH)
SPECIALITIES = pd.read_csv(SPECIALITIES_PATH)
SYMPTOMS = pd.read_csv(SYMPTOMS_PATH)
DISEASE_SYMPTOMS = pd.read_csv(DISEASE_SYMPTOMS_PATH)
DISEASE_SPECIALITIES = pd.read_csv(DISEASE_SPECIALITIES_PATH)

In [57]:
DISEASE_SPECIALITIES.head(3)

Unnamed: 0,id,disease_id,specialty_id
0,511,9,10
1,512,10,10
2,513,2,6


In [95]:
def read_json(path):
    with open(path) as f:
        data = json.load(f)
    return data

class KnowledgeGraph:
    def __init__(self,uri="bolt://localhost:7687",
                    user="neo4j",
                    password='password',
                    DISEASE_PATH='verified_data/diseases.csv',
                    SYMPTOMS_PATH='verified_data/symptoms.csv',
                    SPECIALITIES_PATH='verified_data/specialties.csv',
                    DISEASE_SYMPTOMS_PATH='verified_data/diseases_has_symptoms.csv',
                    DISEASE_SPECIALITIES_PATH='verified_data/diseases_health_specialties.csv'):
        
        # node
        self.disease = pd.read_csv(DISEASE_PATH)
        self.symptom = pd.read_csv(SYMPTOMS_PATH)
        self.speciality = pd.read_csv(SPECIALITIES_PATH)
        
        # relationship
        self.disease_symptom_rel = pd.read_csv(DISEASE_SYMPTOMS_PATH)
        self.disease_speciality_rel = pd.read_csv(DISEASE_SPECIALITIES_PATH)
        
        self.graph = Graph(uri, auth=(user, password))

    def read_nodes(self):
        DISEASE = []
        SYMPTOM = []
        SPECIALITY = []
        
        DISEASE_SYMPTOM = []
        DISEASE_SPECIALITY = []
        
        # disease - node
        for index, row in self.disease.iterrows():
            DISEASE.append({
                "id":row['id'],
                "name":row['name'],
                "overview":row['overview'],
                "cause":row['cause'],
                "symptom":row['symptom'],
                "risk_factor":row['risk_factor'],
                "treatment":row['treatment'],
                "diagnosis":row['diagnosis'],
                "prevention":row['prevention'],
                "synonym":row['synonym']
            })
        
        # symptom - node
        for index, row in self.symptom.iterrows():
            SYMPTOM.append({
                "id":row['id'],
                "name":row['name'],
                "overview":row['overview'],
            })
            
        # speciality - node
        for index, row in self.speciality.iterrows():
            SPECIALITY.append({
                "id":row['id'],
                "name":row['name'],
                "description":row['description'],
            })
        
        # disease-symptom rels
        for index, row in self.disease_symptom_rel.iterrows():
            DISEASE_SYMPTOM.append({
                "disease_id":row['disease_id'],
                "symptom_id":row['symptom_id']
            })
        
        # disease-speciality rels
        for index, row in self.disease_speciality_rel.iterrows():
            DISEASE_SPECIALITY.append({
                "disease_id":row['disease_id'],
                "specialty_id":row['specialty_id']
            })      
            
        return DISEASE,SYMPTOM,SPECIALITY,DISEASE_SYMPTOM,DISEASE_SPECIALITY
    
    def remove_nodes(self):
        print("Remove all nodes,  relationships")
        cypher = 'MATCH (n) DETACH DELETE n'
        self.graph.run(cypher)
        
    def create_node(self):
        self.remove_nodes()

        DISEASE,SYMPTOM,SPECIALITY,DISEASE_SYMPTOM,DISEASE_SPECIALITY = self.read_nodes()
        
        # disease
        print("Create disease node")
        for node in DISEASE:
            cypher_ = "CREATE (d:Disease $props) RETURN d"
            self.graph.run(cypher_,props=node)
        
        #symptom
        print("Create symptom node")
        for node in SYMPTOM:
            cypher_ = "CREATE (d:Symptom $props) RETURN d"
            self.graph.run(cypher_,props=node)
        
        # speciality
        print("Create speciality node")
        for node in SPECIALITY:
            cypher_ = "CREATE (d:Speciality $props) RETURN d"
            self.graph.run(cypher_,props=node)
        
        # disease-symptom rels
        print("Create disease-symptom relation")
        for rel in DISEASE_SYMPTOM:
            disease_id = rel['disease_id']
            symptom_id = rel['symptom_id']
            cypher_ = f'''
            MATCH (d:Disease), (s:Symptom)
            WHERE d.id = {disease_id} AND s.id = {symptom_id}
            CREATE (d)-[r:HAS_SYMPTOM]->(s)
            RETURN type(r)
            '''
            self.graph.run(cypher_)

        # disease-speciality rels
        print("Create disease-speciality relation")
        for rel in DISEASE_SPECIALITY:
            disease_id = rel['disease_id']
            symptom_id = rel['specialty_id']
            cypher_ = f'''
            MATCH (d:Disease), (s:Speciality)
            WHERE d.id = {disease_id} AND s.id = {symptom_id}
            CREATE (d)-[r:HAS_SPECIALITY]->(s)
            RETURN type(r)
            '''
            self.graph.run(cypher_)

        return

    def create_rels(self):
        for node in self.symptom_disease_rel:
            disease_name = node['from']['name']
            symptoms = []
            for s in node['to']:
                symptoms.append(s['name'])
            for symptom_name in symptoms:
                query = '''
                MATCH (d:Disease {name:$disease_name})
                MATCH (s:Symptom {name:$symptom_name})
                CREATE (d)-[:HAS_SYMPTOM]->(s)
                RETURN d,s
                '''
                self.graph.run(query,parameters={"disease_name":disease_name,"symptom_name":symptom_name})
        return

In [96]:
kg = KnowledgeGraph()
kg.create_node()
# kg.create_rels()

Remove all nodes,  relationships
Create disease node
Create symptom node
Create speciality node
Create disease-symptom relation
Create disease-speciality relation


In [70]:
len(a[0])

696

In [42]:
import logging
import json

from neo4j import GraphDatabase
from neo4j.exceptions import ServiceUnavailable

def read_json(path):
    with open(path) as f:
        data = json.load(f)
    return data

class KnowledgeGraph:

    def __init__(self, uri, user, password):
        self.driver = GraphDatabase.driver(uri, auth=(user, password))

    def close(self):
        # Don't forget to close the driver connection when you are finished with it
        self.driver.close()

    def create_friendship(self, person1_name, person2_name):
        with self.driver.session() as session:
            # Write transactions allow the driver to handle retries and transient errors
            result = session.write_transaction(
                self._create_and_return_friendship, person1_name, person2_name)
            for record in result:
                print("Created friendship between: {p1}, {p2}".format(
                    p1=record['p1'], p2=record['p2']))

    @staticmethod
    def _create_and_return_friendship(tx, person1_name, person2_name):

        # To learn more about the Cypher syntax,
        # see https://neo4j.com/docs/cypher-manual/current/

        # The Reference Card is also a good resource for keywords,
        # see https://neo4j.com/docs/cypher-refcard/current/

        query = (
            "CREATE (p1:Person { name: $person1_name }) "
            "CREATE (p2:Person { name: $person2_name }) "
            "CREATE (p1)-[:KNOWS]->(p2) "
            "RETURN p1, p2"
        )
        result = tx.run(query, person1_name=person1_name, person2_name=person2_name)
        try:
            return [{"p1": record["p1"]["name"], "p2": record["p2"]["name"]}
                    for record in result]
        # Capture any errors along with the query and data for traceability
        except ServiceUnavailable as exception:
            logging.error("{query} raised an error: \n {exception}".format(
                query=query, exception=exception))
            raise

    def find_person(self, person_name):
        with self.driver.session() as session:
            result = session.read_transaction(self._find_and_return_person, person_name)
            for record in result:
                print("Found person: {record}".format(record=record))

    @staticmethod
    def _create_instance(tx,sample):

        # get all attributes
        attributes = []
        for att in sample["attributes"]:
            attributes.append(att['attribute'])

        query_str = "CREATE (d:Disease { name: $sample['disease'], url:$sample['url'], faq:$sample['faq']}) \n"
        
        for i,att in enumerate(attributes):
            q = f"SET d.{att} = {sample['attributes'][i]['content']} \n"
            query_str += q
        query = (
            f"""
            {query_str}
            RETURN d
            """
        )
        result = tx.run(query, sample=sample)

        try:
            return [{"d": record["d"]["name"]}
                    for record in result]
        # Capture any errors along with the query and data for traceability
        except ServiceUnavailable as exception:
            logging.error("{query} raised an error: \n {exception}".format(
                query=query, exception=exception))
            raise    
    
    def build_database(self,data):
        for sample in data:
            with self.driver.session() as session:
                # Write transactions allow the driver to handle retries and transient errors
                result = session.write_transaction(
                    self._create_instance, sample)
                for record in result:
                    print(f"Created disease: {record['d']}")
    
    @staticmethod
    def _find_and_return_disease(tx, disease_name):
        query = (
            "MATCH (p:Disease)"
            "WHERE p.name = $disease_name "
            "RETURN p.name AS name"
        )
        result = tx.run(query, disease_name=disease_name)
        return [record["name"] for record in result]


    @staticmethod
    def _query(tx, disease_name, attribute):
        THRESHOLD = 0.55

        query = f"""
        MATCH (a:Disease)
        WHERE apoc.text.sorensenDiceSimilarity(a.name, "{disease_name}") >=  {THRESHOLD}
        RETURN a.{attribute} as result
        """

        result = tx.run(query, disease_name=disease_name,attribute=attribute)
        return [record["result"] for record in result]

    def query(self,disease_name, attributes, mode='simple'):        
        result = []
        if mode == 'simple':
            for att in attributes:
                with self.driver.session() as session:
                    result = session.write_transaction(self._query,disease_name,att)   
                    
                    result.append(result)
        return result

In [None]:
# See https://neo4j.com/developer/aura-connect-driver/ for Aura specific connection URL.
scheme = "bolt"  # Connecting to Aura, use the "neo4j+s" URI scheme
host_name = "localhost"
port = 7687
url = "{scheme}://{host_name}:{port}".format(scheme=scheme, host_name=host_name, port=port)
print(url)
user = "neo4j"
password = "password"
app = KnowledgeGraph(url, user, password)

DISEASE = read_json('verified_data/disease.json')

SYMPTOM = read_json('verified_data/symptom.json')

DISEASE_SYMPTOM = read_json('verified_data/disease_symptom.json')

In [None]:
app.build_database(DATA)

app.close()