In [9]:
from SPARQLWrapper import SPARQLWrapper, JSON
import spacy
from spacy.matcher import Matcher

# load the spaCy model for english language
nlp = spacy.load("en_core_web_sm")

# initialize a Matcher with the shared vocab
matcher = Matcher(nlp.vocab)


# define patterns for the Matcher
patternsOntology = {
    "risk factor": [[{"LOWER": "risk"}, {"LOWER": "factor"}, {"ENT_TYPE": "DISEASE", "OP": "?"}]],
    "diagnosis method": [[{"LOWER": "diagnosis"}, {"LOWER": "method"}, {"ENT_TYPE": "DISEASE", "OP": "?"}]],
    "disease": [[{"LOWER": "disease"}]],
    "symptom": [[{"LOWER": "symptom"}]],
    "treatment": [[{"LOWER": "treatment"}]],
    "cause": [[{"LOWER": "cause"}]]
}
for key, pattern in patternsOntology.items():
    matcher.add(key, pattern)

# process text with spaCy and Matcher
def processText(text):
    doc = nlp(text)
    matches = matcher(doc)

    foundTerms = set()
    specificDisease = None
    for matchId, start, end in matches:
        print("matchId:", matchId)
        span = doc[start:end]
        print("span: ", span)
        # check if the span contains a disease entity
        for token in span:
            print("token:", token)
            if token.ent_type_ == "DISEASE":
                specificDisease = token.text
                break
        foundTerms.add(span.text.lower())

    return foundTerms, specificDisease


# create a dynamic SPARQL query
def createSparql_query(foundTerms, specificDisease):
    baseQuery = """
    PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    PREFIX hd: <http://www.semanticweb.org/sarashahin/ontologies/2023/9/Human_Disease_ontology#>
    """

    termMapping = {
        "disease": "hd:Disease",
        "symptom": "hd:Symptom",
        "treatment": "hd:Treatment",
        "cause": "hd:Cause",
        "risk factor": "hd:RiskFaktor",
        "diagnosis method": "hd:DiagnosisMethod"
    }

    queryConditions = []
    for term in foundTerms:
        if term in termMapping:
            print("term:", term)
            queryConditions.append(f"?item rdf:type {termMapping[term]}")
    
    if specificDisease:
        queryConditions.append(f'?item hd:diagnosedBy "{specificDisease}"')

    if not queryConditions:  # default condition if no specific terms are identified
        queryConditions.append("?item rdf:type hd:Disease")

    whereClause = " . ".join(queryConditions)
    print("whereClause: ", whereClause)
    query = f"{baseQuery}SELECT DISTINCT ?item WHERE {{ {whereClause} }} LIMIT 10"

    return query

# run SPARQL query and return results
def executeSparql_query(query):
    sparql = SPARQLWrapper("http://localhost:3030/Human-Disease-Ontology/sparql")
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    try:
        results = sparql.query().convert()
        return results
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

# main function to handle user queries
def main():
    userQuery = input("Enter your query in natural language: ")
    print('userQuery: ', userQuery)
    foundTerms, specificDisease = processText(userQuery)
    print("foundTerms :", foundTerms)
    
    sparqlQuery = createSparql_query(foundTerms, specificDisease)
    print(f"SPARQL query to execute:\n{sparqlQuery}")
    results = executeSparql_query(sparqlQuery)
    print("results: ", results)
    if results and results["results"]["bindings"]:
        print("Results found:")
        for result in results["results"]["bindings"]:
            print(result["item"]["value"])
    else:
        print("No results returned or an error occurred.")

if __name__ == "__main__":
    main() 
    
    
    
    
# What are the risk factor for Asthma?
# Give me diagnosis method for Diabetes
# What are some diseases?
# disease has names?
# List all symptom.
# List diagnosis method 
# Show treatment.
# Give me cause.
# What are the risk factor?
# Give me diagnosis method
    

Enter your query in natural language: What are some diseases?
userQuery:  What are some diseases?
foundTerms : set()
whereClause:  ?item rdf:type hd:Disease
SPARQL query to execute:

    PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    PREFIX hd: <http://www.semanticweb.org/sarashahin/ontologies/2023/9/Human_Disease_ontology#>
    SELECT DISTINCT ?item WHERE { ?item rdf:type hd:Disease } LIMIT 10
results:  {'head': {'vars': ['item']}, 'results': {'bindings': [{'item': {'type': 'uri', 'value': 'http://www.semanticweb.org/sarashahin/ontologies/2023/9/Human_Disease_ontology#Diabetes'}}, {'item': {'type': 'uri', 'value': 'http://www.semanticweb.org/sarashahin/ontologies/2023/9/Human_Disease_ontology#Alzheimer'}}, {'item': {'type': 'uri', 'value': 'http://www.semanticweb.org/sarashahin/ontologies/2023/9/Human_Disease_ontology#Osteoporosis'}}, {'item': {'type': 'uri', 'value': 'http://www.semanticweb.org/sarashahin/ontolog