# Querry

In [32]:
# Import the below packages.
import spacy
nlp = spacy.load('en_core_web_sm')
import textacy
from textacy.extract import subject_verb_object_triples
import requests
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from py2neo import Database, Graph, Node, Relationship
import re
import os

In [33]:
#Preprocess the input text:

# Function returns the negation handled word if it is presend in the appos dictionary
# Else returns the word itself
def negationHandling(word):
    if word in appos:
        return appos[word]
    else:
        return word
    
# Check if a word is a Stopword
# Stopword is a word that is commonly present in most of the documents and does not affect the model
def isNotStopWord(word):
    return word not in stopwords.words('english')


def preprocessingText(text):
    text = re.sub("[\(\[].*?[\)\]]", "", text)
    #sentences = nltk.sent_tokenize(text)
    tokens = []
    temp = ""
    
    #for sentence in sentences:
    words = nltk.word_tokenize(text)

    #Converting to LowerCase
    #words = map(str.lower, words)

    # Remove stop words
    words = filter(lambda x: isNotStopWord(x), words)

    # Removing punctuations except '<.>/<?>/<!>'
    punctuations = '"#$%&\'()*+,-/:;<=>@\\^_`{|}~'
    words = map(lambda x: x.translate(str.maketrans('', '', punctuations)), words)

    # Remove empty strings
    words = filter(lambda x: len(x) > 0, words)

    tokens = tokens + list(words)
    temp = ' '.join(word for word in tokens)
        
    return temp

In [34]:
# Store the entities and its type/annotations in a dictionary for using it in KG construction
# Extract the SVO (Subject Verb Object) triples suing dependency parsing and save it in the svos list of tuples
# Also save the labels of the SVO as a separate list

def entity_svo_extract(text):
    text = preprocessingText(text)
    final_svos = []
    final_text_svos = []
    entity_dict = {}
    svo_labels = []
    #for i, text in enumerate(TEXTS):
    doc = nlp(text)

    for ent in doc.ents:
        #print(f"ent in doc.ents: {ent}")
        if ent not in entity_dict.keys():
            #print(f"ent: {ent}")
            entity_dict[str(ent)] = ent.label_ 
            #print(f"ent label: {ent.label_}")

    svos = list(subject_verb_object_triples(doc))
    #print(f"svos: {svos}")
    svos_text = [(str(x[0]).strip(), str(x[1]).strip(), str(x[2]).strip()) for x in svos]
    #print(f"svos text: {svos_text}")
    final_svos = final_svos + svos
    final_text_svos = final_text_svos + svos_text
    print(f"final text svos: {final_text_svos}")

    for svo in final_text_svos:
        tup = ['Object', 'Object']
        if(svo[0] in entity_dict.keys()):
            tup[0] = entity_dict[svo[0]]

        if(svo[2] in entity_dict.keys()):
            tup[1] = entity_dict[svo[2]]
        svo_labels.append(tuple(tup))
        print(tup)
    return svo_labels[0][0],final_text_svos[0][0],final_text_svos[0][1],svo_labels[0][1],final_text_svos[0][2]

In [35]:
def querry_graph(graph, f_node, f_name, rel, s_node, s_name):
    query1 = '''
        MATCH (f:{fnode})-[r:{rl}]-(m) 
        where f.name = '{fname}' 
        RETURN m
    '''
    query2 = '''
        MATCH path = (n:{fnode})-[r]-(m)
        where n.name = '{fname}'
        RETURN path,r,m
    '''
    full_query = query2.format(fnode=f_node, fname=f_name, rl=rel)
    nodes = graph.run(full_query)            
    #names1 = [node['m']['name'] for node in nodes]
    #print(names1)
    names = [node['r'] for node in nodes]
    print(f"Based on your question probable answers are: {names}")

In [36]:
#graph.delete_all()

In [37]:
#graph = Graph("neo4j@bolt://localhost:7687",password="1432")
def add_update_graph(graph, f_node, f_name, rel, s_node, s_name):
    query = '''
        MERGE (f:{fnode})-[r:{rl}]-(s:{snode})
        ON CREATE SET f.name='{fname}' 
        ON CREATE SET s.name='{sname}'
        RETURN f,r,s
    '''
    full_query = query.format(fnode=f_node, fname=f_name, rl=rel, snode=s_node, sname=s_name)
    nodes = graph.run(full_query)            
    names = [node['r'] for node in nodes]
    print(f"Your answer is updated in database as: {names}")
    #print("Your answer is updated")
#user_input = input("Please enter your input: ")
#f_node, f_name, rel, s_node, s_name = entity_svo_extract(user_input)
#add_update_graph(graph, f_node, f_name, rel, s_node, s_name)

In [39]:
#Connect to the graph
graph = Graph("neo4j@bolt://localhost:7687",password="1432")
#Getting question from the user
question = input("Please enter the question: ")
#How hydrogen bonds show features
#oil production has fallen in Alaska
#How oil production is distributed in Alaska
f_node, f_name, rel, s_node, s_name = entity_svo_extract(question)
querry_graph(graph, f_node, f_name, rel, s_node, s_name)
print("Hope you are sattisfied with the answer, Help to choose options below")
option1 = input("Are you sattisfied with the answer? y/n : ")
if option1 != 'y':
    option2 = input("Do you want to update the answer? y/n : ")
    if option2 == 'y':
        user_input = input("Please enter your input: ")
        f_node, f_name, rel, s_node, s_name = entity_svo_extract(user_input)
        print("Knowledge Graph will be added/updated with below input:")
        print(f"f_node:{f_node} fn_name:{f_name} relation:{rel} s_node:{s_node} sn_name:{s_name}")
        option3 = input("Please enter 'y' for your final confirmation: ")
        if option3 == 'y':
            add_update_graph(graph, f_node, f_name, rel, s_node, s_name)
        else:
            print("You have opted not to add/update answer suggested by you, Thanks for visiting")
    else:
        print("Thanks for your feedback, will work to improve the system")
else:
    print("Thanks for visiting the page, Hope you enjoyed...")

Please enter the question: How petroleum extraction is distributed in India?
final text svos: [('petroleum extraction', 'distributed', 'India')]
['Object', 'GPE']
Based on your question probable answers are: []
Hope you are sattisfied with the answer, Help to choose options below
Are you sattisfied with the answer? y/n : n
Do you want to update the answer? y/n : y
Please enter your input: petroleum extraction is unevenly distributed in India
final text svos: [('petroleum extraction', 'distributed', 'India')]
['Object', 'GPE']
Knowledge Graph will be added/updated with below input:
f_node:Object fn_name:petroleum extraction relation:distributed s_node:GPE sn_name:India
Please enter 'y' for your final confirmation: y
Your answer is updated in database as: [(oil production)-[:distributed {}]->(Alaska)]
