# Knowledge Extraction Using NER Relationships

In [None]:
#!pip install transformers

In [None]:
#!git clone https://github.com/thunlp/OpenNRE.git

In [None]:
#!pip install -r OpenNRE/requirements.txt

In [None]:
#!python OpenNRE/setup.py install

In [None]:
#!pip install neo4j

In [None]:
from neo4j import GraphDatabase
from neo4j.exceptions import ServiceUnavailable
import json

# Initialize the graph DB and delete all the nodes and relationships
graph = GraphDatabase.driver(
    "neo4j+s://c22b0b3c.databases.neo4j.io:7687",
    auth=("neo4j", "akterzObCsJTwulDEG7AFlCkDcMyGC7RblEnmXbg7aE")
)

In [3]:
query = (
        "MATCH (all_nodes)"
        "OPTIONAL MATCH (all_nodes)-[all_rels]->()"
        "DELETE all_nodes, all_rels"
    )
with graph.session() as session:
    result = session.run(query)

In [2]:
import transformers
import OpenNRE.opennre as opennre

model = opennre.get_model('wiki80_bert_softmax')

2024-04-27 21:09:14.576491: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-04-27 21:09:48,387 - root - INFO - Loading BERT pre-trained checkpoint.
Some weights of the model checkpoint at /Users/sususan/.opennre/pretrain/bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassificatio

In [6]:
import pandas as pd
FAQ = pd.read_csv("./corpus/DBS FAQ.csv")
FAQ.head()

Unnamed: 0,Id,Section,SubSection,Label,Question,Answer
0,1,Bank Cards,Applying for a DBS Debit Card or Credit Card,eligible_debit_card,Am I eligible for a debit card?,You must be at least 16 years old and have a P...
1,2,Bank Cards,Applying for a DBS Debit Card or Credit Card,eligible_credit_card,Am I eligible for a credit card?,If you're Singaporean or have permanent reside...
2,3,Bank Cards,Applying for a DBS Debit Card or Credit Card,apply_card,What do I need to apply?,The type of document you’ll need varies. Click...
3,4,Bank Cards,Applying for a DBS Debit Card or Credit Card,apply_card_duration,How long does it take?,Please allow 7 working days for processing. Ap...
4,5,Bank Cards,Applying for a DBS Debit Card or Credit Card,apply_card_turned dow,Why was my application turned down?,All applications go through an approval proces...


# Create Constraint

In [9]:
with graph.session() as session:
    result = session.run('CREATE CONSTRAINT IF NOT EXISTS FOR (q:Question) REQUIRE q.Question IS UNIQUE;')

with graph.session() as session:
    result = session.run('CREATE CONSTRAINT IF NOT EXISTS FOR (a:Answer) REQUIRE a.Answer IS UNIQUE;')

with graph.session() as session:
    result = session.run('CREATE CONSTRAINT IF NOT EXISTS FOR (f:FAQ) REQUIRE f.Id IS UNIQUE;')

with graph.session() as session:
    result = session.run('CREATE CONSTRAINT IF NOT EXISTS FOR (s:Section) REQUIRE s.Section IS UNIQUE;')

with graph.session() as session:
    result = session.run('CREATE CONSTRAINT IF NOT EXISTS FOR (ss:SubSection) REQUIRE ss.SubSection IS UNIQUE;')

with graph.session() as session:
    result = session.run('CREATE TEXT INDEX question IF NOT EXISTS FOR (f:FAQ) ON f.Question;')

# Create Relationship

In [7]:
FAQ_import_query = """
UNWIND $data AS row
MERGE (f:FAQ {Id: row.Id})
MERGE (q:Question {Label: row.Question}) - [:PART_OF]->(f)
MERGE (a:Answer {Label: row.Answer}) - [:PART_OF]->(f)
SET f += apoc.map.clean(row, ["Id", "Section", "SubSection"], [])
WITH f,a,q,row.Section AS Section, row.SubSection AS SubSection, row.Question AS Question, row.Answer AS Answer
MERGE (s:Section {Label: Section})
MERGE (ss:SubSection {Label: SubSection})
MERGE (ss)-[:SUBSECTION_OF]->(s)
MERGE (f)-[:HAS_SUBSECTION]->(ss)
MERGE (q)-[:QUESTION_OF]->(a)
MERGE (a)-[:ANSWER_OF]->(q)
"""
with graph.session() as session:
    result = session.run(FAQ_import_query, {"data": FAQ.to_dict("records")})

# Knowledge Extraction

In [11]:
import networkx as nx
import matplotlib.pyplot as plt
import en_core_web_sm
import en_core_web_lg

#nlp = en_core_web_sm.load()
nlp = en_core_web_lg.load()

def knowledgeExtraction(questions,answers, FAQIDs):
   
    exist_ent = {}
    exist_relationship = []
    
    for i in range(0, len(answers)):
        question = questions[i]
        answer = answers[i]
        doc = nlp(answer)
        names_of_entities = []
        entities = []
        
        for ent in doc.ents:
            if ent.text not in names_of_entities:
                names_of_entities.append(ent.text)
                entities.append(ent)
    
        for ent in entities:
            if exist_ent.get(ent.text) is None:
                exist_ent[ent.text] = ent.text
                query = (
                "MATCH (FAQ:FAQ {Id: $FAQ_Id})"
                #"CREATE (node:Entity {name: $name, type: $type})- [:MENTIONS] -> (FAQ)"
                "MERGE (node: Entity {name: $name, type: $type}) - [:MENTIONS] -> (FAQ)" # Create Node Entity
                "RETURN node"
                )
                with graph.session() as session:
                    result = session.run(query, name=ent.text, type = ent.label_,FAQ_Id = FAQIDs[i])
                print("create new node with name as {0} and with type as {1} and sentiment with {2}".format(ent.text, ent.label_, ent.sentiment))
    
        for i in range(len(entities)):
            for j in range(i + 1, len(entities)):
                text_i = entities[i].text
                text_j = entities[j].text
                loc_h = answer.find(text_i)
                loc_t = answer.find(text_j)
                result = model.infer({'text': answer, 'h': {'pos': (loc_h, loc_h + len(text_i))},
                                    't': {'pos': (loc_t, loc_t + len(text_j))}})
                (rel, confidence) = result[0].replace(' ', '_'), result[1]
    
                record = (text_i, text_j, rel, confidence)
    
                result_rev = model.infer({'text': answer, 'h': {'pos': (loc_t, loc_t + len(text_j))},
                                    't': {'pos': (loc_h, loc_h + len(text_i))}})
                (rel_rev, confidence) = result_rev[0].replace(' ', '_'), result_rev[1]
    
                record_rev = (text_j, text_i, rel_rev, confidence)
                if record not in exist_relationship:
                    exist_relationship.append(record)
                    if record[3] > 0.8:
                        query = (
                            "MATCH (n1 {name: $name1})"
                            "MATCH (n2 {name: $name2})"
                            "MERGE (n1) - [r:"+record[2]+"] -> (n2)"
                            "RETURN n1, n2, r"
                        )
                        #print(query, name1=exist_ent[text_i], name2=exist_ent[text_j])
                        with graph.session() as session:
                            result = session.run(query, name1=exist_ent[text_i], name2=exist_ent[text_j])
                        print("create new relationship {0} - {1} -> {2} with confidence of {3}".format(record[0], record[2], record[1], record[3]))
    
                if record_rev not in exist_relationship:
                    exist_relationship.append(record_rev)
                    if record_rev[3] > 0.8:
                        query = (
                            "MATCH (n1 {name: $name1})"
                            "MATCH (n2 {name: $name2})"
                            "MERGE (n1) - [r:"+record_rev[2]+"] -> (n2)"   
                            "RETURN n1, n2, r"
                        )
                        print(exist_ent[text_j].replace(' ' , '_') + ' ' + exist_ent[text_i] + ' ' + record_rev[2])
                        with graph.session() as session:
                            result = session.run(query, name1=exist_ent[text_j], name2=exist_ent[text_i])
                        print("create new relationship {0} - {1} -> {2} with confidence of {3}".format(record_rev[0], record_rev[2], record_rev[1], record_rev[3]))


In [12]:
questions = []
answers = []
FAQIDs = []
for i in range(len(FAQ)):
    questions.append(FAQ.loc[i].Question)
    answers.append(FAQ.loc[i].Answer)
    FAQIDs.append(FAQ.loc[i].Id)

In [13]:
knowledgeExtraction(questions,answers,FAQIDs)

create new node with name as at least 16 years old and with type as DATE and sentiment with 0.0
create new node with name as DBS Savings Plus Account and with type as ORG and sentiment with 0.0
create new node with name as DBS Autosave Account and with type as ORG and sentiment with 0.0
create new node with name as DBS Current Account and with type as ORG and sentiment with 0.0
create new node with name as href="https://www.dbs.com.sg and with type as ORG and sentiment with 0.0
create new node with name as at least 6 months and with type as DATE and sentiment with 0.0
create new relationship DBS Savings Plus Account - followed_by -> DBS Autosave Account with confidence of 0.9901670217514038
DBS_Autosave_Account DBS Savings Plus Account followed_by
create new relationship DBS Autosave Account - followed_by -> DBS Savings Plus Account with confidence of 0.9828715324401855
create new relationship DBS Savings Plus Account - followed_by -> DBS Current Account with confidence of 0.9697880148

In [186]:
query = (
        "MATCH (f)-[:HAS_SUBSECTION]->(SubSection {name:'DBS Online Rewards'}) RETURN f.Question AS Question, f.Answer AS Answer "
)

# Execuate above query to modify neo4j db:
with graph.session() as session:
    results = session.run(query)
    for result in results:
        print('Question' + ': '+result['Question'])
        print('Answer' + ': '+result['Answer'])

Question: What is DBS Online Rewards?
Answer: DBS Online rewards website allows you to check your DBS points and redeem rewards online.
Question: How do I login to redeem?
Answer: Simply enter your digibank user ID and PIN to login.

For Daily$ rebates, please login to <a href="https://internet-banking.dbs.com.sg/">DBS iBanking</a>  to redeem your Daily$ rebates
Question: I am not an existing DBS iBanking customer; can I still redeem my points online?
Answer: No. You will need to register for a digibank user ID to perform DBS rewards redemption. Click <a href="https://internet-banking.dbs.com.sg/ibAPL/Welcome" >here</a>  to register now.

Question: I am not a DBS Cardmember, can I still access the DBS Online Rewards website?
Answer: Yes. You can still browse the website to gather information. However, you need to be a DBS credit cardmember and digibank user to login. To sign up for a DBS credit card, please visit <a href="https://www.dbs.com.sg/personal/cards/default.page" >www.dbs.com

In [187]:
query = (
        "MATCH (f {Question:'What is DBS Online Rewards?'}) RETURN f.Question AS Question, f.Answer AS Answer "
)

# Execuate above query to modify neo4j db:
with graph.session() as session:
    results = session.run(query)
    for result in results:
        print('Question' + ': '+result['Question'])
        print('Answer' + ': '+result['Answer'])

Question: What is DBS Online Rewards?
Answer: DBS Online rewards website allows you to check your DBS points and redeem rewards online.


In [188]:
query = (
        "MATCH (ss)-[:SUBSECTION_OF]->(Section {name:'Bank Cards'}) RETURN ss.name AS SubSection"
)

# Execuate above query to modify neo4j db:
with graph.session() as session:
    results = session.run(query)
    for result in results:
        print('SubSection' + ': '+result['SubSection'])

SubSection: Applying for a DBS Debit Card or Credit Card
SubSection: DBS Reward Points / DBS Online Rewards
SubSection: DBS Online Rewards
SubSection: Paying Your Credit Card Balance
SubSection: Charge Disputes
SubSection: Cancelling and Replacing Cards, Changing Your PIN
SubSection: Upgrading or Changing Cards
SubSection: Cash Advance and Funds Transfer
SubSection: Enhancing Card Usage Security
SubSection: Card Activation
SubSection: Credit Card
SubSection: Credit Card 
SubSection: Cancel Card
SubSection: Card Upgrade
SubSection: Overlimit Suspension


In [275]:
query = (
        #"MATCH (ORG)-[:instance_of]->(ORG {name: 'DBS Credit/Debit Card'}) RETURN ORG.name AS name"
        "MATCH p=()-[:instance_of]->(ORG {name: 'DBS Credit/Debit Card'}) RETURN ORG.name AS name"
)

# Execuate above query to modify neo4j db:
with graph.session() as session:
    results = session.run(query)
    for result in results:
        print('Name' + ': '+result['name'])

Name: DBS Credit/Debit Card
Name: DBS Credit/Debit Card
Name: DBS Credit/Debit Card


In [282]:
query = (
    "MATCH p=()-[:PART_OF]->(FAQ {Id : 1}) RETURN p LIMIT 25;"
)

# Execuate above query to modify neo4j db:
with graph.session() as session:
    results = session.run(query)
    print(results)
    for result in results:
        print(result['p'])

<neo4j._sync.work.result.Result object at 0x19d405c10>
<Path start=<Node element_id='4:5e2359e9-89fa-457b-b603-d6efe892f544:42' labels=frozenset({'Question'}) properties={'Label': 'Am I eligible for a debit card?'}> end=<Node element_id='4:5e2359e9-89fa-457b-b603-d6efe892f544:41' labels=frozenset({'FAQ'}) properties={'Answer': 'You must be at least 16 years old and have a POSB Savings Account, DBS Savings Plus Account, DBS Autosave Account or DBS Current Account. To open one of these accounts, click <a href="https://www.dbs.com.sg/personal/deposits/bank-with-ease/addon-casa" >here</a>\n\nIf you are a foreigner, please apply for a debit card at any of our branches. You will need to present your passport and an employment pass that is valid for at least 6 months.', 'Label': 'eligible_debit_card', 'Question': 'Am I eligible for a debit card?', 'Id': 1}> size=1>
<Path start=<Node element_id='4:5e2359e9-89fa-457b-b603-d6efe892f544:399' labels=frozenset({'Answer'}) properties={'Label': 'You 

In [None]:
results

In [21]:
query = (
    """MATCH p=(e:Entity)-[:MENTIONS]->(f:FAQ) 
    WHERE (toLower(e.name)='platinum') 
    RETURN f.Label AS Label, f.Question AS Question, f.Answer AS Answer LIMIT 25;"""
)
# Execuate above query to modify neo4j db:
with graph.session() as session:
    results = session.run(query)
    for result in results:
        print("Label: " + result['Label'])
        print("Question: " + result['Question'])
        print("Answer: " + result['Answer'])

Label: eligible_credit_card
Question: Am I eligible for a credit card?
Answer: If you're Singaporean or have permanent residency and you're over 21 years of age, you can apply for a DBS <a href="https://www.dbs.com.sg/personal/cards/credit-cards/default.page"> Credit Card </a>. You'll need to earn at least S$30,000 a year. For some cards, especially Platinum cards, you'll need to earn more so check the Application Details for the card you're interested in.

If you're a foreigner with a valid employment pass, you'll need to earn a least S$45,000 a year unless otherwise stated.

