In [None]:
!pip install neo4j



In [None]:
from huggingface_hub import InferenceClient

llama = "meta-llama/Llama-2-70b-chat-hf"
bloom = "bigscience/bloom"
token = ""

llama_client = InferenceClient(model=llama, token=token)
bloom_client = InferenceClient(model=bloom, token=token)

In [None]:
import neo4j
from neo4j import GraphDatabase

url = "neo4j+s://ef25c60e.databases.neo4j.io:7687"
username ="neo4j"
password = ""

graphDB_Driver = GraphDatabase.driver(url, auth=(username, password))

In [None]:
rel_str = 'ActTowards, CommunicatesWith, InteractsWith, Supplies, Demands, Acquires, Transforms, Decides, Assesses, Solves, Develops, Impacts, Manages, Moves, Happens'
node_str = 'Person, Company, Organization, Facility, Location, GeoPoliticalEntity, Time, Date, Event, Product, Regulation'

In [None]:
import re

def find_match_query(input_string):
    pattern1 = re.compile(r'MATCH.*?\n', re.DOTALL)
    match1 = pattern1.search(input_string)
    pattern2 = re.compile(r'MATCH.*?\.', re.DOTALL)
    match2 = pattern2.search(input_string)

    if match1 and not match2:
        return match1.group().rstrip().rstrip('.').rstrip(',')
    elif match2 and not match1:
        return match2.group().rstrip().rstrip('.').rstrip(',')
    elif not match1 and not match2:
        return None
    elif len(match1.group()) < len(match2.group()):
        return match1.group().rstrip().rstrip('.').rstrip(',')
    else:
        return match2.group().rstrip().rstrip('.').rstrip(',')


# def find_match_query_ver2(input_string):
#     pattern = re.compile(r'MATCH.*?[.\n]', re.DOTALL)
#     matches = pattern.findall(input_string)

#     if not matches:
#         return None

#     chosen_match = min(matches, key=len)
#     return chosen_match.rstrip().rstrip('.').rstrip(',')

In [None]:
def output_triples(output_query, record_names):
    pattern = r'\((\w+):(\w+)\)-\[:(\w+)\]->\((\w+):(\w+)\)\s*RETURN (\w+)'
    match = re.search(pattern, output_query)
    if match:
        s, stype, p, o, otype, var = match.groups()
    else:
        return None

    if var == s:
        return [(n, p, o) for n in record_names]
    elif var == o:
        return [(s, p, n) for n in record_names]
    elif var == p:
        return [(s, n, o) for n in record_names]
    else:
        return None

In [None]:
def read_file_and_process_sentences(file_path):
    sentences = []
    with open(file_path, 'r',encoding="utf-8")) as file:
        for line in file:
            cleaned_line = line.strip()
            if cleaned_line:  # Check if the line is not empty after stripping
                sentences.append(cleaned_line)
    return sentences

In [None]:
questions = read_file_and_process_sentences("questions.txt")
len(questions)

25

In [None]:
rel_str = 'ActTowards, CommunicatesWith, InteractsWith, Supplies, Demands, Acquires, Transforms, Decides, Assesses, Solves, Develops, Impacts, Manages, Moves, Happens, Produce, is'
node_str = 'Person, Company, Organization, Facility, Location, Country, GeoPoliticalEntity, Time, Date, Event, Product, Regulation, Journal, Paper, Number, Problem'

In [None]:
def retrieve_triples(question, model, node=node_str, rel=rel_str, api_token=token, max_new_token=100):

    prompt = f'''Cypher is a query language designed for querying graph databases. It was initially developed by Neo4j, and is now an open standard for various graph databases.

There are pre-defined node labels in our Neo4j database, namely {node_str}. There are also 15 pre-defined relationship types, namely {rel_str}.

Here's an example of a Cypher query for a natural language query "What companies are the suppliers of Louis Vuitton?":
MATCH (company:Company)-[:Supplies]->(Louis_Vuitton:Company) RETURN company
This query retrieves companies from the class "Company" that are connected with Intel by a "Supplies" relationship, and then returns those companies.

Here's another example for query "Who does China trade with?":
MATCH (China:Country)-[:InteractsWith]->(other_country:Country) RETURN other_country
This query retrieves countries from the class "Country" that are connected with China by a "InteractsWith" relationship, as the predicate "trade with" closely aligns with the pre-defined relationship type "InteractsWith".

Never forget to return values in a query. Using the node labels and the relationship types in the database, for the query in natural language "{question}," the corresponding Cypher query should be '''

    client = InferenceClient(model=model, token=api_token)
    output_text = client.text_generation(prompt, max_new_tokens=max_new_token)
    output_query = find_match_query(output_text) + '.name'

    db_records, summary, keys = graphDB_Driver.execute_query(output_query, database_="neo4j")
    record_names = [r[keys[0]] for r in db_records]

    return output_triples(output_query, record_names)

In [None]:
question = 'What companies are the developers of Walmart?'
model = bloom

retrieve_triples(question=question, model=model)

[('Walmart', 'Develops', 'Walmart'),
 ('Company', 'Develops', 'Walmart'),
 ('VCF', 'Develops', 'Walmart'),
 ('suppliers', 'Develops', 'Walmart'),
 ('Wilhelm', 'Develops', 'Walmart'),
 ('Apple', 'Develops', 'Walmart'),
 ('Apple', 'Develops', 'Walmart'),
 ('SCN', 'Develops', 'Walmart'),
 ('SCN', 'Develops', 'Walmart'),
 ('IBM', 'Develops', 'Walmart'),
 ('A', 'Develops', 'Walmart'),
 ('Publ', 'Develops', 'Walmart'),
 ('Amazon', 'Develops', 'Walmart'),
 ('Company', 'Develops', 'Walmart'),
 ('Company', 'Develops', 'Walmart'),
 ('Company', 'Develops', 'Walmart'),
 ('CSR', 'Develops', 'Walmart'),
 ('supply_chain_management', 'Develops', 'Walmart'),
 ('Ferrari', 'Develops', 'Walmart'),
 ('Company', 'Develops', 'Walmart'),
 ('SCRES', 'Develops', 'Walmart'),
 ('Walmart', 'Develops', 'Walmart'),
 ('Company', 'Develops', 'Walmart'),
 ('Walmart', 'Develops', 'Walmart'),
 ('developing', 'Develops', 'Walmart'),
 ('Walmart', 'Develops', 'Walmart'),
 ('Company', 'Develops', 'Walmart'),
 ('CLSCs', 'Deve

In [None]:
print(retrieve_triples(question="What are supply chain challenges when running a food business? ", model=model))

None
