In [7]:
from langchain.chains import GraphCypherQAChain
from langchain_community.graphs import Neo4jGraph
from neo4j import GraphDatabase
from langchain_openai import ChatOpenAI
import os
from dotenv import load_dotenv, find_dotenv
from langchain_community.chat_models import ChatOllama
import json
from openai import OpenAI
import openai
from collections import defaultdict
import tiktoken

# openai.proxy = {"http": OPENAI_BASE_URL}

_ = load_dotenv(find_dotenv())
# load_dotenv()

NEO4J_URI = os.getenv('NEO4J_URI')
NEO4J_USERNAME = os.getenv('NEO4J_USERNAME')
NEO4J_PASSWORD = os.getenv('NEO4J_PASSWORD')
# OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
OPENAI_PROXY_KEY = os.getenv('OPENAI_API_KEY')
OPENAI_BASE_URL = os.getenv('OPENAI_BASE_URL')

graph = Neo4jGraph(
    url=NEO4J_URI, 
    username=NEO4J_USERNAME, 
    password=NEO4J_PASSWORD
)

driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD))



# def get_schema(tx):
#     schema = {"nodes": [], "relationships": []}

#     # Get node labels and properties
#     result = tx.run("CALL db.schema.nodeTypeProperties()")
#     node_properties = {}
#     for record in result:
#         labels = record["nodeLabels"]
#         property_name = record["propertyName"]
#         labels_tuple = tuple(labels)  # Convert labels list to a tuple to use it as a key
#         if labels_tuple not in node_properties:
#             node_properties[labels_tuple] = []
#         node_properties[labels_tuple].append(property_name)

#     for labels, properties in node_properties.items():
#         node = {
#             "labels": labels,
#             "properties": properties
#         }
#         schema["nodes"].append(node)

#     # Get relationship types and properties
#     result = tx.run("CALL db.schema.relTypeProperties()")
#     rel_properties = {}
#     for record in result:
#         rel_type = record["relType"]
#         property_name = record["propertyName"]
#         if rel_type not in rel_properties:
#             rel_properties[rel_type] = []
#         rel_properties[rel_type].append(property_name)

#     for rel_type, properties in rel_properties.items():
#         relationship = {
#             "type": rel_type,
#             "properties": properties
#         }
#         schema["relationships"].append(relationship)

#     return schema

# with driver.session() as session:
#     schema = session.read_transaction(get_schema)

def get_schema(driver):
    schema = {
        "nodes": {
            "labels": [],
            "properties": defaultdict(lambda: defaultdict(set))
        },
        "relationships": {
            "types": [],
            "properties": defaultdict(lambda: defaultdict(set))
        }
    }

    with driver.session() as session:
        # Get node labels and properties
        result = session.run("""
        CALL db.schema.nodeTypeProperties()
        YIELD nodeLabels, propertyName
        RETURN nodeLabels, propertyName
        """)
        for record in result:
            for label in record['nodeLabels']:
                if label not in schema['nodes']['labels']:
                    schema['nodes']['labels'].append(label)
                schema['nodes']['properties'][label][record['propertyName']]

        # Get relationship types and properties
        result = session.run("""
        CALL db.schema.relTypeProperties()
        YIELD relType, propertyName
        RETURN relType, propertyName
        """)
        for record in result:
            rel_type = record['relType']
            if rel_type not in schema['relationships']['types']:
                schema['relationships']['types'].append(rel_type)
            schema['relationships']['properties'][rel_type][record['propertyName']]

        # Get unique property values for nodes
        for label in schema['nodes']['labels']:
            for prop in schema['nodes']['properties'][label]:
                result = session.run(
                    "MATCH (n) WHERE any(label IN labels(n) WHERE label = $label) "
                    "RETURN DISTINCT n[$prop] AS value",
                    label=label, prop=prop
                )
                schema['nodes']['properties'][label][prop] = [record['value'] for record in result if record['value'] is not None]

        # Get unique property values for relationships
        for rel_type in schema['relationships']['types']:
            for prop in schema['relationships']['properties'][rel_type]:
                result = session.run(
                    "MATCH ()-[r]->() WHERE type(r) = $rel_type "
                    "RETURN DISTINCT r[$prop] AS value",
                    rel_type=rel_type, prop=prop
                )
                schema['relationships']['properties'][rel_type][prop] = [record['value'] for record in result if record['value'] is not None]

    # Convert defaultdict to regular dict for JSON serialization
    schema['nodes']['properties'] = dict(schema['nodes']['properties'])
    schema['relationships']['properties'] = dict(schema['relationships']['properties'])
    for label in schema['nodes']['properties']:
        schema['nodes']['properties'][label] = dict(schema['nodes']['properties'][label])
    for rel_type in schema['relationships']['properties']:
        schema['relationships']['properties'][rel_type] = dict(schema['relationships']['properties'][rel_type])

    return schema

# Assuming you have already initialized the driver as mentioned
# driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD))

schema = get_schema(driver)

# Print the schema as formatted JSON
print(json.dumps(schema, indent=4, ensure_ascii=False))


{
    "nodes": {
        "labels": [
            "Subject",
            "Object"
        ],
        "properties": {
            "Subject": {
                "name": [
                    "骨性关节炎",
                    "急性胰腺炎",
                    "乙型肝炎",
                    "感染性心内膜炎",
                    "稳定型缺血性心脏疾病",
                    "广泛性焦虑症",
                    "原发性高血压",
                    "口咽癌",
                    "急性_TIN",
                    "尤因肉瘤",
                    "失眠症",
                    "慢性胰腺炎",
                    "肾盂和输尿管移行细胞癌",
                    "弯刀综合征",
                    "先天性卵巢发育不全综合征",
                    "前列腺癌",
                    "急性髓性白血病",
                    "缺血性卒中",
                    "B系急性淋巴细胞白血病",
                    "生长激素缺乏症",
                    "脑炎",
                    "膝关节骨性关节炎",
                    "类癌综合征",
                    "下丘脑疾病",
                    "B族链球菌感染",
                    "狂犬病",
                    "SMA",
                    "食管癌",

In [2]:

# Convert schema to JSON
schema_json = json.dumps(schema, indent=4, ensure_ascii=False)
# print(schema_json)
# print(type(schema_json))
print(schema_json)



<class 'str'>
{
    "nodes": [
        {
            "labels": [
                "Subject"
            ],
            "properties": [
                "name",
                "type"
            ]
        },
        {
            "labels": [
                "Object"
            ],
            "properties": [
                "name",
                "type"
            ]
        }
    ],
    "relationships": [
        {
            "type": ":`Predicate`",
            "properties": [
                null
            ]
        }
    ]
}


In [8]:
encoding = tiktoken.get_encoding("cl100k_base")
encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
encoding.encode("tiktoken is great!")

[83, 1609, 5963, 374, 2294, 0]

In [16]:
len(encoding.encode(schema_json))

99

In [9]:
def num_tokens_from_string(string: str, encoding_name: str) -> int:
    """Returns the number of tokens in a text string."""
    encoding = tiktoken.get_encoding(encoding_name)
    num_tokens = len(encoding.encode(string))
    return num_tokens


In [13]:
num_tokens_from_string(schema_json, "gpt-3.5-turbo")

ValueError: Unknown encoding gpt-3.5-turbo. Plugins found: ['tiktoken_ext.openai_public']

In [None]:
from langchain.prompts.prompt import PromptTemplate


CYPHER_GENERATION_TEMPLATE = """
任务: 生成Cypher语句以查询图数据库。
说明:
仅使用架构中提供的关系类型和属性。
不要使用任何其他未提供的关系类型或属性。
Schema:
{schema}

注意: 不要在回答中包含任何解释或道歉。
不要回答任何要求构建Cypher语句以外的问题。
除了生成的Cypher语句外，不要包含任何文本。

问题:
{question}"""

# CYPHER_GENERATION_TEMPLATE = """
# Task:Generate Cypher statement to query a graph database.
# Instructions:
# Use only the provided relationship types and properties in the schema.
# Do not use any other relationship types or properties that are not provided.
# Schema:
# {schema}
# Cypher examples:
# # How many streamers are from Norway?
# MATCH (s:Stream)-[:HAS_LANGUAGE]->(:Language {{name: 'no'}})
# RETURN count(s) AS streamers

# Note: Do not include any explanations or apologies in your responses.
# Do not respond to any questions that might ask anything else than for you to construct a Cypher statement.
# Do not include any text except the generated Cypher statement.

# The question is:
# {question}"""

CYPHER_GENERATION_PROMPT = PromptTemplate(
    input_variables=["schema", "question"], template=CYPHER_GENERATION_TEMPLATE
)

# chain_language_example = GraphCypherQAChain.from_llm(
#     ChatOllama(model="command-r-plus:104b-q2_K", temperature=0), graph=graph, verbose=True,
#     # ChatOpenAI(temperature=0), graph=graph, verbose=True,
#     cypher_prompt=CYPHER_GENERATION_PROMPT
# )

# llm = ChatOllama(model="command-r-plus:104b-q2_K", temperature=0)

llm = ChatOpenAI(
        model="gpt-4-turbo",
        temperature=0.3,
        api_key=OPENAI_PROXY_KEY,
        base_url=OPENAI_BASE_URL,
    )

print(llm)



In [None]:
prompt = "What are the benefits of using AI in healthcare?"
response = llm.predict(prompt)
print(response)

In [None]:
len(schema_json)

In [None]:
chain_language_example = GraphCypherQAChain.from_llm(
    llm, graph=graph, verbose=True,
    cypher_prompt=CYPHER_GENERATION_PROMPT
)

filled_prompt = CYPHER_GENERATION_PROMPT.format(schema=schema_json, question="失眠怎么办？")

chain_language_example.run(filled_prompt)

In [None]:
# 配置 OpenAI 服务  

client = OpenAI()

response = client.chat.completions.create(
    messages=[
        {
            "role": "user",
            "content": "讲个笑话",
        }
    ],
    # model="gpt-3.5-turbo",
    model="gpt-4o",
)

print(response)
