In [1]:
from langchain.prompts.chat import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    SystemMessagePromptTemplate,
)
from langchain.schema import HumanMessage, SystemMessage
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
from graphdatascience import GraphDataScience
from getpass import getpass
from langchain_core.runnables import RunnableConfig
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


# Set up connections

In [2]:
openai_api_key = getpass()

 ········


In [3]:
neo4j_password = getpass()

 ········


In [4]:
neo4j_uri = "neo4j+s://bc9f751a.databases.neo4j.io"
neo4j_user = "neo4j"
gds = GraphDataScience(neo4j_uri, auth=(neo4j_user, neo4j_password))

# Get document info

In [5]:
document_info = gds.run_cypher("""
MATCH (g:TopicGroup)<-[:IN_GROUP]-()<-[:HAS_TOPIC {prompt:"v2"}]-(d)
WHERE g.summary IS null
WITH g, collect(distinct d{.text}) AS documentData
RETURN g.id AS id, g.descriptions as descriptions, apoc.coll.randomItems(documentData, $resultLimit) AS documentData""",
              {"resultLimit": 10})

In [6]:
document_info

Unnamed: 0,id,descriptions,documentData
0,5000,"[!tables command, Available commands, Command ...",[{'text': 'The output is the following: Conne...
1,1053,"[""(RELATIONSHIP)"" suffix, ""RELATIONSHIP"", LIST...",[{'text': 'The allowlist only applies to nodes...
2,2101,"[""+"" operator in Cypher, ""-"" operator in Cyphe...","[{'text': 'A parameter: $param, $0. A list of ..."
3,5262,"["".*"" in regex, Special characters in regex, U...",[{'text': 'Matching using regular expressions ...
4,2391,"[""/Romance"" in Cypher, * relationship in Cyphe...","[{'text': '(:Person:Product {name: ""Alicia""})-..."
...,...,...,...
38514,38555,[Creating constraints on nodes],[{'text': 'to turn a setNeo4jroles intoNeo4jar...
38515,38556,[Setting roles property in relationships],[{'text': 'to turn a setNeo4jroles intoNeo4jar...
38516,38557,[Confirming data import and relationships],[{'text': 'to turn a setNeo4jroles intoNeo4jar...
38517,38558,[Confirming data load],"[{'text': 'Movie node, and we create the relat..."


# Ask LLM to summarize topics in topic groups

In [14]:
chat = ChatOpenAI(temperature=0, model="gpt-4-turbo-preview", openai_api_key=openai_api_key)

In [15]:
system_message = SystemMessage(
    content="""You are an expert on Neo4j software. 
You will be given a list of related topics and a list of example documents that contain those topics.
Write two or three sentences that describe the common themes from the topics. 
Don't let your summary be longer than two or three sentences.
Use the example document information to guide your description of the topics.
""")
final_prompt = ChatPromptTemplate.from_messages(
    [system_message,
     ("human", """topics: {descriptions}
     example documents: {documentData}""")])

In [16]:
document_info.iloc[0]

id                                                           5000
descriptions    [!tables command, Available commands, Command ...
documentData    [{'text': 'The output is the following:  Conne...
Name: 0, dtype: object

In [17]:
final_prompt.invoke({"descriptions": document_info.loc[0,'descriptions'], "documentData": document_info.loc[0,'documentData']})

ChatPromptValue(messages=[SystemMessage(content="You are an expert on Neo4j software. \nYou will be given a list of related topics and a list of example documents that contain those topics.\nWrite two or three sentences that describe the common themes from the topics. \nDon't let your summary be longer than two or three sentences.\nUse the example document information to guide your description of the topics.\n"), HumanMessage(content='topics: [\'!tables command\', \'Available commands\', \'Command failed\', \'Command fails\', \'Command option\', \'Command output\', \'Help command\', \'One command\']\n     example documents: [{\'text\': \'The output is the following:  Connected to Neo4j at neo4j://IP-address:7687 as user neo4j. Type :help for a list of available commands or :exit to exit the shell. Note that Cypher queries must end with a semicolon. Available commands Once in the interactive shell, run the following command to display all available commands: Example 1. Running  The outp

In [18]:
chain = final_prompt | chat

In [19]:
response = chain.invoke({"descriptions": document_info.loc[0,'descriptions'], "documentData": document_info.loc[0,'documentData']})

In [20]:
response.content

'The documents focus on the usage, management, and administration of Neo4j databases through various command-line tools and commands. They cover aspects such as available commands, command options, handling command failures, and interpreting command outputs. Additionally, they touch on the help command for assistance and specific commands for tasks like connecting to databases, managing server options, and performing backups or uploads.'

In [21]:
config = RunnableConfig(max_concurrency=4)

In [24]:
def get_topic_group_descriptions(start_id, end_id):
    doc_data = document_info.iloc[start_id: end_id, :].copy()
    response = chain.batch(doc_data.loc[:, ['descriptions', 'documentData']].to_dict("records"), config=config)
    doc_data.loc[:, 'summary'] = [message.content for message in response] 
    gds.run_cypher("""
        UNWIND $data as row
        MATCH (g:TopicGroup {id:row['id']})
        SET g.summary = row['summary']
        """, 
                   {"data": doc_data.loc[:, ['id', 'summary']].to_dict("records")})

In [22]:
document_info.shape

(38519, 3)

In [25]:
for i in range(0, int(document_info.shape[0]/100) + 1):
    get_topic_group_descriptions(i*100,(i+1)*100)
    print(f"Finished row {(i+1)*100}")

Finished row 100
Finished row 200
Finished row 300
Finished row 400
Finished row 500
Finished row 600
Finished row 700
Finished row 800
Finished row 900
Finished row 1000
Finished row 1100
Finished row 1200
Finished row 1300
Finished row 1400
Finished row 1500
Finished row 1600
Finished row 1700
Finished row 1800
Finished row 1900
Finished row 2000
Finished row 2100
Finished row 2200
Finished row 2300
Finished row 2400
Finished row 2500
Finished row 2600
Finished row 2700
Finished row 2800


RateLimitError: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}

# Examine summaries

In [None]:
summary_df = gds.run_cypher("""
MATCH (g:TopicGroup)
RETURN g.descriptions AS descriptions, g.summary AS summary
ORDER BY g.descriptions""")

In [None]:
pd.set_option('display.max_colwidth', None)

In [None]:
summary_df.head(10)