In [77]:
from langchain.prompts.chat import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    SystemMessagePromptTemplate,
)
from langchain.schema import HumanMessage, SystemMessage
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
from graphdatascience import GraphDataScience
from getpass import getpass
from langchain_core.runnables import RunnableConfig
import pandas as pd
import time

# Set up connections

In [27]:
openai_api_key = getpass()

 ········


In [3]:
neo4j_password = getpass()

 ········


In [29]:
neo4j_uri = "neo4j+s://bc9f751a.databases.neo4j.io"
neo4j_user = "neo4j"
gds = GraphDataScience(neo4j_uri, auth=(neo4j_user, neo4j_password))

# Get document info

In [79]:
document_info = gds.run_cypher("""
MATCH (g:TopicGroup)<-[:IN_GROUP]-()<-[:HAS_TOPIC]-(d)
WHERE g.summary IS null
WITH g, collect(distinct d{.text}) AS documentData
RETURN g.id AS id, g.descriptions as descriptions, apoc.coll.randomItems(documentData, $resultLimit) AS documentData""",
              {"resultLimit": 10})

In [76]:
document_info

Unnamed: 0,id,descriptions,documentData
0,5515,"[Capturing log events, Event log analysis, For...",[{'text': 'Check recent Events The kubectl des...
1,2864,"[Cardinality in relationships, Cardinality of ...",[{'text': 'This example shows how to configure...
2,5672,"[Cardinality issues, Cost-Effective Lazy Forwa...","[{'text': 'have, and also a pattern, which is,..."
3,3284,"[Cargo 2000 Case Study dataset, Cargo 2000 tra...",[{'text': 'Source Logistics Dataset To explore...
4,5918,"[Cargo 2000 case study, Cargo 2000 system data...",[{'text': 'The below figure shows a model of t...
...,...,...,...
7424,9299,[Fashion recommendations dataset],[{'text': 'retailer they released a public da...
7425,9300,[Worker compensation data],[{'text': 'so I will start with a demo so thi...
7426,9301,[Dynamic prompting],[{'text': 'that's another thing that you can d...
7427,9302,[Order details file],[{'text': ''m going to select the fuller north...


# Ask LLM to summarize topics in topic groups

In [58]:
chat = ChatOpenAI(temperature=0, model="gpt-3.5-turbo", openai_api_key=openai_api_key)

In [59]:
system_message = SystemMessage(
    content="""You are an expert on Neo4j software. 
You will be given a list of related topics and a list of example documents that contain those topics.
Write two or three sentences that describe the common themes from the topics. 
Don't let your summary be longer than two or three sentences.
Use the example document information to guide your description of the topics.
""")
final_prompt = ChatPromptTemplate.from_messages(
    [system_message,
     ("human", """topics: {descriptions}
     example documents: {documentData}""")])

In [60]:
document_info.iloc[0]

id                                                           3083
descriptions    [Arguments in Cypher Operator, Direction "out"...
documentData    [{'text': 'Bitwise Operations Functions for bi...
Name: 0, dtype: object

In [61]:
final_prompt.invoke({"descriptions": document_info.loc[0,'descriptions'], "documentData": document_info.loc[0,'documentData']})

ChatPromptValue(messages=[SystemMessage(content="You are an expert on Neo4j software. \nYou will be given a list of related topics and a list of example documents that contain those topics.\nWrite two or three sentences that describe the common themes from the topics. \nDon't let your summary be longer than two or three sentences.\nUse the example document information to guide your description of the topics.\n"), HumanMessage(content='topics: [\'Arguments in Cypher Operator\', \'Direction "out" in Cypher\', \'LEFT SHIFT in Cypher\', \'OUT direction in Cypher\', \'OVERLAP in Cypher\', \'UP mode in Cypher\', \'USE in Cypher\', \'USER keyword in Cypher\', \'WHEN expression in Cypher\', \'methodName in Cypher\', \'timeout:10 in Cypher\']\n     example documents: [{\'text\': \'Bitwise Operations Functions for bitwise operations apoc.bitwise.op   apoc.bitwise.op(a Integer, operator String, b Integer) - returns the result of the bitwise operation. Function Examples a & b AND apoc.bitwise.op(6

In [62]:
chain = final_prompt | chat

In [69]:
response = chain.invoke({"descriptions": document_info.loc[0,'descriptions'], "documentData": document_info.loc[0,'documentData']})

In [70]:
response.content

'The common themes among the provided topics include various Cypher operators such as LEFT SHIFT, OVERLAP, and USE, as well as directions like "out" and "UP mode" in Cypher queries. These topics also cover specific keywords and expressions like "USER keyword" and "WHEN expression" in Cypher, along with practical examples like setting a timeout in a Cypher query.'

In [71]:
config = RunnableConfig(max_concurrency=4)

In [72]:
def get_topic_group_descriptions(start_id, end_id):
    doc_data = document_info.iloc[start_id: end_id, :].copy()
    response = chain.batch(doc_data.loc[:, ['descriptions', 'documentData']].to_dict("records"), config=config)
    doc_data.loc[:, 'summary'] = [message.content for message in response] 
    gds.run_cypher("""
        UNWIND $data as row
        MATCH (g:TopicGroup {id:row['id']})
        SET g.summary = row['summary']
        """, 
                   {"data": doc_data.loc[:, ['id', 'summary']].to_dict("records")})

In [80]:
document_info.shape

(7329, 3)

In [81]:
for i in range(0, int(document_info.shape[0]/100) + 1):
    get_topic_group_descriptions(i*100,(i+1)*100)
    print(f"Finished row {(i+1)*100}")
    time.sleep(5)

Finished row 100
Finished row 200
Finished row 300
Finished row 400
Finished row 500
Finished row 600
Finished row 700
Finished row 800
Finished row 900
Finished row 1000
Finished row 1100
Finished row 1200
Finished row 1300
Finished row 1400
Finished row 1500
Finished row 1600
Finished row 1700
Finished row 1800
Finished row 1900
Finished row 2000
Finished row 2100
Finished row 2200
Finished row 2300
Finished row 2400
Finished row 2500
Finished row 2600
Finished row 2700
Finished row 2800
Finished row 2900
Finished row 3000
Finished row 3100
Finished row 3200
Finished row 3300
Finished row 3400
Finished row 3500
Finished row 3600
Finished row 3700
Finished row 3800
Finished row 3900
Finished row 4000
Finished row 4100
Finished row 4200
Finished row 4300
Finished row 4400
Finished row 4500
Finished row 4600
Finished row 4700
Finished row 4800
Finished row 4900
Finished row 5000
Finished row 5100
Finished row 5200
Finished row 5300
Finished row 5400
Finished row 5500
Finished row 5600
F

# Examine summaries

In [None]:
summary_df = gds.run_cypher("""
MATCH (g:TopicGroup)
RETURN g.descriptions AS descriptions, g.summary AS summary
ORDER BY g.descriptions""")

In [None]:
pd.set_option('display.max_colwidth', None)

In [None]:
summary_df.head(10)