In [20]:
from langchain_community.graphs import Neo4jGraph
import os
from dotenv import load_dotenv
from typing import List
from openai import AzureOpenAI
import os

load_dotenv()
# Warning control
import warnings

warnings.filterwarnings("ignore")

**Load the environment variables from your `.env` file**

In [21]:
azure_openai_api_key = os.environ["OPENAI_API_KEY"]
azure_openai_endpoint = os.environ["OPENAI_API_BASE"]
print(azure_openai_api_key, azure_openai_endpoint)

dc1475322fbf4c07a1469f57242f14b5 https://cogopenaiscgjwdllmchat1.openai.azure.com/


**Load the Azure openAI instance**

In [22]:
client = AzureOpenAI(api_key=azure_openai_api_key, api_version=os.environ["OPENAI_API_VERSION"], azure_endpoint=azure_openai_endpoint)


def embed_text(text: str) -> List:
    """
    Embeds the given text using the specified model.

    Parameters:
        text (str): The text to be embedded.

    Returns:
        List: A list containing the embedding of the text.
    """
    response = client.embeddings.create(input=text, model="text-embedding-ada-002")
    return response.data[0].embedding

**Add Neo4j credentials (These information need to be kept secret)**

In [23]:
NEO4J_URI = "bolt://localhost:7687"
NEO4J_USERNAME = "neo4j"
NEO4J_PASSWORD = "12345678"
NEO4J_DATABASE = 'neo4j'

In [24]:
graph = Neo4jGraph(url=NEO4J_URI, username=NEO4J_USERNAME, password=NEO4J_PASSWORD, database=NEO4J_DATABASE)

**Sample question for RAG:**

In [25]:
question = "What movies are about love?"

**Get the questions embedding:**

In [26]:
question_embedding = embed_text(question)
question_embedding[:10]

[-0.004219826776534319,
 -0.03029683791100979,
 0.0007639749674126506,
 -0.02761838585138321,
 -0.007965870201587677,
 0.0019409307278692722,
 -0.006576106883585453,
 -0.021617135033011436,
 -0.002027790993452072,
 -0.008066943846642971]

**Perform Similarity Search using the question's embedding on the vector index of the graph database and get the results**

In [27]:
result = graph.query("""
    with $question_embedding as question_embedding      // Use the provided question embedding as 'question_embedding'
    CALL db.index.vector.queryNodes(                    // Call the vector index query function
        'movie_tagline_embeddings',                     // Name of the vector index to query against
        $top_k,                                         // Number of top results to retrieve
        question_embedding                              // The question embedding to compare against
        ) YIELD node AS movie, score                    // Yield each matched node and its similarity score
    RETURN movie.title, movie.tagline, score            // Return the title, tagline, and similarity score of each movie
    """,
    params={
        "question_embedding": question_embedding,       # Pass the question embedding as a parameter
        "top_k": 3                                      # Specify the number of top results to retrieve
    })
result

[{'movie.title': 'Heat',
  'movie.tagline': 'A Los Angeles crime saga',
  'score': 0.8910077810287476},
 {'movie.title': 'Grumpier Old Men',
  'movie.tagline': 'Still Yelling. Still Fighting. Still Ready for Love.',
  'score': 0.8878611326217651},
 {'movie.title': 'Tom and Huck',
  'movie.tagline': 'The Original Bad Boys.',
  'score': 0.8836764097213745}]

**Pass the results to an LLM for the final answer**

In [31]:
import json


prompt = f"# Question:\n{question}\n\n# Graph DB search results:\n{result}"
messages = [
    {
        "role": "system",
        "content": str("You will be given the user question along with the search result of that question over a Neo4j graph database. Give the user the proper answer."),
    },
    {"role": "user", "content": prompt},
]
print(json.dumps(messages, ensure_ascii=False, indent=2))
response = client.chat.completions.create(model="gpt-4o", messages=messages)

print(response.choices[0].message.content)

[
  {
    "role": "system",
    "content": "You will be given the user question along with the search result of that question over a Neo4j graph database. Give the user the proper answer."
  },
  {
    "role": "user",
    "content": "# Question:\nWhat movies are about adventure?\n\n# Graph DB search results:\n[{'movie.title': 'Toy Story', 'movie.tagline': 'The adventure takes off!', 'score': 0.9303786158561707}, {'movie.title': 'Cutthroat Island', 'movie.tagline': 'The Course Has Been Set. There Is No Turning Back. Prepare Your Weapons. Summon Your Courage. Discover the Adventure of a Lifetime!', 'score': 0.8933805227279663}, {'movie.title': 'Tom and Huck', 'movie.tagline': 'The Original Bad Boys.', 'score': 0.8925687670707703}, {'movie.title': 'Jumanji', 'movie.tagline': 'Roll the dice and unleash the excitement!', 'score': 0.8908360004425049}, {'movie.title': 'Heat', 'movie.tagline': 'A Los Angeles crime saga', 'score': 0.8869890570640564}]"
  }
]
Here are some movies that are about 

-----------------------

**Note: In this usecase, there is a higher chance of hallucination due to lack of enough evidence for the LLM to use its own judgment. The contents of the vector DB and the system role can address this issue to some extent.**

-----------------------

**Second example (in one go):**

In [29]:
import json


question = "What movies are about adventure?"
question_embedding = embed_text(question)
result = graph.query(
    """
    with $question_embedding as question_embedding
    CALL db.index.vector.queryNodes(
        'movie_tagline_embeddings', 
        $top_k, 
        question_embedding
        ) YIELD node AS movie, score
    RETURN movie.title, movie.tagline, score
    """,
    params={"question_embedding": question_embedding, "top_k": 5},
)

prompt = f"# Question:\n{question}\n\n# Graph DB search results:\n{result}"
messages = [
    {
        "role": "system",
        "content": str("You will be given the user question along with the search result of that question over a Neo4j graph database. Give the user the proper answer."),
    },
    {"role": "user", "content": prompt},
]
print(json.dumps(messages, indent=2, ensure_ascii=False))

response = client.chat.completions.create(model="gpt-4o", messages=messages)

[
  {
    "role": "system",
    "content": "You will be given the user question along with the search result of that question over a Neo4j graph database. Give the user the proper answer."
  },
  {
    "role": "user",
    "content": "# Question:\nWhat movies are about adventure?\n\n# Graph DB search results:\n[{'movie.title': 'Toy Story', 'movie.tagline': 'The adventure takes off!', 'score': 0.9303786158561707}, {'movie.title': 'Cutthroat Island', 'movie.tagline': 'The Course Has Been Set. There Is No Turning Back. Prepare Your Weapons. Summon Your Courage. Discover the Adventure of a Lifetime!', 'score': 0.8933805227279663}, {'movie.title': 'Tom and Huck', 'movie.tagline': 'The Original Bad Boys.', 'score': 0.8925687670707703}, {'movie.title': 'Jumanji', 'movie.tagline': 'Roll the dice and unleash the excitement!', 'score': 0.8908360004425049}, {'movie.title': 'Heat', 'movie.tagline': 'A Los Angeles crime saga', 'score': 0.8869890570640564}]"
  }
]


In [30]:
print(response.choices[0].message.content)

The movies about adventure based on the provided search results are:

1. **Toy Story** - Tagline: "The adventure takes off!"
2. **Cutthroat Island** - Tagline: "The Course Has Been Set. There Is No Turning Back. Prepare Your Weapons. Summon Your Courage. Discover the Adventure of a Lifetime!"
3. **Tom and Huck** - Tagline: "The Original Bad Boys."
4. **Jumanji** - Tagline: "Roll the dice and unleash the excitement!"

These movies are associated with adventure themes based on their taglines and search scores.
