# Neo4j Graph Store

In [None]:
import os

os.environ["OPENAI_API_KEY"] = "API_KEY_HERE"

import logging
import sys
from llama_index.llms import OpenAI
from llama_index import ServiceContext

logging.basicConfig(stream=sys.stdout, level=logging.INFO)

# define LLM
llm = OpenAI(temperature=0, model="gpt-3.5-turbo")
service_context = ServiceContext.from_defaults(llm=llm, chunk_size=512)

In [17]:
# For Azure OpenAI
import os
import json
import openai
from llama_index.llms import AzureOpenAI
from llama_index.embeddings import OpenAIEmbedding
from llama_index import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    KnowledgeGraphIndex,
    ServiceContext,
)

import logging
import sys

from IPython.display import Markdown, display

logging.basicConfig(
    stream=sys.stdout, level=logging.INFO
)  # logging.DEBUG for more verbose output
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

openai.api_type = "azure"
openai.api_base = "https://firstaiproject.openai.azure.com"
openai.api_version = "2022-12-01"
os.environ["OPENAI_API_KEY"] = "77a8bace65c148c7a10be35741adaf5b"
openai.api_key = os.getenv("OPENAI_API_KEY")

llm = AzureOpenAI(
    deployment_name="gpt-35-turbo-16k",
    temperature=0,
    openai_api_version=openai.api_version,
    model_kwargs={
        "api_key": openai.api_key,
        "api_base": openai.api_base,
        "api_type": openai.api_type,
        "api_version": openai.api_version,
    },
)

# You need to deploy your own embedding model as well as your own chat completion model
embedding_llm = OpenAIEmbedding(
    model="text-embedding-ada-002",
    deployment_name="gpt-35-turbo-16k",
    api_key=openai.api_key,
    api_base=openai.api_base,
    api_type=openai.api_type,
    api_version=openai.api_version,
)

service_context = ServiceContext.from_defaults(
    llm=llm,
    embed_model=embedding_llm,
)

ValueError: 'gpt-3.5-turbo' is not a valid OpenAIEmbeddingModelType

## Using Knowledge Graph with Neo4jGraphStore
### Building the Knowledge Graph

In [11]:
from llama_index import (
    KnowledgeGraphIndex,
    ServiceContext,
    SimpleDirectoryReader,
)
from llama_index.storage.storage_context import StorageContext
from llama_index.graph_stores import Neo4jGraphStore


from llama_index.llms import OpenAI
from IPython.display import Markdown, display

In [12]:
documents = SimpleDirectoryReader("./data/paul_graham").load_data()

### Prepare for Neo4j

#### 安装 neo4j 服务
```shell
docker run -d -p 7474:7474 -p 7687:7687 --name neo4j-apoc -e NEO4J_apoc_export_file_enabled=true -e NEO4J_apoc_import_file_enabled=true -e NEO4J_apoc_import_file_use__neo4j__config=true -e NEO4J_AUTH=neo4j/pleaseletmein -e NEO4J_PLUGINS=\[\"apoc\"\] neo4j:latest
```

In [13]:
%pip install neo4j

username = "neo4j"
password = "pleaseletmein"
url = "bolt://localhost:7687"
database = "neo4j"

Note: you may need to restart the kernel to use updated packages.


### Instantiate Neo4jGraph KG Indexes

In [14]:
graph_store = Neo4jGraphStore(
    username=username,
    password=password,
    url=url,
    database=database,
)

storage_context = StorageContext.from_defaults(graph_store=graph_store)

# NOTE: can take a while!
index = KnowledgeGraphIndex.from_documents(
    documents,
    storage_context=storage_context,
    max_triplets_per_chunk=2,
    service_context=service_context,
)

INFO:httpx:HTTP Request: POST https://firstaiproject.openai.azure.com//openai/deployments/text-embedding-ada-002/chat/completions?api-version=2023-06-01-preview "HTTP/1.1 400 Bad Request"
HTTP Request: POST https://firstaiproject.openai.azure.com//openai/deployments/text-embedding-ada-002/chat/completions?api-version=2023-06-01-preview "HTTP/1.1 400 Bad Request"
HTTP Request: POST https://firstaiproject.openai.azure.com//openai/deployments/text-embedding-ada-002/chat/completions?api-version=2023-06-01-preview "HTTP/1.1 400 Bad Request"
HTTP Request: POST https://firstaiproject.openai.azure.com//openai/deployments/text-embedding-ada-002/chat/completions?api-version=2023-06-01-preview "HTTP/1.1 400 Bad Request"


BadRequestError: Error code: 400 - {'error': {'code': 'OperationNotSupported', 'message': 'The chatCompletion operation does not work with the specified model, text-embedding-ada-002. Please choose different model and try again. You can learn more about which models can be used with each operation here: https://go.microsoft.com/fwlink/?linkid=2197993.'}}

### Querying the Knowledge Graph
First, we can query and send only the triplets to the LLM.

In [None]:
query_engine = index.as_query_engine(
    include_text=False, response_mode="tree_summarize"
)

response = query_engine.query("Tell me more about Interleaf")

In [None]:
display(Markdown(f"<b>{response}</b>"))

In [None]:
query_engine = index.as_query_engine(
    include_text=True, response_mode="tree_summarize"
)
response = query_engine.query(
    "Tell me more about what the author worked on at Interleaf"
)

In [None]:
display(Markdown(f"<b>{response}</b>"))

### Query with embeddings

In [None]:
# Clean dataset first
graph_store.query(
    """
MATCH (n) DETACH DELETE n
"""
)

# NOTE: can take a while!
index = KnowledgeGraphIndex.from_documents(
    documents,
    storage_context=storage_context,
    max_triplets_per_chunk=2,
    service_context=service_context,
    include_embeddings=True,
)

query_engine = index.as_query_engine(
    include_text=True,
    response_mode="tree_summarize",
    embedding_mode="hybrid",
    similarity_top_k=5,
)

In [None]:
# query using top 3 triplets plus keywords (duplicate triplets are removed)
response = query_engine.query(
    "Tell me more about what the author worked on at Interleaf"
)

In [None]:
display(Markdown(f"<b>{response}</b>"))

In [None]:
from llama_index.node_parser import SentenceSplitter
node_parser = SentenceSplitter()
nodes = node_parser.get_nodes_from_documents(documents)
# initialize an empty index for now
index = KnowledgeGraphIndex.from_documents([], storage_context=storage_context)

In [None]:
# add keyword mappings and nodes manually
# add triplets (subject, relationship, object)

# for node 0
node_0_tups = [
    ("author", "worked on", "writing"),
    ("author", "worked on", "programming"),
]
for tup in node_0_tups:
    index.upsert_triplet_and_node(tup, nodes[0])

# for node 1
node_1_tups = [
    ("Interleaf", "made software for", "creating documents"),
    ("Interleaf", "added", "scripting language"),
    ("software", "generate", "web sites"),
]
for tup in node_1_tups:
    index.upsert_triplet_and_node(tup, nodes[1])

In [None]:
query_engine = index.as_query_engine(
    include_text=False, response_mode="tree_summarize"
)

response = query_engine.query("Tell me more about Interleaf")

In [None]:
display(Markdown(f"<b>{response}</b>"))