In [None]:
!pip install python-dotenv
!pip install datasets
!pip install langchain
!pip install neo4j
!pip install llama-index

In [2]:

import os
from dotenv import load_dotenv
import logging
import sys
from llama_index.llms import OpenAI
from llama_index import ServiceContext

load_dotenv()

api_key = os.environ['OPENAI_API_KEY']

logging.basicConfig(stream=sys.stdout, level=logging.INFO)

In [3]:
from llama_index import (
    KnowledgeGraphIndex,
    ServiceContext,
    SimpleDirectoryReader,
    Document
)
from llama_index.storage.storage_context import StorageContext
from llama_index.graph_stores import Neo4jGraphStore
from llama_index.llms import OpenAI
from IPython.display import Markdown, display
from langchain_community.document_loaders import PyPDFLoader

In [4]:
loader = PyPDFLoader('../data/Football_news.pdf')
pages = loader.load_and_split()

#convert langchain document to llama_index document format
documents = [Document(text=content.page_content) for content in pages]

In [5]:
#creating neo4j graph store

url = os.environ['NEO4J_URI']
username = os.environ['NEO4J_USERNAME']
password = os.environ['NEO4J_PASSWORD']
database = "neo4j"

graph_store = Neo4jGraphStore(
    username=username,
    password=password,
    url=url,
    database=database,
)

storage_context = StorageContext.from_defaults(graph_store=graph_store)

In [6]:
# define LLM and chunk size
llm = OpenAI(temperature=0, model="gpt-3.5-turbo")
service_context = ServiceContext.from_defaults(llm=llm, chunk_size=200)

In [None]:
# without embeddings

index = KnowledgeGraphIndex.from_documents(
    documents,
    storage_context=storage_context,
    max_triplets_per_chunk=3,
    service_context=service_context,
)

In [None]:

query_engine = index.as_query_engine(
    include_text=False, 
    response_mode="tree_summarize"
)
response = query_engine.query("Between which teams friendlies was played")

In [23]:
display(Markdown(f"<i>{response}</i>"))

<i>Miami and the Hong Kong select team played friendlies.</i>

In [None]:
#with embeddings

index = KnowledgeGraphIndex.from_documents(
    documents,
    storage_context=storage_context,
    max_triplets_per_chunk=2,
    service_context=service_context,
    include_embeddings=True,
)


In [17]:
query_engine = index.as_query_engine(
    include_text=True,
    response_mode="tree_summarize",
    embedding_mode="hybrid",
    similarity_top_k=5,
)

In [None]:
response = query_engine.query("Between which teams friendlies was played")

In [21]:
display(Markdown(f"<i>{response}</i>"))

<i>Miami played friendlies against a Hong Kong select team and Japanese side Vissel Kobe.</i>

Without embeddings - 

Node count -21
Relation count - 15


With embeddings - 

Node count - 28
Relation count - 21