In [1]:
cd ../

/Users/linafaik/Documents/projects/knowledge_graph_llm


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


In [2]:
import os
from config import OPENAI_API_KEY
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

import nest_asyncio
nest_asyncio.apply()

%load_ext autoreload
%autoreload 2

In [3]:
path_storage = "storage/dynamic_llm"

LLM_MODEL = "gpt-4o-mini"
EMBEDDING_MODEL = "text-embedding-3-small"
TEMPERATURE = 0.1

## 1. Graph loading

In [20]:
from llama_index.core import StorageContext, load_index_from_storage

# Load the index from the storage context
# StorageContext.from_defaults() initializes a default storage context with a specified directory for persistent storage
index = load_index_from_storage(
    StorageContext.from_defaults(persist_dir=path_storage)
)


## 2. Vector context retriever

In [16]:
# Import necessary modules from the llama_index library
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openai import OpenAI

# Import the VectorContextRetriever class from the property_graph module
from llama_index.core.indices.property_graph import VectorContextRetriever

# Create a sub-retriever using VectorContextRetriever
# This will use the property graph store and vector store from the loaded index
# The embed_model parameter specifies the model to be used for embedding queries (e.g., OpenAI's embedding model)
sub_retriever = VectorContextRetriever(
  index.property_graph_store, 
  vector_store=index.vector_store,
  embed_model=OpenAIEmbedding(model_name=EMBEDDING_MODEL),
)

# Create a retriever from the index using the previously defined sub-retriever
retriever = index.as_retriever(sub_retrievers=[sub_retriever])

# Initialize the query engine using the retriever
# The query engine will use the retriever(s) to process and return responses to queries
query_engine = index.as_query_engine(
    sub_retrievers=[retriever]
)

# Perform a query on the query engine to retrieve information about ICIJ's involvement in the Panama Papers scandal
print(
    query_engine.query(
    "How was the International Consortium of Investigative Journalists (ICIJ) involved in the Panama Papers scandal?"
    ).response
)

The International Consortium of Investigative Journalists (ICIJ) helped organize the research and document review once Süddeutsche Zeitung realized the scale of the work required to validate the authenticity of the leaked data. The ICIJ enlisted reporters and resources from various media outlets to investigate individuals and organizations associated with Mossack Fonseca. Additionally, the ICIJ released the leaked documents from the Panama Papers scandal on its website after verifying the source and content.


## 3. LLM synonym retriever

In [31]:
# Import necessary modules from the llama_index library
from llama_index.core.indices.property_graph import LLMSynonymRetriever
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openai import OpenAI

# Create a sub-retriever using LLMSynonymRetriever
# This retriever will utilize a language model (LLM) to retrieve paths with synonymous terms or related concepts
# from the property graph store within the index
sub_retriever = LLMSynonymRetriever(
    index.property_graph_store,  # The property graph store from the index is used as the data source
    llm=OpenAI(model=LLM_MODEL, temperature=TEMPERATURE),  # Initialize the LLM with specified model and temperature
    include_text=True,  # Include the source chunk text in the retrieved paths
    max_keywords=100,  # Maximum number of keywords to be considered for retrieval
    path_depth=5,  # Limit the depth of the search paths to 5 levels
)

# Create a retriever from the index using the previously defined sub-retriever
retriever = index.as_retriever(sub_retrievers=[sub_retriever])

# Initialize the query engine using the retriever
# The query engine will use the retriever(s) to process and return responses to queries
query_engine = index.as_query_engine(
    sub_retrievers=[retriever]
)

# Perform a query on the query engine to retrieve information about ICIJ's involvement in the Panama Papers scandal
# The response is then printed to the console
print(
    query_engine.query(
    "Who were the main people involved in Panama Papers scandal?"
    ).response
)

Empty Response
