# Trailhead


In [1]:
from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv())
import nest_asyncio
nest_asyncio.apply()
from loguru import logger

In [2]:
import os

QDRANT_API_KEY = os.environ.get("QDRANT_API_KEY", "")
QDRANT_HOSTED_URL = os.environ.get("QDRANT_HOSTED_URL", "")
QDRANT_PORT = os.environ.get("QDRANT_PORT", 6333)

len(QDRANT_API_KEY), QDRANT_API_KEY[:10], len(QDRANT_HOSTED_URL), QDRANT_HOSTED_URL[:10], QDRANT_PORT, type(QDRANT_PORT)

(54, 'PM4CDFLF-s', 75, 'https://94', 6333, int)

In [3]:
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding

llm = OpenAI(
    model="gpt-4o",
)
embedding_model = OpenAIEmbedding(
    model="text-embedding-3-large",
)

In [4]:
from llama_index.core import VectorStoreIndex
from llama_index.core.chat_engine.types import ChatMode
from llama_index.core.vector_stores.types import VectorStoreQueryMode
from llama_index.vector_stores.qdrant import QdrantVectorStore
import qdrant_client

COLLECTION_NAME = "trailhead"
q_client= qdrant_client.QdrantClient(
    url=QDRANT_HOSTED_URL, 
    port=QDRANT_PORT, 
    api_key=QDRANT_API_KEY
)

In [5]:
trailhead_vector_store = QdrantVectorStore(
    collection_name=COLLECTION_NAME,
    client=q_client,
    prefer_grpc=True,
    parallel=8,
    enable_hybrid=False
)

trailhead_index = VectorStoreIndex.from_vector_store(
    vector_store=trailhead_vector_store,
    embed_model=embedding_model,
    show_progress=True
)

In [6]:
trailhead_vector_store._collection_exists(COLLECTION_NAME)

True

In [7]:
trailhead_retriever = trailhead_index.as_retriever(
    similarity_top_k=8,
    vector_store_query_mode=VectorStoreQueryMode.MMR,
    vector_store_kwargs={
        "mmr_prefetch_k": 16,
    },
)

In [8]:
nodes = trailhead_retriever.retrieve("What AI products are there for advertisers?")
len(nodes)

8

In [11]:
[n.score for n in nodes]

[0.37856883,
 0.37558502,
 0.37520227,
 0.35171467,
 0.33147365,
 0.3210597,
 0.31702423,
 0.31585655]

In [13]:
from llama_index.core.response_synthesizers import get_response_synthesizer, ResponseMode
trailhead_engine = trailhead_index.as_chat_engine(
    chat_mode=ChatMode.BEST,
    llm=llm,
    similarity_top_k=8,
    vector_store_query_mode=VectorStoreQueryMode.MMR,
    vector_store_kwargs={
        "mmr_prefetch_k": 16,
    },
    response_synthesizer=get_response_synthesizer(
        response_mode=ResponseMode.TREE_SUMMARIZE,
    )
)

In [14]:
response = trailhead_engine.chat("Where should I be using batch Apex?")
type(response), vars(response)

(llama_index.core.chat_engine.types.AgentChatResponse,
 {'response': 'Batch Apex should be used when processing large data sets that could potentially exceed governor limits in a single execution. It allows for the processing of records in smaller, manageable chunks to avoid hitting limits such as the SOQL query limit or the number of DML statements allowed.',
  'sources': [ToolOutput(content='Batch Apex should be used when processing large data sets that could potentially exceed governor limits in a single execution. It allows for the processing of records in smaller, manageable chunks to avoid hitting limits such as the SOQL query limit or the number of DML statements allowed.', tool_name='query_engine_tool', raw_input={'input': 'Where should I be using batch Apex?'}, raw_output=Response(response='Batch Apex should be used when processing large data sets that could potentially exceed governor limits in a single execution. It allows for the processing of records in smaller, manageable

In [15]:
from IPython.display import Markdown, display

In [16]:
display(Markdown(response.response)), response.source_nodes[0].score if len(response.source_nodes) > 0 else ""

Batch Apex should be used when processing large data sets that could potentially exceed governor limits in a single execution. It allows for the processing of records in smaller, manageable chunks to avoid hitting limits such as the SOQL query limit or the number of DML statements allowed.

(None, 0.49708623)

In [17]:

response = trailhead_engine.chat("What is Future Apex used for?  Please give a concrete example.")
display(Markdown(response.response)), len(response.source_nodes)

Future Apex is used for executing long-running operations asynchronously in Salesforce. 

### Concrete Example:
Suppose you have a Salesforce application that needs to integrate with an external web service to fetch additional data whenever a record is created or updated. Calling this web service synchronously could slow down the user experience or even hit governor limits if the service takes too long to respond.

By using Future Apex, you can offload this web service call to run asynchronously. This means that the record creation or update process can complete without waiting for the web service response, thus improving performance and user experience.

Here's a simple example of how you might implement this:

```apex
public class MyIntegrationClass {
    @future(callout=true)
    public static void callExternalService(String recordId) {
        // Logic to call the external web service
        // Example: HTTP request to fetch additional data
        HttpRequest req = new HttpRequest();
        req.setEndpoint('https://api.example.com/data/' + recordId);
        req.setMethod('GET');
        
        Http http = new Http();
        HttpResponse res = http.send(req);
        
        // Process the response
        if (res.getStatusCode() == 200) {
            // Handle successful response
            String responseBody = res.getBody();
            // Update Salesforce records or perform other actions
        } else {
            // Handle error response
        }
    }
}
```

In this example, the `callExternalService` method is annotated with `@future(callout=true)`, allowing it to perform a callout to an external service asynchronously.

(None, 8)

## Sentence Window Node Parsing

We use the `SentenceWindowNodeParser` to parse documents into single sentences per node. Each node also contains a "window" with the sentences on either side of the node sentence.

Then, after retrieval, before passing the retrieved sentences to the LLM, the single sentences are replaced with a window containing the surrounding sentences using the `MetadataReplacementNodePostProcessor`.

This is most useful for large documents/indexes, as it helps to retrieve more fine-grained details.

By default, the sentence window is 5 sentences on either side of the original sentence.

In this case, chunk size settings are not used, in favor of following the window settings.


In [18]:
from llama_index.core.node_parser import SentenceWindowNodeParser
from llama_index.core.node_parser import SentenceSplitter
from typing import Callable, List

def split_by_sentence_splitter() -> Callable[[str], List[str]]:
    
    splitter = SentenceSplitter(
        chunk_size=1024,
        chunk_overlap=256
    )

    def split(text: str) -> List[str]:
        # The SentenceSplitter's split_text method returns a list of strings which are already sentences, 
        # so we don't need to do additional processing :-)
        return splitter.split_text(text)

    return split

In [19]:
text_splitter = SentenceSplitter.from_defaults(
    chunk_size=256,
    chunk_overlap=32,
)

node_parser = SentenceWindowNodeParser.from_defaults(
    sentence_splitter=split_by_sentence_splitter(),
    include_metadata=True,
    include_prev_next_rel=True,
    window_size=5,
    window_metadata_key="window",
    original_text_metadata_key="original_text",
)

In [25]:
from llama_index.core import Settings

Settings.llm = llm
Settings.embed_model = embedding_model
Settings.text_splitter = text_splitter

In [17]:
from llama_index.core.postprocessor import MetadataReplacementPostProcessor

wider_query_engine = trailhead_index.as_query_engine(
    similarity_top_k=2,
    node_postprocessors=[
        MetadataReplacementPostProcessor(
            target_metadata_key="window",
        ),
    ],
    verbose=True
)

In [None]:
response = wider_query_engine.query("What is Future Apex used for?  Please give a concrete example.")
display(Markdown(response.response)), len(response.source_nodes)


Let's check the original sentence that was retrieved for each node, as well as the actual window of sentences that was sent to the LLM.


In [None]:
from pprint import pprint
window = response.source_nodes[0].node
pprint(window.metadata)


In [None]:
original_text = response.source_nodes[0].node.metadata["original_text"]

display(Markdown(f"**Original Sentence:**\n{original_text}"))
display(Markdown(f"**Window:**\n{window}"))