# Initial Setup and define Environment Variables

In [1]:
# initial setup

import os
import sys
from getpass import getpass
import nest_asyncio
from IPython.display import Markdown, display

# llama-parse is async-first, running the async code in a notebook requires the use of nest_asyncio
nest_asyncio.apply()


In [2]:
# Define environment variables
NOTION_TOKEN = os.getenv("NOTION_TOKEN")
DATABASE_ID = os.getenv("NOTION_DATABASE_ID")

CO_API_KEY = os.environ['CO_API_KEY'] or getpass("Enter your Cohere API key: ")

QDRANT_URL = os.environ['QDRANT_URL']
QDRANT_API_KEY = os.environ['QDRANT_API_KEY']

OPENAI_API_KEY = os.environ['OPENAI_API_KEY']


## Set up observability with Llamatrace

In [3]:
# ... existing code ...

# Update the tracing setup
from opentelemetry import trace
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import SimpleSpanProcessor
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
from openinference.instrumentation.llama_index import LlamaIndexInstrumentor

from phoenix.otel import register
from openinference.instrumentation.openai import OpenAIInstrumentor

# Add Phoenix API Key for tracing
PHOENIX_API_KEY = "4a8adf47667f6a3d533:b229a5b"
os.environ["OTEL_EXPORTER_OTLP_HEADERS"] = f"api_key={PHOENIX_API_KEY}"

# Create and set the tracer provider
tracer_provider = TracerProvider()

# configure the Phoenix tracer
tracer_provider = register(
  project_name="my-llm-app", # Default is 'default'
  endpoint="https://app.phoenix.arize.com/v1/traces",
)

#OpenAIInstrumentor().instrument(tracer_provider=tracer_provider)
LlamaIndexInstrumentor().instrument(tracer_provider=tracer_provider)

OpenTelemetry Tracing Details
|  Phoenix Project: my-llm-app
|  Span Processor: SimpleSpanProcessor
|  Collector Endpoint: https://app.phoenix.arize.com/v1/traces
|  Transport: HTTP
|  Transport Headers: {'api_key': '****'}
|  
|  Using a default SpanProcessor. `add_span_processor` will overwrite this default.
|  
|  `register` has set this TracerProvider as the global OpenTelemetry default.
|  To disable this behavior, call `register` with `set_global_tracer_provider=False`.



# Set Up the Notion Processor

# Data Pipeline Execution

## Set up the LLM, Embeddings and Vector Store

In [4]:
from llama_index.llms.cohere import Cohere
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.embeddings.cohere import CohereEmbedding
from qdrant_client import QdrantClient, AsyncQdrantClient
from llama_index.llms.openai import OpenAI
from llama_index.vector_stores.qdrant import QdrantVectorStore

from llama_index.core.settings import Settings


COLLECTION_NAME = "Notion_vector_store"

# Set up the LLM
llm_openai = OpenAI(
    model="gpt-4o-mini", 
    temperature=0.5,
    api_key=OPENAI_API_KEY,
    )

Settings.llm = llm_openai

# Set up the Embeddings
embed_model_openai = OpenAIEmbedding(
    model="text-embedding-3-large", 
    api_key=OPENAI_API_KEY
    )

Settings.embed_model = embed_model_openai

# set up the vector store client
client = QdrantClient(
    location=QDRANT_URL, 
    api_key=QDRANT_API_KEY
    )
# set up the async client
aclient = AsyncQdrantClient(
    location=QDRANT_URL,
    api_key=QDRANT_API_KEY
    )
# set up the vector store
vector_store = QdrantVectorStore(
    client=client, 
    aclient=aclient, 
    collection_name=COLLECTION_NAME,
    )

  from .autonotebook import tqdm as notebook_tqdm
Both client and aclient are provided. If using `:memory:` mode, the data between clients is not synced.


## Build Index over Vector Database

In [5]:
from llama_index.core import StorageContext
from llama_index.core import SimpleKeywordTableIndex, VectorStoreIndex

# storage context for caching (I think)
storage_context = StorageContext.from_defaults(
    vector_store=vector_store
    )


# Create index from vector store with storage context
index = VectorStoreIndex.from_vector_store(
    vector_store=vector_store,
    storage_context=storage_context,  # Add storage context
    embed_model=embed_model_openai,
    show_progress=True  # Optional: helps track progress
)

## Set Up the Workflow for querying

In [6]:
from llama_index.core.workflow import Event
from llama_index.core.schema import NodeWithScore


class RetrieverEvent(Event):
    """Result of running retrieval"""

    nodes: list[NodeWithScore]
    
class PostRetrievalTransform(Event):
    """Result of running post retrieval transform"""

    nodes_transformed: list[NodeWithScore]
    
class ProgressEvent(Event):
    msg: str

# Workflow breakdown

## Retriever
Two retrievers are used:
- vector retriever - performs semantic search on embeddings
- bm25 - determines a document's relevance to a given query and ranks documents based on their relevance scores

The QueryFusionRetriever does some stuff with these two retrievers to get the best results!

Other Options - Retrievers:
- KeywordTableSimpleRetriever - regex keyword search on text content

## Post-Retriever Transforms

### Sentence Embedding Optimizer
Selects the most relevant sentences and shorten the input text. The optimizer removes sentences that are not related to the query using embeddings.



In [7]:
from llama_index.core.data_structs import Node
from llama_index.core.response_synthesizers import ResponseMode
from llama_index.core import get_response_synthesizer

from llama_index.core.workflow import (
    Context,
    Workflow,
    StartEvent,
    StopEvent,
    step,
)

from llama_index.retrievers.bm25 import BM25Retriever
from llama_index.core.retrievers import QueryFusionRetriever

from llama_index.core.postprocessor import SentenceEmbeddingOptimizer, SimilarityPostprocessor

from llama_index.core import PromptTemplate
from prompts import QUERY_GEN_PROMPT

QUERY_GEN_PROMPT_TEMPLATE = PromptTemplate(QUERY_GEN_PROMPT)

class RAGWorkflow(Workflow):
    @step
    async def retrieve(self, ctx: Context, ev: StartEvent) -> RetrieverEvent | None:
        "Entry point for RAG, triggered by a StartEvent with `query`."
        
        ctx.write_event_to_stream(ProgressEvent(msg="Querying the database"))
        
        query = ev.get("query")
        index = ev.get("index")

        if not query:
            return None

        print(f"Query the database with: {query}")

        # store the query in the global context
        await ctx.set("query", query)

        # get the index from the global context
        if index is None:
            print("Index is empty, load some documents before querying!")
            return None
        
        # define the retrievers
        # We want to retrieve more nodes then less so set the k value high. We can filter out
        # nodes in post-processing
        vector_retriever = index.as_retriever(similarity_top_k=20)
        bm25_retriever = BM25Retriever.from_defaults(nodes=index.vector_store.get_nodes(), similarity_top_k=20)
        
        retriever = QueryFusionRetriever(
            [vector_retriever, bm25_retriever],
            similarity_top_k=10,
            
            num_queries=3,  # Set to 1 to disable query generation
            mode="reciprocal_rerank",
            use_async=True,
            verbose=True,
            llm=llm_openai,
            query_gen_prompt=QUERY_GEN_PROMPT_TEMPLATE
        )

        nodes = await retriever.aretrieve(query)
        print(f"Retrieved {len(nodes)} nodes.")
        return RetrieverEvent(nodes=nodes)
    
    @step
    async def post_retrieval_transform(self, ctx: Context, ev: RetrieverEvent) -> PostRetrievalTransform:
        """Transform the retrieved node before synthesizing"""
        
        #similarity_postprocessor = SimilarityPostprocessor(similarity_cutoff=0.7)
        
        sentence_embedding_postprocessor = SentenceEmbeddingOptimizer(
            embed_model=embed_model_openai,
            #percentile_cutoff=0.5,
            context_before=2,
            context_after=2,
            threshold_cutoff=0.7
        )
        nodes = ev.nodes
        #nodes_transformed = similarity_postprocessor.postprocess_nodes(nodes=nodes)
        #print(f"First Transform: {len(nodes_transformed)} nodes.")

        nodes_transformed = sentence_embedding_postprocessor.postprocess_nodes(nodes=nodes)
        print(f"Second Transform: {len(nodes_transformed)} nodes.")
        
        return PostRetrievalTransform(nodes_transformed=nodes_transformed)
        

    @step
    async def synthesize(self, ctx: Context, ev: PostRetrievalTransform) -> StopEvent:
        """Return a streaming response using reranked nodes."""
        
        prompt = """When possible, group output based on common fields such as Employer or Project Name and make sure to associate the response with the projects they were extracted from"""
        
        qa_prompt_tmpl = ('''
            Context information is below.\n
            ---------------------\n
            {context_str}\n
            ---------------------\n
            Given the context information and not prior knowledge, 
            answer the query.\n
            Please synthesize the information by grouping details based on common fields, 
            such as 'Employer' and 'Project Name.' For each section, indicate the specific 
            project the information originated from. Aim to present a structured summary, 
            with grouped information under each relevant field.\n
            Query: {query_str}\n
            Answer:'''
        )
        
        qa_prompt_tmpl_2 = ('''
            Context information is below.\n
            ---------------------\n
            {context_str}\n
            ---------------------\n
            Given the context information and not prior knowledge, 
            answer the query.\n
            Please synthesize the information first answering the query, then provide a summary of the information grouped by common fields.\n
            Query: {query_str}\n
            Answer:'''
        )
        
        qa_prompt = PromptTemplate(qa_prompt_tmpl)
                
        response_synthesizer = get_response_synthesizer(
            response_mode=ResponseMode.TREE_SUMMARIZE,
            llm=llm_openai,
            streaming=True,
            verbose=True,
            summary_template=qa_prompt,
            )
        
        query = await ctx.get("query", default=None)
        nodes = ev.nodes_transformed
        response = await response_synthesizer.asynthesize(query, nodes=nodes)
        return StopEvent(result=response)

resource module not available on Windows


In [8]:
workflow = RAGWorkflow(verbose=True)

query = "What challenges were encountered and how were they overcome"
#query = "What is the weather like today?"

# Run a query
result = await workflow.run(query=query, index=index, )
async for chunk in result.async_response_gen():
     print(chunk, end="", flush=True)


# workflow = RAGWorkflow(verbose=True)

# handler = workflow.run(query=query, index=index)

# async for ev in handler.stream_events():
#     if isinstance(ev, ProgressEvent):
#         print(ev.msg)

# result = await handler


Running step retrieve
Query the database with: What challenges were encountered and how were they overcome
Generated queries:
1. What difficulties were faced, and what strategies were employed to resolve them?
2. What problems arose during the process, and how were they addressed?
Retrieved 10 nodes.
Step retrieve produced event RetrieverEvent
Running step post_retrieval_transform
Second Transform: 10 nodes.
Step post_retrieval_transform produced event PostRetrievalTransform
Running step synthesize
1 text chunks after repacking


Failed to detach context
Traceback (most recent call last):
  File "c:\Users\rbt7r\OneDrive\Documents\VSCode Workspace\RAG_Implementation\.conda\lib\site-packages\opentelemetry\context\__init__.py", line 152, in detach
    _RUNTIME_CONTEXT.detach(token)
  File "c:\Users\rbt7r\OneDrive\Documents\VSCode Workspace\RAG_Implementation\.conda\lib\site-packages\opentelemetry\context\contextvars_context.py", line 50, in detach
    self._current_context.reset(token)  # type: ignore
ValueError: <Token var=<ContextVar name='current_context' default={} at 0x000001E582FF2250> at 0x000001E5CA2D9600> was created in a different Context


Step synthesize produced event StopEvent
### Summary of Challenges Encountered and Solutions Implemented

#### Employer: Personal
**Project Name: ChatGPT Organization and Navigation Tool - Project Management and Notes**

- **Challenge: React Fundamentals**
  - **Solution:** Revisited React best practices, enhancing knowledge in state management and component structure, leading to a more robust application.

- **Challenge: UI Cohesion and Attractiveness**
  - **Solution:** Relied on ChatGPT for discussions and iterative design processes to bridge the gap between visual imagination and CSS implementation.

- **Challenge: Time Management**
  - **Solution:** Focused on prioritizing the Minimum Viable Product (MVP) to ensure timely completion of essential features.

---

#### Employer: Personal
**Project Name: NoteNudge - Software Application**

- **Challenge: Data Workflow and Text Formatting**
  - **Solution:** Tested various Node modules and Python packages for exporting data from the Qu

In [39]:
result

AsyncStreamingResponse(response_gen=<async_generator object astream_chat_response_to_tokens.<locals>.gen at 0x000002A766E2BD40>, source_nodes=[NodeWithScore(node=TextNode(id_='84acf3aa-c3ab-472d-aeb4-0518f890388c', embedding=None, metadata={'Tags': 'CSS, ChatGPT, Chrome Extension, HTML, JavaScript, Project Management, React, State Flow Diagram, Web Development', 'Description': 'A detailed overview of how this project was managed', 'Name': 'ChatGPT Organization and Navigation Tool - Project Management and Notes', 'Employer': 'Personal', 'Project Size': 'Large', 'When': '2024-07-08', 'Position': 'Fun', 'header': 'Challenges and Solutions:'}, excluded_embed_metadata_keys=['Project Size', 'When', 'Position', 'Tags'], excluded_llm_metadata_keys=['Project Size', 'When', 'Position', 'Tags'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='a42fea15-cacc-42dc-8dde-4014f453c87d', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'Tags': 'CSS, ChatGPT, Chrome Extension, HTML

In [38]:
async for chunk in result.async_response_gen():
    print(chunk, end="", flush=True)

### Project: ChatGPT Organization and Navigation Tool - Project Management and Notes
**Employer:** Personal  
**Challenges and Solutions:**
- **React Fundamentals:** Initial difficulties with React's state management and component structure were addressed by revisiting React best practices, resulting in a more robust application. 
- **UI Cohesion:** Challenges in creating a visually appealing UI were mitigated through discussions and iterative design processes with ChatGPT.
- **Time Management:** The project extended beyond its initial timeline due to feature complexity; prioritizing the MVP allowed for timely completion of essential features.

---

### Project: NoteNudge - Software Application
**Employer:** Personal  
**Challenges and Solutions:**
- **Data Workflow and Text Formatting:** Determined workflow for exporting data from the Quill editor to a Word document. Resolved by testing various Node modules and Python libraries, ultimately selecting the best fit.
- **Non-Intrusive Use