In [1]:
from typing import Optional
from orchestrator.ingestion.vector_store.pinecone.client import PineconeClient
from llama_index.core.workflow import (
    Context,
    Workflow,
    StartEvent,
    StopEvent,
    
    Event,
    step,
)
from orchestrator.utils.pdf_processor import process_pdfs_in_directory


  from tqdm.autonotebook import tqdm


In [2]:
# events.py
class InitializeEvent(Event):
    pass

class ConciergeEvent(Event):
    request: Optional[str]
    just_completed: Optional[str]
    need_help: Optional[bool]

class OrchestratorEvent(Event):
    request: Optional[str] = None

class PDFIngestionEvent(Event):
    request: Optional[str]
    
class QueryEvent(Event):
    query: str

class QueryResultEvent(Event):
    result: str

In [3]:

from llama_index.core.query_engine import CustomQueryEngine
from llama_index.core.retrievers import BaseRetriever
from llama_index.core import get_response_synthesizer
from llama_index.core.response_synthesizers import BaseSynthesizer

class RAGQueryEngine(CustomQueryEngine):
    """RAG Query Engine."""

    retriever: BaseRetriever
    response_synthesizer: BaseSynthesizer

    def custom_query(self, query_str: str):
        nodes = self.retriever.retrieve(query_str)
        response_obj = self.response_synthesizer.synthesize(query_str, nodes)
        return response_obj

In [4]:
class WorkflowSteps:
    
    @step(pass_context=True)
    async def concierge(self, ctx: Context, ev: ConciergeEvent | StartEvent) -> InitializeEvent | OrchestratorEvent | None:
        if isinstance(ev, StartEvent):
            dirname = ev.get("dirname")
            if dirname is not None:
                ctx.data["dirname"] = dirname
                return OrchestratorEvent(request="ingest")  # Trigger the OrchestratorEvent for ingestion

            return InitializeEvent()

        # After initialization, direct to orchestrator for querying
        return OrchestratorEvent(request="query")

    @step(pass_context=True)
    async def initialize(self, ctx: Context, ev: InitializeEvent) -> OrchestratorEvent:
        ctx.data["index"] = PineconeClient(collection_name="pdf-docs")
        return OrchestratorEvent(request="query")  # After initializing, direct to query

    @step(pass_context=True)
    async def orchestrator(self, ctx: Context, ev: OrchestratorEvent) -> QueryEvent | PDFIngestionEvent | StopEvent:
        if ev.request == "ingest":
            return PDFIngestionEvent(request="start_pdf_ingestion")
        
        elif ev.request == "query":
            return QueryEvent(query="how does Environmental difficulties at present are also challenging the sustainability?")
        
        return StopEvent(result={"message": "Orchestrator did not recognize the event"})

    @step(pass_context=True)
    async def query_index(self, ctx: Context, ev: QueryEvent) -> StopEvent:
        
        
        # query_engine = ctx.data["index"].as_query_engine()
        # response = query_engine.query(ev.query)
        retriever = ctx.data["index"].as_retriever()
        synthesizer = get_response_synthesizer(response_mode="refine")
        query_engine = RAGQueryEngine(
            retriever=retriever, response_synthesizer=synthesizer
        )
        response = query_engine.query(ev.query)

        return StopEvent(result={"query_result": str(response), "source_node": response.source_nodes[0].get_content()})

    @step(pass_context=True)
    async def ingest(self, ctx: Context, ev: PDFIngestionEvent) -> InitializeEvent:
        dirname = ctx.data.get("dirname")
        if dirname:
            nodes = process_pdfs_in_directory(dirname)

            client = PineconeClient(collection_name="pdf-docs")
            client.upsert_indices(nodes)
            ctx.data["dirname"] = None
            return InitializeEvent()
        return None

In [5]:
class ConciergeWorkflow(Workflow):
    
    steps = WorkflowSteps()

    @step(pass_context=True)
    async def concierge(self, ctx: Context, ev: ConciergeEvent | StartEvent) -> InitializeEvent | OrchestratorEvent | None:
        return await self.steps.concierge(ctx, ev)

    @step(pass_context=True)
    async def initialize(self, ctx: Context, ev: InitializeEvent) -> OrchestratorEvent:
        return await self.steps.initialize(ctx, ev)

    @step(pass_context=True)
    async def orchestrator(self, ctx: Context, ev: OrchestratorEvent) -> ConciergeEvent | PDFIngestionEvent | QueryEvent | StopEvent:
        return await self.steps.orchestrator(ctx, ev)

    @step(pass_context=True)
    async def ingest(self, ctx: Context, ev: PDFIngestionEvent) -> InitializeEvent:
        return await self.steps.ingest(ctx, ev)
    
    @step(pass_context=True)
    async def query_index(self, ctx: Context, ev: QueryEvent) -> StopEvent:
        return await self.steps.query_index(ctx, ev)

In [6]:
# main.py

from workflow import ConciergeWorkflow
from llama_index.utils.workflow import draw_all_possible_flows

draw_all_possible_flows(ConciergeWorkflow, filename="concierge_flows.html")

concierge = ConciergeWorkflow(timeout=180, verbose=True)
result = await concierge.run()
# query_engine = result["index"].index.as_query_engine()
# response = query_engine.query("how does Environmental difficulties at present are also challenging the sustainability of the Mediterranean way of living")
# print(str(result))
# print(result["source_node"])

concierge_flows.html
Running step concierge
Step concierge produced event InitializeEvent
Running step initialize
Step initialize produced event OrchestratorEvent
Running step orchestrator
Step orchestrator produced event QueryEvent
Running step query_index
len source nodes 2
Int. J. Environ. Res. Public Health 2019, 16, 942 2 of 16 Coinage of the term “Mediterranean diet” and its breakthrough to the attention of the medical public were made possible by the work of Ancel Keys, an American scientist who was the first to notice the relationship between the low incidence of cardiovascular disease in some traditional Mediterranean communities and their specific dietary habits [4]. Ensuing research confirmed the benefits brought by Mediterranean-derived dietary interventions not only in the primary and secondary prevention of cardiovascular disease, but also in the therapeutic approach of obesity, type 2 diabetes, metabolic syndrome, cancer or neurodegenerative diseases [5,6]. In the moment

In [7]:
from IPython.display import Markdown, display

response = result["query_result"]
display(Markdown(response))

The Indo-Mediterranean diet is a dietary approach that combines elements of the traditional Mediterranean diet with foods commonly consumed in the Indian subcontinent. This diet includes whole grains, fruits, vegetables, walnuts, almonds, mustard or soybean oil, which are rich in alpha-linolenic acid. Research has shown that following an Indo-Mediterranean diet can lead to significant health benefits, particularly in reducing the risk of cardiovascular events. In a study involving Indian patients with pre-existing coronary heart disease or high cardiovascular risk, those following the Indo-Mediterranean diet experienced a notable reduction in the rate of cardiovascular death and risk for non-fatal myocardial infarction compared to a control group following a different dietary approach.