In [1]:
from typing import Optional
from orchestrator.ingestion.vector_store.pinecone.client import PineconeClient
from llama_index.core.workflow import (
    Context,
    Workflow,
    StartEvent,
    StopEvent,
    
    Event,
    step,
)
from orchestrator.ingestion.utils.pdf_processor import process_pdfs_in_directory


  from tqdm.autonotebook import tqdm


In [2]:
# events.py
class InitializeEvent(Event):
    pass

class ConciergeEvent(Event):
    request: Optional[str]
    just_completed: Optional[str]
    need_help: Optional[bool]

class OrchestratorEvent(Event):
    request: Optional[str] = None

class PDFIngestionEvent(Event):
    request: Optional[str]
    
class QueryEvent(Event):
    query: str

class QueryResultEvent(Event):
    result: str

In [3]:
class WorkflowSteps:
    
    @step(pass_context=True)
    async def concierge(self, ctx: Context, ev: ConciergeEvent | StartEvent) -> InitializeEvent | OrchestratorEvent | None:
        if isinstance(ev, StartEvent):
            dirname = ev.get("dirname")
            if dirname is not None:
                ctx.data["dirname"] = dirname
                return OrchestratorEvent(request="ingest")  # Trigger the OrchestratorEvent for ingestion

            return InitializeEvent()

        # After initialization, direct to orchestrator for querying
        return OrchestratorEvent(request="query")

    @step(pass_context=True)
    async def initialize(self, ctx: Context, ev: InitializeEvent) -> OrchestratorEvent:
        ctx.data["index"] = PineconeClient(collection_name="pdf-docs")
        return OrchestratorEvent(request="query")  # After initializing, direct to query

    @step(pass_context=True)
    async def orchestrator(self, ctx: Context, ev: OrchestratorEvent) -> QueryEvent | PDFIngestionEvent | StopEvent:
        if ev.request == "ingest":
            return PDFIngestionEvent(request="start_pdf_ingestion")
        
        elif ev.request == "query":
            return QueryEvent(query="how does Environmental difficulties at present are also challenging the sustainability?")
        
        return StopEvent(result={"message": "Orchestrator did not recognize the event"})

    @step(pass_context=True)
    async def query_index(self, ctx: Context, ev: QueryEvent) -> StopEvent:
        query_engine = ctx.data["index"].as_query_engine()
        response = query_engine.query(ev.query)
        return StopEvent(result={"query_result": str(response)})

    @step(pass_context=True)
    async def ingest(self, ctx: Context, ev: PDFIngestionEvent) -> InitializeEvent:
        dirname = ctx.data.get("dirname")
        if dirname:
            nodes = process_pdfs_in_directory(dirname)

            client = PineconeClient(collection_name="pdf-docs")
            client.upsert_indices(nodes)
            ctx.data["dirname"] = None
            return InitializeEvent()
        return None

In [4]:
class ConciergeWorkflow(Workflow):
    
    steps = WorkflowSteps()

    @step(pass_context=True)
    async def concierge(self, ctx: Context, ev: ConciergeEvent | StartEvent) -> InitializeEvent | OrchestratorEvent | None:
        return await self.steps.concierge(ctx, ev)

    @step(pass_context=True)
    async def initialize(self, ctx: Context, ev: InitializeEvent) -> OrchestratorEvent:
        return await self.steps.initialize(ctx, ev)

    @step(pass_context=True)
    async def orchestrator(self, ctx: Context, ev: OrchestratorEvent) -> ConciergeEvent | PDFIngestionEvent | QueryEvent | StopEvent:
        return await self.steps.orchestrator(ctx, ev)

    @step(pass_context=True)
    async def ingest(self, ctx: Context, ev: PDFIngestionEvent) -> InitializeEvent:
        return await self.steps.ingest(ctx, ev)
    
    @step(pass_context=True)
    async def query_index(self, ctx: Context, ev: QueryEvent) -> StopEvent:
        return await self.steps.query_index(ctx, ev)

In [5]:
# main.py

from workflow import ConciergeWorkflow
from llama_index.utils.workflow import draw_all_possible_flows

draw_all_possible_flows(ConciergeWorkflow, filename="concierge_flows.html")

concierge = ConciergeWorkflow(timeout=180, verbose=True)
result = await concierge.run()
# query_engine = result["index"].index.as_query_engine()
# response = query_engine.query("how does Environmental difficulties at present are also challenging the sustainability of the Mediterranean way of living")
print(str(result))


concierge_flows.html
Running step concierge
Step concierge produced event InitializeEvent
Running step initialize
Step initialize produced event OrchestratorEvent
Running step orchestrator
Step orchestrator produced event QueryEvent
Running step query_index
Step query_index produced event StopEvent
{'query_result': 'Environmental difficulties at present are challenging the sustainability due to factors such as water scarcity induced by decreasing water availabilities and increasing water needs, with agricultural demands representing a significant portion of total water expenditure. Additionally, land degradation caused by various factors like urbanization, waste pollution, soil erosion, and limited agricultural-matched soil expansion further challenges the sustainability of the Mediterranean way of living. These difficulties are exacerbated by global climate changes, leading to water scarcity, land degradation, and failures in crop, fisheries, and livestock productions.'}


In [6]:
from IPython.display import Markdown, display

display(Markdown(f"{result}"))

{'query_result': 'Environmental difficulties at present are challenging the sustainability due to factors such as water scarcity induced by decreasing water availabilities and increasing water needs, with agricultural demands representing a significant portion of total water expenditure. Additionally, land degradation caused by various factors like urbanization, waste pollution, soil erosion, and limited agricultural-matched soil expansion further challenges the sustainability of the Mediterranean way of living. These difficulties are exacerbated by global climate changes, leading to water scarcity, land degradation, and failures in crop, fisheries, and livestock productions.'}