In [20]:

from llama_index.core.workflow import (
    Context,
    Workflow,
    StartEvent,
    StopEvent,
    Event,
    step,
)
from llama_index.llms.openai import OpenAI
from utils.pdf_processor import process_pdfs_in_directory

In [21]:
from typing import Optional
class InitializeEvent(Event):
    pass

class ConciergeEvent(Event):
    request: Optional[str]
    just_completed: Optional[str]
    need_help: Optional[bool]

class PDFIngestionEvent(Event):
    dirname: str

In [22]:
from vector_store.pinecone.client import PineconeClient

class PdfAgentWorkflow(Workflow):

    @step(pass_context=True)
    async def concierge(self, ctx: Context, ev: ConciergeEvent | StartEvent) -> InitializeEvent | StopEvent | None:
    
        if isinstance(ev, StartEvent):
            dirname = ev.get("dirname")
            if dirname is not None:
                ctx.data["dirname"] = dirname
                
            return InitializeEvent()
        
        return StopEvent(result={"index": ctx.data["index"]})
    
    @step(pass_context=True)
    async def initialize(self, ctx: Context, ev: InitializeEvent) -> PDFIngestionEvent | ConciergeEvent:
        # Check if "dirname" exists in ctx.data and is not None
        if ctx.data.get("dirname") is not None:
            return PDFIngestionEvent(dirname=ctx.data["dirname"])
        
        ctx.data["index"] = PineconeClient(collection_name="pdf-docs")
        return ConciergeEvent()
    
    @step(pass_context=True)
    async def ingest(self, ctx: Context, ev: PDFIngestionEvent) -> InitializeEvent | None:
        """Entry point to ingest a document, triggered by a StartEvent with `dirname`."""
        dirname = ctx.data.get("dirname")
        if dirname:
            nodes = process_pdfs_in_directory(dirname)

            client = PineconeClient(collection_name="pdf-docs")
            client.upsert_indices(nodes)
            ctx.data["dirname"] = None
            return InitializeEvent()
        return None


In [23]:
workflow = PdfAgentWorkflow(timeout=180)
result = await workflow.run(dirname="data")
print(str(result))

Upserted vectors: 100%|██████████| 194/194 [00:01<00:00, 140.07it/s]

{'index': <vector_store.pinecone.client.PineconeClient object at 0x16ad351d0>}





In [25]:
from llama_index.utils.workflow import (
    draw_all_possible_flows,
    draw_most_recent_execution,
)

# Draw all
draw_all_possible_flows(PdfAgentWorkflow, filename="workflow_all.html")
draw_most_recent_execution(workflow, filename="workflow_recent.html")

# Draw an execution
# w = PdfAgentWorkflow()
# await w.run(topic="Pirates")
# draw_most_recent_execution(w, filename="joke_flow_recent.html")

workflow_all.html
workflow_recent.html


In [36]:
# set Logging to DEBUG for more detailed outputs
query_engine = result["index"].index.as_query_engine()
response = query_engine.query("how does Environmental difficulties at present are also challenging the sustainability of the Mediterranean way of living")

In [37]:
from IPython.display import Markdown, display

display(Markdown(f"{response}"))

Environmental difficulties at present are challenging the sustainability of the Mediterranean way of living due to factors such as water scarcity, land degradation, and declining biodiversity. Water scarcity is a significant issue in Mediterranean-neighboring countries, driven by decreasing water availability and increasing water needs, particularly for agriculture. Land degradation is also a concern, caused by urbanization, waste pollution, soil erosion, and other factors, impacting food production. Global climate changes further exacerbate water scarcity, land degradation, and failures in crop, fisheries, and livestock production. Additionally, the decline in biodiversity and the shift towards monocultures and standardized cultivation practices negatively affect local food production in the Mediterranean regions.