In [1]:
import os
import shutil
import time
import logging
from dotenv import load_dotenv
from typing import Iterator, List
# CrewAI imports
from crewai import Agent, Task, Crew, Process

# LLM
from langchain_community.chat_models.ollama import ChatOllama
from agent.utils.load_documents import covert_document
from agent.tools.retrieve_tool import RetrieveTool, IngestTool
from langchain_core.documents import Document
from langchain_core.document_loaders import BaseLoader
# from docling.document_converter import DocumentConverter
from langchain_community.document_loaders import PyPDFLoader, Docx2txtLoader, UnstructuredWordDocumentLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter


  warn(
  from .autonotebook import tqdm as notebook_tqdm


In [2]:
file_path = ["data/2501.07329v2.pdf",
             "data/ielts_listening_practice_test_pdf_1_1_1ae068b05d.pdf"]

In [3]:
# class DocumentPDFLoader(BaseLoader):
    
#     def __init__(self, filepath: List[str]) -> None: 
#         self._filepath = filepath if isinstance(filepath, list) else [filepath]
#         self._coverter = DocumentConverter()
    
#     def lazy_load (self)->Iterator[Document]:
#         for file in self._filepath:
#             dl = self._coverter.convert(file).document
#             text = dl.export_to_markdown()
#             yield Document(page_content=text)


In [4]:
# pdf_loader = DocumentPDFLoader(file_path)

class DocumentPDFLoader(BaseLoader):
    def __init__(self, filepath: List[str]) -> None: 
        self._filepath = filepath if isinstance(filepath, list) else [filepath]
        self._loaders = [PyPDFLoader(file) for file in self._filepath]
    
    def lazy_load (self)->Iterator[Document]:
        for loader in self._loaders:
            for doc in loader.load():
                yield doc


In [5]:
pdf_loader = DocumentPDFLoader(file_path)

In [6]:
chunker = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    length_function=len
)


In [7]:
documents = pdf_loader.load()

In [8]:
text_chunks = chunker.split_documents(documents)

In [9]:
text_chunks[40].metadata

{'source': 'data/ielts_listening_practice_test_pdf_1_1_1ae068b05d.pdf',
 'page': 3}

In [10]:
docs = [doc.page_content for doc in text_chunks]
metadata = [doc.metadata for doc in text_chunks]

In [11]:
load_dotenv() # Load environment variables if needed (e.g., API keys for other tools)
TEST_COLLECTION_NAME = "agent_test_docs"
EMBEDDING_MODEL = "all-MiniLM-L6-v2" # Ensure this model is available locally
PERSIST_DIR = "./_agent_test_chroma_db"
LLM_MODEL = "deepseek-r1:1.5b" # Or your preferred Ollama model
# llm = ChatOllama(model='deepseek-r1:1.5b', temperature=0.2, max_tokens=2000)


In [12]:
import logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)


In [20]:
docs[0]

'Joint Automatic Speech Recognition And Structure\nLearning For Better Speech Understanding\nJiliang Hu1, Zuchao Li2,*, Mengjia Shen3, Haojun Ai1, Sheng Li4, Jun Zhang3\n1Key Laboratory of Aerospace Information Security and Trusted Computing, Ministry of Education,\nSchool of Cyber Science and Engineering, Wuhan University, Wuhan, China,\n2School of Computer Science, Wuhan University, Wuhan, China,\n3Wuhan Second Ship Design and Research Institute, Wuhan, China,\n4National Institute of Information and Communications Technology, Japan.\nAbstract —Spoken language understanding (SLU) is a structure\nprediction task in the field of speech. Recently, many works\non SLU that treat it as a sequence-to-sequence task have\nachieved great success. However, This method is not suitable\nfor simultaneous speech recognition and understanding. In this\npaper, we propose a joint speech recognition and structure\nlearning framework (JSRSL), an end-to-end SLU model based'

In [13]:
def setup_environment():
    """Initializes LLM, Tools, and cleans up old DB."""
    logger.info("--- Setting up test environment ---")

    try:
        # Initialize LLM
        logger.info(f"Loading LLM: {LLM_MODEL}")
        llm = ChatOllama(model=LLM_MODEL, temperature=0.1)
        # Simple check if LLM is accessible (optional, Ollama might not have a direct check)
        # llm.invoke("Hi")
        logger.info("LLM loaded successfully.")

        # Initialize Tools
        logger.info("Initializing RAG Tools...")
        retriever_tool = RetrieveTool(
            embedding_model_name=EMBEDDING_MODEL,
            persist_dir=PERSIST_DIR
        )
        ingest_tool = IngestTool(retriever_tool=retriever_tool)
        logger.info("RAG Tools initialized successfully.")

        return llm, retriever_tool, ingest_tool

    except Exception as e:
        logger.error(f"Failed to set up environment: {e}", exc_info=True)
        raise


In [14]:
llm, retriever_tool, ingest_tool = setup_environment()

In [15]:
tools = [retriever_tool, ingest_tool]

In [16]:
logger.info("--- Defining Knowledge Base Manager Agent ---")
kb_manager_agent = Agent(
    role='Knowledge Base Manager',
    goal=f"Efficiently manage and retrieve information from the company's knowledge base stored in ChromaDB. Use the provided tools to ingest new documents into specific collections and retrieve relevant information based on queries.",
    backstory=(
        "You are an expert AI assistant responsible for maintaining the accuracy and accessibility "
        "of the company's document knowledge base. You meticulously ingest new information using the "
        "'ChromaDB Document Ingest Tool' and expertly query the database using the "
        "'ChromaDB Retriever Tool' to answer questions. Always specify the correct collection name."
    ),
    llm=llm,
    tools=tools,
    verbose=True, # Set to True to see LLM reasoning and tool calls
    allow_delegation=False,
    
    # memory=True # Optional: Enable memory for conversation context if needed
)
logger.info(f"Agent '{kb_manager_agent.role}' created.")

In [17]:
docs_repr = repr(docs)
metas_repr = repr(metadata)

In [18]:
task_ingest = Task(
        description=(
            f"Ingest the following set of documents into the '{TEST_COLLECTION_NAME}' collection "
            f"using the 'ChromaDB Document Ingest Tool'. Ensure you pass both the document texts "
            f"and their corresponding metadata.\n\n"
            f"Documents to ingest: {docs_repr}\n"
            f"Associated Metadatas: {metas_repr}"
        ),
        expected_output=(
            f"Confirmation that {len(docs)} documents were successfully ingested "
            f"into the '{TEST_COLLECTION_NAME}' collection."
        ),
        agent=kb_manager_agent,
        tools=[ingest_tool] # Optional: Limit tools for this specific task
    )
logger.info("Ingestion task defined.")

In [21]:
query = "What is the Joint Automatic Speech Recognition and Machine Translation (JASR-MT) task?"
task_retrieve = Task(
    description=(
        f"Search the '{TEST_COLLECTION_NAME}' collection using the 'ChromaDB Retriever Tool' "
        f"to find information relevant to the following query: '{query}'. "
        f"Retrieve the top 3 most relevant documents using MMR for diversity. " # Explicitly guide MMR usage
        f"Present the content of the retrieved documents clearly."
    ),
    expected_output=(
        "A summary or list of the content from the top 3 relevant documents found in the "
        f"'{TEST_COLLECTION_NAME}' collection related to '{query}', retrieved using MMR."
    ),
    agent=kb_manager_agent,
    context=[task_ingest], # Make this task depend on the ingestion task
    tools=[retriever_tool] # Optional: Limit tools for this specific task
)
logger.info("Retrieval task defined.")

In [23]:
# 5. Create and Run Crew
logger.info("--- Creating and Running the Crew ---")
company_knowledge_crew = Crew(
    agents=[kb_manager_agent],
    tasks=[task_ingest, task_retrieve],
    process=Process.sequential, # Ensure tasks run in order: ingest then retrieve
    verbose=2 # Use verbose=2 to see detailed LLM thoughts and tool calls
)

result = company_knowledge_crew.kickoff()

logger.info("--- Crew Execution Finished ---")
print("\n\n===== Final Crew Result =====")
print(result)
print("============================")



[1m[95m [DEBUG]: == Working Agent: Knowledge Base Manager[00m
[1m[95m [INFO]: == Starting Task: Ingest the following set of documents into the 'agent_test_docs' collection using the 'ChromaDB Document Ingest Tool'. Ensure you pass both the document texts and their corresponding metadata.

Documents to ingest: ['Joint Automatic Speech Recognition And Structure\nLearning For Better Speech Understanding\nJiliang Hu1, Zuchao Li2,*, Mengjia Shen3, Haojun Ai1, Sheng Li4, Jun Zhang3\n1Key Laboratory of Aerospace Information Security and Trusted Computing, Ministry of Education,\nSchool of Cyber Science and Engineering, Wuhan University, Wuhan, China,\n2School of Computer Science, Wuhan University, Wuhan, China,\n3Wuhan Second Ship Design and Research Institute, Wuhan, China,\n4National Institute of Information and Communications Technology, Japan.\nAbstract —Spoken language understanding (SLU) is a structure\nprediction task in the field of speech. Recently, many works\non SLU that treat

In [25]:
print("Result: \n", result)

Result: 
 1. Document 1: "This document discusses advancements in joint automatic speech recognition and machine translation, focusing on improving accuracy and efficiency in real-time applications."  
2. Document 2: "The JASR-MT task emphasizes the integration of automatic speech recognition systems with machine translation models to enhance natural language processing tasks."  
3. Document 3: "This document provides an overview of the latest developments in joint automatic speech recognition and machine translation, highlighting their impact on various industries."


In [26]:
from agent.config.load_config import agents_config

FileNotFoundError: [Errno 2] No such file or directory: 'agent/config/tasks.yaml'