In [1]:
import os
import autogen
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain_community.document_loaders.image import UnstructuredImageLoader
from autogen import register_function
from typing import Annotated, Literal

flaml.automl is not available. Please install flaml[automl] to enable AutoML functionalities.


In [2]:
# def parse_files_in_directory(directory_path):
    
#     parsed_data = parse_pdfs(directory_path)

#     # for filename in os.listdir(directory_path):
#         # file_path = os.path.join(directory_path, filename)
        
#         # if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.tiff', '.bmp')):
#         #     loader = UnstructuredImageLoader(file_path)
#         #     documents = loader.load()
#         #     parsed_text = ' '.join([doc.page_content for doc in documents])

#         # else:
#         #     continue

#         # parsed_data[filename] = parsed_text

#     return parsed_data


def parse_pdfs(directory:str ="../data/") -> dict:
# def parse_pdfs(dir_path="../data/"):
    parsed_data = {}
    loader = PyPDFDirectoryLoader(directory, extract_images=True)
    documents = loader.load()
    for page in documents:
        if parsed_data.get(page.metadata["source"]) is None:
            parsed_data[page.metadata["source"]] = [page.page_content]
        else:
            parsed_data[page.metadata["source"]].append(page.page_content)
    return parsed_data


In [3]:
api_key = os.environ.get("GROQ_API_KEY")

config_list = [
  {
    "model": "llama-3.2-11b-text-preview",
    "base_url": "https://api.groq.com/openai/v1/chat/completions",
    "api_type": "groq",
    "api_key": api_key,
    "api_rate_limit": 100
  }
]

# llm_config = config_list[0]
llm_config={"config_list": config_list}

In [4]:
# user_proxy = autogen.ConversableAgent(
#     name="Admin",
#     system_message="Give the task, and send instructions to doc_classifier to classify the documents in the data folder",
#     code_execution_config=False,
#     llm_config=llm_config,
#     human_input_mode="ALWAYS",
# )

user_proxy = autogen.ConversableAgent(
    name="User",
    is_termination_msg=lambda x: x.get("content", "") and x.get("content", "").rstrip().endswith("TERMINATE"),
    human_input_mode="NEVER",
    # max_consecutive_auto_reply=10,
    # code_execution_config={
    #     "work_dir": "code",
    #     "use_docker": False
    # },
    llm_config=False,
)

In [5]:
doc_parse_caller = autogen.ConversableAgent(
    name="Doc Parse Caller",
    llm_config=llm_config,
    description="A caller for parsing documents",
    system_message="You are a helpful AI assistant. Use the provided python function parse_pdfs(directory), to parse the documents at the directory '../data/'.",
    is_termination_msg=lambda x: x.get("content", "") and x.get("content", "").rstrip().endswith("TERMINATE"),
)

doc_parser = autogen.ConversableAgent(
    name="Doc Parser",
    llm_config=llm_config,
    system_message="You are an executer that parses documents. If you receive a python call instruction, then you execute it, or else ignore the message."
                   "Reply TERMINATE afer you execute the python function",
)

doc_classifier = autogen.ConversableAgent(
    name="Doc Classifier",
    llm_config=llm_config,
    description="A classifier that classifies the text that has been parsed",
    system_message="You are a helpful AI assistant."
                   "For each document provided, categorize them from one of these options ['Passport', 'Residence Permit', 'Transaction', 'Health Insurance', 'Other'] and provide it as a list."
                   "Reply TERMINATE when the task is done.",
)



In [6]:
# user_proxy.register_function(
#     function_map={
#         "parse_pdfs": parse_pdfs()
#     }
# )

register_function(
    parse_pdfs,
    caller=doc_parse_caller,  
    executor=doc_parser,  
    name="parse_pdfs",  
    description="A simple document parser",  # A description of the tool.
)

# # Register the tool signature with the assistant agent.
# doc_parse_caller.register_for_llm(name="parse_pdfs", description="A simple pdf parser")(parse_pdfs)

# # Register the tool function with the user proxy agent.
# doc_parser.register_for_execution(name="parse_pdfs")(parse_pdfs)

In [7]:
groupchat = autogen.GroupChat(
    agents=[user_proxy, doc_parse_caller, doc_parser, doc_classifier],
    messages=[],
    max_round=5,
    allowed_or_disallowed_speaker_transitions={
        user_proxy: [doc_parse_caller],
        doc_parse_caller: [doc_parser],
        doc_parser: [doc_classifier],
        doc_classifier: [user_proxy],
    },
    speaker_transitions_type="allowed",
)

manager = autogen.GroupChatManager(groupchat=groupchat, llm_config=llm_config)

In [8]:
doc_parse_caller.llm_config["tools"]

[{'type': 'function',
  'function': {'description': 'A simple document parser',
   'name': 'parse_pdfs',
   'parameters': {'type': 'object',
    'properties': {'directory': {'type': 'string',
      'default': '../data/',
      'description': 'directory'}},
    'required': []}}}]

In [9]:
# def parse_calling(recipient, messages, sender, config):
#     # return f"Can you provide the python function to call for parsing the documents?\n\n {recipient.chat_messages_for_summary(sender)[-1]['content']}"
#     #print(f"{recipient.chat_messages_for_summary(sender)[-1]['content']}")
    
#     # if isinstance(recipient.chat_messages_for_summary(sender)[-1]['content'], dict):
#     #     return "TERMINATE"
#     # else:
#     return "I have some pdf documents in the directory '../data/', and I would like to parse the documents in this directory"


# def parsed_docs(recipient, messages, sender, config):
#     # return f"Can you provide the python function to call for parsing the documents?\n\n {recipient.chat_messages_for_summary(sender)[-1]['content']}"
#     #print(f"{recipient.chat_messages_for_summary(sender)[-1]['content']}")
#     return "Here are the parsed documents to be classified"


# doc_parser.register_nested_chats(
#     [
#         {
#             "recipient": doc_parse_caller,
#             "message": parse_calling,
#             "max_turns": 2,
#             "summary_method": "last_msg",
#         },
#         {
#             "recipient": user_proxy,
#             "message": parsed_docs,
#             "max_turns": 1,
#             "summary_method": "last_msg",
#         }
#     ],
#     trigger=manager,  # condition=my_condition,
# )

In [10]:
result = user_proxy.initiate_chat(
    manager,
    # message="Parse the documents and classify each of them by a heading",
    message="I have some pdf documents in the directory '../data/', and I would like to know what type of documents they are (in terms of classification)",
)

[33mUser[0m (to chat_manager):

I have some pdf documents in the directory '../data/', and I would like to know what type of documents they are (in terms of classification)

--------------------------------------------------------------------------------
[32m
Next speaker: Doc Parse Caller
[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[33mDoc Parse Caller[0m (to chat_manager):

[32m***** Suggested tool call (call_tdh3): parse_pdfs *****[0m
Arguments: 
{"directory": "../data/"}
[32m*******************************************************[0m

--------------------------------------------------------------------------------
[32m
Next speaker: Doc Parser
[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[35m
>>>>>>>> EXECUTING FUNCTION parse_pdfs...[0m
[33mDoc Parser[0m (to chat_manager):

[33mDoc Parser[0m (to chat_manager):

[32m***** Response from calling tool (call_tdh3) *****[0m
{"..\\data\\ahmad.pdf": ["EUR Statement\nGenerated on the 1 Sept 2023\nPage  of 1 1© 2023 Revolut B

