In [7]:
import dspy
from dspy.teleprompt import BootstrapFewShot

from mae.kernel.rag.split.util import split_files


In [8]:
class EntitiesSignature(dspy.Signature):
    """
    Signature class for the entity recognition task
    """
    question = dspy.InputField()
    entities = dspy.InputField(desc="Retrieve entities and their types")
    condition = dspy.InputField(
        desc="Identify and list all significant entities. Entities can include events, person names, place names, organization names, dates, etc. For each entity, specify its type, ensuring no duplicate content.")
    answer = dspy.OutputField(desc="Returned example: [{'entity_name':'','entity_type':''}]")


class EntitieCoTModule(dspy.Module):
    def __init__(self):
        super().__init__()
        self.predict = dspy.ChainOfThought(EntitiesSignature)

    def forward(self, question:str):
        return self.predict(question=question)


class TextToKgSignature(dspy.Signature):
    question = dspy.InputField()
    answer = dspy.InputField(
        desc="Based on each entity_name and entity_type I provide, construct a complete data system according to the principles of building a knowledge graph. If an entity is not clearly defined in the text, you should infer its possible attributes and relationships with other entities based on the context, providing as much detail as possible.")
    return_json = dspy.OutputField(desc="""The returned result must maintain a complete JSON structure. If knowledge graph identification is not possible, then return None [
        {
            "entity_name": "",
            "entity_type": "",
            "description": "",
            "relationships": [
                {"related_entity": "", "relation_type": ""},
                {"related_entity": "", "relation_type": ""}
            ]
        }
        ]""")


class TextToKGModule(dspy.Module):
    def __init__(self, entitie_optimized_cot: BootstrapFewShot):
        super().__init__()
        self.kg = dspy.ChainOfThoughtWithHint(TextToKgSignature)
        self.entitie_optimized_cot = entitie_optimized_cot
        self.entities = None

    def forward(self, context: str):
        entities = self.entitie_optimized_cot(question=context)
        self.entities = entities
        answer = self.kg(question=context, hint=str(entities))
        return answer

In [6]:
doc_data = [doc.page_content   for doc in split_files(files_path=['moa_llm.pdf'])]
doc_data

['Mixture-of-Agents Enhances Large Language Model\nCapabilities\nJunlin Wang\nDuke University\nTogether AI\njunlin.wang2@duke.eduJue Wang\nTogether AI\njue@together.aiBen Athiwaratkun\nTogether AI\nben@together.ai\nCe Zhang\nUniversity of Chicago\nTogether AI\ncez@uchicago.eduJames Zou\nStanford University\nTogether AI\njamesz@stanford.edu\nAbstract\nRecent advances in large language models (LLMs) demonstrate substantial capa-\nbilities in natural language understanding and generation tasks. With the growing\nnumber of LLMs, how to harness the collective expertise of multiple LLMs is an\nexciting open direction. Toward this goal, we propose a new approach that lever-\nages the collective strengths of multiple LLMs through a Mixture-of-Agents (MoA)\nmethodology. In our approach, we construct a layered MoA architecture wherein\neach layer comprises multiple LLM agents. Each agent takes all the outputs from\nagents in the previous layer as auxiliary information in generating its response.

In [5]:
doc_data

[]