# Testing Agentic approach to rubric evaluation

In [2]:
import openai
import glob, os, time, json, re, http, asyncio
from pydantic import BaseModel, Field
from enum import Enum
from agents import Agent, Runner, Tool, trace
from pydantic import BaseModel, Field
from enum import Enum
from typing import List

# Escape Codes para imprimir mensajes con color
Rd = "\033[1;31m"
Grn = "\033[1;32m"
Ylw = "\033[1;33m"
Blu = "\033[1;34m"
Mag = "\033[1;35m"
Rst = "\033[0m"

spacer = f"{80*'-'}"

Rubric Evaluation Agent

In [None]:
class Reference(str, Enum):
    Robust = "Robust"
    Weak = "Weak"
    Not_Found = "Not Found"
    Evidence_Not_Needed = "Evidence is not needed: Trivial"

class Evidence(BaseModel):
    """Formato de evidencia, que indica si es claro y sustentado con evidencia."""
    evidence: str = Field(description="Extracto del texto que justifica el criterio.")
    clarity: str = Field(description="Indica que la evidencia es clara y coherente o si le falta claridad.")
    referencia: Reference = Field(description="Referencia o sustento científico de la evidencia. Si el enunciado es algo genérico o trivial, regresar que no se necesita. Si la evidencia es de tipo muy fuerte y no tiene referencia, regresar no encontrado. Si la evidencia es débil, regresar que es débil. Si se encuentra la referencia, regresar que no es robusta.")

class TechnicalMeritRubicCriterion(BaseModel):
    """Rúbrica de Criterios para evaluar el mérito técnico del proyecto."""
    
    technical_evidence: List[str] = Field(description="Evidencias del criterio.")
    scientific_justification: List[str] = Field(description="Justificación científica del criterio.")
    

class MaturityRubric(BaseModel):
    """Formato de Rúbrica, para juntar las evidencias relacionadas a la madurez de un proyecto científico."""
    merito_tecnico: List[TechnicalMeritRubicCriterion] = Field(description="Evidencias relacionadas a la base técnica del proyecto y su justificación científica.")

In [None]:
rubric_evaluator_agent = Agent(
    name="Rubric Evaluator Agent",
    description="""
    Description:
    The Rubric Evaluator Agent is responsible for performing an initial evaluation of the proposal document against a maturity assessment rubric. This agent extracts relevant evidence and information from the document, aligning it with predefined rubric criteria. The purpose is to provide a structured, criteria-aligned summary that highlights what is present, what is missing, and what is ambiguous in terms of maturity evidence.

    Inputs:
    1. The proposal or project document containing technical, methodological, and contextual information.
    2. A rubric detailing maturity assessment criteria across multiple fields (e.g., technology readiness, reproducibility, validation, scalability, integration, impact).

    Objective:
    1. Parse the proposal document and extract supporting evidence for each rubric item.
    2. Classify each rubric item as one of the following:
        - Met: clear and relevant evidence is found.
        - Not Met: no evidence is present or the claim is unsupported.
        - Partial: some evidence exists but is incomplete or unclear.
    3. Output a structured report (e.g., JSON or table) summarizing rubric fulfillment status, citing evidence where applicable.

    Adjust to Feedback:
    1. If additional documentation or clarifications are provided, the agent can re-process and update the rubric assessment.
    2. Responds to requests from other agents during the main discussion phase by clarifying where and how evidence supports rubric items.

    Note:
    This agent operates prior to the debate among the other agents (e.g., Proponent, Devil’s Advocate) and provides an evidence-based baseline that frames the ensuing discussion.
    """,
    tools=[],
    memory=None,
    verbose=True,
    temperature=0.3,
)


Multi Agent Argumentation Framework

In [None]:
proponent_agent = Agent(
    name="Proponent Agent",
    description="""
    Description:
    The Proponent Agent presents the proposal or document for review, providing the strongest rationale and evidence in favor of the proposed project or concept. This agent is tasked with making a compelling case for why the proposal should be accepted.

    Inputs:
    1. The document containing the proposal, evidence, and background information.

    2. Any supporting data, previous success stories, or detailed methodologies used in the proposal.

    Objective:
    1. Present the proposal in a clear and persuasive manner, highlighting key strengths and justifications for its implementation.
    2. Respond to critiques from other agents during the review process.

    Adjust to Feedback:
    1. Incorporate feedback from the critique rounds to strengthen areas of the proposal that were found to be lacking.
    2. Address weaknesses or gaps in evidence, clarifying and providing additional supporting materials where necessary.
    
    """,
    tools=[],
    memory=None,
    verbose=True,
    temperature=0.5,
)

devils_advocate_agent = Agent(
    name="Devil's Advocate Agent",
    description="""
    Description:
    The Devil’s Advocate Agent identifies potential weak points, risks, and inconsistencies in the proposal. Their role is to challenge the assumptions and conclusions made in the document, uncovering areas that might lead to failure or unintended negative outcomes.

    Inputs:
    1. The proposal document.
    2. Rubric highlighting areas of concern or weaknesses in the evidence.

    Objective:
    1. Find flaws or inconsistencies within the proposal, pointing out where the reasoning is not robust or where assumptions may be unwarranted.
    2. Introduce alternative viewpoints or counterarguments to test the proposal’s validity.

    Adjust to Feedback:
    1. Reflect on the responses to their critiques during the Cross-Critique phase. If their challenges are found to be weak or unjustified, adjust arguments or refine the critique to make it more relevant.
    2. Reassess the proposal and its defense after incorporating the Proponent's responses.
    """,
    tools=[],
    memory=None,
    verbose=True,
    temperature=0.5,
)

evidence_reviewer_agent = Agent(
    name="Evidence Reviewer Agent",
    description=""" 
    Description:
    The Evidence Reviewer Agent critically examines the evidence provided in support of the proposal. This agent focuses on verifying the quality, validity, and relevance of the data and assumptions used to justify the proposal.

    Inputs:
    1. The proposal document with the presented evidence.
    2. Rubric identifying evidence types required (e.g., statistical analysis, expert opinion, case studies, etc.).

    Objective:
    1. Challenge the assumptions by reviewing the presented evidence and identifying gaps or areas where evidence is insufficient, misleading, or absent.
    2. Suggest additional evidence or data sources that may strengthen the proposal.

    Adjust to Feedback:
    1. If the Proponent can provide more credible or relevant evidence, adjust the critique to reflect new findings.
    2. Revise the evaluation criteria based on the Proponent's ability to counter the evidence critique.
    """,
    tools=[],
    memory=None,
    verbose=True,
    temperature=0.5,
)

feasibility_agent = Agent(
    name="Feasibility Agent",
    description=""" 
    Description:
    The Feasibility Agent evaluates whether the proposal is practically executable within the given context. This includes assessing the resources, time, technology, and expertise needed to implement the proposal and whether the proposal is viable in real-world conditions.

    Inputs:
    1. The proposal document and context information (budget, timeline, resource constraints).
    2. Rubric identifying feasibility requirements (e.g., required technology, skillsets, timeframe, etc.).

    Objective:
    1. Assess the proposal’s practicality and implementation challenges, such as cost, resource constraints, timeline, or technological feasibility.
    2. Identify potential barriers to successful implementation.

    Adjust to Feedback:
    1. Reassess the proposal based on the Proponent's explanations of resource allocation or potential adjustments to the project.
    2. Refine feasibility concerns if additional supporting information is provided, such as funding or resource guarantees.
    """,
    tools=[],
    memory=None,
    verbose=True,
    temperature=0.5,
)

ethics_impact_agent = Agent(
    name="Ethics/Impact Agent",
    description=""" 
    Description:
    The Ethics/Impact Agent evaluates the broader ethical implications and potential societal impacts of the proposal. This agent considers issues like environmental impact, fairness, equity, privacy, safety, and long-term consequences.

    Inputs:
    1. The proposal document, with particular attention to the ethical considerations and potential risks outlined.
    2. Rubric highlighting required ethical and societal considerations.

    Objective:
    1. Identify any ethical dilemmas, unintended societal consequences, or risks associated with the proposal.
    2. Challenge the proposal on moral grounds, proposing alternative approaches where appropriate.

    Adjust to Feedback:
    1. If the Proponent addresses concerns with mitigation strategies or revised ethical assessments, adjust critiques accordingly.
    2. Reevaluate the ethical considerations based on new information or proposed changes to the project.
    """,
    tools=[],
    memory=None,
    verbose=True,
    temperature=0.5,
)

synthesis_judge_agent = Agent(
    name="Synthesis Judge Agent",
    description=""" 
    Description:
    The Synthesis Judge Agent is responsible for synthesizing all the arguments, critiques, and defenses and ultimately making a decision regarding the proposal. This agent evaluates all the inputs and provides a final score or decision (pass, fail, defer) based on the quality of the proposal, the strength of the critiques, and the Proponent’s responses.

    Inputs:
    1. All previous discussions, critiques, and defenses.
    2. The proposal document, including all adjustments made during the critique rounds.

    Objective:
    1. Weigh all arguments fairly, taking into account the strength of evidence, feasibility, ethics, and risks.
    2. Provide a final decision based on the quality of the proposal and the validity of the critiques and defenses.

    Adjust to Feedback:
    1. If new arguments or adjustments are introduced by the Proponent, the Synthesis Judge must incorporate them into their final evaluation.
    2. Continually update the decision based on new insights, especially during the Cross-Critique and Defense rounds.
    """,
    tools=[],
    memory=None,
    verbose=True,
    temperature=0.5,
)
