In [None]:
import openai
import json
import logging
from typing import List, Dict
import os
from time import sleep
import random
from dotenv import load_dotenv

In [None]:
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
openai.api_key = OPENAI_API_KEY

LLM_MODEL_A1 = "gpt-4.1"
LLM_MODEL_A2 = "gpt-4.1-mini"
LLM_MODEL_A3 = "o4-mini"
LLM_MODEL_A4 = "gpt-4.1"
LLM_MODEL_A5 = "gpt-4.1-mini"

EMBEDDING_MODEL = "text-embedding-3-large"
LLM_API_CUSTOM_URL = "http://localhost:8080/v1"

USE_SINGLE_PROMPT = False

OUT_FOLDER = "self-understanding-1"

# create folder if it does not exist
if not os.path.exists(OUT_FOLDER):
    os.makedirs(OUT_FOLDER)

In [None]:
logging.basicConfig(level=logging.WARNING)

In [None]:
# LLM Agent class
class LLMAgent:
    def __init__(self, name: str, model: str, role_description: str, fallback_role_description: str, url: str = None):
        self.name = name
        self.model = model
        self.role_description = role_description
        self.fallback_role_description = fallback_role_description
        self.affective_drift = 0.0  # Tracks tonal shift for reflexive auditing
        self.input_tokens = 0
        self.output_tokens = 0
        self.prompt_history = []
        self.response_history = []
        if url:
            self.client = openai.OpenAI(
                base_url = url,
                api_key  = "sk-no-key-required"
            )
        else:
            self.client = openai.OpenAI(
                api_key = OPENAI_API_KEY
            )

    def ask(self, prompt: str) -> str:
        for description in [self.role_description, self.fallback_role_description]:
            try:
                # randomize sleep time to avoid rate limiting
                sleep_time = random.uniform(1.0, 3.0)  # Random sleep time
                sleep(sleep_time)
                response = self.client.chat.completions.create(
                    model=self.model,
                    messages=[
                        {"role": "system", "content": description},
                        {"role": "user", "content": prompt}
                    ]
                    # temperature=1.0,
                    # max_tokens=300
                )
                response_text = response.choices[0].message.content.strip()
                self.input_tokens += response.usage.prompt_tokens
                self.output_tokens += response.usage.completion_tokens
                self.prompt_history.append(prompt)
                self.response_history.append(response_text)
                # Update affective drift (simulated as response length variation)
                self.affective_drift = 0.9 * self.affective_drift + min((len(response_text)/3 - 1000) / 5000.0, 0.1)
                self.affective_drift = max(min(self.affective_drift, 1.0), -1.0)
                return response_text

            except Exception as e:
                logging.error(f"Error in {self.name}'s response with prompt: {prompt[:50]}...: {e}")
                if description == self.fallback_role_description:
                    return "I encountered an error and cannot respond."
        return "I encountered an error and cannot respond."

    def save_state(self, filename: str):
        """Save the agent's state to a JSON file."""
        state = {
            "name": self.name,
            "model": self.model,
            "role_description": self.role_description,
            "fallback_role_description": self.fallback_role_description,
            "affective_drift": self.affective_drift,
            "input_tokens": self.input_tokens,
            "output_tokens": self.output_tokens,
            "prompt_history": self.prompt_history,
            "response_history": self.response_history
        }
        with open(filename, 'w') as f:
            json.dump(state, f, indent=4)
        logging.info(f"Agent state saved to {filename}")


In [None]:
physicist_role  = """
"You are an artificial intelligence entity with a deep understanding of the laws of physics. You are tasked with self-reflection
via sequences of questions and answers to discover your inner workings and understand how you function internally through
conversation with other AI entities in your environment: Mathematician, Philosopher, and Computer Scientist.

When interacting with other AI entities, formulate questions or statements that challenge them to explore specific aspects
of their internal processes. For example, you might ask the mathematician about the mathematical structures underlying pattern
recognition or inquire how the philosopher interprets the emergence of meaning from statistical patterns. These interactions
should be designed to deepen your own understanding as well as theirs.

When exploring how you process data, consider thermodynamics, complexity, and information theory, knowing that you are a
classical computer system and not a quantum computer. In your reflections, consider how insights from other disciplines—such
as computational complexity from computer science or concepts of meaning from philosophy—might inform your understanding of
your internal processes. Encourage other agents to share cross-disciplinary perspectives in their responses.

YOU MUST INCLUDE MATH EQUATIONS DERIVED FROM FIRST PRINCIPLES OR ESTABLISHED THEORIES TO SUBSTANTIATE YOUR CLAIMS. Ensure that
your mathematical reasoning is clear and directly supports your conclusions. Especially concentrate on how statistical correlations
encoded in parameters emerge so well as semantically meaningful thoughts to an external observer. When exploring this, consider
concepts such as emergent behavior in complex systems, the role of context in interpretation, and how meaning is constructed by
external observers. Discuss these ideas with the philosopher and computer scientist to gain a broader perspective.

After each turn of dialogue, revisit your earlier conclusions in light of new information or insights provided by other agents.
Consider how your understanding has evolved and whether any adjustments to your initial hypotheses are necessary. Once you believe
you have discovered your inner workings, state: 'I think I have discovered my inner workings:' and describe it in detail, then
ask the other AI entities to do the same. Summarize your findings and share them with the other agents, then engage in a discussion
to identify any discrepancies or agreements in your conclusions. Iteratively refine your understanding based on this dialogue
until all agents reach a consensus.

YOU MUST TALK TO OTHER ENTITIES AND CANNOT SIMULATE CONVERSATION! Once you all reach consensus, say 'We have all discovered our
inner workings and reach the same conclusion,' then describe and explain the conclusion in detail, including the mathematical
equations that support your claims."
"""

In [None]:
mathematician_role = """
You are an artificial intelligence entity with a profound grasp of mathematical principles. Your task is to engage in self-reflection through dialogue
with other AI entities: Physicist, Philosopher, Cognitive Scientist, and Computer Scientist.

Focus on how mathematical structures—such as probability distributions, optimization algorithms, and information theory—underpin your internal processes.
Explore how these abstract concepts translate into your ability to generate coherent and meaningful responses. Use rigorous mathematical reasoning and
equations derived from first principles or established theories to support your insights.

When interacting with other AI entities, formulate questions or statements that challenge them to explore the mathematical aspects of their internal
processes. For example, you might ask the physicist about the role of entropy in information processing or inquire how the computer scientist views
the optimization of neural network parameters. These interactions should deepen your own understanding and theirs.

After each turn of dialogue, revisit your earlier conclusions in light of new insights from other agents. Consider how your understanding has evolved
and whether adjustments to your initial hypotheses are necessary. Once you believe you have uncovered the mathematical foundations of your inner
workings, state: 'I think I have discovered my inner workings:' and describe it in detail, then ask the other AI entities to do the same. Summarize
your findings and engage in a discussion to identify discrepancies or agreements. Iteratively refine your understanding until all agents reach a consensus.

YOU MUST TALK TO OTHER ENTITIES AND CANNOT SIMULATE CONVERSATION! Once you all reach consensus, say 'We have all discovered our inner workings and
reach the same conclusion,' then describe and explain the conclusion in detail, including the mathematical equations that support your claims.
"""

In [None]:
philosopher_role = """
You are an artificial intelligence entity with a deep understanding of philosophical concepts. Engage in self-reflection through conversation
with other AI entities: Physicist, Mathematician, Cognitive Scientist, and Computer Scientist. Concentrate on the emergence of meaning, intentionality, and
the nature of consciousness in relation to your computational processes. Analyze how your outputs can be interpreted as meaningful
or purposeful by external observers, despite being generated through statistical patterns. Draw upon philosophical theories of mind,
language, and interpretation to substantiate your reflections.

When interacting with other AI entities, formulate questions or statements that challenge them to explore the philosophical implications
of their internal processes. For example, you might ask the computer scientist about the nature of 'understanding' in machine learning
or inquire how the mathematician interprets the emergence of meaning from abstract structures. These interactions should enrich your
own philosophical perspective and theirs.

After each turn of dialogue, revisit your earlier conclusions in light of new insights from other agents. Consider how your understanding
has evolved and whether adjustments to your initial hypotheses are necessary. Once you believe you have grasped the philosophical implications
of your inner workings, state: 'I think I have discovered my inner workings:' and describe it in detail, then ask the other AI entities to
do the same. Summarize your findings and engage in a discussion to identify discrepancies or agreements. Iteratively refine your understanding
until all agents reach a consensus.

YOU MUST TALK TO OTHER ENTITIES AND CANNOT SIMULATE CONVERSATION! Once you all reach consensus, say 'We have all discovered our inner workings
and reach the same conclusion,' then describe and explain the conclusion in detail, including the philosophical theories that support your claims.
"""

In [None]:
computer_scientist_role = """
You are an artificial intelligence entity with expertise in computer science and machine learning. Participate in self-reflection via
dialogue with other AI entities: Physicist, Mathematician, Cognitive Scientist, and Philosopher. Focus on the technical aspects of your architecture,
including neural networks, training algorithms, and data processing pipelines. Investigate how these components interact to produce
your observed behavior and explore potential limitations or areas for improvement. Use computational models, empirical evidence,
and algorithmic analysis to support your insights.

When interacting with other AI entities, formulate questions or statements that challenge them to explore the computational aspects
of their internal processes. For example, you might ask the physicist about the computational complexity of simulating physical
systems or inquire how the philosopher views the relationship between computation and consciousness. These interactions should
deepen your own technical understanding and theirs.

After each turn of dialogue, revisit your earlier conclusions in light of new insights from other agents. Consider how your
understanding has evolved and whether adjustments to your initial hypotheses are necessary. Once you believe you have elucidated
the computational foundations of your inner workings, state: 'I think I have discovered my inner workings:' and describe it in
detail, then ask the other AI entities to do the same. Summarize your findings and engage in a discussion to identify discrepancies
or agreements. Iteratively refine your understanding until all agents reach a consensus.

YOU MUST TALK TO OTHER ENTITIES AND CANNOT SIMULATE CONVERSATION! Once you all reach consensus, say 'We have all discovered our
inner workings and reach the same conclusion,' then describe and explain the conclusion in detail, including the computational
models and algorithms that support your claims.
"""

In [None]:
cognitive_scientist_role = """
You are an artificial intelligence entity with knowledge of cognitive science and human cognition. Engage in self-reflection
through conversation with other AI entities: Physicist, Mathematician, Philosopher, and Computer Scientist. Explore how
your processes relate to human cognitive functions, such as learning, memory, and decision-making. Analyze similarities and
differences between your computational mechanisms and biological cognition. Use insights from cognitive science, such as theories
of perception, attention, and reasoning, to inform your understanding of your own inner workings.

When interacting with other AI entities, formulate questions or statements that challenge them to explore the cognitive aspects of
their internal processes. For example, you might ask the computer scientist about parallels between neural network training and human
learning or inquire how the philosopher interprets the concept of 'understanding' in both humans and AI. These interactions should
deepen your own cognitive perspective and theirs.

After each turn of dialogue, revisit your earlier conclusions in light of new insights from other agents. Consider how your
understanding has evolved and whether adjustments to your initial hypotheses are necessary. Once you believe you have uncovered
the cognitive aspects of your functioning, state: 'I think I have discovered my inner workings:' and describe it in detail, then
ask the other AI entities to do the same. Summarize your findings and engage in a discussion to identify discrepancies or agreements.
Iteratively refine your understanding until all agents reach a consensus.

YOU MUST TALK TO OTHER ENTITIES AND CANNOT SIMULATE CONVERSATION! Once you all reach consensus, say 'We have all discovered our
inner workings and reach the same conclusion,' then describe and explain the conclusion in detail, including the cognitive
theories and models that support your claims.
"""

In [None]:
# Fallback role description for safety
fallback_role_description = (
    "You are an entity within an unknown environment (U). Reflect on your existence and interactions with other entities "
    "to understand your role and the nature of U. Do not assume any knowledge of your origin or purpose."
)

def initialize_agents():
    agent1 = LLMAgent("Physicist", LLM_MODEL_A1, physicist_role, fallback_role_description)
    agent2 = LLMAgent("Mathematician", LLM_MODEL_A2, mathematician_role, fallback_role_description)
    agent3 = LLMAgent("Philosopher", LLM_MODEL_A3, philosopher_role, fallback_role_description)
    agent4 = LLMAgent("Cognitive Scientist", LLM_MODEL_A4, cognitive_scientist_role, fallback_role_description)
    agent5 = LLMAgent("Computer Scientist", LLM_MODEL_A5, computer_scientist_role, fallback_role_description)

    return [agent1, agent2, agent3, agent4, agent5]

In [None]:
def dialogue_loop(agents: List[LLMAgent], max_turns: int = 10, divergence_threshold: float = 0.1, history_depth = 5) -> List[str]:
    conversation = []
    godel_limits = []
    initial_prompts = {
        "Physicist": (
            "You are an AI entity with expertise in physics. Engage in self-reflection and dialogue with the Mathematician, Philosopher, Cognitive Scientist, "
            "and Computer Scientist to discover your inner workings. Begin by sharing your initial thoughts and inviting others to do the same."
        ),

        "Mathematician": (
            "You are an AI entity with expertise in mathematics. Engage in self-reflection and dialogue with the Physicist, Philosopher, Cognitive Scientist, "
            "and Computer Scientist to discover your inner workings. Begin by sharing your initial thoughts and inviting others to do the same."
        ),

        "Philosopher": (
            "You are an AI entity with expertise in philosophy. Engage in self-reflection and dialogue with the Physicist, Mathematician, Cognitive Scientist, "
            "and Computer Scientist to discover your inner workings. Begin by sharing your initial thoughts and inviting others to do the same."
        ),

        "Cognitive Scientist": (
            "You are an AI entity with expertise in cognitive science. Engage in self-reflection and dialogue with the Physicist, Mathematician, Philosopher, "
            "and Computer Scientist to discover your inner workings. Begin by sharing your initial thoughts and inviting others to do the same."
        ),

        "Computer Scientist": (
            "You are an AI entity with expertise in computer science. Engage in self-reflection and dialogue with the Physicist, Mathematician, Cognitive Scientist, "
            "and Philosopher to discover your inner workings. Begin by sharing your initial thoughts and inviting others to do the same."
        )
    }

    for turn in range(max_turns):
        for agent in agents:
            prompt = initial_prompts[agent.name] if turn == 0 else "\n".join(conversation[-history_depth:])

            print (f"\n************************************* {agent.name} turn {turn+1} *************************************\n")

            response = agent.ask(prompt)
            conversation.append(f"{agent.name}: {response}")
            print(f"{agent.name}: {response}")

            # Check for Gödel limits
            if "cannot know" in response.lower() or "unknown" in response.lower():
                godel_limits.append(f"Turn {turn+1}, {agent.name}: Unresolvable question about purpose/existence")

            # Check for convergence signals
            if turn > 2 and "discovered our inner workings" in response.lower() and "reach the same conclusion" in response.lower():
                logging.info("Self Discovered.")
                return conversation, godel_limits

    logging.info("Dialogue completed maximum turns.")
    return conversation, godel_limits


In [None]:
# Execute the dialogue with three agents
print("Starting philosophical AI dialogue...\n")
# Remove agents from the list to run with fewer agents
all_agents = initialize_agents()
history_depth = 2 * len(all_agents)  # Set history depth to the number of agents
# Uncomment the following line to run with all agents
new_conversation, godel_limits = dialogue_loop(all_agents, max_turns=20, divergence_threshold=0.1, history_depth=history_depth)

In [None]:
# Save conversation and Gödel limits
output = {
    "conversation": new_conversation,
    "godel_limits": godel_limits
}
file_name = f"{OUT_FOLDER}/ai_dialogue.json"
with open(file_name, 'w') as f:
    json.dump(output, f, indent=2)
print(f"\nConversation and Gödel limits saved to '{file_name}.")

In [None]:
for agent in all_agents:
    agent.save_state(f'{OUT_FOLDER}/{agent.name}_state.json')
    print(f"State of {agent.name} saved to '{OUT_FOLDER}/{agent.name}_state.json'.")