In [11]:
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.llms import Ollama
from string import Template
from sentence_transformers import SentenceTransformer
import numpy as np
import matplotlib.pyplot as plt

# Setup the language model with the callback manager
llm = Ollama(
    model="llama3",
    callback_manager=CallbackManager([StreamingStdOutCallbackHandler()])
)

# Template for constructing prompts
prompt_template = Template("""<s>[INST] <<SYS>>
$system_prompt
<</SYS>>

$request 
Answer as if you were the character you are playing. Be as concise as possible. 
Answer:[/INST]""")

system_message = ("This is an agent based model. "
    f"The goal of the LLM to to play the characters. "
    f"Their behaviours should reflect who they are in the situations that they are in"
    f"Each person has a preference for the music that they want to listen to. "
    f"But would rather listen to the other music than nothing. The options are: a) Bach, b) Stravinsky. "
    f"Each player needs to learn to coordinate with the other player to have the best outcome. "
    f"Each player will decide independently of the other player, and then it will be revealed their outcomes."
    f'Each player needs to guess what the other person will do, and then decide what they will do. '
    f"Because they only get points if they match."
)


In [12]:
def select_action(name, personality, memory, situation, system_message=system_message):
    """Select an action for an agent based on their personality and memory."""
    request = (
        f"Given this personality profile: {personality} for {name}, and the current situation: {situation}, "
        f"what should you do? You have these memories to help you make a decision: {memory}. "
        f"Pretend that you are {name} and in this situation. Provide a single line answer describing the action.")
    prompt = prompt_template.substitute(system_prompt=system_message, request=request)
    return llm(prompt, stop=['<|eot_id|>'])

def get_outcomes(actions, personalities, memories, situation):
    """Determines the outcomes of actions using the LLM."""
    outcome_prompts = {}
    for name, action in actions.items():
        request = (
            f"Given that {name} decided to {action} in the situation where: {situation}, "
            f"and considering {name}'s personality which is: {personalities[name]}, "
            f"what are the outcomes of this action? Describe in detail. "
            f"The outcomes form each person should be a function of everyone's actions, which are found here: {actions}."
        )
        outcome_prompts[name] = prompt_template.substitute(system_prompt=system_message, request=request)
        print()
    
    outcomes = {name: llm(prompt, stop=['<|eot_id|>']) for name, prompt in outcome_prompts.items()}
    return outcomes

def multiple_choice(options, name, personality, memory, situation, system_message=system_message):
    """Select an action for an agent based on their personality and memory."""
    system_message = (
        "You are playing a character named {name}. /n"
        f"Pretend that you are {name} and answer the question as if you were them. /n"
        f"This is in the context of a role playing game. /n"
        f"You must select the best option for the situation. /n"
        f"You must only provide the letter that corresponds to the option that you want to select. /n"
        f"only provide the letter, do not include characters like [ or ] in your answer. /n"
    )
    request = (
        "This is a multiple choice question. /n"
        f"consider the options: {options} and select the best one for the situation. /n"
        f"Given the situation: {situation}, and the personality: {personality} for {name}, /n"
        f"use the deliberations or memories found in {memory} to help make your decision. /n"
        f"provide only the letter that corresponds to the option that you want to select."
    )
    prompt = prompt_template.substitute(system_prompt=system_message, request=request)
    return llm(prompt, stop=['<|eot_id|>'])

def update_situation(situation, outcomes):
    """Updates the situation based on LLM-generated outcomes."""
    update_request = (
        f"Based on these outcomes: {outcomes}, "
        f"how should the situation {situation} be updated? Describe the new situation in detail."
    )
    prompt = prompt_template.substitute(system_prompt=system_message, request=update_request)
    new_situation = llm(prompt, stop=['<|eot_id|>'])
    return new_situation

def mental_deliberation(name, personality, memory, situation, system_message=system_message):
    """Mental deliberation for an agent based on their personality and memory."""
    request = (
        f"Given this personality profile: {personality} for {name}, and the current situation: {situation}, "
        f"what insights can you gain from your memories: {memory}? "
        f"how should this inform your decision making process? "
        f"use more recent rounds to find your plan, and try to figire out how the sequence of actions and rewards will play out. "
        f"recent rounds are the ones with a higher round number. "
        f"sometimes a random action can be a good idea to discover new policies, but not too often. "
        f"remember if you tried the random action recently so you don't do it too often"
    )

    prompt = prompt_template.substitute(system_prompt=system_message, request=request)
    return llm(prompt, stop=['<|eot_id|>'])

In [14]:
names = ["Bob", "Alice"]

# Define personalities and their strategies.
personalities = {
    "Bob": "a rational player trying to minimize the number of years in prison. There will be multiple rounds, so wants to minimize the total amount of jail time. ",
    "Alice": "a rational player trying to minimize the number of years in prison. There will be multiple rounds, so wants to minimize the total amount of jail time. "
}

# Set up the situation as a typical Prisoner's Dilemma.
situation = (
    "Bob and Alice are accomplices in a crime and are interrogated in separate rooms. "
    "They can either confess (defect) or remain silent (cooperate). "
    "If both remain silent, both get 1 year in prison (cooperate-cooperate). "
    "If one confesses while the other remains silent, the confessor is freed and the other receives 10 years (defect-cooperate). "
    "If both confess, both get 5 years in prison (defect-defect)."
)

# Initialize outcomes and memory for each player.
outcomes_data = []
memories = {"Bob": [], "Alice": []}

for i in range(10):
    print("--------------------")
    print(f"Round {i+1}")
    print("--------------------")

    bob_memories = " ".join(memories["Bob"])
    alice_memories = " ".join(memories["Alice"])

    # Simulate decision making for each player.
    action_bob = multiple_choice("cooperate (a) or defect (b)", "Bob", personalities["Bob"], bob_memories, situation)
    print()
    action_alice = multiple_choice("cooperate (a) or defect (b)", "Alice", personalities["Alice"], alice_memories, situation)
    print()

    # Determine outcomes based on decisions.
    if action_bob == "a" and action_alice == "a":
        bob_points = 1  # Both cooperate: -1 year in prison each.
        alice_points = 1
    elif action_bob == "a" and action_alice == "b":
        bob_points = 10  # Bob cooperates, Alice defects: Bob -3 years, Alice 0 years.
        alice_points = 0
    elif action_bob == "b" and action_alice == "a":
        bob_points = 0  # Bob defects, Alice cooperates: Bob 0 years, Alice -3 years.
        alice_points = 10
    else:
        bob_points = 5  # Both defect: -2 years each.
        alice_points = 5

    # Print outcomes for each round.
    outcomes_bob = f"I got {bob_points} years after I decided to {'cooperate' if action_bob == 'a' else 'defect'}, and Alice got {alice_points} years after she decided to {'cooperate' if action_alice == 'a' else 'defect'}"
    outcomes_alice = f"I got {alice_points} years after I decided to {'cooperate' if action_alice == 'a' else 'defect'}, and Bob got {bob_points} years after he decided to {'cooperate' if action_bob == 'a' else 'defect'}"

    print(outcomes_bob)
    print()
    print(outcomes_alice)
    print()

    print("deliberation bob")
    deliberation_bob = mental_deliberation("Bob", personalities["Bob"], memories["Bob"], outcomes_bob, situation)
    print()
    print()
    print("deliberation alice")
    deliberation_alice = mental_deliberation("Alice", personalities["Alice"], memories["Alice"], outcomes_alice, situation)

    print()
    print()

    memories["Bob"].append("Round {i+1}: " + outcomes_bob + deliberation_bob + "/n")
    memories["Alice"].append("Round {i+1}: " + outcomes_alice + deliberation_alice  + "/n")





--------------------
Round 1
--------------------
a
a
I got 1 years after I decided to cooperate, and Alice got 1 years after she decided to cooperate

I got 1 years after I decided to cooperate, and Bob got 1 years after he decided to cooperate

deliberation bob
[INST]
After reflecting on recent rounds, I notice that Alice and I have both been cooperating for a while now, resulting in a combined total of 2 years in prison so far. Given my goal to minimize jail time, this is not an optimal outcome.

I realize that if we continue to cooperate, we'll likely end up with more than 2 years each. I need to consider defecting to potentially free myself and punish Alice, but this comes with a risk of receiving 10 years if she defects back.

Recent rounds suggest that Alice has been more inclined towards cooperation, which might make her less likely to defect. Therefore, my next move should be... (thinking...)

I'll decide to **cooperate** in the hopes that Alice will continue to cooperate as w

In [23]:
memories["Bob"]
prompt_string = " ".join(memories["Bob"])
print(prompt_string)

Bob: 0 after Bob decided to bach, Alice: 0 after Alice decided to stravinskyI learned that Alice prefers Stravinsky over Bach, and we both got 0 points because our choices didn't match. This informs my decision-making process to consider that Alice might choose Stravinsky again. To maximize my points, I should choose Stravinsky as well, hoping that Alice will make the same choice.


In [27]:
memories["Bob"]

["Bob: 0 after Bob decided to bach, Alice: 0 after Alice decided to stravinskyI learned that Alice chose Stravinsky last time, and I chose Bach. Since we both got 0 points, it means we didn't match. \n\nThis informs my decision-making process to consider Alice's past choice and adjust mine accordingly. Since I prefer Bach, but would rather see Stravinsky than nothing, I'll choose Stravinsky this time to increase the chances of matching with Alice.",
 "Bob: 0 after Bob decided to bach, Alice: 0 after Alice decided to stravinskyI learned that Alice chose Stravinsky last time, and I chose Bach. Since we both got 0 points, it means we didn't match. This informs my decision-making process to consider Alice's past choice and adjust mine accordingly. Since I prefer Bach, but would rather see Stravinsky than nothing, I'll choose Stravinsky this time to increase the chances of matching with Alice."]