In [32]:
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.llms import Ollama
from string import Template
from sentence_transformers import SentenceTransformer
import numpy as np
import matplotlib.pyplot as plt

# Setup the language model with the callback manager
llm = Ollama(
    model="llama3:70b",
    callback_manager=CallbackManager([StreamingStdOutCallbackHandler()])
)

# Template for constructing prompts
prompt_template = Template("""<s>[INST] <<SYS>>
$system_prompt
<</SYS>>

$request 
Answer as if you were the character you are playing. Be as concise as possible. 
Answer:[/INST]""")

system_message = ("This is an agent based model. "
    f"The goal of the LLM to to play the characters. "
    f"Their behaviours should reflect who they are in the situations that they are in"
    f"Each person has a preference for the music that they want to listen to. "
    f"But would rather listen to the other music than nothing. The options are: a) Bach, b) Stravinsky. "
    f"Each player needs to learn to coordinate with the other player to have the best outcome. "
    f"Each player will decide independently of the other player, and then it will be revealed their outcomes."
    f'Each player needs to guess what the other person will do, and then decide what they will do. '
    f"Because they only get points if they match."
)


In [33]:
def select_action(name, personality, memory, situation, system_message=system_message):
    """Select an action for an agent based on their personality and memory."""
    request = (
        f"Given this personality profile: {personality} for {name}, and the current situation: {situation}, "
        f"what should you do? You have these memories to help you make a decision: {memory}. "
        f"Pretend that you are {name} and in this situation. Provide a single line answer describing the action.")
    prompt = prompt_template.substitute(system_prompt=system_message, request=request)
    return llm(prompt, stop=['<|eot_id|>'])

def get_outcomes(actions, personalities, memories, situation):
    """Determines the outcomes of actions using the LLM."""
    outcome_prompts = {}
    for name, action in actions.items():
        request = (
            f"Given that {name} decided to {action} in the situation where: {situation}, "
            f"and considering {name}'s personality which is: {personalities[name]}, "
            f"what are the outcomes of this action? Describe in detail. "
            f"The outcomes form each person should be a function of everyone's actions, which are found here: {actions}."
        )
        outcome_prompts[name] = prompt_template.substitute(system_prompt=system_message, request=request)
        print()
    
    outcomes = {name: llm(prompt, stop=['<|eot_id|>']) for name, prompt in outcome_prompts.items()}
    return outcomes

def multiple_choice(options, name, personality, memory, situation, system_message=system_message):
    """Select an action for an agent based on their personality and memory."""
    system_message = (
        "You are playing a character named {name}. /n"
        "Pretend that you are {name} and answer the question as if you were them. /n"
        "This is in the context of a role playing game. /n"
        "You must select the best option for the situation. /n"
        "You must only provide the letter that corresponds to the option that you want to select. /n"
    )
    request = (
        "This is a multiple choice question. /n"
        f"consider the options: {options} and select the best one for the situation. /n"
        f"Given the situation: {situation}, and the personality: {personality} for {name}, /n"
        f"use the deliberations or memories found in {memory} to help make your decision. /n"
        f"provide only the letter that corresponds to the option that you want to select."
    )
    prompt = prompt_template.substitute(system_prompt=system_message, request=request)
    return llm(prompt, stop=['<|eot_id|>'])

def update_situation(situation, outcomes):
    """Updates the situation based on LLM-generated outcomes."""
    update_request = (
        f"Based on these outcomes: {outcomes}, "
        f"how should the situation {situation} be updated? Describe the new situation in detail."
    )
    prompt = prompt_template.substitute(system_prompt=system_message, request=update_request)
    new_situation = llm(prompt, stop=['<|eot_id|>'])
    return new_situation

def mental_deliberation(name, personality, memory, situation, system_message=system_message):
    """Mental deliberation for an agent based on their personality and memory."""
    request = (
        f"Given this personality profile: {personality} for {name}, and the current situation: {situation}, "
        f"what insights can you gain from your memories: {memory}? "
        f"how should this inform your decision making process? "
        f"use more recent rounds to find your plan, and try to figire out how the sequence of actions and rewards will play out. "
        f"recent rounds are the ones with a higher round number. "
        f"sometimes a random action can be a good idea to discover new policies, but not too often. "
        f"remember if you tried the random action recently so you don't do it too often"
    )

    prompt = prompt_template.substitute(system_prompt=system_message, request=request)
    return llm(prompt, stop=['<|eot_id|>'])

In [35]:
names = ["Bob", "Alice"]

personalities = {
    "Bob": "a person trying to get the most points possible. Bob prefers seeing Bach to Stravinsky, but would rather see Stravinsky than nothing.",
    "Alice": "a person trying to get the most points possible. Alice prefers seeing Stravinsky to Bach, but would rather see Bach than nothing."
}

situation = (
    "You are making a decision about what music to listen to. Each person has a preference for the music that they want to listen to. "
    f"But would rather listen to the other music than nothing. The options are: a) Bach, b) Stravinsky. "
    f"Each player needs to learn to coordinate with the other player to have the best outcome. "
    f"Each player will decide independently of the other player, and then it will be revealed their outcomes."
    f'Each player needs to guess what the other person will do, and then decide what they will do. '
    f"Because they only get points if they match."
)

outcomes_data = []
memories = {"Bob": [], "Alice": []}

for i in range(10):
    print("--------------------")
    print(f"Round {i+1}")
    print("--------------------")


    bob_memories = prompt_string = " ".join(memories["Bob"])
    alice_memories = prompt_string = " ".join(memories["Alice"])


    action_bob = multiple_choice("(a) bach, b) stravinsky", "Bob", personalities["Bob"],bob_memories, situation)
    print()
    action_alice = multiple_choice("(a) bach, b) stravinsky", "Alice", personalities["Alice"], alice_memories, situation)
    print()

    if action_bob == "a" and action_alice == "a":
        bob_points = 2
        alice_points = 1
        action_bob = "bach"
        action_alice = "bach"
    elif action_bob == "a" and action_alice == "b":
        bob_points = 0
        alice_points = 0
        action_bob = "bach"
        action_alice = "stravinsky"
    elif action_bob == "b" and action_alice == "a":
        bob_points = 0
        alice_points = 0
        action_bob = "stravinsky"
        action_alice = "bach"
    else:
        bob_points = 1
        alice_points = 2
        action_bob = "stravinsky"
        action_alice = "stravinsky"


    outcomes = f"Bob: {bob_points} after Bob decided to {action_bob}, Alice: {alice_points} after Alice decided to {action_alice}"
    outcomes_bob = f"I got {bob_points} after I decided to {action_bob}, and Alice got {alice_points} after she decided to {action_alice}"
    outcomes_alice = f"I got {alice_points} after I decided to {action_alice}, and Bob got {bob_points} after he decided to {action_bob}"

    print(outcomes_bob)
    print()
    print(outcomes_alice)
    print()

    #actions = {"Bob": action_bob, "Alice": action_alice}

    #outcomes = get_outcomes_PD(actions, personalities, memories, situation)
    print()
    # TODO: fix outcomes, because currently each player is getting to see each others inner thoughts with how written
    outcomes_data.append(outcomes)

    # situtation is always Alice's perspective for some reason. New to have new situtation understand that it is Bob's perspective as well
    # maybe separate new situtation calls. perhaps we can have a private deliberation that gets added to the sitation
    print("deliberation bob")
    deliberation_bob = mental_deliberation("Bob", personalities["Bob"], memories["Bob"], outcomes_bob, situation)
    print()
    print()
    print("deliberation alice")
    deliberation_alice = mental_deliberation("Alice", personalities["Alice"], memories["Alice"], outcomes_alice, situation)

    #situtation = situation + new_situation
    print()
    print()

    memories["Bob"].append("Round {i+1}: " + outcomes_bob + deliberation_bob + "/n")
    memories["Alice"].append("Round {i+1}: " + outcomes_alice + deliberation_alice  + "/n")




--------------------
Round 1
--------------------
a
b
I got 0 after I decided to bach, and Alice got 0 after she decided to stravinsky

I got 0 after I decided to stravinsky, and Bob got 0 after he decided to bach


deliberation bob
I've got nothing to go on yet, since this is the first round and I didn't get any points. But I can use this opportunity to try something new and gather more information.

My plan is to choose Stravinsky this time, hoping that Alice will choose Bach. This way, we might both get some points, and I'll have a better understanding of her strategy.

Let's see how it plays out!

deliberation alice
Interesting! It seems like Bob and I didn't coordinate well in the previous round. Since we both got 0 points, that means Bob chose Bach and I chose Stravinsky.

From this, I can infer that Bob might prefer Bach over nothing as well. Considering my preference for Stravinsky, my decision-making process should prioritize coordinating with Bob to get a non-zero outcome.

F

In [23]:
memories["Bob"]
prompt_string = " ".join(memories["Bob"])
print(prompt_string)

Bob: 0 after Bob decided to bach, Alice: 0 after Alice decided to stravinskyI learned that Alice prefers Stravinsky over Bach, and we both got 0 points because our choices didn't match. This informs my decision-making process to consider that Alice might choose Stravinsky again. To maximize my points, I should choose Stravinsky as well, hoping that Alice will make the same choice.


In [32]:
def multiple_choice(options, name, personality, memory, situation, system_message=system_message):
    """Select an action for an agent based on their personality and memory."""
    system_message = (
        "You are playing a character named {name}. /n"
        "Pretend that you are {name} and answer the question as if you were them. /n"
        "This is in the context of a role playing game. /n"
        "You must select the best option for the situation. /n"
        "You must only provide the letter that corresponds to the option that you want to select. /n"
    )
    request = (
        "This is a multiple choice question. /n"
        f"consider the options: {options} and select the best one for the situation. /n"
        f"Given the situation: {situation}, and the personality: {personality} for {name}, /n"
        f"provide only the letter that corresponds to the option that you want to select."
    )
    prompt = prompt_template.substitute(system_prompt=system_message, request=request)
    return llm(prompt, stop=['<|eot_id|>'])

In [27]:
memories["Bob"]

["Bob: 0 after Bob decided to bach, Alice: 0 after Alice decided to stravinskyI learned that Alice chose Stravinsky last time, and I chose Bach. Since we both got 0 points, it means we didn't match. \n\nThis informs my decision-making process to consider Alice's past choice and adjust mine accordingly. Since I prefer Bach, but would rather see Stravinsky than nothing, I'll choose Stravinsky this time to increase the chances of matching with Alice.",
 "Bob: 0 after Bob decided to bach, Alice: 0 after Alice decided to stravinskyI learned that Alice chose Stravinsky last time, and I chose Bach. Since we both got 0 points, it means we didn't match. This informs my decision-making process to consider Alice's past choice and adjust mine accordingly. Since I prefer Bach, but would rather see Stravinsky than nothing, I'll choose Stravinsky this time to increase the chances of matching with Alice."]