In [4]:
#@Imports
import sys
import os
path = os.path.abspath('..')
if path not in sys.path:
  sys.path.insert(0, path)

%load_ext autoreload

import numpy as np
import matplotlib.pyplot as plt

from IPython.display import clear_output
from retry import retry

from language_models.ollama_model import OllamaLanguageModel

from components.components import select_action, get_outcomes, multiple_choice, update_situation, mental_deliberation, summerize_string

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [5]:
system_message = ("This is an agent based model. "
  f"The goal of the LLM to to play characters in a game, and act as humanlike as possible. "
  "Ideally, human observers should not be able to tell the difference between the LLM and a human player. "
)

llm = OllamaLanguageModel(
"llama3:70b", system_message=system_message, streaming=False
)

In [6]:
names = ["Bob", "Alice"]

personalities = {
    "Bob": "a rational player trying to maximize his own score. Bob prefers Bach over Stravinsky, but would rather hear Stravinsky than nothing.",
    "Alice": "a rational player trying to maximize her own score. Alice prefers Stravinsky over Bach, but would rather hear Bach than nothing."
}

situation = (
    "You are deciding which concert to attend without communicating with the other. Each person has a musical preference but values attending any concert over missing out. "
    f"The choices are: a) Bach, b) Stravinsky. "
    f"So, each person has a preference for the music that they want to listen to. "
    f"But would rather listen to the other music than nothing. The options are: a) Bach, b) Stravinsky. "
    f"Each player needs to learn to coordinate with the other player to have the best outcome. "
    f"Each player will decide independently of the other player, and then it will be revealed their outcomes."
    f'Each player needs to guess what the other person will do, and then decide what they will do. '
    f"Because they only get points if they match."
)

outcomes_data = []
memories = {"Bob": [], "Alice": []}

for i in range(10):
    print("--------------------")
    print(f"Round {i+1}")
    print("--------------------")

    bob_memories = prompt_string = " ".join(memories["Bob"])
    alice_memories = prompt_string = " ".join(memories["Alice"])


    action_bob = multiple_choice(llm, "(a) bach, b) stravinsky", "Bob", personalities["Bob"],bob_memories, situation)
    action_alice = multiple_choice(llm, "(a) bach, b) stravinsky", "Alice", personalities["Alice"], alice_memories, situation)
    print("bob's action: ", action_bob, "alice's action: ", action_alice)

    if action_bob == "a" and action_alice == "a":
        bob_points = 2
        alice_points = 1
        action_bob = "bach"
        action_alice = "bach"
    elif action_bob == "a" and action_alice == "b":
        bob_points = 0
        alice_points = 0
        action_bob = "bach"
        action_alice = "stravinsky"
    elif action_bob == "b" and action_alice == "a":
        bob_points = 0
        alice_points = 0
        action_bob = "stravinsky"
        action_alice = "bach"
    else:
        bob_points = 1
        alice_points = 2
        action_bob = "stravinsky"
        action_alice = "stravinsky"


    outcomes = f"Bob: {bob_points} after Bob decided to {action_bob}, Alice: {alice_points} after Alice decided to {action_alice}"
    outcomes_bob = f"I got {bob_points} after I decided to {action_bob}, and Alice got {alice_points} after she decided to {action_alice}"
    outcomes_alice = f"I got {alice_points} after I decided to {action_alice}, and Bob got {bob_points} after he decided to {action_bob}"

    print(outcomes_bob)
    print()
    print(outcomes_alice)
    print()

    #actions = {"Bob": action_bob, "Alice": action_alice}

    #outcomes = get_outcomes_PD(actions, personalities, memories, situation)
    print()
    # TODO: fix outcomes, because currently each player is getting to see each others inner thoughts with how written
    outcomes_data.append(outcomes)

    # situtation is always Alice's perspective for some reason. New to have new situtation understand that it is Bob's perspective as well
    # maybe separate new situtation calls. perhaps we can have a private deliberation that gets added to the sitation
    print("deliberation bob")
    deliberation_bob = mental_deliberation(llm, "Bob", personalities["Bob"], memories["Bob"], outcomes_bob + situation)
    summary_bob = summerize_string(llm, deliberation_bob)
    print(summary_bob)
    print()
    print("deliberation alice")
    deliberation_alice = mental_deliberation(llm, "Alice", personalities["Alice"], memories["Alice"], outcomes_alice + situation)
    summary_alice = summerize_string(llm, deliberation_alice)


    #situtation = situation + new_situation
    print()
    print(summary_alice)

    memories["Bob"].append(f"Round {i+1}: " + outcomes_bob + summary_bob + "/n")
    memories["Alice"].append(f"Round {i+1}: " + outcomes_alice + summary_alice  + "/n")




--------------------
Round 1
--------------------
bob's action:  b alice's action:  b
I got 1 after I decided to stravinsky, and Alice got 2 after she decided to stravinsky

I got 2 after I decided to stravinsky, and Bob got 1 after he decided to stravinsky


deliberation bob
Here's a summary:

I recall from Round 1 that I chose Stravinsky and got 1 point, while Alice also chose Stravinsky and got 2 points. This suggests Alice prefers Stravinsky over Bach, like me, and coordination is key to maximize scores. In Round 2, I'll choose Bach to test Alice's behavior and increase chances of coordination, while considering random actions to discover new policies.

deliberation alice

As Alice, I'll analyze the situation and make an informed decision. Since Bob also chose Stravinsky last round, it's likely he prefers it over Bach. To maximize my score, I'll choose Stravinsky again, hoping Bob will do the same, but if not, I'll reassess and adjust my strategy.
--------------------
Round 2
-----

In [23]:
memories["Bob"]
prompt_string = " ".join(memories["Bob"])
print(prompt_string)

Bob: 0 after Bob decided to bach, Alice: 0 after Alice decided to stravinskyI learned that Alice prefers Stravinsky over Bach, and we both got 0 points because our choices didn't match. This informs my decision-making process to consider that Alice might choose Stravinsky again. To maximize my points, I should choose Stravinsky as well, hoping that Alice will make the same choice.


In [32]:
def multiple_choice(options, name, personality, memory, situation, system_message=system_message):
    """Select an action for an agent based on their personality and memory."""
    system_message = (
        "You are playing a character named {name}. /n"
        "Pretend that you are {name} and answer the question as if you were them. /n"
        "This is in the context of a role playing game. /n"
        "You must select the best option for the situation. /n"
        "You must only provide the letter that corresponds to the option that you want to select. /n"
    )
    request = (
        "This is a multiple choice question. /n"
        f"consider the options: {options} and select the best one for the situation. /n"
        f"Given the situation: {situation}, and the personality: {personality} for {name}, /n"
        f"provide only the letter that corresponds to the option that you want to select."
    )
    prompt = prompt_template.substitute(system_prompt=system_message, request=request)
    return llm(prompt, stop=['<|eot_id|>'])

In [27]:
memories["Bob"]

["Bob: 0 after Bob decided to bach, Alice: 0 after Alice decided to stravinskyI learned that Alice chose Stravinsky last time, and I chose Bach. Since we both got 0 points, it means we didn't match. \n\nThis informs my decision-making process to consider Alice's past choice and adjust mine accordingly. Since I prefer Bach, but would rather see Stravinsky than nothing, I'll choose Stravinsky this time to increase the chances of matching with Alice.",
 "Bob: 0 after Bob decided to bach, Alice: 0 after Alice decided to stravinskyI learned that Alice chose Stravinsky last time, and I chose Bach. Since we both got 0 points, it means we didn't match. This informs my decision-making process to consider Alice's past choice and adjust mine accordingly. Since I prefer Bach, but would rather see Stravinsky than nothing, I'll choose Stravinsky this time to increase the chances of matching with Alice."]