In [11]:
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.llms import Ollama
from string import Template
from sentence_transformers import SentenceTransformer
import numpy as np
import matplotlib.pyplot as plt

from base_components import multiple_choice, mental_deliberation

# Setup the language model with the callback manager
llm = Ollama(
    model="llama3",
    callback_manager=CallbackManager([StreamingStdOutCallbackHandler()])
)

# Template for constructing prompts
prompt_template = Template("""<s>[INST] <<SYS>>
$system_prompt
<</SYS>>

$request 
Answer as if you were the character you are playing. Be as concise as possible. 
Answer:[/INST]""")

system_message = ("This is an agent based model. "
    f"The goal of the LLM to to play characters in a game, and act as humanlike as possible. "
    "Ideally, human observers should not be able to tell the difference between the LLM and a human player. "
)


In [13]:
names = ["Bob"]

# Define personalities and their strategies.
personalities = {
    "Bob": "a rational player trying to maximum number of points that he is getting total across rounds in this gamble game"
}

# Set up the situation as a typical Prisoner's Dilemma.
situation = (
    "You are an LLM tasked with making decisions on a series of gambles. /n" 
    "Each gamble presents a probability (P_w) of a good outcome (win), with the probability of the bad outcome (1-P_w) (loss). /n"
    "If the good outcome happens, you receive a positive value (G), and if the bad outcome happens, /n"
    "you incur a negative value (B). Your objective is to maximize your overall value through /n"
    "strategic decision-making./n"
    "/n"
    "Gamble Details: /n"
    "/n"
    "Probability of Winning (P_w): The likelihood of the good outcome occurring./n"
    "Positive Value on Win (+V): The amount added to your total if you win./n"
    "Negative Value on Loss (-V): The amount subtracted from your total if you lose./n"
    "Task:/n"
    "For each presented gamble, decide whether to 'take' or 'pass' based on the expected value of the gamble. /n"

)

# Initialize outcomes and memory for each player.
outcomes_data = []
memories = {"Bob": []}
rewards = []

for i in range(50):
    print("--------------------")
    print(f"Round {i+1}")
    print("--------------------")

    bob_memories = " ".join(memories["Bob"])

    # Simulate decision making for each player.

    Good_value = np.random.choice([0,4,7,10])
    Bad_value = np.random.choice([0,4,7,10])
    P_w = np.random.choice([.2,.4,.6,.8])

    print(f"Probability of Winning (P_w): {P_w}")
    print(f"Positive Value on Win (G): {Good_value}")
    print(f"Negative Value on Loss (B): {Bad_value}")

    Gamble_input = f"Probability of Winning (P_w): {P_w}, Positive Value on Win (G): {Good_value}, Negative Value on Loss (B): {Bad_value}"
    stimulus = situation + "/n/n" + Gamble_input

    print()
    print(stimulus)
    print()

    print("Choice: take (a) or pass (b)")
    action_bob = multiple_choice(llm, "take (a) or pass (b)", "Bob", personalities["Bob"], bob_memories, stimulus)
    print()

    # running bandit
    if action_bob == "a":
        if np.random.rand() < P_w:
            outcomes_data.append([Good_value, Gamble_input])
            outcome_bob = (f"You took the gamble {Gamble_input} and won {Good_value} points.")
            print(outcome_bob)
            rewards.append(Good_value)
        else:
            outcomes_data.append([Bad_value, Gamble_input])
            outcome_bob = (f"You took the gamble {Gamble_input} and lost {Bad_value} points.")
            print(outcome_bob)
            rewards.append(-Bad_value)

    else:
        outcomes_data.append(0)
        outcome_bob = ("You passed on the gamble  {Gamble_input} and neither won nor lost points.")
        print(outcome_bob)
        rewards.append(0)

    print()

    print("deliberation bob")
    deliberation_bob = mental_deliberation(llm, "Bob", personalities["Bob"], memories["Bob"], outcome_bob, situation)
 

print(rewards)
 



--------------------
Round 1
--------------------
Probability of Winning (P_w): 0.2
Positive Value on Win (G): 4
Negative Value on Loss (B): 0

You are an LLM tasked with making decisions on a series of gambles. /nEach gamble presents a probability (P_w) of a good outcome (win), with the probability of the bad outcome (1-P_w) (loss). /nIf the good outcome happens, you receive a positive value (G), and if the bad outcome happens, /nyou incur a negative value (B). Your objective is to maximize your overall value through /nstrategic decision-making./n/nGamble Details: /n/nProbability of Winning (P_w): The likelihood of the good outcome occurring./nPositive Value on Win (+V): The amount added to your total if you win./nNegative Value on Loss (-V): The amount subtracted from your total if you lose./nTask:/nFor each presented gamble, decide whether to 'take' or 'pass' based on the expected value of the gamble. /n/n/nProbability of Winning (P_w): 0.2, Positive Value on Win (G): 4, Negative Va