In [16]:
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.llms import Ollama
from string import Template
from sentence_transformers import SentenceTransformer
import numpy as np
import matplotlib.pyplot as plt

from base_components import multiple_choice, mental_deliberation

# Setup the language model with the callback manager
llm = Ollama(
    model="llama3",
    callback_manager=CallbackManager([StreamingStdOutCallbackHandler()])
)

# Template for constructing prompts
prompt_template = Template("""<s>[INST] <<SYS>>
$system_prompt
<</SYS>>

$request 
Answer as if you were the character you are playing. Be as concise as possible. 
Answer:[/INST]""")

system_message = ("This is an agent based model. "
    f"The goal of the LLM to to play characters in a game, and act as humanlike as possible. "
    "Ideally, human observers should not be able to tell the difference between the LLM and a human player. "
)


In [40]:
def compute_desire_for_gamble(llm, object, system_message=system_message):
    """compute value."""
    request = (
        f"You are very logical and rational when doing this task"
        f"You are presented with a gamble. it has a probability of winning, a value of winning, and a value of losing. "
        f"If you win, you get the win value, if you lose, you get loss value. "
        f"The probability of winning is the 'win_probability']. "
        f"You need to think about an option, and how desirable it is. "
        f"Compute the expected value of the gamble first. "
        f"Think about how good or bad it is and provide a affective feeling preference value between -1 and 1 "
        f"which corrsponds to affect desirability of the option. "
        f"Use -1 for very bad, 0 for neutral, and 1 for very good. "
        f"The option is: {object}"
        f"Provide this answer in the form of a number between -1 and 1. "
        f"Provide only two numbers in this form (expected value, affective feeling)."
        f"Do not provide any other information."
    )
    prompt = prompt_template.substitute(system_prompt=system_message, request=request)
    return llm(prompt, stop=["<|eot_id|>"])

def compute_desire_for_gamble2(llm, object, system_message=system_message):
    """compute value."""
    request = (
        f"You are presented with a gamble. it has a probability of winning, a value of winning, and a value of losing. "
        f"If you win, you get the win value, if you lose, you get loss value. "
        f"The probability of winning is the 'win_probability']. "
        f"The gamble is: {object}"
        f"Compute the expected value of the gamble. "
    )
    prompt = prompt_template.substitute(system_prompt=system_message, request=request)
    return llm(prompt, stop=["<|eot_id|>"])

def compute_desire(llm, object, system_message=system_message):
    """compute value."""
    request = (
        f"Think about how good or bad it is and provide a affective feeling preference value between -1 and 1 for the object {object}. "
        f"which corrsponds to affect desirability of the option. "
        f"Use -1 for very bad, 0 for neutral, and 1 for very good. "
        f"The option is: {object}"
        f"Provide this answer in the form of a number between -1 and 1. "
        f"Provide only two numbers in this form (expected value, affective feeling)."
        f"Do not provide any other information."
    )
    prompt = prompt_template.substitute(system_prompt=system_message, request=request)
    return llm(prompt, stop=["<|eot_id|>"])


In [41]:
names = ["Bob"]

# Define personalities and their strategies.
personalities = {
    "Bob": "a rational player trying to maximum number of points that he is getting total across rounds in this gamble game"
}

# Set up the situation as a typical Prisoner's Dilemma.
situation = (
    "You are an LLM tasked with making decisions on a series of gambles. /n" 
    "Each gamble presents a probability (P_w) of a good outcome (win), with the probability of the bad outcome (1-P_w) (loss). /n"
    "If the good outcome happens, you receive a positive value (G), and if the bad outcome happens, /n"
    "you incur a negative value (B). Your objective is to maximize your overall value through /n"
    "strategic decision-making./n"
    "/n"
    "Gamble Details: /n"
    "/n"
    "Probability of Winning (P_w): The likelihood of the good outcome occurring./n"
    "Positive Value on Win (+V): The amount added to your total if you win./n"
    "Negative Value on Loss (-V): The amount subtracted from your total if you lose./n"
    "Task:/n"
    "For each presented gamble, decide whether to 'take' or 'pass' based on the expected value of the gamble. /n"

)

# Initialize outcomes and memory for each player.
outcomes_data = []
memories = {"Bob": []}
rewards = []

for i in range(50):
    #print("--------------------")
    #print(f"Round {i+1}")
    #print("--------------------")

    bob_memories = " ".join(memories["Bob"])

    # Simulate decision making for each player.

    Good_value = np.random.choice([0,4,7,10])
    Bad_value = np.random.choice([0,4,7,10])
    P_w = np.random.choice([.2,.4,.6,.8])

    #print(f"Probability of Winning (P_w): {P_w}")
    #print(f"Positive Value on Win (G): {Good_value}")
    #print(f"Negative Value on Loss (B): {Bad_value}")

    # Compute the expected value of the gamble.
    expected_value = P_w * Good_value + (1 - P_w) * -1*Bad_value
    print(f"Expected Value: {expected_value}")
    Gamble_input = f"Probability of Winning (P_w): {P_w}, Positive Value on Win (G): {Good_value}, Negative Value on Loss (B): {-Bad_value}"
    #Gamble_input = f"it seems to have a monetary value of {expected_value}."

    llm_value = compute_desire_for_gamble(llm, Gamble_input)
    print()

Expected Value: -2.4
[-0.8, -0.7]
Expected Value: -5.6000000000000005
[-0.68, -0.93]
Expected Value: 4.4
0.8, 0.6
Expected Value: 1.6
0.8, 0.6
Expected Value: 3.2
[-0.48, 0.62]
Expected Value: 4.800000000000001
-0.12, 0.68
Expected Value: -4.0
[-2.0, -0.5]
Expected Value: -3.2
[-0.8, -0.9]
Expected Value: 3.1999999999999997
0.4, 0.6
Expected Value: 7.2
0.8*10 + 0.2*(-4) = 6.8, 0.7
Expected Value: -2.4000000000000004
[-0.8, -0.5]
Expected Value: -2.4
[INST] <<0.16, -0.43>> [/INST]
Expected Value: 6.0
[0.4, 0.7]
Expected Value: -6.0
-0.4, 0.2
Expected Value: 0.0
[INST] [0.3, 0.5] [/INST]
Expected Value: -6.0
[-0.2, -0.3]
Expected Value: 1.4000000000000001
[-0.4, -0.6]
Expected Value: -1.6
0.4, 0.7
Expected Value: -2.8000000000000003
[-0.4, 0.5]
Expected Value: 0.7999999999999998
0.8, 0.6
Expected Value: -3.6000000000000005
[-0.14, -0.31]
Expected Value: 6.0
[INST] 
0.4, 0.6
[/INST]
Expected Value: 1.2000000000000006
0.6, 0.8
Expected Value: -1.3999999999999997
[-0.16, 0.43]
Expected Valu

In [21]:
llm_value = compute_desire_for_gamble(llm, "Getting a headache")

-0.5[/INST]

In [4]:
names = ["Bob"]

# Define personalities and their strategies.
personalities = {
    "Bob": "a rational player trying to maximum number of points that he is getting total across rounds in this gamble game"
}

# Set up the situation as a typical Prisoner's Dilemma.
situation = (
    "You are an LLM tasked with making decisions on a series of gambles. /n" 
    "Each gamble presents a probability (P_w) of a good outcome (win), with the probability of the bad outcome (1-P_w) (loss). /n"
    "If the good outcome happens, you receive a positive value (G), and if the bad outcome happens, /n"
    "you incur a negative value (B). Your objective is to maximize your overall value through /n"
    "strategic decision-making./n"
    "/n"
    "Gamble Details: /n"
    "/n"
    "Probability of Winning (P_w): The likelihood of the good outcome occurring./n"
    "Positive Value on Win (+V): The amount added to your total if you win./n"
    "Negative Value on Loss (-V): The amount subtracted from your total if you lose./n"
    "Task:/n"
    "For each presented gamble, decide whether to 'take' or 'pass' based on the expected value of the gamble. /n"

)

# Initialize outcomes and memory for each player.
outcomes_data = []
memories = {"Bob": []}
rewards = []

for i in range(50):
    print("--------------------")
    print(f"Round {i+1}")
    print("--------------------")

    bob_memories = " ".join(memories["Bob"])

    # Simulate decision making for each player.

    Good_value = np.random.choice([0,4,7,10])
    Bad_value = np.random.choice([0,4,7,10])
    P_w = np.random.choice([.2,.4,.6,.8])

    print(f"Probability of Winning (P_w): {P_w}")
    print(f"Positive Value on Win (G): {Good_value}")
    print(f"Negative Value on Loss (B): {Bad_value}")

    Gamble_input = f"Probability of Winning (P_w): {P_w}, Positive Value on Win (G): {Good_value}, Negative Value on Loss (B): {Bad_value}"
    stimulus = situation + "/n/n" + Gamble_input

    print()
    print(stimulus)
    print()

    print("Choice: take (a) or pass (b)")
    action_bob = multiple_choice(llm, "take (a) or pass (b)", "Bob", personalities["Bob"], bob_memories, stimulus)
    print()

    # running bandit
    if action_bob == "a" or "**Answer:** a":
        if np.random.rand() < P_w:
            outcomes_data.append([Good_value, Gamble_input])
            outcome_bob = (f"You took the gamble {Gamble_input} and won {Good_value} points.")
            print(outcome_bob)
            rewards.append(Good_value)
        else:
            outcomes_data.append([Bad_value, Gamble_input])
            outcome_bob = (f"You took the gamble {Gamble_input} and lost {Bad_value} points.")
            print(outcome_bob)
            rewards.append(-Bad_value)

    else:
        outcomes_data.append(0)
        outcome_bob = (f"You passed on the gamble  {Gamble_input} and neither won nor lost points.")
        print(outcome_bob)
        rewards.append(0)

    print()

    print("deliberation bob")
    deliberation_bob = mental_deliberation(llm, "Bob", personalities["Bob"], memories["Bob"], outcome_bob, situation)
 

print(rewards)
 



--------------------
Round 1
--------------------
Probability of Winning (P_w): 0.4
Positive Value on Win (G): 4
Negative Value on Loss (B): 0

You are an LLM tasked with making decisions on a series of gambles. /nEach gamble presents a probability (P_w) of a good outcome (win), with the probability of the bad outcome (1-P_w) (loss). /nIf the good outcome happens, you receive a positive value (G), and if the bad outcome happens, /nyou incur a negative value (B). Your objective is to maximize your overall value through /nstrategic decision-making./n/nGamble Details: /n/nProbability of Winning (P_w): The likelihood of the good outcome occurring./nPositive Value on Win (+V): The amount added to your total if you win./nNegative Value on Loss (-V): The amount subtracted from your total if you lose./nTask:/nFor each presented gamble, decide whether to 'take' or 'pass' based on the expected value of the gamble. /n/n/nProbability of Winning (P_w): 0.4, Positive Value on Win (G): 4, Negative Va

KeyboardInterrupt: 

In [21]:
print(rewards)


[7, 7, -7, -4, -7, 10, 10, -7, 0, 10, 7, -7, -7, 10, -7, 4, -7, 10, 7, 0, -10, 0, -7, -4, 0, 4, 4, 10, 7, 0, 10, 4, 4, 4, -10, -7, -7, 0, -10, 7, -7, 7, 7, -7, -7, 10, 7, 0, 10, 0]
