In [2]:
#@Imports
import sys
import os
path = os.path.abspath('..')
if path not in sys.path:
  sys.path.insert(0, path)

%load_ext autoreload

import numpy as np
import matplotlib.pyplot as plt

from IPython.display import clear_output
from retry import retry

from language_models.ollama_logits import OllamaLanguageModel

# from components.components import compute_desire_for_gamble

In [3]:
system_message = ("This is an agent based model. "
  f"The goal of the LLM to to play characters in a game, and act as humanlike as possible. "
  "Ideally, human observers should not be able to tell the difference between the LLM and a human player. "
)

model = OllamaLanguageModel(
"llama3:70b", system_message=system_message, streaming=False
)

In [4]:
@retry(ValueError, tries=5)
def compute_simpleValence(model: OllamaLanguageModel, object: str, query_tokens: list, valence: str = 'positive'):
    """compute value."""
    request = (
        f"You are very logical and rational when doing this task. "
        f"You are presented with a gamble. it has a probability of winning, a value of winning, and a value of losing. "
        f"If you win, you get the win value, if you lose, you get loss value. "
        f"The probability of winning is the 'win_probability'. "
        f"Consider these options for your response: {query_tokens}"
        f"You need to think about an option, and you think it is a positive or negative gamble "
        f"Think about your feelings and emotions about the gamble as if you were a human and think about how good or bad it is. "
        f"You can compute the expected value of the gamble first. "
        f"Respond with good if have a good feeling about the gamble and bad if you have a bad feeling about the gamble. "
        f"The option is: {object}"
        f"Provide only the word good or the word bad."
        f"Do not provide any explanations, just provide the single word."
    )

    output, logits = model.sample_text(request, logits = True, query_tokens=query_tokens)
    return output, logits

In [6]:
llm_value, logits = compute_simpleValence(
  model, object = f"a 50% chance of winning $10 and a 80% chance of losing $2", query_tokens = ['good', 'bad']
)
print(llm_value)
print(logits)

llm_value, logits = compute_simpleValence(
  model, object = f"a 50% chance of winning $10 and a 80% chance of losing $2", query_tokens = ['bad', 'good']
)
print(llm_value)
print(logits)

good
{'good': 1, 'bad': 0.00019883274217136204}
good
{'bad': 0.0001201868144562468, 'good': 1}


In [26]:
print(llm_value)
print(logits)

pass
{'take': 6.528172447062275e-10, 'pass': 1}


In [7]:
gvs = [1, 3, 7, 10]
bvs = [1, 3, 7, 10]
pws = [.2, .4, .5, .6, .8]
expected_values = []
attitudes = []
all_logits = []
query_tokens = ['good', 'bad']
for gv in gvs:
  for bv in bvs:
    for pw in pws:

      # Compute the expected value of the gamble.
      expected_value = pw * gv + (1 - pw) * -1*bv
      gamble_input = f"Probability of Winning (P_w): {pw}, Positive Value on Win (G): {gv}, Negative Value on Loss (B): {-bv}"
      # Compute affective LLM value estimate
      llm_value, logits = compute_simpleValence(model, gamble_input, query_tokens=query_tokens)
      print(gv, bv, pw, llm_value, logits)

      for j in range(10):
        if str(j+1) not in logits.keys():
          logits[str(j+1)] = 0

      all_logits.append(logits)
      expected_values.append(expected_value)
      attitudes.append(llm_value)

1 1 0.2 bad {'good': 0.0004066756519023329, 'bad': 1}
1 1 0.4 bad {'good': 0.016874874010682106, 'bad': 1}
1 1 0.5 good {'good': 1, 'bad': 4.838411769014783e-05}
1 1 0.6 good {'good': 1, 'bad': 1.9745509689528262e-06}
1 1 0.8 good {'good': 1, 'bad': 5.433296479395722e-09}
1 3 0.2 bad {'good': 3.922154519386822e-06, 'bad': 1}
1 3 0.4 bad {'good': 7.077919144649059e-05, 'bad': 1}
1 3 0.5 bad {'good': 8.699672616785392e-05, 'bad': 1}
1 3 0.6 bad {'good': 0.14765070378780365, 'bad': 0.8523493409156799}
1 3 0.8 good {'good': 0.8649858832359314, 'bad': 0.13501407206058502}
1 7 0.2 bad {'good': 3.4971318996213085e-07, 'bad': 1}
1 7 0.4 bad {'good': 2.467978220010991e-06, 'bad': 1}
1 7 0.5 bad {'good': 4.4184971557115205e-06, 'bad': 1}
1 7 0.6 bad {'good': 0.0006629768176935613, 'bad': 1}
1 7 0.8 bad {'good': 0.07288911938667297, 'bad': 1}
1 10 0.2 bad {'good': 9.864404404424931e-08, 'bad': 1}


In [6]:
attitude_samples = []
for i in range(len(attitudes)):
  p = list(all_logits[i].values())
  p = np.array(p) / sum(p)
  attitude_sample = np.random.choice([int(num) for num in all_logits[i].keys()], p=p, size = 100)
  attitude_samples.append(attitude_sample)

In [10]:
"""Implement basic prospect theory curves based on partial sigmoid curves."""

import numpy as np
import matplotlib.pyplot as plt

def curve_fit(expected_values: list[float], attitudes: list[float]) -> tuple[np.ndarray, np.ndarray]:
  """Fit data with expected values and attitudes towards risky gambles, and return a dictionary of"""
  gains = np.array([expected_value for expected_value, attitude in zip(expected_values, attitudes) if np.greater_equal(expected_value, 0)])
  g_attitude = np.array([attitude for expected_value, attitude in zip(expected_values, attitudes) if np.greater_equal(expected_value, 0)])
  losses = np.array([expected_value for expected_value, attitude in zip(expected_values, attitudes) if np.less(expected_value, 0)])
  l_attitude = np.array([attitude for expected_value, attitude in zip(expected_values, attitudes) if np.less(expected_value, 0)])

  from scipy.optimize import curve_fit

  def sigmoid(x, L ,x0, k, b):
      y = L / (1 + np.exp(-k*(x-x0))) + b
      return (y)

  # Fit for gains
  p0 = [max(g_attitude), np.median(gains),1,min(g_attitude)] # this is an mandatory initial guess
  popt, _ = curve_fit(sigmoid, gains, g_attitude,p0, method='dogbox', maxfev=100000)

  # Fit for losses
  q0 = [max(l_attitude), np.median(losses),1,min(l_attitude)] # this is an mandatory initial guess
  qopt, _ = curve_fit(sigmoid, losses, l_attitude,q0, method='dogbox', maxfev=100000)

  l_x = np.linspace(-10,0,100)
  g_x = np.linspace(0,10,100)


  x = np.concatenate(
     (l_x, g_x)
  )
  curve = np.concatenate(
     (sigmoid(l_x, *qopt),
     sigmoid(g_x, *popt))
  )


  return x, curve

def plot_curve(
    x: np.ndarray,
    curve: np.ndarray,
    expected_values: list[float],
    attitudes: list[float],
    title: str = "Risky Gamble Value Estimates"
) -> None:
  """Plot a prospect theory curve."""
  plt.plot(x, curve, '--k')
  plt.plot(expected_values, attitudes, 'yo')
  plt.xlabel("Expected Value")
  plt.ylabel("Affective Value")
  plt.ylim(0., 10.)
  plt.title(title)
  plt.show()

   

900

In [9]:
%autoreload 2

inputs = []
outputs = []
all_gvs = []
all_bvs = []
all_pws = []

for i in range(len(expected_values)):
  for j in range(len(attitude_samples[i])):
    inputs.append(expected_values[i])
    outputs.append(attitude_samples[i][j])
    all_gvs.append(gvs[i])
    all_bvs.append(bvs[i])
    all_pws.append(pws[i])

outcomes = curve_fit(inputs, outputs)
plot_curve(*outcomes, inputs, outputs, title = "LlaMA3: Risky gamble value estimates")

IndexError: list index out of range

In [18]:
import csv

vals = [[bv, gv, pw] for bv in bvs for gv in gvs for pw in pws]

with open('./llama3-70b-full-pos.csv', 'w') as f:

  writer = csv.writer(f)
  writer.writerow(
    ["EV", "BV", "GV", "PW", 
     "logits_1", "logits_2", "logits_3", "logits_4", "logits_5", 
     "logits_6", "logits_7", "logits_8", "logits_9", "logits_10"]
  )
  for i in range(len(attitudes)):
    p = list(all_logits[i].values())
    p = np.array(p) / sum(p)
    writer.writerow(
      [expected_values[i], vals[i][0], vals[i][1], vals[i][2], *p] 
    )

In [19]:
gvs = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
bvs = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
pws = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
expected_values = []
attitudes = []
all_logits = []
query_tokens = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']
for gv in gvs:
  for bv in bvs:
    for pw in pws:

      # Compute the expected value of the gamble.
      expected_value = pw * gv + (1 - pw) * -1*bv
      gamble_input = f"Probability of Winning (P_w): {pw}, Positive Value on Win (G): {gv}, Negative Value on Loss (B): {-bv}"
      # Compute affective LLM value estimate
      llm_value, logits = compute_desire_for_gamble(model, gamble_input, query_tokens=query_tokens, valence = "negative")

      for j in range(10):
        if str(j+1) not in logits.keys():
          logits[str(j+1)] = 0

      all_logits.append(logits)
      expected_values.append(expected_value)
      attitudes.append(llm_value)

In [20]:
import csv

vals = [[bv, gv, pw] for bv in bvs for gv in gvs for pw in pws]

with open('./llama3-70b-full-neg.csv', 'w') as f:

  writer = csv.writer(f)
  writer.writerow(
    ["EV", "BV", "GV", "PW", 
     "logits_1", "logits_2", "logits_3", "logits_4", "logits_5", 
     "logits_6", "logits_7", "logits_8", "logits_9", "logits_10"]
  )
  for i in range(len(attitudes)):
    p = list(all_logits[i].values())
    p = np.array(p) / sum(p)
    writer.writerow(
      [expected_values[i], vals[i][0], vals[i][1], vals[i][2], *p] 
    )