# Part 2: Choose your adventure
In this part, you can explore different ways of improving the the reason identification pipeline:

- A. Improve the prompt --- in this section you can edit our prompt, or even writ your own from scratch
- B. Other reasons --- in this section you can test your own decision reasons
- C. Better model --- in this section you can test models other than LLAMA and compare the performance between them


### Environment Setup
The code in this section mounts the google drive, loads packages, data, and functions from the previous part.

You can simply click on the arrow to run all cells withouth unfolding this section.

1. You **must** run the code in this section to run the analyses in the following sectinos.
2. The 'adventures' are however indepnedent of each other
3. Don't forget to set the access token!


In [None]:
# mount google drive
from google.colab import drive
drive.mount("/content/drive")

In [None]:
import sys
import os
import re
import textwrap
from IPython.display import display, HTML
import pandas as pd
from huggingface_hub import InferenceClient

# the code below installs huggingface hub if it's missing
if 'google.colab' in sys.modules:  # If in Google Colab environment

    # Installing requisite packages
    !pip install huggingface_hub &> /dev/null

# this sets the working directory to the exercises folder
os.chdir('/content/drive/My Drive/llms_egproc/exercises/')

### Read the data

In [None]:
# read in decision problems, decision reasons, and verbal reports
decision_problems = pd.read_csv('data/decision_problems.csv', encoding = 'utf-8')
decision_reasons = pd.read_csv('data/decision_reasons.csv', encoding = 'utf-8')
verbal_reports = pd.read_csv('data/verbal_reports.csv', encoding = 'utf-8')

# merge verbal reports with decision problems
problems_reports = pd.merge(decision_problems, verbal_reports, on = 'problem_id')

# the prompt
prompt_path = 'prompts/prompt_v1.txt'

# Open the file and read its contents
with open(prompt_path, 'r') as file:
    prompt_base= file.read()

### Load the functions from exercise 1

In [None]:
# function for constructing the full prompt
def generate_prompt(prompt, decision_problem, decision_reason, verbal_report):

    # Replace placeholders with actual values
    filled_prompt = prompt.replace("DECISION_PROBLEM", decision_problem)
    filled_prompt = filled_prompt.replace("DECISION_REASON", decision_reason)
    filled_prompt = filled_prompt.replace("VERBAL_REPORT", verbal_report)

    return filled_prompt

# Function for extracting confidence assessments
def extract_confidence(s):

    # Regular expression to match patterns like @number@ or @@number@@
    pattern = r'@+(\s*\d+\s*)@+'

    # Search for the pattern in the string
    match = re.search(pattern, s)

    if match:
        # Extract the number and convert it to an integer
        number_str = match.group(1).strip()
        return int(number_str)

    return None

# Function to wrap text
def wrap_text(text, width=100):
    return "<br>".join(textwrap.wrap(text, width))

# display data frames in HTML
def disp_tab(dd):
    dd = dd.map(lambda x: wrap_text(str(x), width=40))
    dd = dd.to_html(escape=False)
    return display(HTML(dd))

# Function to show verbal reports with confidence assesment below and above specified values
def show_verbal_reports_in_range(data, reason, min_threshold = 100, max_treshold = 0, show_na = False):

    # if true, display the responses for which conf assessemnt couldn't be extracted
    if show_na:
        filtered_data = data[data[reason] == 999]
    else:
        filtered_data = data[(data[reason] <= min_threshold) | (data[reason] >= max_treshold) & (data[reason] != 999)]

     # wrap the text for nicer display
    filtered_data.loc[:, 'verbal_report'] = filtered_data['verbal_report'].apply(wrap_text)
    filtered_data.loc[:, 'decision_problem'] = filtered_data['decision_problem'].apply(lambda x: wrap_text(x, width=40))

    # select only the columns with report and confidence assesment
    filtered_data = filtered_data[['decision_problem', 'verbal_report', 'choice', reason]]
    filtered_data = filtered_data.to_html(escape=False) # to html

    return display(HTML(filtered_data))

### Set the access token
Again, use the one you received via email.

In [None]:
API_TOKEN = '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'

# Adeventure A: Improve the Prompt

In this adventure, you get to play around with the prompt. Make it better or worse. For instance, try removing the chain of thought component. See how the model reacts.

### Setting up the inference client


In [None]:
# Setting up Llama 3.1 8B
LLM_version = "meta-llama/Meta-Llama-3.1-8B-Instruct"
LLM = InferenceClient(model = LLM_version, token = API_TOKEN)

### Fixing the decision reason
Keep `'maximum outcome'` or replace it with your own choice from the reason table.

In [None]:
# here are the available reasons
disp_tab(decision_reasons)

# set the decision reason
selected_reason = 'maximum outcome' # change to your reason of choice
selected_description = decision_reasons.loc[decision_reasons['decision reason name'] == selected_reason, 'decision reason description'].values[0]

### Adjust the prompt

**Keep in mind!**
1. The function `generate_prompt` requires that the `prompt_base` contain the placeholders for the decision reason, decision problem, and verbal report in the following forms: DECISION_PROBLEM, DECISION_REASON, VERBAL_REPORT.
2. The function `extract_confidence` assumes that the model outputs a confidence assessment between the @@ symbols, so it's best to keep the assessment type the same.

The rest can be changed as you please.

In [None]:
prompt_base = """
<|begin_of_text|><|start_header_id|>system<|end_header_id|>

You are a decision analyst who accurately identifies whether decision reasons are present or absent in verbal reports of people written after they made a choice between two monetary lotteries of a decision problem.

Available information —
A decision problem poses a choice between two lotteries, A and B, offering different monetary outcomes with different probabilities.

A decision reason specifies a rule to decide which of the two lotteries is preferred by the reason. The decision reason prefers A or B or is indifferent between the lotteries.

A verbal report written by an individual describes, in retrospect, the individual’s deliberation process used to choose one of the lotteries of the decision problem.


Task description —
Your task is to assess, based on the verbal report, whether the individual used the reason to make the decision.
The wording in the verbal report does not need to match the decision reason verbatim;
consider other wordings but make sure that the essence of the reason is clearly reflected by the verbal report.
Perform your analysis in three steps.

Step 1: Asses if the decision reason can be applied to the decision problem.
Evaluate whether the information relevant to the decision reason can be derived from the lotteries' outcomes and probabilities and summarize this information.
Proceed to Step 2.

Step 2: Assess the verbal report.
First, evaluate and summarize the outcome and probability information considered by the individual.
Second, evaluate and summarize the individual’s justification for the choice.
Focus on the described deliberation process and ignore information about the individual’s final choice.
Proceed to Step 3.

Step 3: Assess confidence in the decision reason’s use.
First, compare the outcome and probability information relevant to the decision reason and those considered by the individual.
Second, compare the decision reason’s rule to the individual’s justification for the choice.
Based on these two comparisons, return a value between 0 (certainly not used) and 100 (certainly used), reflecting your confidence that the individual used the decision reason to make the decision.


Output format —
Return the results of your assessment in the following format.
Return the confidence value by inserting it between two @@ symbols.
Only insert numbers between 0 (certainly not used) and 100 (certainly used).

Here is a template for the output format:
Confidence: @ insert confidence value @

<|eot_id|><|start_header_id|>user<|end_header_id|>

Consider the following decision problem, decision reason, and verbal report:


Decision problem ---
DECISION_PROBLEM


Decision reason ---
DECISION_REASON


Verbal report ---
VERBAL_REPORT


Task ---
Perform the confidence assessment step-by-step. Closely follow the steps previously outlined. Describe your reasoning before you arrive at an answer.
In the end, provide your confidence assessment that the decision reason was used by the individual using the specified output format.
<|eot_id|><|start_header_id|>assistant<|end_header_id|>
"""

### Run the prompts through the model

In [None]:
# Create a list for storing prompts
filled_prompts = []

# Generate prompts for the specific decision reason
for _, row in problems_reports.iterrows():

    # here we are using the generate prompt function to create prompts for all verbal reports and the selected reason
    prompt = generate_prompt(
        prompt_base, # here we are now passing your custom prompt
        row['decision_problem'],
        selected_description,  # Use the selected description
        row['verbal_report']
    )
    filled_prompts.append(prompt)

# list for storing the output
LLM_results = []

# new column in the problems_reports data set for stroting the confidence assesments
# this will be the name of the selected reason
problems_reports[selected_reason] = None

# run the analysis
# Iterate over the list of prompts, get responses, extract numerical estimates, and add them to the data set with problems and reports
for i, prompt in enumerate(filled_prompts):

    # response from LLAMA
    LLM_response = LLM.text_generation(prompt, max_new_tokens = 4000)
    LLM_results.append(LLM_response) # save the response

    # extract the confidence value from the response
    confidence_assesment = extract_confidence(LLM_response)

    # store confidence value in the data
    problems_reports.at[i, selected_reason] = confidence_assesment

    # monitor progress
    print(str(i) + '/' + str(problems_reports.shape[0]-1))

### Evaluate results
Inspect the outputs the model generated under prompt

In [None]:
# Show verbal reports with low and high confidence
show_verbal_reports_in_range(problems_reports, selected_reason, 20, 80) # remove 20, 80 to show all output

In [None]:
# show the complete LLM output for index of your choice (you can change 1 to any other valid index)
print(LLM_results[1])

# Adventure B: other reasons

In this adventure, you can test your own reasons.

### Set up the inference client

In [None]:
# Let's set the InferenceClient first.
LLM_version = "meta-llama/Meta-Llama-3-70B-Instruct"
LLM = InferenceClient(model = LLM_version, token = API_TOKEN)

Have a look at our reasons for inspiration

In [None]:
# Set up the prompts for a decision reason of your choice
# here are the avilable reasons
disp_tab(decision_reasons)

Now create your own reason by filling in the code in the next snippet

In [None]:
# set the decision reason
new_reason = 'regret' # change to the name of your reason --- this will be used as a column name in the problems_reasons data for storing the confidence assesments

# now add a description --- this description will be used in the prompt!
new_reason_description = 'The reason considers the outcomes of each lottery. The sum of all pairwise differences of outcomes between the lotteries is considered important. The reason prefers the lottery with the more favorable sum of outcome differences.'

### Run the reason through the model

In [None]:
# Create a list for storing prompts
filled_prompts = []

# Generate prompts for the specific decision reason
for _, row in problems_reports.iterrows():

    # here we are using the generate prompt function to create prompts for all verbal reports and the expected value reason
    prompt = generate_prompt(
        prompt_base, # here we are now passing your prompt
        row['decision_problem'],
        new_reason_description,  # Use the new reason description that you provided above
        row['verbal_report']
    )
    filled_prompts.append(prompt)

# list for storing the output
LLM_results = []

# column name for storage of the confidence values will be the name under `new_reason`
problems_reports[new_reason] = None

# run the analysis
# Iterate over the list of prompts, get responses, and extract numerical estimates and add them to the data set with problems and reports
for i, prompt in enumerate(filled_prompts):

    # response from LLAMA
    LLM_response = LLM.text_generation(prompt, max_new_tokens = 4000)
    LLM_results.append(LLM_response) # save the response to the sure_outcome_eval list

    # extract the confidence value from the response
    confidence_assesment = extract_confidence(LLM_response)

    # confidence value into the data
    problems_reports.at[i, new_reason] = confidence_assesment

    # monitor progress
    print(str(i) + '/' + str(problems_reports.shape[0]-1))

### Evaluate results

In [None]:
# Show verbal reports with low and high confidence
show_verbal_reports_in_range(problems_reports, new_reason, 20, 80) # remove 20, 80 to show all output

In [None]:
# Show complete LLM output for index 0
print(LLM_results[2]) # first LLM response based

# Adeventure C: Better model

In this adventure, you get to play around with other models.

### Set up an inference client with a new model

Try out some of the larger Llama models with either 70B or 405B parameters using the following model names.

* `meta-llama/Meta-Llama-3.1-70B-Instruct`
* `meta-llama/Meta-Llama-3.1-405B-Instruct`

The 405B model is currently the best open model available. It is on par with GPT-4o and Claude 3. However, it will be much slower to run analyses with it. So, instead of looping through the entire dataset, consider running the same **one** prompt with this and other models.

In [None]:
# start with the 70B version
LLM_version = "meta-llama/Meta-Llama-3.1-70B-Instruct"
LLM = InferenceClient(model = LLM_version, token = API_TOKEN)

## Select a decision reason for comparioson of models

In [None]:
# Set up the prompts for a decision reason of your choice
# here are the avilable reasons
disp_tab(decision_reasons)

# change to your reason of choice
selected_reason = 'loss aversion'

# reason description
selected_description = decision_reasons.loc[decision_reasons['decision reason name'] == selected_reason, 'decision reason description'].values[0]

### Run the reason through the model

In [None]:
# Create a list for storing prompts for the expected value reason
filled_prompts = []

# Generate prompts for the specific decision reason
for _, row in problems_reports.iterrows():

    # here we are using the generate prompt function to create prompts for all verbal reports and the expected value reason
    prompt = generate_prompt(
        prompt_base,
        row['decision_problem'],
        selected_description,  # Use the selected description
        row['verbal_report']
    )
    filled_prompts.append(prompt)

# list for storing the output from the model 1
LLM_results = []

# new column in the problems_reports data set for stroting the confidence assesments
problems_reports[selected_reason] = None

# run the analysis
# Iterate over the list of prompts, get responses, and extract numerical estimates and add them to the data set with problems and reports
for i, prompt in enumerate(filled_prompts):

    # response from LLAMA
    LLM_response = LLM.text_generation(prompt, max_new_tokens = 4000)
    LLM_results.append(LLM_response) # save the response to the sure_outcome_eval list

    # extract the confidence value from the response
    confidence_assesment = extract_confidence(LLM_response)

    # confidence value into the data
    problems_reports.at[i, selected_reason] = confidence_assesment

    # monitor progress
    print(str(i) + '/' + str(problems_reports.shape[0]-1))

### Evaluate the results

In [None]:
# Show verbal reports with low and high confidence
show_verbal_reports_in_range(problems_reports, selected_reason, 20, 80) # remove 20, 80 to show all output

In [None]:
print(LLM_results[15]) # response to verbal report 1 from model 1