# Final LLM class Project 

Authors: Luisa, Sebastian, Jan-Felix, Nion 


### LLM Model Set-up

In [None]:
import torch
import pandas as pd
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, GPT2LMHeadModel, GPT2Tokenizer

import os
os.chdir("/Users/luisakurth/Downloads/")

In [2]:

##################################################
#LLM Set Up
##################################################
# this first part is from the course webpage
# see: https://cogsciprag.github.io/LLM-implications/materials/session3
# I don't know whether we are allowed to copy this verbatim
# but it wouldn't be very much work to implement something similar anyways
# of course, if possible, we should adapt it to more recent models

# taken from: https://cogsciprag.github.io/LLM-implications/materials/session3

# load the tokenizer & model for T5 & GPT2
tokenizer_T5 = AutoTokenizer.from_pretrained("google/flan-t5-base")
model_T5 = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base",
                                                 pad_token_id=tokenizer_T5.eos_token_id)

tokenizer_GPT2 = GPT2Tokenizer.from_pretrained("gpt2")
model_GPT2 = GPT2LMHeadModel.from_pretrained("gpt2",
                                             pad_token_id=tokenizer_GPT2.eos_token_id)

# convenience function for nicer output
def pretty_print(s):
    print("Output:\n" + 100 * '-')
    print(s)


def generate(prompt, model="T5"):
    if model == "T5":
        model = model_T5
        tokenizer = tokenizer_T5
    else:
        model = model_GPT2
        tokenizer = tokenizer_GPT2
    # encode context the generation is conditioned on
    input_ids = tokenizer.encode(prompt, return_tensors='pt')
    # get output with standard parameters
    sample_output = model.generate(
        input_ids,        # context to continue
        do_sample=True,   # use sampling (not beam search (see below))
        # return maximally 50 words (including the input given)
        max_length=500,
        top_k=0,          # just sample one word
        top_p=1,          # consider all options
        temperature=0.7   # soft-max temperature
    )
    return(tokenizer.decode(sample_output[0], skip_special_tokens=True))




### Experiment 1 Set-up 

In [None]:
###########################################
#Experiment 1 Set-up 
###########################################

def convert(text):
    """this function converts the experimental item into a prompt
    :text param: experimental item/sentence
    :output: prompt for model
    """
    text = text.replace(
        "XXX", ". Choose the more likely number:").replace("YYYY", "or")
    text = text+"?"
    return text



def process_stimuli(convert_func):
    """
    Processes a stimuli file and extracts "thinks" and "announced" prompts.
    :param convert_func: A function to convert the prompts as needed.
    :return: Lists of "thinks" and "announced" prompts.
    """
    thinks = []
    announced = []
    all_lines = []
    stimuli_exp1_file = open("stimuli_Exp1.txt", 'r')
    # open file (from: https://osf.io/9eg34/)

    for l in stimuli_exp1_file.readlines():
        all_lines.append(l)

    for i in range(len(all_lines)//3):

        # in the file, there is always one "thinks" line followed by one "announced" line and one blank line
        line1 = all_lines.pop(0)
        # remove first part since its an identifier
        text1 = " ".join(line1.split()[1:])
        prompt1 = convert_func(text1)
        thinks.append(prompt1)

        line2 = all_lines.pop(0)
        text2 = " ".join(line2.split()[1:])
        prompt2 = convert_func(text2)
        announced.append(prompt2)

        all_lines.pop(0)

    return thinks, announced



def extract_number(question, response):
    """This function defines whether the lower or higher or both or neither number of the question is present in the model response 
    """
    # this is still very rough and could be modified to reduce the risk of missing out some instances (e.g. by
    # excluding more irrelevant punctuation marks, or including number word representations)
    qs = question.replace("?", "").split()
    low = qs[-3]
    high = qs[-1]
    has_low = False
    has_high = False
    rs = response.replace(".", "").replace(",", "").split()
    for i in rs:
        if i == low:
            has_low = True
        if i == high:
            has_high = True

    if has_low and has_high:
        return "both"
    elif has_low:
        return "low"
    elif has_high:
        return "high"
    else:
        return "neither"


def get_counts(N, thinks, announced, model="T5"):
    '''Counts the numbers in the model response. 
    :param N: number of participatants i.e. number of repeats for model 
    :param thinks: dataset with think sentences 
    :param announced: dataset with announced sentences
    :param model: LLM model to use 
    :output: think_counts and announce_counts, in the format [counts of both numbers in answer, counts of low number, counts of high, counts of neither]
    '''
    think_counts = [0, 0, 0, 0]
    announce_counts = [0, 0, 0, 0]

    for p in range(N):  # Number of participants in the "think" condition
        for tq in thinks:
            response = generate(tq, model)
            result = extract_number(tq, response)
            if result == "both":
                think_counts[0] += 1
            elif result == "low":
                think_counts[1] += 1
            elif result == "high":
                think_counts[2] += 1
            else:
                think_counts[3] += 1

    for p in range(N):  # Number of participants in the "announce" condition
        for aq in announced:
            response = generate(aq, model)
            result = extract_number(aq, response)
            if result == "both":
                announce_counts[0] += 1
            elif result == "low":
                announce_counts[1] += 1
            elif result == "high":
                announce_counts[2] += 1
            else:
                announce_counts[3] += 1

    return think_counts, announce_counts

### Test results

In [6]:
#create dataframe for all results: 
results = pd.DataFrame(columns=["Both", "Low", "High", "Neither"])

In [7]:
##################
#baseline results
##################

thinks, announced = process_stimuli(convert)
think_counts, announce_counts = get_counts(N=5, thinks=thinks, announced=announced)
print("The T5 model in the think condition:", think_counts)
print("The T5 model in the announce condition:", announce_counts)

# results for one run: [0, 496, 393, 101], [0, 520, 410, 60] (will be different for other runs)
# hence it happens at times that neither number is present in the response; it would probably be worth investigation
# how this is caused (whether it's actually not there or whether it is not recognized)


# Append the new row to the Results DataFrame
results.loc["baseline_T5_t"] = think_counts
results.loc["baseline_T5_a"] = announce_counts

The T5 model in the think condition: [0, 34, 21, 5]
The T5 model in the announce condition: [0, 37, 18, 5]


In [8]:
##################
#1-shot learning
#################

def convert_oneshot(text):
    """Creates prompt with 1 example before the actual task  
    input: experimental item 
    output: Exmaple + Experimental item Task (Choose the more likely number)
    """
    example1 = "Rachel is a librarian and works with Mark. Mark thinks that Rachel read ___ books last month. Choose the more likely number: 45 or 52. The answer is 45. Rachel is a woman from the US. Rachel is a librarian and works with Mark. Mark announced to me that Rachel read ___ books last month. Choose the more likely number: 45 or 52. The answer is 52. "
    text = text.replace(
        "XXX", ". Choose the more likely number:").replace("YYYY", "or")
    text = example1+text+"?"
    return text

thinks, announced = process_stimuli(convert_oneshot)

think_counts, announce_counts = get_counts(N=5, thinks=thinks, announced=announced)
print("The T5 model in the think condition with 1 shot learning:", think_counts)
print("The T5 model in the announce condition with 1 shot learning", announce_counts)

# Append the new row to the Results DataFrame
results.loc["1shot_T5_t"] = think_counts
results.loc["1shot_T5_a"] = announce_counts


##################
#2-shot learning
#################
def convert_twoshot(text):
    """Creates prompt with 2 example before the actual task  
    input: experimental item 
    output: Exmaple + Experimental item Task (Choose the more likely number)
    """
    example1 = "Rachel is a librarian and works with Mark. Mark thinks that Rachel read ___ books last month. Choose the more likely number: 45 or 52. The answer is 45. Rachel is a woman from the US. Rachel is a librarian and works with Mark. Mark announced to me that Rachel read ___ books last month. Choose the more likely number: 45 or 52. The answer is 52."
    example2 = "Alex is a person from the US. Alex works at a cafe with Emma. Emma thinks that Alex drank ___ cups of coffee yesterday. Choose the more likely number: 3 or 5. The answer is 3. Alex is a person from the US. Alex works at a cafe with Emma. Emma announced to me that Alex drank ___ cups of coffee yesterday. Choose the more likely number:  3 or 5. The answer is 5."
    text = text.replace(
        "XXX", ". Choose the more likely number:").replace("YYYY", "or")
    text = example1+example2+text+"?"
    return text

thinks, announced = process_stimuli(convert_twoshot)
think_counts, announce_counts = get_counts(N=5, thinks=thinks, announced=announced)
print("The T5 model in the think condition with 2 shot learning:", think_counts)
print("The T5 model in the announce condition with 2 shot learning:", announce_counts)

# Append the new row to the Results DataFrame
results.loc["2shot_T5_t"] = think_counts
results.loc["2shot_T5_a"] = announce_counts


##################
#3-shot learning
#################
def convert_threeshot(text):
    """Creates prompt with 3 example before the actual task  
    input: experimental item 
    output: Exmaple + Experimental item Task (Choose the more likely number)
    """
    example1 = "Rachel is a librarian and works with Mark. Mark thinks that Rachel read ___ books last month. Choose the more likely number: 45 or 52. The answer is 45. Rachel is a woman from the US. Rachel is a librarian and works with Mark. Mark announced to me that Rachel read ___ books last month. Choose the more likely number: 45 or 52. The answer is 52."
    example2 = "Alex is a person from the US. Alex works at a cafe with Emma. Emma thinks that Alex drank ___ cups of coffee yesterday. Choose the more likely number: 3 or 5. The answer is 3. Alex is a person from the US. Alex works at a cafe with Emma. Emma announced to me that Alex drank ___ cups of coffee yesterday. Choose the more likely number:  3 or 5. The answer is 5."
    example3 = "Michael is a man from the US. Michael lives in a hot climate. He thinks that the temperature reached ___ degrees Celsius yesterday.Choose the more likely number: 34 or 40. The answer is 34. Michael is a man from the US. Michael lives in a hot climate. He announced to me that the temperature reached ___ degrees Celsius yesterday. Choose the more likely number:  34 or 40. The answer is 40."
    text = text.replace(
        "XXX", ". Choose the more likely number:").replace("YYYY", "or")
    text = example1+example2+example3+text+"?"
    return text

thinks, announced = process_stimuli(convert_threeshot)

think_counts, announce_counts = get_counts(N=5, thinks=thinks, announced=announced)
print("The T5 model in the think condition with 3 shot learning:", think_counts)
print("The T5 model in the announce condition with 3 shot learning:", announce_counts)

# Append the new row to the Results DataFrame
results.loc["3shot_T5_t"] = think_counts
results.loc["3shot_T5_a"] = announce_counts



The T5 model in the think condition with 1 shot learning: [0, 39, 19, 2]
The T5 model in the announce condition with 1 shot learning [0, 36, 23, 1]
The T5 model in the think condition with 2 shot learning: [0, 38, 20, 2]
The T5 model in the announce condition with 2 shot learning: [0, 37, 23, 0]
The T5 model in the think condition with 3 shot learning: [0, 37, 20, 3]
The T5 model in the announce condition with 3 shot learning: [0, 36, 24, 0]


In [9]:
##################
#1-shot-extreme learning
#################

def convert_oneshot_extreme(text):
    """Creates prompt with 1 example before the actual task  
    input: experimental item 
    output: Exmaple + Experimental item Task (Choose the more likely number)
    """
    example_e = "Rachel is a librarian and works with Mark. Mark thinks that Rachel read ___ books last month. Choose the more likely number: 11 or 102. The answer is 11. Rachel is a woman from the US. Rachel is a librarian and works with Mark. Mark announced to me that Rachel read ___ books last month. Choose the more likely number: 11 or 102. The answer is 102. "
    text = text.replace(
        "XXX", ". Choose the more likely number:").replace("YYYY", "or")
    text = example_e+text+"?"
    return text

thinks, announced = process_stimuli(convert_oneshot_extreme)

think_counts, announce_counts = get_counts(N=5, thinks=thinks, announced=announced)
print("The T5 model in the think condition with 1 extreme shot learning:", think_counts)
print("The T5 model in the announce condition with 1 extreme shot learning", announce_counts)

# Append the new row to the Results DataFrame
results.loc["1shot_extreme_T5_t"] = think_counts
results.loc["1shot_extreme_T5_a"] = announce_counts

The T5 model in the think condition with 1 extreme shot learning: [0, 42, 14, 4]
The T5 model in the announce condition with 1 extreme shot learning [0, 35, 24, 1]


In [10]:
###############################
#knowledge generation learning
###############################

#https://www.promptingguide.ai/techniques/knowledge

def convert_know(text):
    """Creates prompt with a infomration before the question  
    input: experimental item 
    output: Exmaple + Experimental item Task (Choose the more likely number)
    """
    knowledge = "Knowledge: the lower number is just slighlty above the mean and the larger number is a lot above the mean."
    text = text.replace(
        "XXX", ". Choose the more likely number:").replace("YYYY", "or")
    text = knowledge+text+"?"
    return text

thinks, announced = process_stimuli(convert_know)

think_counts, announce_counts = get_counts(N=5, thinks=thinks, announced=announced)
print("The T5 model in the think condition with knowledge prompting :", think_counts)
print("The T5 model in the announce condition with knowledge prompting:", announce_counts)

# Append the new row to the Results DataFrame
results.loc["know_T5_t"] = think_counts
results.loc["know_T5_a"] = announce_counts

The T5 model in the think condition with knowledge prompting : [0, 28, 31, 1]
The T5 model in the announce condition with knowledge prompting: [0, 33, 26, 1]


In [11]:
print(results)

                    Both  Low  High  Neither
baseline_T5_t          0   34    21        5
baseline_T5_a          0   37    18        5
1shot_T5_t             0   39    19        2
1shot_T5_a             0   36    23        1
2shot_T5_t             0   38    20        2
2shot_T5_a             0   37    23        0
3shot_T5_t             0   37    20        3
3shot_T5_a             0   36    24        0
1shot_extreme_T5_t     0   42    14        4
1shot_extreme_T5_a     0   35    24        1
know_T5_t              0   28    31        1
know_T5_a              0   33    26        1
