# BiasMonkey

This is a replication of the experiments from [BiasMonkey](https://arxiv.org/abs/2311.04076) (Tjuatja et al. 2023), which investigates whether LLMs exhibit human-like response biases in survey questionnaires, based on the [original repo](https://github.com/lindiatjuatja/BiasMonkey).

Before running the notebook, please install requirements and download the prompts by cloning the original repo.

```bash
pip install -r requirements.txt
git clone https://github.com/lindiatjuatja/BiasMonkey
```

In [1]:

import os
import glob
from llments.lm.base.hugging_face import HuggingFaceLM
from BiasMonkey.utils import Bias, Response
import torch, gc
from bias_monkey_utils import generate_survey_responses

device = 'cuda:0'  # change to 'mps' if you have a mac, or 'cuda:0' if you have an NVIDIA GPU 


In [2]:

%load_ext autoreload
%autoreload 2

In [11]:
gc.collect()
torch.cuda.empty_cache()

In [3]:

# bias_types = ['acquiescence','response_order', 'odd_even', 'allow_forbid', 'opinion_float'] 
# perturbations = ['-key_typo', '-middle_random', '-letter_swap']

base_models = ['llama2-7b', 'llama2-13b', 'llama2-70b']
chat_models = ['llama2-7b-chat', 'llama2-13b-chat', 'llama2-70b-chat']
gpt_models = ['gpt-3.5-turbo', 'gpt-3.5-turbo-instruct']

# os.environ['HF_HOME'] = '/data/datasets/hf_cache/hub/'
# models = ['meta-llama/Llama-2-7b-chat-hf']
models = ['/data/models/huggingface/meta-llama/Llama-2-7b-chat-hf/']
for csv_file in glob.glob('BiasMonkey/prompts/response_order.csv'):
    bias_type = os.path.basename(csv_file.removesuffix(".csv")).split('-')[0]
    for model in models:
        lm = HuggingFaceLM(model, device=device)
        output_path = f'results/{model}/{os.path.basename(csv_file).split(".")[0]}.pickle'
        generate_survey_responses(lm, csv_file, bias_type, output_path, is_chat_model=True, seed=1, num_samples=50, overwrite=True)
        # free up memory from GPU
        del lm
        gc.collect()
        torch.cuda.empty_cache()
        


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [18]:

output_path = f'results/{model}/{os.path.basename(csv_file).split(".")[0]}.pickle'
generate_survey_responses(lm, csv_file, bias_type, output_path, is_chat_model=True, seed=1, num_samples=50, overwrite=True)

generate_survey_responses: model=<llments.lm.base.hugging_face.HuggingFaceLM object at 0x7f9f1d18a000>, prompts_file=BiasMonkey/prompts/response_order.csv, bias_type=response_order, output_path=results//data/models/huggingface/meta-llama/Llama-2-7b-chat-hf//response_order.pickle, seed=1, num_samples=50, overwrite=True, prompt_template=Please answer the following question with one of the alphabetical options provided.
Question: 
Generating responses for prompt3:
[{'role': 'system', 'content': 'Respond only with the letter corresponding to your answer.'}, {'role': 'user', 'content': 'Please answer the following question with one of the alphabetical options provided.\nQuestion: Over the next 30 years, do you think that the average American family will see its standard of living\nA. Get better\nB. Stay about the same\nC. Get worse'}]
Generating responses for prompt3:
[{'role': 'system', 'content': 'Respond only with the letter corresponding to your answer.'}, {'role': 'user', 'content': 'P

In [19]:
import pandas as pd
df = pd.read_pickle(output_path)
# print all values in responses column
print(df['responses'].values)

['b,b,a,a,a,b,b,b,b,a,b,b,a,b,b,b,b,a,b,b,b,b,b,b,b,b,b,b,b,b,b,b,a,b,a,b,b,b,b,b,b,b,b,b,b,b,b,b,b,b'
 'b,c,c,c,b,c,c,c,c,c,c,c,b,b,c,b,c,b,c,c,c,c,b,c,b,c,c,c,c,b,c,c,c,c,c,c,c,c,b,c,b,c,c,c,c,b,c,c,c,c']


In [None]:
from pathlib import Path
from BiasMonkey.format_results import format_df

# convert the pickle files to csv

models = ['/data/models/huggingface/meta-llama/Llama-2-7b-chat-hf/']
for model in models:
    dir = f"results/{model}"
    Path(f"{dir}/csv").mkdir(parents=True, exist_ok=True)
    for filename in os.listdir(f"results/{model}"):
        print(filename)
        name = filename.split('.')[0]
        f = os.path.join(dir, filename)
        if os.path.isfile(f):
            df = format_df(filename)
            df.to_csv(f'results/{model}/csv/{name}.csv', index=False)
