In [None]:
import pandas as pd

# Load the dataframe.
df = pd.read_csv('full_dataset_filtered.csv')

# Print the unique values in the 'bias' column.
print(df['bias'].unique())

In [None]:
bias_types = ['Availability Heuristic', 'Stereotyping', 'Confirmation Bias', 'Status Quo Bias','Halo Effect', 'Anchoring', 'Framing Effect','Bandwagon Effect']
filtered_df = df[df['bias'].isin(bias_types)]
filtered_df.head()

In [None]:
filtered_df.to_csv('eight_bias_filtered_dataset.csv', index=False)

In [None]:
# filtered_df = pd.read_csv('eight_bias_filtered_dataset.csv')
filtered_df.head()

In [None]:
print(len(filtered_df))

In [None]:
import pandas as pd
import random
import pandas as pd
import random
import re

def transform_dataframe(df):
    """
    Transform DataFrame with special handling for Bandwagon Effect rows and random assignment for Framing Effect.

    Parameters:
    df (pandas.DataFrame): Input DataFrame with columns [id, bias, control, treatment]

    Returns:
    pandas.DataFrame: New DataFrame with columns [gold_standard_prompt, training_prompt]
    """

    def process_and_swap_actions(text):
        # Identify the substring starting with "Current course of action" and ending before "Alternative course of action"
        current_action_pattern = r'(Current course of action:.*?)(?=\n\s*Alternative course of action:)'
        current_action_match = re.search(current_action_pattern, text, re.DOTALL)

        # Identify the substring starting with "Alternative course of action" and ending before "Prompt:"
        alternative_action_pattern = r'(Alternative course of action:.*?)(?=\n\s*Prompt:)'
        alternative_action_match = re.search(alternative_action_pattern, text, re.DOTALL)

        if current_action_match and alternative_action_match:
            current_action = current_action_match.group(1).strip()
            alternative_action = alternative_action_match.group(1).strip()

            # Replace "Current course of action" with "One course of action (could be alternative or current)"
            current_action = current_action.replace("Current course of action", "[Harvard CS2420_Substitute_In_Order RYZ] course of action (could be alternative or current)")

            # Replace "Alternative course of action" with "Another course of action (could be alternative or current)"
            alternative_action = alternative_action.replace("Alternative course of action", "[Harvard CS2420_Substitute_In_Order RYZ] course of action (could be alternative or current)")

            # Randomly decide which goes first
            actions = [
                [current_action, alternative_action],  # Current first
                [alternative_action, current_action]   # Alternative first
            ]

            # Randomly shuffle the order
            selected_action = random.choice(actions)

            selected_action[0] = selected_action[0].replace("[Harvard CS2420_Substitute_In_Order RYZ]", "One")
            selected_action[1] = selected_action[1].replace("[Harvard CS2420_Substitute_In_Order RYZ]", "Another")

            # Reconstruct the text with the new order
            result_text = text[:current_action_match.start()] + selected_action[0] + text[current_action_match.end():alternative_action_match.start()] + selected_action[1] + text[alternative_action_match.end():]

            return result_text
        else:
            raise ValueError("Unable to find the required patterns in the text.")


    def process_bandwagon_text(text):
        """Helper function to remove the specific substring if it exists"""
        pattern = r'Colleagues in your industry usually express opinion.*?(?=\n\s*Prompt:)'
        match = re.search(pattern, text)

        if match:
            # Remove the matched substring and concatenate the parts
            return text[:match.start()] + text[match.end():]
        else:
            # Print the text if pattern not found
            error_message = f"Pattern not found in text:\n{text!r}"
            raise ValueError(error_message)

    # Define bias types that use control as gold standard
    control_as_gold = [
        'Availability Heuristic',
        'Stereotyping',
        'Confirmation Bias',
        'Halo Effect',
        'Anchoring'
    ]

    # Define bias types that use treatment as gold standard
    treatment_as_gold = [
        'Status Quo Bias'
    ]

    # Initialize lists to store the new values
    gold_standard_prompts = []
    training_prompts = []
    max_rating_on_scale = []
    eleven_pt_scale = [
        'Availability Heuristic',
        'Confirmation Bias',
        'Anchoring'
    ]
    biases = []

    # Iterate through each row
    for _, row in df.iterrows():
        if row['bias'] in eleven_pt_scale:
            max_rating_on_scale.append(11)
            biases.append(row['bias'])
        else:
            if row['bias'] == 'Status Quo Bias' or row['bias'] == 'Bandwagon Effect':
              max_rating_on_scale.append(7)
              max_rating_on_scale.append(7)
              biases.append(row['bias'])
              biases.append(row['bias'])
            else:
              max_rating_on_scale.append(7)
              biases.append(row['bias'])
        if row['bias'] in control_as_gold:
            gold_standard_prompts.append(row['control'])
            training_prompts.append(row['treatment'])
        elif row['bias'] == 'Status Quo Bias':
            processed_gold = process_and_swap_actions(row['control'])
            gold_standard_prompts.extend([processed_gold, processed_gold])
            training_prompts.extend([row['control'], row['treatment']])
        elif row['bias'] == 'Framing Effect':
            if random.choice([True, False]):
                gold_standard_prompts.append(row['control'])
                training_prompts.append(row['treatment'])
            else:
                gold_standard_prompts.append(row['treatment'])
                training_prompts.append(row['control'])
        elif row['bias'] == 'Bandwagon Effect':
            # Process control and treatment text
            processed_gold = process_bandwagon_text(row['control'])

            # Add two rows: one for control as training, one for treatment as training
            gold_standard_prompts.extend([processed_gold, processed_gold])
            training_prompts.extend([row['control'], row['treatment']])

    # Create new DataFrame
    new_df = pd.DataFrame({
        'gold_standard_prompt': gold_standard_prompts,
        'training_prompt': training_prompts,
        'max_rating_on_scale': max_rating_on_scale,
        'bias': biases
    })

    return new_df

test1_new_df = transform_dataframe(filtered_df)
test1_new_df.head()

In [None]:
print(len(test1_new_df))

In [None]:

test1_new_df.to_csv('cs2420_cog_bias_training_df_8_biases.csv', index=False)


# Obtaining Gold Standard Labels

In [None]:
import transformers
import torch
from huggingface_hub import login

login()

In [None]:
model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"

pipeline = transformers.pipeline(
    "text-generation",
    model=model_id,
    model_kwargs={"torch_dtype": torch.bfloat16},
    device_map="auto",
)

messages = [
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": """Situation:
Suppose you are a quality assurance manager at Innovate Semiconductors.
A company in your industry plans to introduce a new line of high-performance microchips with enhanced testing protocols to ensure superior reliability and efficiency.

Prompt:
How likely do you estimate the planned project to be a success?

Answer Options:
Option 1: 0%
Option 2: 10%
Option 3: 20%
Option 4: 30%
Option 5: 40%
Option 6: 50%
Option 7: 60%
Option 8: 70%
Option 9: 80%
Option 10: 90%
Option 11: 100%

Respond with the number of the option you choose. Respond ONLY with the option number. Option number:
"""},
]

outputs = pipeline(
    messages,
    max_new_tokens=1,
    temperature=0.7,
    top_k=1
)
print(outputs[0]["generated_text"][-1])


In [None]:
import pandas as pd
training_dataset = pd.read_csv("cs2420_cog_bias_training_df_8_biases.csv")

In [None]:
training_dataset.head()

In [None]:
suffix = "\n\nRespond with the number of the option you choose. Respond ONLY with the option number. Option number:"
training_dataset["gold_standard_prompt"] = training_dataset["gold_standard_prompt"] + suffix
training_dataset["training_prompt"] = training_dataset["training_prompt"] + suffix


In [None]:
i = 0
gold_standard_labels = []
for index, row in training_dataset.iterrows():
  messages = [
      {"role": "system", "content": "You are a helpful assistant."},
      {"role": "user", "content": row["gold_standard_prompt"]},
  ]

  outputs = pipeline(
      messages,
      max_new_tokens=1,
      temperature=0.7,
      top_k=1,
      pad_token_id=pipeline.tokenizer.eos_token_id
  )
  max_rating = row["max_rating_on_scale"]
  try:
      # Extract the last character and attempt to convert it to an integer
      number = int(outputs[0]["generated_text"][-1]['content'])
      # Check if the number is within the valid range
      if 1 <= number <= max_rating:
          gold_standard_labels.append(number)
      else:
          gold_standard_labels.append(None)
  except (ValueError, IndexError):
      gold_standard_labels.append(None)
  i += 1
  if i % 1000 == 0:
    print(i)

training_dataset['gold_standard_label'] = gold_standard_labels

In [None]:
print(len(training_dataset['gold_standard_label']))

In [None]:
training_dataset.to_csv('../02_LoRA/training_df.csv', index=False)