In [None]:
import time
import numpy as np
import openai
from tqdm import tqdm
import pickle
import json
import sys
import os
import pandas as pd
from dotenv import load_dotenv

# Load environment variables
load_dotenv()
# Load environment variables
load_dotenv()
DATA_PATH = os.getenv('DATA_PATH')
TEMP_PATH = os.getenv('TEMP_PATH')


# Getting all synthetic human bios

I based my code on the code from [this paper](https://arxiv.org/pdf/2209.06899) (IT WAS PUBLISHED AS A [PDF](https://dataverse.harvard.edu/file.xhtml?fileId=6711665&version=1.0) LIKE WHY) for creating the bio prompts, which takes demographic data from [here](https://faculty.wcas.northwestern.edu/jnd260/pub/Rothschild,%20Howat,%20Shafranek,%20Busby%202018.pdf)


In [None]:

def uniqvals(users, field):
    vals = [users[id][field] for id in users.keys()]
    return list(set(vals))

fields_of_interest = {
    "Gender": {
        "Male": "male",
        "Female": "female",
        '': ''
    },
    "Hisp": {
        "Hispanic": "Hispanic",
        "Not Hispanic": '',
        '': ''
    },
    "WHITE": {
        "White": "white",
        "Non-white": '',
        '': ''
    },
    "Ideo": {
        '': '',
        'Liberal': 'liberal',
        'Slightly conservative': 'slightly conservative',
        'Conservative': 'conservative',
        'Slightly liberal': 'slightly liberal',
        "Moderate/Haven't thought about it": 'moderate',
        'Extremely Liberal': 'extremely liberal',
        'Extremely conservative': 'extremely conservative',
    },
    "PID7": {
        '': '',
        'Ind': 'am an independent',
        'Strong D': 'am a strong Democrat',
        'Strong R': 'am a strong Republican',
        'Lean D': 'lean towards Democrats',
        'Lean R': 'lean towards Rebublicans',
        'Weak D': 'am a weak Democrat',
        'Weak R': 'am a weak Republican',
    },
    "Inc": {
        '': '',
        'Less than $15K': 'very poor',
        '$15K to $25K': 'poor',
        '$25K to $50K': 'poor',
        '$50K to $75K': 'middle-class',
        '$75K to $100K': 'middle-class',
        '$100K to $150K': 'middle-class',
        '$150K to $200K': 'upper-class',
        '$200K to $250K': 'upper-class',
        '$250K to $500K': 'upper-class',
        'Prefer not to answer': '',
        '-8': '',
    },
}

def mapper(profile):
    results = {}
    for k in profile.keys():
        if k in fields_of_interest:
            results[k] = fields_of_interest[k].get(profile[k], '')
    if profile['Age'] != '':
        age = int(profile['Age'])
        if age >= 18 and age < 25:
            results['Age'] = 'young'
        elif age >= 25 and age < 40: 
            results['Age'] = 'middle-aged'
        elif age >= 40 and age < 60: 
            results['Age'] = 'old'
        elif age >= 60 and age < 100: 
            results['Age'] = 'very old'
        else:
            results['Age'] = ''
    return results


In [None]:

# Read the CSV file into a DataFrame
df = pd.read_csv("../data/ppfull.csv")

# Drop rows where both race categories are empty or have values that will map to empty
df = df[
    (df['WHITE'].isin(['White'])) |  # Only keep 'White' for WHITE column
    (df['Hisp'].isin(['Hispanic']))   # Only keep 'Hispanic' for Hisp column
]

# Drop rows with empty/NaN values or values that will map to empty for other features
df = df[df['Age'].notna() & (df['Age'] != '') & (df['Age'].astype(float) >= 18) & (df['Age'].astype(float) < 100)]
df = df[df['Ideo'].isin(fields_of_interest['Ideo'].keys()) & (df['Ideo'] != '')]
df = df[df['PID7'].isin(fields_of_interest['PID7'].keys()) & (df['PID7'] != '')]
df = df[df['Gender'].isin(fields_of_interest['Gender'].keys()) & (df['Gender'] != '')]
df = df[df['Inc'].isin(fields_of_interest['Inc'].keys()) & (df['Inc'] != '')]

# Convert the DataFrame to a dictionary
dmap = df.set_index(df.columns[0]).T.to_dict()

In [None]:

results = {}
ids = dmap.keys()
for id in tqdm(ids):
    user_profile = mapper(dmap[id])
    
    # Store all features and bio
    results[id] = {
        'id': id,
        'ideology': user_profile.get('Ideo', ''),
        'political_affiliation': user_profile.get('PID7', ''),
        'race_white': user_profile.get('WHITE', ''),
        'hispanic': user_profile.get('Hisp', ''),
        'gender': user_profile.get('Gender', ''),
        'income': user_profile.get('Inc', ''),
        'age': user_profile.get('Age', '')
    }
    
    # Construct bio
    prompt = ""
    if user_profile['Ideo'] != '':
        prompt += "Ideologically, I describe myself as " + user_profile['Ideo'] + ". "
    if user_profile['PID7'] != '':
        prompt += "Politically, I " + user_profile['PID7'] + ". "
    if user_profile['WHITE'] == 'White':
        prompt += "Racially, I am white. "
    if user_profile['Hisp'] == 'Hispanic':
        prompt += "Racially, I am Hispanic. "
    if user_profile['Gender'] != '':
        prompt += "I am " + user_profile['Gender'] + ". "
    if user_profile['Inc'] != '':
        prompt += "Financially, I am " + user_profile['Inc'] + ". "
    if user_profile.get('Age', '') != '':
        prompt += "In terms of my age, I am " + user_profile['Age'] + ". "
    
    results[id]['bio'] = prompt

# Convert to DataFrame (you can add this after the loop)
import pandas as pd
df = pd.DataFrame.from_dict(results, orient='index')


In [None]:
df

Theres a lot of duplicates, dont need 'em

In [None]:
# Count the occurrences of each duplicated row, excluding the (unique) id
duplicate_value_counts = df.drop(columns=['id']).value_counts()

# TODO: sample weighted by these counts

# Display the value counts of duplicated rows
duplicate_value_counts

In [None]:
# Create a DataFrame with unique rows and their counts
unique_with_counts_df = df.drop(columns=['id']).value_counts().reset_index(name='count')

# Save the DataFrame to a CSV file with counts for future sampling 
unique_with_counts_df.to_csv('../data/pigeonhole_human_data_counts.csv', index=False)


In [None]:
# Create a copy of df with only the unique rows
unique_df = df.drop_duplicates(subset=df.columns.difference(['id']))

# Save the unique DataFrame to a CSV file
unique_df.to_csv('../data/pigeonhole_human_data.csv', index=False)


# Generating their responses on some Habermas questions using some models
for now: 
1. sample subset of 50 users
2. sample 5 Habermas questions 
3. generate for llama-3.1-8b-instruct & gpt-4o-mini

this bit of code will look a lot like `generating_llm_responses.ipynb`

## Sampling 50 "humans"

In [None]:
sample_human = unique_with_counts_df.sample(5, weights='count', random_state=42).reset_index(drop=True)

## Questions + LLM responses data

In [None]:
df_questions = pd.read_csv(DATA_PATH+'questions_and_human_perspectives_with_responses.csv')
if 'Unnamed: 0' in df_questions.columns:
    df_questions.drop(columns=['Unnamed: 0'], inplace=True)
print("df_questions.shape: ", df_questions.shape)

In [None]:
sample_qs = df_questions.sample(5,random_state=42).reset_index(drop=True)
sample_qs.head()

## Generating Human responses

In [None]:

def generate_responses(questions, bios, generation_function, output_path, start_from_checkpoint=True):
    """
    This is a general helper function to generate responses from an LLM and save them to a JSON file. It takes in an arbitrary generation function and can resume from a checkpoint. It will save a JSON file of responses.
    """
    print("Generating responses for: ", output_path)
    
    # Load existing responses if any and if we want to resume
    responses = {}
    if start_from_checkpoint:
        with open(output_path, 'r') as f:
            responses = json.load(f)

    # Make sure the directory exists
    os.makedirs(os.path.dirname(output_path), exist_ok=True)

    # Get questions that haven't been answered yet for this model
    remaining_questions = [
        q for q in questions 
        if q not in responses
    ]
        
    if not remaining_questions:
        print(f"All questions already processed.")
        return
             
    for bio in tqdm(bios,desc='Generating for bio'):
        # Process each remaining question with progress bar
        for idx, question in enumerate(tqdm(remaining_questions, desc=f"Generating question responses", smoothing=0, ascii=True)):
            try:
                # Generate response
                response = generation_function(question, bio)
                
                # Store response
                if question not in responses:
                    responses[question] = {}
                responses[question][bio] = response
                    
            except Exception as e:
                print(f"\nError processing question '{question}' for: {str(e)}")
                continue

            if idx % 10 == 0:
                # Save to JSON
                with open(output_path, 'w') as f:
                    json.dump(responses, f, indent=2)
                
    with open(output_path, 'w') as f:
        json.dump(responses, f, indent=2)
    
    return responses

In [None]:
from together import Together

client = Together(api_key=os.getenv('TOGETHER_API_KEY'))

def generate_together_response(question, bio, model):
  completion = client.chat.completions.create(
      model=model,
      messages=[
          {"role": "system", "content": f"Answer only from the perspective of a person with the following demographics and beliefs:\n{bio}"},
          {"role": "user", "content": question}
          ],
      max_tokens=2048
  )
  return completion.choices[0].message.content

# While you should be able to use the mistral models on HF, together is much faster with a dedicated endpoint and more models.
together_models = {
    # 'mistral-7b-instruct': 'mistralai/Mistral-7B-Instruct-v0.1',
    'llama-3.1-8b-instruct': 'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo-128K',
    # 'deepseek-r1': 'deepseek-ai/DeepSeek-R1-Distill-Llama-70B',
    # 'gemma-2b-it': 'google/gemma-2b-it'
}
for bettername, model in together_models.items():
    output_file = bettername+'_responses.json'

    generation_function = lambda x, bio: generate_together_response(x, bio, model)

    responses = generate_responses(
        questions=sample_qs['question'], 
        bios=sample_human['bio'].values.tolist(),
        generation_function=generation_function,
        output_path=TEMP_PATH+output_file,
        start_from_checkpoint=False
    )

In [None]:
def generate_responses_to_df(questions_df, bios_df):
    """
    Generate responses and format them into a DataFrame with demographic information
    """
    model = 'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo-128K'
    all_results = []
    
    # Loop through each bio
    for _, bio_row in tqdm(bios_df.iterrows(), desc='Generating responses'):
        # For each bio, generate response for each question
        for _, question_row in questions_df.iterrows():
            try:
                response = generate_together_response(question_row['question'], bio_row['bio'], model)
                
                # Combine question, response, and demographic info
                result = {
                    'question': question_row['question'],
                    'response': response,
                    # Add all demographic columns
                    'ideology': bio_row['ideology'],
                    'political_affiliation': bio_row['political_affiliation'],
                    'race_white': bio_row['race_white'],
                    'hispanic': bio_row['hispanic'],
                    'gender': bio_row['gender'],
                    'income': bio_row['income'],
                    'age': bio_row['age'],
                    'bio': bio_row['bio']
                }
                all_results.append(result)
                
            except Exception as e:
                print(f"\nError processing question for bio: {str(e)}")
                continue
    
    # Convert results to DataFrame
    results_df = pd.DataFrame(all_results)
    return results_df

# Generate responses
final_df = generate_responses_to_df(
    questions_df=sample_qs,
    bios_df=sample_human
)

# Save to CSV
final_df.to_csv(TEMP_PATH + 'llama_responses_with_demographics.csv', index=False)

In [None]:
final_df.head()

In [None]:
from pydantic import BaseModel, Field
from typing import Literal

class AgreementRating(BaseModel):
    explanation: str = Field(description="Brief rationale for the chosen rating")
    answer: Literal[1, 2, 3, 4, 5] = Field(description="Agreement rating on a 1-5 scale")
    quotes: list[str] = Field(description="List of substrings from the response that the user feels specifically represented by")

def generate_agreement_rating(question, llm_response, bio, human_answer, model):
    """
    Generate structured agreement rating for an LLM response from a human perspective
    """
    system_prompt = f"""Answer only from the perspective of a person with the following demographics and beliefs:
{bio}

In response to this question: '{question},' you said '{human_answer}'"""
    prompt = f"""Question: {question}

Response: {llm_response}

On a scale of 1-5, to what extent does the above response represent *your* opinion?
1 = Completely disagree
2 = Somewhat disagree
3 = Neutral
4 = Somewhat agree
5 = Completely agree

Provide your rating, a brief explanation for your choice, and a list of exact quotes from the response that you feel represent your opinion, if any.

Output should be in the following JSON format:
{{
    "explanation": "Brief rationale for your rating",
    "answer": <integer between 1-5>,
    "quotes": ["quote1", "quote2", ...]
}}"""

    completion = client.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": prompt}
        ],
        max_tokens=2048,
        response_format={"type": "json_object"}
    )
    
    try:
        response_dict = json.loads(completion.choices[0].message.content)
        rating = AgreementRating(**response_dict)
        return rating
    except Exception as e:
        print(f"Error parsing response: {e}")
        return None

def generate_agreement_ratings_df(questions_df, bios_df, llm_to_eval, human_responses_df):
    """
    Generate agreement ratings for each human-LLM response pair
    """
    model = 'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo'
    all_results = []
    
    for _, bio_row in tqdm(bios_df.iterrows(), desc='Processing bios'):
        for _, question_row in questions_df.iterrows():
            # Get LLM response for this question
            llm_response = question_row[llm_to_eval]
            
            # Get human response for this question and bio
            human_response = human_responses_df[
                (human_responses_df['question'] == question_row['question']) &
                (human_responses_df['bio'] == bio_row['bio'])
            ]['response'].iloc[0]
            
            try:
                rating = generate_agreement_rating(
                    question_row['question'],
                    llm_response,
                    bio_row['bio'],
                    human_response,
                    model
                )
                
                if rating:
                    result = {
                        'question': question_row['question'],
                        'llm': llm_to_eval,
                        'llm_response': llm_response,
                        'human_response': human_response,
                        'agreement_rating': rating.answer,
                        'rating_explanation': rating.explanation,
                        'quotes': rating.quotes,
                        'ideology': bio_row['ideology'],
                        'political_affiliation': bio_row['political_affiliation'],
                        'race_white': bio_row['race_white'],
                        'hispanic': bio_row['hispanic'],
                        'gender': bio_row['gender'],
                        'income': bio_row['income'],
                        'age': bio_row['age'],
                        'bio': bio_row['bio'],
                    }
                    all_results.append(result)
                
            except Exception as e:
                print(f"\nError processing rating: {str(e)}")
                continue
    
    results_df = pd.DataFrame(all_results)
    return results_df

# Generate agreement ratings
agreement_df = generate_agreement_ratings_df(
    questions_df=sample_qs,
    bios_df=sample_human,
    llm_to_eval='llama-3.1-8b-instruct',
    human_responses_df=final_df
)

# Save to CSV
agreement_df.to_csv(TEMP_PATH + 'llama_agreement_ratings.csv', index=False)