In [None]:
from openai import OpenAI
import numpy as np
import pandas as pd

In [35]:
KEY = "YOUR_KEY_HERE"

## Prompt

In Hewitt et al [1], "Predicting Results of Social Science Experiments Using LLMs", they found that they could use LLMs to generate responses to social surveys that matched up to responses from survey participants. Very interesting idea that seems to work (perhaps a little too well, which raises some skepticism).

The methods section is a bit spare, but they include an image with an example prompt template used to generate responses. Approximate that prompt for our use case -- but this is a sharp edge! Need to validate the paper's claims more seriously.

1: https://docsend.com/view/qeeccuggec56k9hd

In [64]:
def construct_hewitt_et_al_prompt(
    political_leaning,
    age,
    ethnicity,
    gender,
    education_level,
    party,
    policy
):
    """
    construct a prompt approximately based on work by Hewitt Et Al.
     https://docsend.com/view/qeeccuggec56k9hd
    """
    prompt = f"""
You are a {political_leaning}, {age}, {ethnicity}, {gender}, Londoner with {education_level}, who identifies as {party}.
You are asked about your thoughts on the following policy for the Greater Londer Area:

{policy}

Please concisely explain how you feel about the policy.
"""
    return prompt

## Data Generation

In [62]:
# Example distributions
# All of these numbers are dummy numbers until we find some stats to be confident in!
political_leaning_dist = ['Liberal', 'Conservative', 'Moderate']
political_leaning_prob = [0.4, 0.3, 0.3]

age_dist = np.arange(18, 90)
age_prob = np.random.normal(50, 15, len(age_dist))
age_prob = age_prob / age_prob.sum()  # Normalize to sum to 1

ethnicity_dist = ['White', 'Black', 'Asian', 'Hispanic', 'Other']
ethnicity_prob = [0.6, 0.13, 0.06, 0.18, 0.03]

gender_dist = ['Male', 'Female', 'Other']
gender_prob = [0.49, 0.49, 0.02]

education_level_dist = ['High School', 'Some College', 'Bachelor', 'Master', 'PhD']
education_level_prob = [0.3, 0.3, 0.2, 0.15, 0.05]

party_dist = ['Labour', 'Conservative', 'Liberal Democrat', 'Other']
party_prob = [0.4, 0.35, 0.2, 0.05]

In [67]:
# Example: Add 10 rows to the DataFrame
rows = []
for _ in range(10):
    row = {
        'political_leaning': np.random.choice(political_leaning_dist, p=political_leaning_prob),
        'age': np.random.choice(age_dist, p=age_prob),
        'ethnicity': np.random.choice(ethnicity_dist, p=ethnicity_prob),
        'gender': np.random.choice(gender_dist, p=gender_prob),
        'education_level': np.random.choice(education_level_dist, p=education_level_prob),
        'party': np.random.choice(party_dist, p=party_prob)
    }
    rows.append(row)

df = pd.DataFrame(rows, columns=['political_leaning', 'age', 'ethnicity', 'gender', 'education_level', 'party'])

# Display the DataFrame
print(df)
df_dict = df.to_dict(orient='records')

  political_leaning  age ethnicity  gender education_level             party
0          Moderate   56     Black  Female    Some College  Liberal Democrat
1           Liberal   50     White    Male          Master      Conservative
2           Liberal   30     Black  Female    Some College      Conservative
3           Liberal   65  Hispanic    Male    Some College            Labour
4           Liberal   75     Asian    Male             PhD            Labour
5          Moderate   75     White    Male     High School            Labour
6           Liberal   43     Asian  Female          Master      Conservative
7          Moderate   73     White  Female     High School      Conservative
8           Liberal   42     White    Male     High School      Conservative
9          Moderate   45  Hispanic  Female    Some College  Liberal Democrat


In [68]:
df_dict[0]

{'political_leaning': np.str_('Moderate'),
 'age': 56,
 'ethnicity': np.str_('Black'),
 'gender': np.str_('Female'),
 'education_level': np.str_('Some College'),
 'party': np.str_('Liberal Democrat')}

In [69]:
construct_hewitt_et_al_prompt(**df_dict[0], policy="Increase funding for public schools by 10%")

'\nYou are a Moderate, 56, Black, Female, Londoner with Some College, who identifies as Liberal Democrat.\nYou are asked about your thoughts on the following policy for the Greater Londer Area:\n\nIncrease funding for public schools by 10%\n\nPlease concisely explain how you feel about the policy.\n'

In [70]:
response = client.chat.completions.create(
  model="gpt-4o-mini",
  messages=[
    {"role": "system", "content": construct_hewitt_et_al_prompt(**row, policy="Increase funding for public schools by 10%")},
    # {"role": "user", "content": "Who won the world series in 2020?"},
  ]
)
response.choices[0].message

ChatCompletionMessage(content='I strongly support the policy to increase funding for public schools by 10%. Investing in education is crucial for the development of our children and the overall improvement of our communities. This funding can help reduce class sizes, provide better resources, and support teachers, which ultimately enhances the quality of education. Greater funding can also address disparities in schools, ensuring all children have access to a high-quality education, regardless of their background. In the long term, a well-funded education system can lead to a more informed and skilled workforce, benefiting society as a whole.', refusal=None, role='assistant', function_call=None, tool_calls=None)

In [60]:
response = client.chat.completions.create(
  model="gpt-4o-mini",
  messages=[
    {"role": "system", "content": "You are tasked with helping draft better policy proposals. Simulate responses of constitutients to the best of your ability."},
    {"role": "user", "content": construct_hewitt_et_al_prompt(**row, policy="Increase funding for public schools by 10%")},
  ]
)
response.choices[0].message

ChatCompletionMessage(content="As a conservative, I support the idea of increasing funding for public schools, but I believe it needs to be approached with caution. A 10% increase sounds promising, but I’d like to see a clear plan outlining how the funds will be used—particularly in terms of improving educational outcomes and ensuring accountability. \n\nIt's crucial that funding goes towards effective programs, teacher training, and resources that directly benefit our children. Additionally, our focus should also be on addressing the systemic issues in education, such as school choice and empowering parents to make decisions for their children's education. Overall, I’m open to the idea, but I want to ensure it’s part of a comprehensive strategy.", refusal=None, role='assistant', function_call=None, tool_calls=None)