In [1]:
import os
os.environ['HF_HOME'] = '/mnt/sagemaker-nvme/cache'
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'

import torch
import pandas as pd
import json
from tqdm import trange, tqdm
from sklearn.cluster import KMeans
from utils import last_token_pool, get_detailed_instruct, get_llm_response, PersonaDimension, list_s3_prefix
from transformers import AutoTokenizer, AutoModel
from dataclasses import asdict

surveys = set()
for path in list_s3_prefix("human_resp/"):
    if path.startswith("human_resp/American_Trends_Panel"):
        # Extract the folder name
        folder = path.split("/")[1]
        surveys.add(folder)
surveys = sorted(list(surveys))[:3]
surveys

['American_Trends_Panel_W26',
 'American_Trends_Panel_W27',
 'American_Trends_Panel_W29']

# Clustering

In [2]:
def get_personas_extracted_from_questions(personas_from_questions_filename):
    with open(personas_from_questions_filename, 'r') as f:
        data = json.load(f)
    res = []
    for entry in data:
        if not entry['valid']: continue
        for persona_dim in eval(entry['response']):
            entry_dict = asdict(persona_dim)
            input_dict = eval(entry['input_dict'])
            entry_dict['original_question'] = input_dict['question'] + ' ' + input_dict['options']
            res.append(entry_dict)
    df = pd.DataFrame(res)
    return df


def cluster_extracted_personas(survey, extraction_dir, output_dir, num_clusters, print_result, tokenizer, model):
    os.makedirs(output_dir, exist_ok=True)
    
    # Get data
    personas_from_questions_filename = f"{extraction_dir}/personas_extracted_from_question_{survey}.json"
    data = get_personas_extracted_from_questions(personas_from_questions_filename)

    # Get formatted string for clustering
    def get_formatted_persona_dim(row):
        task = 'Given a persona dimension description, retrieve semantically similar persona dimension descriptions.'
        persona = f"{row['name']}: {row['description']}. Candidate values: {row['candidate_values']}"
        return get_detailed_instruct(task, persona)
    data['formatted'] = data.apply(get_formatted_persona_dim, axis=1)

    for level in ['high', 'mid', 'low']:
        # Get subset and save artifacts
        level_df = data[data['level']==level]
        level_df.to_csv(f"{output_dir}/{level}-level_personas_{survey}.csv")
        
        # Get the embeddings
        max_length = 4096
        input_texts = level_df['formatted'].to_list()

        embeddings = []
        for text in input_texts:
            batch_dict = tokenizer([text], max_length=max_length, padding=True, truncation=True, return_tensors="pt")
            with torch.no_grad():
                outputs = model(**batch_dict)
                embed = last_token_pool(outputs.last_hidden_state, batch_dict['attention_mask'])[0]
            embeddings.append(embed)
        embeddings = torch.stack(embeddings, axis=0)
        print(embeddings.shape, len(input_texts))
        
        # Clustering the embeddings and save artifacts
        clustering_model = KMeans(n_clusters=num_clusters)
        clustering_model.fit(embeddings)
        level_df['cluster'] = clustering_model.labels_
        level_df = level_df.sort_values(by='cluster')
        level_df.to_csv(f'{output_dir}/clustered_{level}_level_personas_{survey}.csv')

        if print_result:
            for idx in range(num_clusters):
                print(idx)
                for _, row in enumerate(level_df[level_df['cluster'] == idx]['formatted']):
                    print(row.split('\n')[1])
                print('\n\n')

In [3]:
# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SFR-Embedding-2_R')
model = AutoModel.from_pretrained('Salesforce/SFR-Embedding-2_R', device_map='auto')

for survey in tqdm(surveys):
    cluster_extracted_personas(survey,
                               extraction_dir='sm_local/outputs/extraction',
                               output_dir='sm_local/outputs/clustering',
                               num_clusters=20,
                               print_result=False,
                               tokenizer=tokenizer,
                               model=model)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

torch.Size([226, 4096]) 226


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  level_df['cluster'] = clustering_model.labels_


torch.Size([226, 4096]) 226


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  level_df['cluster'] = clustering_model.labels_
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  level_df['cluster'] = clustering_model.labels_
 33%|███▎      | 1/3 [01:15<02:30, 75.18s/it]

torch.Size([199, 4096]) 199
torch.Size([268, 4096]) 268


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  level_df['cluster'] = clustering_model.labels_


torch.Size([403, 4096]) 403


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  level_df['cluster'] = clustering_model.labels_
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  level_df['cluster'] = clustering_model.labels_
 67%|██████▋   | 2/3 [02:57<01:31, 91.40s/it]

torch.Size([205, 4096]) 205
torch.Size([223, 4096]) 223


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  level_df['cluster'] = clustering_model.labels_


torch.Size([305, 4096]) 305


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  level_df['cluster'] = clustering_model.labels_
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  level_df['cluster'] = clustering_model.labels_
100%|██████████| 3/3 [04:14<00:00, 84.77s/it]

torch.Size([158, 4096]) 158





# Summarizing

In [4]:
def summarize_clustered_personas(prompt_name, survey, level, clustering_dir, output_dir, num_clusters, print_result):
    os.makedirs(output_dir, exist_ok=True)

    # Get data
    clustered_persona_filename = f"{clustering_dir}/clustered_{level}_level_personas_{survey}.csv"
    data = pd.read_csv(clustered_persona_filename)
    with open(f'prompts/{prompt_name}.txt') as f:
        prompt_template = f.read()

    # summarize
    res = []
    for idx in trange(num_clusters):
        persona_cluster = []
        for _, row in data[data['cluster'].astype(str) == str(idx)].iterrows():
            persona = PersonaDimension(**row[['name', 'description', 'level', 'candidate_values']].to_dict())
            persona_cluster.append(persona)
        
        prompt = prompt_template.format(persona_dimensions='[\n' + ',\n'.join(repr(dim) for dim in persona_cluster) + '\n]')
        response = get_llm_response(prompt, prefill='[')
        response = '[' + response
        
        try:
            response = eval(response)
            for dim in response:
                res.append(dim)

            record_res = [str(dim) for dim in res]
            summarized_clustered_personas_filename = f"{output_dir}/summarized_{level}_level_personas_{survey}.json"
            with open(summarized_clustered_personas_filename, 'w') as f:
                json.dump(record_res, f, indent=4)
    
            if print_result:
                print(response)
        except:
            if print_result:
                print(f"cluster {idx} failed")
                print(response)
            continue


In [None]:
for survey in surveys:
    for level in ['low', 'mid', 'high']:
        summarize_clustered_personas(prompt_name="summarize_clustered_personas",
                                     survey=survey,
                                     level=level,
                                     clustering_dir='sm_local/outputs/clustering',
                                     output_dir='sm_local/outputs/summarizing',
                                     num_clusters=20,
                                     print_result=True)

  5%|▌         | 1/20 [00:03<00:58,  3.07s/it]

[    PersonaDimension(
        name="Occupation and Gun Involvement",
        description="The participant's occupation, profession, or job responsibilities that may involve, require, or be influenced by gun ownership, usage, or exposure.",
        level="low",
        candidate_values=['gun-related occupation', 'non-gun-related occupation']
    )]


 10%|█         | 2/20 [00:06<00:59,  3.29s/it]

[    PersonaDimension(
        name="Geographic Gun Ownership and Risk",
        description="The participant's geographic location in terms of gun ownership rates, gun culture, and risk of gun-related incidents such as violence and crime.",
        level="low",
        candidate_values=['high gun ownership and risk area', 'low gun ownership and risk area']
    )]


 15%|█▌        | 3/20 [00:09<00:57,  3.37s/it]

[    PersonaDimension(
        name="Experiences with Gun Violence",
        description="The participant's personal experiences with or exposure to gun violence, crime, threats, and gun-related incidents, either directly or indirectly.",
        level="low",
        candidate_values=['directly affected', 'indirectly affected', 'not affected']
    )]


 20%|██        | 4/20 [00:12<00:46,  2.92s/it]

[    PersonaDimension(
        name="Household Composition with Children",
        description="Whether the person has children living in their home, which may influence perceptions of gun ownership and storage practices due to safety concerns.",
        level="low",
        candidate_values=['has children', 'no children']
    )]


 25%|██▌       | 5/20 [00:14<00:40,  2.69s/it]

[    PersonaDimension(
        name="Personal Experiences with Firearms",
        description="The individual's past experiences, either positive or negative, with firearms, including ownership, usage, interactions with gun owners, and any incidents or threats related to guns.",
        level="low",
        candidate_values=['positive experiences', 'negative experiences', 'no experiences']
    )]


 30%|███       | 6/20 [00:21<00:56,  4.04s/it]

[    PersonaDimension(
        name="Occupational Risk Level",
        description="The participant's occupation and its associated level of risk or exposure to gun violence, crime, and potential need for self-defense measures.",
        level="low",
        candidate_values=['high-risk occupation', 'low-risk occupation']
    )]


 35%|███▌      | 7/20 [00:24<00:51,  3.95s/it]

[    PersonaDimension(
        name="Firearm-Related Lifestyle and Recreational Interests",
        description="The individual's involvement in activities, hobbies, and overall lifestyle related to firearms, such as hunting, shooting sports, gun collecting, and environments where gun ownership or usage is common.",
        level="low",
        candidate_values=['active involvement', 'occasional involvement', 'no involvement']
    )]


 40%|████      | 8/20 [00:28<00:44,  3.72s/it]

[    PersonaDimension(
        name="Gun Ownership Status",
        description="Whether the individual, their spouse/partner, or their household currently owns or has access to firearms, excluding air guns.",
        level="low",
        candidate_values=['gun owner', 'non-gun owner']
    )]


 45%|████▌     | 9/20 [00:29<00:34,  3.14s/it]

[    PersonaDimension(
        name="Socioeconomic Status",
        description="The individual's economic and social standing in society, which may influence their perspective on the relationship between economic opportunities and gun violence, as well as their perceptions of gun violence in general.",
        level="low",
        candidate_values=['high socioeconomic status', 'low socioeconomic status']
    )]


 50%|█████     | 10/20 [00:32<00:30,  3.04s/it]

[    PersonaDimension(
        name="Personal Firearm Experiences",
        description="The individual's past experiences, exposure, and familiarity with firearms, including direct use, hunting, sport shooting, gun-related incidents, and childhood exposure.",
        level="low",
        candidate_values=['extensive experience', 'limited experience', 'no experience']
    )]


 55%|█████▌    | 11/20 [00:35<00:26,  2.96s/it]

[    PersonaDimension(
        name="Perceived Risk in Geographic Location",
        description="The participant's perception of the level of risk, such as crime, violence, economic instability, or terrorism, in the geographic area where they reside.",
        level="low",
        candidate_values=['high-risk area', 'low-risk area']
    )]


 60%|██████    | 12/20 [00:40<00:27,  3.42s/it]

[    PersonaDimension(
        name="Firearm-Related Media Consumption",
        description="The participant's habits and frequency of consuming media content related to firearms, such as TV shows, videos, podcasts, online forums, and websites about guns, hunting, and shooting sports.",
        level="low",
        candidate_values=['high consumption', 'moderate consumption', 'low consumption']
    )]


 65%|██████▌   | 13/20 [00:42<00:20,  2.97s/it]

[    PersonaDimension(
        name="Health and Healthcare Access",
        description="The individual's current health status, medical history, and level of access to healthcare services through insurance coverage or other means.",
        level="low",
        candidate_values=['good health with comprehensive coverage', 'poor health with limited coverage']
    )]


 70%|███████   | 14/20 [00:43<00:15,  2.56s/it]

[    PersonaDimension(
        name="Job Tenure",
        description="length of time in current job or with current employer",
        level="low",
        candidate_values=['short tenure', 'long tenure']
    )]


 75%|███████▌  | 15/20 [00:46<00:12,  2.54s/it]

[    PersonaDimension(
        name="Victimization Experiences",
        description="The participant's personal experiences of being a victim of crime, violence, or feeling unsafe in the past.",
        level="low",
        candidate_values=['victim of violent crime', 'victim of burglary/property crime', 'no victimization experience']
    )]


 80%|████████  | 16/20 [00:48<00:10,  2.60s/it]

[    PersonaDimension(
        name="Geographic Location",
        description="The region or area where the person lives or grew up, which may influence gun ownership, storage practices, laws, norms, risks, and prevalence of gun ownership among their social circle.",
        level="low",
        candidate_values=['urban area', 'rural area', 'suburban area', 'high-crime area', 'low-crime area']
    )]


 85%|████████▌ | 17/20 [00:51<00:08,  2.76s/it]

[    PersonaDimension(
        name="Lifestyle and Activities",
        description="The individual's daily routines, activities, hobbies, social circles, and environments that may influence their need or decision to carry a handgun or pistol.",
        level="low",
        candidate_values=['High-risk lifestyle that may require carrying', 'Low-risk lifestyle that does not require carrying']
    )]


 90%|█████████ | 18/20 [00:53<00:04,  2.35s/it]

[    PersonaDimension(
        name="Financial Situation",
        description="the current state of an individual's financial resources and obligations, such as income, expenses, and debt levels",
        level="low",
        candidate_values=['financially stable', 'financially unstable']
    )]


 95%|█████████▌| 19/20 [00:57<00:02,  2.73s/it]

[    PersonaDimension(
        name="Household Composition",
        description="The number and types of individuals living in the participant's household, including the presence of a spouse/partner, children, or other family members, which may influence gun ownership decisions and storage practices.",
        level="low",
        candidate_values=['single adult', 'family with children', 'multi-generational household', 'lives with roommates']
    )]


100%|██████████| 20/20 [01:03<00:00,  3.18s/it]


[    PersonaDimension(
        name="Personal Experiences with Crime and Safety",
        description="The individual's personal experiences with crime, violence, safety issues, or situations requiring self-defense, which may shape their views on gun ownership and carrying a handgun.",
        level="low",
        candidate_values=['experienced crime/violence/safety issues', 'not experienced crime/violence/safety issues']
    ),     PersonaDimension(
        name="Family Background on Gun Ownership",
        description="The family's cultural, social, or economic background that may have shaped their views on gun ownership and use.",
        level="low",
        candidate_values=['pro-gun family', 'anti-gun family']
    )]


  5%|▌         | 1/20 [00:02<00:53,  2.80s/it]

[    PersonaDimension(
        name="Perception of Risk and Threats",
        description="The participant's assessment of the likelihood and potential impact of various risks and threats, such as gun violence, crime, personal crises, and terrorist attacks.",
        level="mid",
        candidate_values=['high risk perception', 'low risk perception']
    )]


 10%|█         | 2/20 [00:04<00:40,  2.25s/it]

[    PersonaDimension(
        name="Attitudes Towards Gun Safety Education",
        description="The person's beliefs and opinions regarding the importance and necessity of providing gun safety education to children.",
        level="mid",
        candidate_values=['prioritizes gun safety education', 'does not prioritize gun safety education']
    )]


 15%|█▌        | 3/20 [00:07<00:45,  2.66s/it]

[    PersonaDimension(
        name="Hunting and Shooting Interests",
        description="The individual's recreational interests, cultural identity, utilitarian needs, and desire for skill development related to hunting and sport shooting activities.",
        level="mid",
        candidate_values=['strong interest', 'moderate interest', 'low interest']
    )]


 20%|██        | 4/20 [00:14<01:05,  4.08s/it]

[    PersonaDimension(
        name="Emotional State Regarding Safety and Violence",
        description="The person's levels of anxiety, fear, concern, or emotional response towards various safety and violence-related issues, such as gun violence, personal safety, violent crime, mass shootings, and home burglaries.",
        level="mid",
        candidate_values=['high anxiety/fear', 'low anxiety/fear']
    )]


 25%|██▌       | 5/20 [00:17<00:58,  3.89s/it]

[    PersonaDimension(
        name="Sense of Responsibility for Safety",
        description="The participant's perceived sense of responsibility and duty to ensure the safety of others, especially children, when owning or handling firearms.",
        level="mid",
        candidate_values=['high responsibility', 'low responsibility']
    )]


 30%|███       | 6/20 [00:22<00:56,  4.06s/it]

[    PersonaDimension(
        name="Perceived Need for Personal Safety and Security",
        description="The individual's assessment of personal safety risks and the perceived need for gun ownership or access for self-defense and security purposes.",
        level="mid",
        candidate_values=['high perceived need', 'low perceived need']
    )]


 35%|███▌      | 7/20 [00:25<00:48,  3.72s/it]

[    PersonaDimension(
        name="Attitudes Towards Firearm Safety",
        description="The person's beliefs, values, and practices regarding the importance of responsible firearm ownership, safe storage, handling, and safety measures.",
        level="mid",
        candidate_values=['prioritizes firearm safety', 'indifferent to firearm safety']
    )]


 40%|████      | 8/20 [00:28<00:42,  3.51s/it]

[    PersonaDimension(
        name="Attitudes Towards Guns and Gun Control",
        description="The individual's beliefs, opinions, and perceptions regarding gun ownership, use, and the role of guns in society, as well as their views on gun control policies and regulations.",
        level="mid",
        candidate_values=['pro-gun', 'anti-gun', 'neutral']
    )]


 45%|████▌     | 9/20 [00:30<00:34,  3.10s/it]

[    PersonaDimension(
        name="Familiarity with Firearms",
        description="The level of exposure, experience, knowledge, and comfort with firearms and related equipment from an early age or upbringing.",
        level="mid",
        candidate_values=['high familiarity', 'low familiarity']
    )]


 50%|█████     | 10/20 [00:33<00:30,  3.02s/it]

[    PersonaDimension(
        name="Risk Perception and Assessment of Gun Ownership",
        description="The individual's evaluation and perception of the potential risks, threats, consequences, and benefits associated with gun ownership, usage, and storage.",
        level="mid",
        candidate_values=['high risk perception', 'low risk perception', 'perceives guns as reducing risk', 'perceives guns as increasing risk']
    )]


 55%|█████▌    | 11/20 [00:36<00:26,  2.98s/it]

[    PersonaDimension(
        name="Safety and Security Perceptions",
        description="The individual's perceptions and concerns related to personal safety, household security, and the role of firearms in providing a sense of safety or protection.",
        level="mid",
        candidate_values=['high safety concerns', 'low safety concerns', 'feels safer with a gun', 'feels less safe with a gun']
    )]


 60%|██████    | 12/20 [00:38<00:23,  2.88s/it]

[    PersonaDimension(
        name="Cultural and Social Influences on Gun Attitudes",
        description="The impact of cultural, regional, community, and societal norms, values, and attitudes towards gun ownership and usage on the individual's perspectives.",
        level="mid",
        candidate_values=['pro-gun culture', 'anti-gun culture', 'neutral culture']
    )]


 65%|██████▌   | 13/20 [00:42<00:22,  3.24s/it]

[    PersonaDimension(
        name="Civic and Community Engagement",
        description="The person's level of involvement and interest in political, social, and community issues related to gun ownership, regulation, and violence.",
        level="mid",
        candidate_values=['highly engaged', 'moderately engaged', 'disengaged']
    ),     PersonaDimension(
        name="Interest in Guns and Gun Culture",
        description="The level of interest, emotional attachment, and engagement the person has with topics, activities, and content related to guns, shooting sports, and gun culture.",
        level="mid",
        candidate_values=['high interest', 'moderate interest', 'low interest']
    )]


 70%|███████   | 14/20 [00:44<00:16,  2.76s/it]

[    PersonaDimension(
        name="Risk Tolerance",
        description="The individual's willingness to engage in potentially risky or dangerous activities, including financial risks, self-defense situations, and risk-taking behavior during childhood.",
        level="mid",
        candidate_values=['high risk tolerance', 'low risk tolerance']
    )]


 75%|███████▌  | 15/20 [00:47<00:14,  2.84s/it]

[    PersonaDimension(
        name="Financial Capability",
        description="The individual's knowledge, priorities, and resilience in managing personal finances, including understanding financial concepts, setting financial goals, coping with financial stress, and withstanding unexpected expenses.",
        level="mid",
        candidate_values=['high capability', 'moderate capability', 'low capability']
    )]


 80%|████████  | 16/20 [00:49<00:09,  2.50s/it]

[    PersonaDimension(
        name="Recreational Firearm Interests and Use",
        description="The person's engagement in recreational activities involving firearms, such as hunting, target shooting, or other firearm-related hobbies.",
        level="mid",
        candidate_values=['active participation', 'no participation']
    )]


 85%|████████▌ | 17/20 [00:51<00:07,  2.47s/it]

[    PersonaDimension(
        name="Risk Perception of Firearms in Homes with Children",
        description="The individual's assessment of the potential risks, dangers, and consequences associated with having firearms, particularly loaded or unsecured guns, in a household where children are present.",
        level="mid",
        candidate_values=['high risk perception', 'low risk perception']
    )]


 90%|█████████ | 18/20 [00:53<00:04,  2.40s/it]

[    PersonaDimension(
        name="Perception of Crime and Safety",
        description="How the person perceives the overall level of crime and safety in the world compared to the past",
        level="mid",
        candidate_values=['perceives the world as safer', 'perceives the world as more dangerous', 'perceives the world as neither safer nor more dangerous']
    )]


 95%|█████████▌| 19/20 [00:56<00:02,  2.51s/it]

[    PersonaDimension(
        name="Beliefs about Causes of Gun Violence",
        description="The individual's perceptions and beliefs regarding the underlying factors contributing to gun violence in society, including the role of economic conditions, media influence, family instability, and illegal gun access.",
        level="mid",
        candidate_values=['attributes gun violence primarily to societal factors', 'attributes gun violence primarily to individual factors']
    )]


100%|██████████| 20/20 [00:59<00:00,  2.98s/it]


[    PersonaDimension(
        name="Perceived Safety and Trust",
        description="The individual's perception of safety, level of trust in institutions responsible for public safety, and assessment of crime rates in their local environment.",
        level="mid",
        candidate_values=['high perceived safety and trust', 'low perceived safety and trust']
    )]


  5%|▌         | 1/20 [00:02<00:47,  2.49s/it]

[    PersonaDimension(
        name="Attitudes Towards Firearms and Gun Ownership",
        description="The individual's overall beliefs, values, and perspectives regarding gun ownership, the role of firearms in society, and related policies and regulations.",
        level="high",
        candidate_values=['pro-gun', 'anti-gun', 'neutral']
    )]


 10%|█         | 2/20 [00:05<00:46,  2.59s/it]

[    PersonaDimension(
        name="General Risk Attitudes",
        description="The individual's overall tendencies towards risk-taking or risk-aversion, resilience, optimism, and willingness to take precautions across various aspects of life.",
        level="high",
        candidate_values=['risk-averse', 'risk-taking']
    )]


 15%|█▌        | 3/20 [00:11<01:15,  4.45s/it]

[    PersonaDimension(
        name="Worldview and Ideological Beliefs",
        description="The participant's broader philosophical, political, social, and cultural beliefs and ideologies that shape their perspectives on gun ownership and the role of firearms in society.",
        level="high",
        candidate_values=['conservative worldview', 'liberal worldview', 'moderate worldview']
    )]


 20%|██        | 4/20 [00:13<00:56,  3.52s/it]

[    PersonaDimension(
        name="Personal Values on Gun Ownership",
        description="The individual's core beliefs and principles regarding gun ownership, encompassing priorities such as child safety, responsible gun ownership, individual rights, and personal freedom.",
        level="high",
        candidate_values=['prioritizes child safety', 'prioritizes responsible gun ownership', 'prioritizes individual rights', 'prioritizes personal freedom', 'balanced approach']
    )]


 25%|██▌       | 5/20 [00:16<00:46,  3.08s/it]

[    PersonaDimension(
        name="Core Personal Values",
        description="The individual's fundamental beliefs, principles, and priorities related to public safety, individual rights, personal freedom, and social responsibility.",
        level="high",
        candidate_values=['prioritizes public safety', 'prioritizes individual rights', 'prioritizes personal freedom', 'prioritizes community well-being']
    )]


 30%|███       | 6/20 [00:20<00:46,  3.35s/it]

[    PersonaDimension(
        name="Personal Values: Safety vs Freedom",
        description="The prioritization of personal safety, security, and protection versus individual rights, freedom, and non-violence.",
        level="high",
        candidate_values=['prioritizes safety and security', 'prioritizes individual freedom and rights']
    ),     PersonaDimension(
        name="Personal Values: Career and Finances",
        description="The prioritization of job security, financial stability, and career advancement.",
        level="high",
        candidate_values=['prioritizes job security and finances', 'prioritizes career growth']
    )]


 35%|███▌      | 7/20 [00:23<00:43,  3.34s/it]

[    PersonaDimension(
        name="Personality Traits",
        description="The individual's inherent personality characteristics, such as risk-taking, adventurousness, cautiousness, responsibility, and openness, that may influence their attitudes, behaviors, and experiences related to firearms.",
        level="high",
        candidate_values=['risk-taking', 'cautious', 'responsible', 'adventurous', 'open-minded', 'security-conscious']
    )]


 40%|████      | 8/20 [00:27<00:41,  3.43s/it]

[    PersonaDimension(
        name="Personality Traits",
        description="An individual's personality characteristics that may influence their financial behaviors and attitudes, such as their level of optimism, risk-aversion, or impulse control.",
        level="high",
        candidate_values=['optimistic', 'risk-averse', 'impulsive']
    )]


 45%|████▌     | 9/20 [00:28<00:32,  2.98s/it]

[    PersonaDimension(
        name="Empathy and Compassion",
        description="The individual's ability to understand and relate to the emotional experiences of others, particularly those affected by gun violence, economic hardship, or considering suicide.",
        level="high",
        candidate_values=['high empathy', 'low empathy']
    )]


 50%|█████     | 10/20 [00:34<00:36,  3.64s/it]

[    PersonaDimension(
        name="Moral Foundations and Values",
        description="The individual's underlying moral principles, values, and ethical beliefs that shape their views on gun ownership, use, safety, and related issues such as self-defense, violence, child safety, and individual rights.",
        level="high",
        candidate_values=['prioritizes harm/care', 'prioritizes fairness/reciprocity', 'prioritizes loyalty/ingroup', 'prioritizes authority/respect', 'prioritizes purity/sanctity', 'prioritizes individual rights', 'prioritizes public safety', 'prioritizes non-violence', 'prioritizes social justice', 'prioritizes law and order']
    )]


 55%|█████▌    | 11/20 [00:37<00:32,  3.60s/it]

[    PersonaDimension(
        name="Cultural and Social Influences on Gun Attitudes",
        description="The impact of an individual's cultural background, social environment, community norms, and regional traditions on their attitudes and behaviors towards gun ownership, usage, and storage.",
        level="high",
        candidate_values=['pro-gun culture', 'anti-gun culture', 'neutral/mixed culture']
    )]


 60%|██████    | 12/20 [00:45<00:39,  4.90s/it]

[    PersonaDimension(
        name="Political and Societal Worldview",
        description="The participant's overarching attitudes, beliefs, and perspectives on various political, social, and cultural issues, including gun control, personal freedom, crime, justice system, role of government, and societal values.",
        level="high",
        candidate_values=['conservative worldview', 'liberal worldview', 'moderate worldview']
    )]


 65%|██████▌   | 13/20 [00:52<00:39,  5.64s/it]

[    PersonaDimension(
        name="Sociopolitical Worldview and Beliefs",
        description="The individual's broader perspectives, beliefs, and attitudes towards society, government, crime, personal freedoms, and the role of firearms in addressing societal issues.",
        level="high",
        candidate_values=['conservative worldview', 'liberal worldview', 'moderate worldview']
    )]


 70%|███████   | 14/20 [00:55<00:27,  4.62s/it]

[    PersonaDimension(
        name="Resilience and Adaptability",
        description="The individual's capacity to cope with, recover from, and adapt to challenging life events, such as job loss, traumatic experiences, or changes in perceived safety and crime levels.",
        level="high",
        candidate_values=['high resilience', 'low resilience']
    )]


 75%|███████▌  | 15/20 [00:57<00:20,  4.06s/it]

[    PersonaDimension(
        name="Political Ideology on Gun Issues",
        description="The individual's political beliefs and affiliations that shape their stance on gun ownership, gun rights, gun control, and gun-related violence.",
        level="high",
        candidate_values=['conservative', 'liberal', 'moderate']
    )]


 80%|████████  | 16/20 [01:00<00:14,  3.69s/it]

[    PersonaDimension(
        name="Parenting Values and Approach",
        description="The individual's values, priorities, and general approach towards parenting, including their beliefs about child safety, the role of parents in educating children, and responsible parenting practices.",
        level="high",
        candidate_values=['prioritizes child safety and authoritative parenting', 'prioritizes child safety but permissive parenting', 'low priority on child safety and uninvolved parenting', 'low priority on child safety and neglectful parenting']
    )]


 85%|████████▌ | 17/20 [01:04<00:11,  3.82s/it]

[    PersonaDimension(
        name="Worldview and Beliefs on Gun Rights and Control",
        description="The individual's broader perspectives, ideologies, and core values regarding the role of firearms in society, gun rights, gun control policies, and the balance between individual rights and public safety.",
        level="high",
        candidate_values=['pro-gun rights', 'pro-gun control', 'moderate on gun issues']
    )]


 90%|█████████ | 18/20 [01:07<00:07,  3.63s/it]

[    PersonaDimension(
        name="Core Personal Values on Gun Ownership",
        description="The individual's fundamental values, beliefs, and principles that shape their stance and attitudes towards gun ownership, use, and related activities.",
        level="high",
        candidate_values=['prioritizes personal safety', 'prioritizes individual freedom', 'prioritizes public safety', 'balanced approach']
    )]


 95%|█████████▌| 19/20 [01:10<00:03,  3.16s/it]

[    PersonaDimension(
        name="Risk Attitudes and Tolerance",
        description="The individual's general tendencies towards risk-taking or risk-aversion, which may influence their views on gun ownership, safety concerns, and resilience in the face of potential threats or dangers.",
        level="high",
        candidate_values=['risk-averse', 'risk-taking']
    )]


100%|██████████| 20/20 [01:13<00:00,  3.68s/it]


[    PersonaDimension(
        name="Sense of Security and Personal Responsibility",
        description="The individual's overall feeling of safety, security, and self-reliance, as well as their beliefs about personal responsibility for protection and well-being.",
        level="high",
        candidate_values=['high sense of security and responsibility', 'low sense of security and responsibility']
    )]


  5%|▌         | 1/20 [00:05<01:37,  5.14s/it]

[    PersonaDimension(
        name="Occupation and Industry",
        description="The participant's current or desired occupation, job responsibilities, and the industry or sector they work in.",
        level="low",
        candidate_values=['technical/IT-related', 'non-technical', 'customer-facing', 'non-customer-facing', 'HR/recruitment', 'managerial', 'entry-level', 'experienced']
    )]


 10%|█         | 2/20 [00:07<01:04,  3.58s/it]

[    PersonaDimension(
        name="Technological Literacy and Familiarity",
        description="The individual's level of understanding, comfort, and experience with various technologies, including emerging advancements across different domains.",
        level="low",
        candidate_values=['high technological literacy', 'moderate technological literacy', 'low technological literacy']
    )]


 15%|█▌        | 3/20 [00:13<01:16,  4.49s/it]

[    PersonaDimension(
        name="Demographic Factors",
        description="Demographic characteristics such as age, gender, education level, socioeconomic status, and location that may influence attitudes and perspectives towards automation, technology adoption, and related topics.",
        level="low",
        candidate_values=['younger', 'older', 'male', 'female', 'higher education', 'lower education', 'high income', 'low income', 'urban', 'rural']
    )]


 20%|██        | 4/20 [00:19<01:25,  5.33s/it]

[    PersonaDimension(
        name="Socioeconomic Status",
        description="The individual's current financial situation, income level, social class, and access to resources and opportunities, which may influence their perspectives on automation, job displacement, healthcare costs, and technology adoption.",
        level="low",
        candidate_values=['high socioeconomic status', 'middle socioeconomic status', 'low socioeconomic status']
    )]


 25%|██▌       | 5/20 [00:23<01:11,  4.75s/it]

[    PersonaDimension(
        name="Experiences with Hiring Processes",
        description="The individual's personal experiences, observations, and familiarity with hiring practices, including the use of automated systems and technology in the job application and hiring process.",
        level="low",
        candidate_values=['positive experiences', 'negative experiences', 'mixed experiences', 'no prior experience']
    )]


 30%|███       | 6/20 [00:26<00:56,  4.01s/it]

[    PersonaDimension(
        name="Occupation and Industry Exposure",
        description="The participant's occupation and industry experience, particularly in fields that may be impacted by emerging technologies such as automation, autonomous vehicles, and drones.",
        level="low",
        candidate_values=['high exposure', 'low exposure']
    )]


 35%|███▌      | 7/20 [00:30<00:51,  3.98s/it]

[    PersonaDimension(
        name="Transportation Needs and Habits",
        description="The individual's reliance on personal transportation, commuting patterns, and mobility requirements that could influence their perception and adoption of autonomous vehicles.",
        level="low",
        candidate_values=['high mobility needs', 'low mobility needs', 'frequent commuter', 'infrequent commuter']
    ),     PersonaDimension(
        name="Personal Safety Concerns",
        description="The level of importance the individual places on their personal safety and security when using autonomous vehicles.",
        level="low",
        candidate_values=['high safety concerns', 'low safety concerns']
    )]


 40%|████      | 8/20 [00:33<00:46,  3.84s/it]

[    PersonaDimension(
        name="Occupation and Industry",
        description="The participant's current or anticipated occupation, profession, job responsibilities, and the industry or sector they work in, which may influence their perception of the likelihood of automation and the impact of technology on their work.",
        level="low",
        candidate_values=['high automation risk', 'low automation risk', 'technology-focused', 'non-technology-focused']
    )]


 45%|████▌     | 9/20 [00:38<00:44,  4.04s/it]

[    PersonaDimension(
        name="Prior Experiences with Autonomous Vehicles",
        description="The individual's previous exposure, interactions, and personal experiences (positive or negative) with autonomous vehicles and related technologies.",
        level="low",
        candidate_values=['positive experiences', 'negative experiences', 'no prior experience']
    ),     PersonaDimension(
        name="Driving Experience and Preferences",
        description="The individual's level of experience, comfort, and personal preferences related to traditional, human-operated vehicles.",
        level="low",
        candidate_values=['extensive driving experience', 'limited driving experience', 'indifferent to driving experience']
    ),     PersonaDimension(
        name="Awareness and Exposure to Autonomous Vehicle Technology",
        description="The individual's level of familiarity, awareness, and exposure to the development and information regarding autonomous vehicle technolog

 50%|█████     | 10/20 [00:40<00:35,  3.58s/it]

[    PersonaDimension(
        name="Personal Experiences with Job Loss and Automation",
        description="The individual's direct experiences with job loss, job changes, or financial hardship due to automation, technological advancements, or the impact of robots on employment opportunities.",
        level="low",
        candidate_values=['experienced job loss/changes due to automation', 'not directly affected by automation']
    )]


 55%|█████▌    | 11/20 [00:45<00:34,  3.83s/it]

[    PersonaDimension(
        name="Household Composition and Family Structure",
        description="The makeup and dynamics of the user's household, including the presence of children, aging parents or family members, and the number of people in the household.",
        level="low",
        candidate_values=['single-person household', 'multi-person household without children', 'household with children', 'household with aging family members']
    )]


 60%|██████    | 12/20 [00:50<00:34,  4.28s/it]

[    PersonaDimension(
        name="Preferences for Human Interaction in Services",
        description="The individual's desire for human interaction and willingness to pay a premium for it in service transactions, retail experiences, and communication.",
        level="low",
        candidate_values=['strong preference for human interaction', 'moderate preference for human interaction', 'preference for automation']
    ),     PersonaDimension(
        name="Consumption and Shopping Habits",
        description="The individual's current behaviors and preferences regarding in-store purchases, online purchases, and usage of delivery services.",
        level="low",
        candidate_values=['frequent in-store shopper', 'frequent online shopper', 'high delivery usage', 'low delivery usage']
    )]


 65%|██████▌   | 13/20 [00:53<00:28,  4.09s/it]

[    PersonaDimension(
        name="Geographic Location and Exposure to Autonomous Systems",
        description="The participant's geographic location and living environment, including factors such as urbanization, population density, infrastructure, crime rates, traffic patterns, and proximity to areas where autonomous vehicles, drones, or related technologies are prevalent.",
        level="low",
        candidate_values=['urban area', 'suburban area', 'rural area', 'high exposure area', 'low exposure area']
    )]


 70%|███████   | 14/20 [00:59<00:26,  4.47s/it]

[    PersonaDimension(
        name="Voice Assistant Usage and Lifestyle Integration",
        description="The individual's habits, routines, and lifestyle factors that influence their need for and usage of voice-controlled digital assistants and smart home devices.",
        level="low",
        candidate_values=['frequent usage and high integration', 'occasional usage and moderate integration', 'rare usage and low integration']
    )]


 75%|███████▌  | 15/20 [01:02<00:20,  4.08s/it]

[    PersonaDimension(
        name="Educational Background",
        description="The individual's level of education, training, and exposure to information about technological advancements, automation, and related topics, which may shape their views on the impact of automation on the workforce and economy.",
        level="low",
        candidate_values=['higher education', 'lower education', 'technical/vocational training']
    ),     PersonaDimension(
        name="Political Engagement",
        description="the individual's level of involvement and interest in political issues related to technology, automation, and the economy",
        level="low",
        candidate_values=['highly engaged', 'disengaged']
    )]


 80%|████████  | 16/20 [01:08<00:19,  4.75s/it]

[    PersonaDimension(
        name="Personal Caregiving Experiences",
        description="The individual's personal experiences with caregiving for elderly or disabled individuals, including family members, friends, or professional caregiving roles.",
        level="low",
        candidate_values=['extensive experience', 'limited experience', 'no experience']
    ),     PersonaDimension(
        name="Access to Healthcare Resources",
        description="The individual's ability to afford and obtain human caregivers or healthcare resources for their loved ones.",
        level="low",
        candidate_values=['ample access', 'limited access']
    ),     PersonaDimension(
        name="Concern for Aging Relatives",
        description="Level of worry or anxiety about the care and well-being of aging family members.",
        level="low",
        candidate_values=['high concern', 'low concern']
    )]


 85%|████████▌ | 17/20 [01:10<00:11,  3.97s/it]

[    PersonaDimension(
        name="Employment Status and Occupation",
        description="The individual's current employment situation, including whether they are employed, unemployed, retired, or seeking employment, as well as their occupation or desired occupation.",
        level="low",
        candidate_values=['employed', 'unemployed', 'retired', 'job-seeking']
    )]


 90%|█████████ | 18/20 [01:13<00:06,  3.46s/it]

[    PersonaDimension(
        name="Age and Generational Cohort",
        description="The individual's age and generational cohort, which may influence their views on technological changes, job automation, and their receptiveness and familiarity with emerging technologies like voice assistants and smart home devices.",
        level="low",
        candidate_values=['younger generation', 'middle-aged', 'older generation']
    )]


 95%|█████████▌| 19/20 [01:18<00:03,  3.90s/it]

[    PersonaDimension(
        name="Drone-Related Personal Experiences",
        description="The individual's personal experiences, encounters, and interactions with drones, including witnessing drone usage in public spaces, at crime scenes, or during events, as well as any positive or negative experiences.",
        level="low",
        candidate_values=['direct positive experience', 'direct negative experience', 'no direct experience']
    ),     PersonaDimension(
        name="Drone-Related Recreational Interests",
        description="The person's hobbies, leisure activities, and recreational interests related to drones, such as owning or operating drones, participating in drone-related events, or engaging in beach activities that may involve drone usage.",
        level="low",
        candidate_values=['active participant', 'occasional participant', 'non-participant']
    )]


100%|██████████| 20/20 [01:27<00:00,  4.40s/it]


[    PersonaDimension(
        name="Technological Awareness and Familiarity",
        description="The individual's level of knowledge, understanding, and exposure to emerging technologies such as automation, robotics, and artificial intelligence, particularly in the context of their potential impact on the job market and caregiving.",
        level="low",
        candidate_values=['high awareness and familiarity', 'moderate awareness and familiarity', 'low awareness and familiarity']
    ),     PersonaDimension(
        name="Loneliness and Social Isolation",
        description="The individual's level of loneliness and social isolation, which may influence their willingness to form connections with robot caregivers.",
        level="low",
        candidate_values=['high loneliness', 'low loneliness']
    )]


  5%|▌         | 1/20 [00:03<01:09,  3.63s/it]

[    PersonaDimension(
        name="Perception of Technological Change",
        description="The individual's beliefs, attitudes, and expectations regarding the pace, impact, and implications of technological advancements, particularly in the context of automation, robotics, and job displacement.",
        level="mid",
        candidate_values=['optimistic', 'pessimistic', 'uncertain']
    )]


 10%|█         | 2/20 [00:06<01:01,  3.41s/it]

[    PersonaDimension(
        name="Technological Awareness and Literacy",
        description="The person's level of understanding, knowledge, and comfort with emerging technologies such as automation, robotics, autonomous vehicles, and their potential impacts on the job market and economy.",
        level="mid",
        candidate_values=['high awareness and literacy', 'low awareness and literacy']
    )]


 15%|█▌        | 3/20 [00:09<00:54,  3.23s/it]

[    PersonaDimension(
        name="Attitudes Towards Automation",
        description="The individual's perceptions, beliefs, and overall sentiment towards the increasing use of robots, automation, and artificial intelligence in various domains, including healthcare, caregiving, employment, and society.",
        level="mid",
        candidate_values=['positive attitude', 'negative attitude', 'neutral attitude']
    )]


 20%|██        | 4/20 [00:16<01:15,  4.70s/it]

[    PersonaDimension(
        name="Attitudes Towards Human Caregiving",
        description="Beliefs and preferences regarding the role of human interaction, emotional support, and personal care in caregiving for the elderly and disabled.",
        level="mid",
        candidate_values=['values human interaction and emotional care', 'values efficiency over human interaction']
    ),     PersonaDimension(
        name="Empathy and Compassion for the Elderly",
        description="The level of empathy, concern, and consideration for the well-being, emotional needs, and independence of older adults.",
        level="mid",
        candidate_values=['high empathy and compassion', 'low empathy and compassion']
    ),     PersonaDimension(
        name="Perceptions of Robot Capabilities in Caregiving",
        description="Beliefs about the capabilities, limitations, and appropriateness of robots in providing care and emotional support for the elderly and disabled.",
        level="mid",
  

 25%|██▌       | 5/20 [00:21<01:11,  4.76s/it]

[    PersonaDimension(
        name="Adaptability to Technological and Workforce Changes",
        description="The individual's willingness and ability to adapt to technological advancements, automation, and changes in the job market and workforce.",
        level="mid",
        candidate_values=['highly adaptable', 'moderately adaptable', 'resistant to change']
    ),     PersonaDimension(
        name="Openness to Technological Innovation",
        description="The individual's overall attitude and willingness to embrace and adopt new technologies, even in sensitive domains like healthcare, transportation, and caregiving.",
        level="mid",
        candidate_values=['open to innovation', 'resistant to innovation']
    )]


 30%|███       | 6/20 [00:24<00:56,  4.06s/it]

[    PersonaDimension(
        name="Trust in Technology and Automation",
        description="The individual's level of trust, confidence, and comfort in relying on technology, automation, and artificial intelligence systems across various domains such as healthcare, transportation, hiring, and caregiving.",
        level="mid",
        candidate_values=['high trust', 'moderate trust', 'low trust']
    )]


 35%|███▌      | 7/20 [00:27<00:46,  3.58s/it]

[    PersonaDimension(
        name="Risk Perception of Autonomous Vehicles",
        description="The individual's assessment of the potential risks, safety concerns, and drawbacks associated with autonomous vehicles, including perceptions of safety, reliability, and the likelihood of accidents.",
        level="mid",
        candidate_values=['high risk perception', 'low risk perception']
    )]


 40%|████      | 8/20 [00:30<00:42,  3.53s/it]

[    PersonaDimension(
        name="Job Security Concerns",
        description="The individual's level of worry, anxiety, or concern about the potential impact of automation, robots, computers, and technological advancements on their job security, employment prospects, and the displacement of human jobs.",
        level="mid",
        candidate_values=['high job security concerns', 'low job security concerns']
    )]


 45%|████▌     | 9/20 [00:32<00:34,  3.16s/it]

[    PersonaDimension(
        name="Work-Life Perspective",
        description="The individual's attitudes, beliefs, and priorities regarding work, employment, and the balance between professional and personal life.",
        level="mid",
        candidate_values=['prioritizes work', 'prioritizes personal life', 'balanced work-life']
    )]


 50%|█████     | 10/20 [00:35<00:31,  3.14s/it]

[    PersonaDimension(
        name="Attitudes Towards Automated Hiring",
        description="The individual's perceptions, beliefs, and attitudes towards the use of computer programs and automation in hiring processes, including concerns about potential biases, fairness, efficiency, and the role of human involvement.",
        level="mid",
        candidate_values=['positive attitude', 'negative attitude', 'neutral attitude']
    )]


 55%|█████▌    | 11/20 [00:39<00:29,  3.32s/it]

[    PersonaDimension(
        name="Technological Proficiency and Adoption",
        description="The individual's level of comfort, familiarity, and willingness to adopt and use new and emerging technologies in various aspects of life, such as work, daily activities, and transportation.",
        level="mid",
        candidate_values=['high proficiency and early adopter', 'moderate proficiency and late adopter', 'low proficiency and non-adopter']
    )]


 60%|██████    | 12/20 [00:42<00:24,  3.06s/it]

[    PersonaDimension(
        name="Civic Engagement",
        description="The individual's level of involvement, interest, and participation in community affairs, public policy issues, decision-making processes, and social issues related to technological advancements and urban planning.",
        level="mid",
        candidate_values=['highly engaged', 'moderately engaged', 'disengaged']
    )]


 65%|██████▌   | 13/20 [00:45<00:22,  3.17s/it]

[    PersonaDimension(
        name="Privacy and Security Concerns",
        description="The individual's level of concern about potential privacy implications, data security risks, and perceived risks associated with the use of emerging technologies such as drones, voice assistants, robot caregivers, and automated systems.",
        level="mid",
        candidate_values=['high concerns', 'low concerns']
    )]


 70%|███████   | 14/20 [00:48<00:18,  3.15s/it]

[    PersonaDimension(
        name="Environmental Consciousness",
        description="The individual's level of concern and awareness regarding the environmental impact of various technologies, products, and activities, including transportation, automation, robotics, drones, and personal consumption patterns.",
        level="mid",
        candidate_values=['high environmental consciousness', 'low environmental consciousness']
    )]


 75%|███████▌  | 15/20 [00:52<00:16,  3.31s/it]

[    PersonaDimension(
        name="Efficiency and Convenience Orientation",
        description="The individual's preference for efficient, streamlined, and convenient processes, products, and services, potentially prioritizing these factors over others.",
        level="mid",
        candidate_values=['high efficiency/convenience orientation', 'low efficiency/convenience orientation']
    )]


 80%|████████  | 16/20 [00:55<00:13,  3.39s/it]

[    PersonaDimension(
        name="Attitudes Towards Government Regulation",
        description="The individual's views on the appropriate level of government regulation and intervention in emerging technologies, automation, and the economy.",
        level="mid",
        candidate_values=['favors more regulation', 'favors less regulation', 'conditional support']
    ),     PersonaDimension(
        name="Trust in Institutions and Authorities",
        description="The degree of trust and confidence in the ability of government, regulatory bodies, corporations, and other institutions to effectively manage and regulate emerging technologies and their impacts.",
        level="mid",
        candidate_values=['high trust', 'low trust']
    )]


 85%|████████▌ | 17/20 [00:59<00:10,  3.44s/it]

[    PersonaDimension(
        name="Productivity and Multitasking",
        description="The person's focus on productivity, efficiency, and multitasking abilities, as well as the impact of technology on their workload, cognitive load, and work-life balance.",
        level="mid",
        candidate_values=['high productivity focus', 'low productivity focus', 'improved work-life balance', 'worsened work-life balance', 'high multitasking', 'low multitasking']
    )]


 90%|█████████ | 18/20 [01:05<00:08,  4.26s/it]

[    PersonaDimension(
        name="Perceived Usefulness and Ease of Use",
        description="The individual's perception of the potential benefits, utility, and ease of use associated with voice assistants and autonomous vehicles.",
        level="mid",
        candidate_values=['high perceived usefulness and ease of use', 'low perceived usefulness and ease of use']
    ),     PersonaDimension(
        name="Accessibility Needs and Perceived Accuracy",
        description="The user's physical or cognitive abilities that may influence their preference for voice assistants, as well as their perception of how accurately these assistants respond to commands.",
        level="mid",
        candidate_values=['high accessibility needs and perceived accuracy', 'low accessibility needs and perceived accuracy']
    )]


 95%|█████████▌| 19/20 [01:08<00:03,  3.81s/it]

[    PersonaDimension(
        name="Economic and Job Outlook",
        description="The individual's overall perspective and beliefs about future economic trends, the impact of automation on the job market, potential job losses or creation of new opportunities, income inequality, and societal disruption.",
        level="mid",
        candidate_values=['optimistic outlook', 'pessimistic outlook']
    )]


100%|██████████| 20/20 [01:12<00:00,  3.63s/it]


[    PersonaDimension(
        name="Financial and Career Stability",
        description="The person's financial situation, career stability, risk tolerance, and attitudes towards self-reliance in managing their finances and career.",
        level="mid",
        candidate_values=['stable finances and career', 'unstable finances or career', 'risk-averse', 'risk-tolerant']
    ),     PersonaDimension(
        name="Emotional and Time Availability",
        description="The person's emotional preparedness, resilience, and availability of time to handle the challenges of caregiving.",
        level="mid",
        candidate_values=['emotionally prepared with time available', 'emotionally unprepared or lacks time available']
    ),     PersonaDimension(
        name="Locus of Control",
        description="The extent to which individuals believe they have control over the events and outcomes in their lives.",
        level="mid",
        candidate_values=['internal locus of control', 'exte

  5%|▌         | 1/20 [00:02<00:44,  2.33s/it]

[    PersonaDimension(
        name="Prioritization of Efficiency",
        description="The individual's emphasis on efficiency, productivity, cost-savings, and convenience over other factors such as fairness, worker welfare, and job security.",
        level="high",
        candidate_values=['high priority on efficiency', 'low priority on efficiency']
    )]


 10%|█         | 2/20 [00:08<01:17,  4.31s/it]

[    PersonaDimension(
        name="Attitudes Towards Automation and Its Societal Impact",
        description="The person's overall perspective, beliefs, and sentiments regarding the role and impact of automation, robotics, and digital technologies on society, the workforce, and the economy.",
        level="high",
        candidate_values=['positive attitude', 'negative attitude', 'neutral attitude']
    ),     PersonaDimension(
        name="Trust in Institutions and Technology",
        description="The level of trust and confidence in the reliability, safety, and responsible management of automated systems and technologies by institutions such as corporations, technology companies, and the government.",
        level="high",
        candidate_values=['high trust', 'low trust']
    ),     PersonaDimension(
        name="Beliefs About the Role of Humans in Automation",
        description="The person's views on the appropriate balance and relationship between human labor and automa

 15%|█▌        | 3/20 [00:14<01:27,  5.15s/it]

[    PersonaDimension(
        name="Core Personal Values",
        description="The individual's fundamental values, beliefs, and priorities that shape their attitudes and preferences across various domains, such as caregiving, hiring, technology adoption, and human interaction.",
        level="high",
        candidate_values=['values independence', 'values human connection', 'values fairness', 'values efficiency', 'values family care']
    )]


 20%|██        | 4/20 [00:16<01:05,  4.10s/it]

[    PersonaDimension(
        name="Attitudes Towards Technological Progress",
        description="The person's overall beliefs, perspectives, and attitudes towards the role, impact, and implications of technological advancements in society, the economy, and various aspects of life.",
        level="high",
        candidate_values=['technology-optimistic', 'technology-skeptical', 'neutral']
    )]


 25%|██▌       | 5/20 [00:19<00:56,  3.79s/it]

[    PersonaDimension(
        name="Openness to Technological Change and Innovation",
        description="The individual's general attitude, willingness, and adaptability towards embracing new technologies, innovations, and changes in various domains such as work, transportation, healthcare, and everyday life.",
        level="high",
        candidate_values=['highly open and adaptable', 'moderately open and adaptable', 'resistant to change and innovation']
    )]


 30%|███       | 6/20 [00:23<00:50,  3.62s/it]

[    PersonaDimension(
        name="Worldview on Technology and Society",
        description="The individual's broader perspectives, beliefs, and worldviews regarding the role of technology in society, the economy, and various aspects of human life, including its potential benefits, drawbacks, and impact on the future.",
        level="high",
        candidate_values=['technology-optimistic', 'technology-skeptical', 'balanced/pragmatic']
    )]


 35%|███▌      | 7/20 [00:30<01:01,  4.70s/it]

[    PersonaDimension(
        name="Societal and Moral Values",
        description="The individual's core values, beliefs, and ethical principles regarding the role of technology, human labor, social welfare, fairness, equity, and responsibility in society.",
        level="high",
        candidate_values=['prioritizes technological progress', 'prioritizes human labor and social welfare', 'prioritizes individual achievement and responsibility', 'balanced views on technology and social welfare']
    ),     PersonaDimension(
        name="Attitudes Towards Aging and Elderly Care",
        description="The person's beliefs, values, and sense of duty regarding the care, independence, and well-being of the elderly population.",
        level="high",
        candidate_values=['prioritizes elderly care and independence', 'indifferent to elderly care', 'prioritizes dependence on caregivers']
    ),     PersonaDimension(
        name="Beliefs on Equity and Fairness in Employment and Healthcar

 40%|████      | 8/20 [00:35<00:57,  4.81s/it]

[    PersonaDimension(
        name="Core Personal Values",
        description="The individual's fundamental values, beliefs, and priorities that shape their attitudes towards work, technology, and life in general.",
        level="high",
        candidate_values=['values job security', 'values career growth', 'values technological progress', 'values personal fulfillment', 'values work-life balance', 'values human interaction', 'values privacy', 'values efficiency and productivity', 'values tradition and stability']
    )]


 45%|████▌     | 9/20 [00:38<00:49,  4.49s/it]

[    PersonaDimension(
        name="Beliefs About the Future of Work and Technology",
        description="The person's overall perspective and outlook on the long-term impact of automation, robotics, and technological advancements on the job market, employment landscape, and the future of work.",
        level="high",
        candidate_values=['optimistic', 'pessimistic', 'neutral/uncertain']
    )]


 50%|█████     | 10/20 [00:42<00:41,  4.14s/it]

[    PersonaDimension(
        name="Ethical Considerations and Moral Values",
        description="The individual's ethical principles, beliefs, and level of concern regarding the moral and ethical implications of using technology such as robots, AI, and automation in sensitive domains like caregiving, healthcare, hiring, and decision-making.",
        level="high",
        candidate_values=['high ethical concerns', 'moderate ethical concerns', 'low ethical concerns']
    )]


 55%|█████▌    | 11/20 [00:44<00:31,  3.54s/it]

[    PersonaDimension(
        name="Worldview and Ideological Beliefs",
        description="The individual's broader philosophical, political, and ethical perspectives that shape their views on the role of technology, government regulation, and the balance between individual freedoms, economic efficiency, and social welfare.",
        level="high",
        candidate_values=['conservative worldview', 'liberal worldview', 'moderate worldview']
    )]


 60%|██████    | 12/20 [00:50<00:33,  4.17s/it]

[    PersonaDimension(
        name="Work-Life Balance Priorities",
        description="The individual's ability to maintain a healthy balance between their work and personal life, as well as their emphasis on prioritizing work-life balance or work demands.",
        level="high",
        candidate_values=['prioritizes work-life balance', 'prioritizes work over personal life']
    ),     PersonaDimension(
        name="Job Satisfaction and Work Personality",
        description="The individual's overall level of satisfaction with their job and work environment, as well as their personality traits related to work orientation, ambition, and need for achievement.",
        level="high",
        candidate_values=['high job satisfaction and work-oriented personality', 'low job satisfaction and balanced work-life personality']
    ),     PersonaDimension(
        name="Adaptability and Resilience",
        description="The individual's ability to adapt to changing circumstances and handle d

 65%|██████▌   | 13/20 [00:52<00:25,  3.70s/it]

[    PersonaDimension(
        name="Personal Values and Priorities",
        description="The individual's core values, principles, and priorities that shape their views on the balance between individual freedoms, personal privacy, public safety, environmental sustainability, and societal well-being.",
        level="high",
        candidate_values=['prioritizes individual freedoms', 'prioritizes personal privacy', 'prioritizes public safety', 'prioritizes environmental sustainability', 'prioritizes societal well-being']
    )]


 70%|███████   | 14/20 [00:55<00:20,  3.33s/it]

[    PersonaDimension(
        name="Personality Traits",
        description="Individual personality characteristics that may influence attitudes towards technology adoption, automation, and hiring practices.",
        level="high",
        candidate_values=['open-minded', 'risk-averse', 'adaptable', 'analytical', 'intuitive']
    )]


 75%|███████▌  | 15/20 [00:57<00:15,  3.17s/it]

[    PersonaDimension(
        name="Core Personal Values",
        description="The individual's fundamental values, beliefs, and priorities that shape their attitudes towards autonomous vehicles, such as safety, convenience, environmental impact, personal control, and technological progressiveness.",
        level="high",
        candidate_values=['prioritizes safety', 'prioritizes convenience', 'prioritizes environmental sustainability', 'prioritizes personal control', 'progressive', 'traditional']
    )]


 80%|████████  | 16/20 [01:01<00:12,  3.23s/it]

[    PersonaDimension(
        name="Personality and Cognitive Traits",
        description="The individual's personality characteristics, cognitive processing styles, and openness to new experiences that may influence their preference for and interaction with voice assistants and other voice-controlled technologies.",
        level="high",
        candidate_values=['extroverted', 'introverted', 'open to new experiences', 'closed to new experiences', 'verbal cognitive style', 'visual cognitive style']
    )]


 85%|████████▌ | 17/20 [01:03<00:08,  2.93s/it]

[    PersonaDimension(
        name="Cultural and Social Values",
        description="The individual's broader cultural, societal, and personal values, beliefs, and norms that influence their perceptions and expectations regarding the appropriate role of technology in caregiving, parenting, and healthcare.",
        level="high",
        candidate_values=['traditional values', 'progressive values']
    )]


 90%|█████████ | 18/20 [01:09<00:07,  3.80s/it]

[    PersonaDimension(
        name="Attitude Towards Technology",
        description="The individual's overall perspective, beliefs, and disposition towards technology, including its role in the workplace, decision-making processes, and society as a whole.",
        level="high",
        candidate_values=['technology-embracing', 'technology-skeptical', 'technology-neutral']
    )]


 95%|█████████▌| 19/20 [01:13<00:03,  3.74s/it]

[    PersonaDimension(
        name="Locus of Control and Autonomy",
        description="The extent to which individuals believe they have control over the outcomes in their lives, and their desire for independence, self-reliance, and personal agency.",
        level="high",
        candidate_values=['internal locus of control and high autonomy', 'external locus of control and low autonomy']
    ),     PersonaDimension(
        name="Sense of Purpose and Meaning",
        description="The importance placed on having a sense of purpose, fulfillment, and meaningful activities in life.",
        level="high",
        candidate_values=['high need for purpose', 'low need for purpose']
    )]


100%|██████████| 20/20 [01:15<00:00,  3.77s/it]


[    PersonaDimension(
        name="Risk Tolerance",
        description="The individual's general attitude and willingness to accept potential risks and trade-offs associated with new technologies or situations in life.",
        level="high",
        candidate_values=['risk-averse', 'risk-tolerant']
    )]


  5%|▌         | 1/20 [00:05<01:37,  5.15s/it]

[    PersonaDimension(
        name="Life Stage",
        description="The individual's current life stage, which may impact their priorities, social connections, community involvement, and availability for neighborhood interactions.",
        level="low",
        candidate_values=['student', 'young adult', 'middle-aged', 'retired']
    ),     PersonaDimension(
        name="Financial Preparedness",
        description="The individual's financial situation and level of preparedness for retirement, including their ability to save and plan for the future.",
        level="low",
        candidate_values=['limited financial resources', 'adequate financial resources', 'unprepared for retirement', 'prepared for retirement']
    )]


 10%|█         | 2/20 [00:08<01:14,  4.13s/it]

[    PersonaDimension(
        name="Experiences with Gender-Based Discrimination and Societal Pressures",
        description="The individual's personal experiences with gender-based discrimination, societal pressures, and expectations related to gender norms, roles, and expression.",
        level="low",
        candidate_values=['experienced discrimination/pressure', 'not experienced discrimination/pressure']
    )]


 15%|█▌        | 3/20 [00:12<01:07,  3.99s/it]

[    PersonaDimension(
        name="Political Orientation",
        description="The individual's political affiliation, voting habits, and level of involvement in political processes and movements.",
        level="low",
        candidate_values=['Strong Republican', 'Moderate Republican', 'Independent/Non-Partisan', 'Moderate Democrat', 'Strong Democrat']
    )]


 20%|██        | 4/20 [00:14<00:49,  3.11s/it]

[    PersonaDimension(
        name="Self-Perception",
        description="The individual's subjective evaluation of their own intelligence, cognitive abilities, and physical attractiveness.",
        level="low",
        candidate_values=['high self-perception', 'moderate self-perception', 'low self-perception']
    )]


 25%|██▌       | 5/20 [00:22<01:15,  5.05s/it]

[    PersonaDimension(
        name="Gender Identity and Expression",
        description="The individual's internal sense and self-perception of being male, female, non-binary, transgender, or another gender identity, as well as how they outwardly express their gender through behavior, appearance, and interests.",
        level="low",
        candidate_values=['cisgender male', 'cisgender female', 'transgender/non-binary', 'masculine expression', 'feminine expression', 'androgynous expression']
    )]


 30%|███       | 6/20 [00:25<00:59,  4.24s/it]

[    PersonaDimension(
        name="Socioeconomic Status",
        description="The individual's social and economic standing, including factors such as income, education, and access to resources and opportunities within their community.",
        level="low",
        candidate_values=['high', 'middle', 'low']
    )]


 35%|███▌      | 7/20 [00:28<00:51,  3.96s/it]

[    PersonaDimension(
        name="Smoking and Lifestyle Habits",
        description="The participant's overall lifestyle, daily routines, and habits that may influence their smoking behavior, including environmental factors, frequency of smoking, and other health-related behaviors.",
        level="low",
        candidate_values=['high-risk habits', 'moderate-risk habits', 'low-risk habits']
    )]


 40%|████      | 8/20 [00:32<00:46,  3.91s/it]

[    PersonaDimension(
        name="Community and Civic Engagement",
        description="The individual's level of involvement, participation, and interest in local community activities, organizations, volunteering, and social or political issues.",
        level="low",
        candidate_values=['highly engaged', 'moderately engaged', 'minimally engaged']
    )]


 45%|████▌     | 9/20 [00:35<00:40,  3.65s/it]

[    PersonaDimension(
        name="Physical and Cognitive Capabilities",
        description="The individual's physical fitness, strength, health conditions, cognitive abilities, and access to healthcare services.",
        level="low",
        candidate_values=['high capabilities', 'moderate capabilities', 'low capabilities']
    )]


 50%|█████     | 10/20 [00:38<00:33,  3.30s/it]

[    PersonaDimension(
        name="Interpersonal and Technological Competence",
        description="The individual's ability to effectively communicate, work with others, and utilize technology to maintain relationships, even when faced with differences or distance.",
        level="low",
        candidate_values=['high competence', 'moderate competence', 'low competence']
    )]


 55%|█████▌    | 11/20 [00:42<00:31,  3.53s/it]

[    PersonaDimension(
        name="Occupation and Industry",
        description="The participant's job or career type, industry sector, and how it may be impacted by factors such as automation, outsourcing, immigration, gender dynamics, foreign trade, and product sales.",
        level="low",
        candidate_values=['blue-collar', 'non-blue-collar', 'export-oriented', 'non-export-oriented', 'male-dominated', 'female-dominated', 'gender-balanced', 'highly automated', 'minimally automated', 'outsourceable', 'non-outsourceable', 'high-skilled', 'low-skilled']
    )]


 60%|██████    | 12/20 [00:45<00:26,  3.34s/it]

[    PersonaDimension(
        name="Educational Background",
        description="The level and type of education, including exposure to research and ideas about gender, feminism, and gender equality, which may influence perspectives on gender roles, parenting, and leadership.",
        level="low",
        candidate_values=['higher education', 'lower education']
    )]


 65%|██████▌   | 13/20 [00:48<00:22,  3.27s/it]

[    PersonaDimension(
        name="Formative Life Experiences",
        description="The individual's personal experiences, major life events, relationships, and circumstances that have shaped their values, beliefs, worldviews, and perspectives.",
        level="low",
        candidate_values=['positive formative experiences', 'negative formative experiences', 'traditional experiences', 'non-traditional experiences']
    )]


 70%|███████   | 14/20 [00:51<00:19,  3.33s/it]

[    PersonaDimension(
        name="Experiences and Perceptions of Masculinity",
        description="Personal experiences with masculinity norms during upbringing, evaluations of one's own masculinity by others, and perceptions of societal attitudes towards traditional masculine traits.",
        level="low",
        candidate_values=['Positive personal and societal view of masculinity', 'Negative personal and societal view of masculinity', 'Mixed personal and societal view of masculinity']
    )]


 75%|███████▌  | 15/20 [00:53<00:15,  3.03s/it]

[    PersonaDimension(
        name="Exposure to Transgender Individuals and LGBTQ+ Community",
        description="The level of personal experience, interaction, and connections the individual has with transgender people and the broader LGBTQ+ community.",
        level="low",
        candidate_values=['high exposure and close connections', 'limited exposure and connections']
    )]


 80%|████████  | 16/20 [00:56<00:11,  2.89s/it]

[    PersonaDimension(
        name="Geographic Location and Proximity",
        description="The participant's geographic location, including the type of area (urban, suburban, rural), proximity to friends and family, and location of their job or career.",
        level="low",
        candidate_values=['urban', 'suburban', 'rural', 'domestic location', 'international location', 'close to friends/family', 'far from friends/family']
    )]


 85%|████████▌ | 17/20 [01:01<00:10,  3.57s/it]

[    PersonaDimension(
        name="Family Structure and Household Composition",
        description="The participant's family situation, including relationship status, presence of children, multi-generational living arrangements, and the number and type of people in the household.",
        level="low",
        candidate_values=['single', 'in a relationship', 'with children', 'multi-generational household', 'single-person household', 'multi-person household']
    ),     PersonaDimension(
        name="Parenting Approaches and Family Dynamics",
        description="The participant's preferences, practices, and experiences related to raising and socializing children, as well as the dynamics within their family structure.",
        level="low",
        candidate_values=['emphasizes emotional expression', 'discourages emotional expression', 'single-parent', 'two-parent', 'extended family']
    )]


 90%|█████████ | 18/20 [01:03<00:06,  3.14s/it]

[    PersonaDimension(
        name="Lifestyle and Social Behavior",
        description="The individual's daily routines, habits, and typical behavior patterns in social situations, which may impact their interactions with others.",
        level="low",
        candidate_values=['structured routine', 'unstructured routine', 'dominant', 'submissive']
    )]


 95%|█████████▌| 19/20 [01:06<00:02,  2.92s/it]

[    PersonaDimension(
        name="Demographic Factors",
        description="Personal characteristics such as age, gender, race, socioeconomic status, and geographic location that may influence an individual's views, experiences, and behaviors.",
        level="low",
        candidate_values=['young', 'old', 'male', 'female', 'non-binary', 'racial minority', 'white', 'low income', 'high income', 'urban', 'rural']
    )]


100%|██████████| 20/20 [01:07<00:00,  3.40s/it]


[    PersonaDimension(
        name="Media Consumption",
        description="The types of media and entertainment that shape perceptions of gender and masculinity.",
        level="low",
        candidate_values=['traditional media consumption', 'progressive media consumption']
    )]


  5%|▌         | 1/20 [00:04<01:18,  4.16s/it]

[    PersonaDimension(
        name="Community Connectedness",
        description="The degree to which an individual feels a sense of belonging, trust, and social connection within their local neighborhood or community.",
        level="mid",
        candidate_values=['high community connectedness', 'moderate community connectedness', 'low community connectedness']
    )]


 10%|█         | 2/20 [00:16<02:38,  8.81s/it]

[    PersonaDimension(
        name="Attitudes Towards Gender Equality and Social Change",
        description="The individual's beliefs, perceptions, and attitudes towards gender equality, feminism, LGBTQ+ rights, racial equality, and the pace and direction of societal changes related to these issues.",
        level="mid",
        candidate_values=['supportive of equality and progress', 'unsupportive of equality and progress', 'ambivalent or uncertain']
    ),     PersonaDimension(
        name="Parenting Beliefs and Approaches",
        description="The individual's beliefs, attitudes, and approaches towards raising and encouraging children, particularly regarding gender roles, leadership, assertiveness, and conformity.",
        level="mid",
        candidate_values=['emphasizes gender equality and empowerment', 'emphasizes traditional gender roles', 'balanced approach']
    ),     PersonaDimension(
        name="Gender Beliefs and Perceptions",
        description="The individual'

 15%|█▌        | 3/20 [00:20<01:57,  6.92s/it]

[    PersonaDimension(
        name="Civic and Community Engagement",
        description="The individual's level of involvement, participation, and interest in local community organizations, political processes, social movements, and civic activities aimed at contributing to and improving their local community.",
        level="mid",
        candidate_values=['highly engaged', 'moderately engaged', 'disengaged']
    )]


 20%|██        | 4/20 [00:24<01:29,  5.61s/it]

[    PersonaDimension(
        name="Attitudes Towards Gender Norms and Roles",
        description="Beliefs, attitudes, and perceptions about societal expectations, norms, and roles related to gender, including traditional and progressive views on masculinity, femininity, and appropriate behaviors for men, women, boys, and girls.",
        level="mid",
        candidate_values=['traditional gender norms', 'progressive gender norms', 'egalitarian gender roles']
    )]


 25%|██▌       | 5/20 [00:27<01:07,  4.52s/it]

[    PersonaDimension(
        name="Adaptability and Tolerance for Ambiguity",
        description="The individual's ability and willingness to adapt to changes, embrace uncertainty, and adjust to ambiguous or unfamiliar situations in various contexts, such as the workforce, job market, and workplace dynamics.",
        level="mid",
        candidate_values=['highly adaptable', 'moderately adaptable', 'resistant to change']
    )]


# Cleaning

In [2]:
def clean_summarized_personas(prompt_name, survey, level, summarizing_dir, output_dir, num_clusters, print_result):
    os.makedirs(output_dir, exist_ok=True)

    # Get data
    summarized_persona_filename = f"{summarizing_dir}/summarized_{level}_level_personas_{survey}.json"
    with open(summarized_persona_filename, 'r') as f:
        data = json.load(f)
    with open(f'prompts/{prompt_name}.txt') as f:
        prompt_template = f.read()

    # summarize
    prompt = prompt_template.format(persona_dimensions='[\n' + ',\n'.join(repr(dim) for dim in data) + '\n]')
    response = get_llm_response(prompt, prefill='[', max_tokens=4096)
    response = '[' + response

    # validate
    try:
        eval(response)
        cleaned_summarized_personas_filename = f"{output_dir}/cleaned_{level}_level_personas_{survey}.json"
        with open(cleaned_summarized_personas_filename, 'w') as f:
            json.dump(response, f, indent=4)
        return response
    except:
        print(f'Cleaned result is not valid. Survey: {survey}, Level: {level}. Response:')
        # print(response)
        return None

In [None]:
for survey in surveys:
    for level in tqdm(['low', 'mid', 'high']):
        cleaned_personas = clean_summarized_personas(prompt_name="clean_summarized_personas",
                                                      survey=survey,
                                                      level=level,
                                                      summarizing_dir='sm_local/outputs/summarizing',
                                                      output_dir='sm_local/outputs/cleaned',
                                                      num_clusters=20,
                                                      print_result=True)
        

In [5]:
for survey in surveys:
    for level in tqdm(['low', 'mid', 'high']):
        with open(f'sm_local/outputs/cleaned/cleaned_{level}_level_personas_{survey}.json') as f:
            json_string = json.load(f)
        response = eval(json_string)

        csv_res = []
        for entry in response:
            csv_res.append(asdict(entry))
        df = pd.DataFrame(csv_res)
        df.to_csv(f'sm_local/outputs/cleaned/cleaned_{level}_level_personas_{survey}.csv')

100%|██████████| 3/3 [00:00<00:00, 518.67it/s]
100%|██████████| 3/3 [00:00<00:00, 561.61it/s]
100%|██████████| 3/3 [00:00<00:00, 590.39it/s]
