<a href="https://colab.research.google.com/github/josh-ashkinaze/plurals/blob/main/Notebook_version.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Install packages

In [2]:
!pip install openai
import numpy as np
from openai import OpenAI
import random
import pandas as pd
from datetime import datetime
from google.colab import userdata
secret_key = userdata.get('openai_key')
client = OpenAI(api_key=secret_key)



# Functions and classes

In [4]:

def print_values(mapping):
    for key in mapping.keys():
        vals = mapping[key]['values']
        if isinstance(vals, dict) and all(isinstance(v, dict) for v in vals.values()):
            for sub_key in vals.keys():
                sub_vals = vals[sub_key]
                sorted_sub_vals = dict(sorted(sub_vals.items(), key=lambda item: int(item[0]) if item[0].isdigit() else item[0]))
                print(f"{mapping[key]['name']} ({sub_key}):")
                for val_key, val in sorted_sub_vals.items():
                    if val:  # Only print if there's a value to avoid empty strings
                        print(f"  {val_key}: {val}")
        else:
            sorted_vals = dict(sorted(vals.items(), key=lambda item: int(item[0]) if item[0].lstrip('-').isdigit() else item[0]))
            print(mapping[key]['name'] + ":")
            for val_key, val in sorted_vals.items():
                print(f"  {key}=={val_key}: {val}")
        print()


anes_mapping = {

    "birthyr_dropdown": {
    "clean_var":"Birth Year",
    "name": "Your age is",
    "values": {
      "integer": ""
    }
  },

  "educ": {
    "clean_var":"Education",
    "name": "Your education is",
    "values": {
      "1": "No high school",
      "2": "High school graduate",
      "3": "Some college",
      "4": "College graduate"
    }
  },
  "gender": {
    "clean_var":"Gender",
    "name": "Your gender is",
    "values": {
      "1": "Male",
      "2": "Female"
    }
  },
  "race": {
    "clean_var":"Race",
    "name": "Your race is",
    "values": {
      "eth": {
        "1": "Hispanic",
        "2": ""
      },
      "rwh": {
        "1": "White",
        "2": ""
      },
      "rbl": {
        "1": "Black",
        "2": ""
      },
      "rain": {
        "1": "American Indian or Alaska Native",
        "2": ""
      },
      "ras": {
        "1": "Asian",
        "2": ""
      },
      "rpi": {
        "1": "Native Hawaiian or Pacific Islander",
        "2": ""
      },
      "roth": {
        "1": "Other",
        "2": ""
      }
    }
  },

      "pid3": {
    "clean_var":"Political Party",
    "name": "Politically, you identify as a(n)",
    "values": {
      "-7": "No Answer",
      "1": "Democrat",
      "2": "Republican",
      "3": "Independent",
      "4": "Other",
      "5": "Not sure"
    }
  },
  "ideo5": {
    "clean_var":"Political Ideology",
    "name": "Your ideology is",
    "values": {
      "-7": "No Answer",
      "1": "Very liberal",
      "2": "Liberal",
      "3": "Moderate",
      "4": "Conservative",
      "5": "Very conservative",
      "6": "Not sure"
    }
  },

    "child18": {
    "clean_var":"Children",
    "name": "Regarding children, you",
    "values": {
      "1": "Have children living at home",
      "2": "Do not have children living at home"
    }
  },
  "employ": {
    "clean_var":"Employment",
    "name": "Your employment status is",
    "values": {
      "-7": "No Answer",
      "-1": "Inapplicable, legitimate skip",
      "1": "Full-time",
      "2": "Part-time",
      "3": "Temporarily laid off",
      "4": "Unemployed",
      "5": "Retired",
      "6": "Permanently disabled",
      "7": "Homemaker"
    }
  },
  "urbanicity2": {
    "clean_var":"Living area",
    "name": "You live in a",
    "values": {
      "1": "Big city",
      "2": "Smaller city",
      "3": "Suburban area",
      "4": "Small town",
      "5": "Rural area"
    }
  },

  "inputstate":{
    "clean_var":"State",
    "name": "You live in the state of",
    "values":{
      "10": "Delaware",
      "11": "District of Columbia",
      "12": "Florida",
      "13": "Georgia",
      "15": "Hawaii",
      "16": "Idaho",
      "17": "Illinois",
      "18": "Indiana",
      "19": "Iowa",
      "20": "Kansas",
      "21": "Kentucky",
      "22": "Louisiana",
      "23": "Maine",
      "24": "Maryland",
      "25": "Massachusetts",
      "26": "Michigan",
      "27": "Minnesota",
      "28": "Mississippi",
      "29": "Missouri",
      "30": "Montana",
      "31": "Nebraska",
      "32": "Nevada",
      "33": "New Hampshire",
      "34": "New Jersey",
      "35": "New Mexico",
      "36": "New York",
      "37": "North Carolina",
      "38": "North Dakota",
      "39": "Ohio",
      "40": "Oklahoma",
      "41": "Oregon",
      "42": "Pennsylvania",
      "44": "Rhode Island",
      "45": "South Carolina",
      "46": "South Dakota",
      "47": "Tennessee",
      "48": "Texas",
      "49": "Utah",
      "50": "Vermont",
      "51": "Virginia",
      "53": "Washington",
      "54": "West Virginia",
      "55": "Wisconsin",
      "56": "Wyoming",
      "01": "Alabama",
      "02": "Alaska",
      "04": "Arizona",
      "05": "Arkansas",
      "06": "California",
      "08": "Colorado",
      "09": "Connecticut",
      "72": "Puerto Rico",
      "66": "Guam",
      "78": "Virgin Islands",
      "60": "American Samoa"
    }
  }


}

def print_values(mapping):
    for key in mapping.keys():
        vals = mapping[key]['values']
        # Check if the values are nested dictionaries (e.g., for "race")
        if isinstance(vals, dict) and all(isinstance(v, dict) for v in vals.values()):
            for sub_key in vals.keys():
                sub_vals = vals[sub_key]
                # Sorting and printing each nested dictionary separately
                sorted_sub_vals = dict(sorted(sub_vals.items(), key=lambda item: int(item[0]) if item[0].isdigit() else item[0]))
                print(f"{mapping[key]['name']} ({sub_key}):")
                for val_key, val in sorted_sub_vals.items():
                    if val:  # Only print if there's a value to avoid empty strings
                        print(f"  {val_key}: {val}")
        else:
            # Sorting by key, converting to int if possible for proper numeric sorting
            sorted_vals = dict(sorted(vals.items(), key=lambda item: int(item[0]) if item[0].lstrip('-').isdigit() else item[0]))
            print(mapping[key]['name'] + ":")
            for val_key, val in sorted_vals.items():
                print(f"  {key}=={val_key}: {val}")
        print()  # Print a newline for better separation


class Agent:
    """A class to represent an agent that processes tasks based on specific characteristics.

    Attributes:
        model (str): The model version to use for processing.
        task_description (str): The description of the task to be processed.
        persona_prefix (str): The prefix to add before the persona description.
        persona_suffix (str): The suffix to add after the persona description.
        persona (str): The persona description to adopt for the task.
        system_instructions (str): The complete system instructions including persona and constraints.
        original_task_description (str): The original task description without modifications.
        current_task_description (str): The current task description that may include modifications.

    Args:
        task_description (str): The description of the task to be processed.
        data (pd.DataFrame, optional): The dataset to use for generating persona descriptions.
        persona_mapping (dict, optional): Mapping to convert dataset rows into persona descriptions.
        ideology (str, optional): The ideological filter to apply when selecting data for persona generation.
        query_str (str, optional): A string that is used for a pandas query clause on the dataframe
        model (str): The model version to use for processing. Defaults to "gpt-3.5-turbo".
        system_instructions (str): Additional instructions for the system. Defaults to an empty string.
        persona (str): Predefined persona description. If not provided, it will be generated based on data and ideology.
        persona_prefix (str): The prefix for the system instructions. Defaults to a standard instruction about adopting a persona.
        persona_suffix (str): The suffix for the system instructions. Defaults to standard constraints to follow.

    Methods:
        process_task(previous_response=""): Process the task, optionally building upon a previous response.
        get_persona_description(data, ideology): Generates a persona description based on the dataset and ideology.
        filter_data_by_ideology(data, ideology): Filters the dataset based on the specified ideology.
        _get_response(task): Internal method to interact with the API and get a response.
        row2persona(row, persona_mapping): Converts a dataset row into a persona description string.
    """
    def __init__(self,
                 task_description,
                 data=None,
                 persona_mapping = None,
                 ideology=None,
                 query_str = None,
                 model="gpt-4-turbo-preview",
                 system_instructions="",
                 persona="",
                 persona_prefix="INSTRUCTIONS\nWhen answering questions or performing tasks, always adopt the following persona.\n\nPERSONA:\n",
                 persona_suffix="\n\nCONSTRAINTS\n- When answering, do not disclose your partisan or demographic identity in any way.\n- Think, talk, and write like your persona.\n- Use plain language.\n- Adopt the characteristics of your persona.\n-Do not be overly polite or politically correct."):
        """Initialize an agent with specific characteristics and dataset."""
        self.model = model
        self.persona_mapping = persona_mapping
        self.task_description = task_description
        self.persona_prefix = persona_prefix
        self.persona_suffix = persona_suffix
        self.persona = persona
        self.ideology = ideology
        self.data = data
        self.query_str = query_str
        self.original_task_description = task_description # Original task description
        self.current_task_description = task_description # This will be updated when it's reading in from another agent
        self.validate()
        if not self.persona:
            if ideology:
              self.persona = self.get_persona_description_ideology(data, ideology)
            else:
              self.persona = self.row2persona(data.query(query_str).sample(1).iloc[0], self.persona_mapping)
        self.system_instructions = self.persona_prefix + self.persona + self.persona_suffix

    def process_task(self, previous_response=""):
        """Process the task, optionally building upon a previous response."""
        task = self.original_task_description
        if previous_response:
          task = f"""
{task}
INCORPORATE PRIOR ANSWERS
- Here is what was previously said: '''{previous_response}'''
- Do not respond directly to what was previously said, but keep the best points from what was previously said. Ensure the perspective from prior responses are represented in your balanced answer.
"""
        self.current_task_description = task
        return self._get_response(task)


    def get_persona_description_ideology(self, data, ideology):
        """Generates a persona description based on the dataset and ideology."""
        filtered_data = self.filter_data_by_ideology(data, ideology)
        if not filtered_data.empty:
            selected_row = filtered_data.sample(n=1).iloc[0]
            return self.row2persona(selected_row, self.persona_mapping)
        return "No data available for the specified ideology."

    def filter_data_by_ideology(self, data, ideology):
        """Filters the dataset based on the specified ideology."""
        if ideology == 'liberal':
            return data[data['ideo5'].isin([1, 2])]
        elif ideology == 'conservative':
            return data[data['ideo5'].isin([4, 5])]
        return data

    def _get_response(self, task):
        """Internal method to interact with the API and get a response."""
        try:
            completion = client.chat.completions.create(
                model=self.model,
                messages=[
                    {"role": "system", "content": self.system_instructions},
                    {"role": "user", "content": task}
                ]
            )
            return completion.choices[0].message.content
        except Exception as e:
            print("Error in _get_response:", e)
            return None


    @staticmethod
    def row2persona(row, persona_mapping):
        """Converts a dataset row into a persona description string."""
        persona_description_str = ""
        for key, value in persona_mapping.items():
            field_name = value['name']
            if key == 'race':
              race_found = False
              for race_key in ['eth', 'rwh', 'rbl', 'rain', 'ras', 'rpi', 'roth']:
                  if race_key in row and str(row[race_key]) == '1':
                      race_description = value['values'][race_key].get('1', 'Unknown')
                      if race_description:  # If a description exists for the '1' value
                          persona_description_str += f"{field_name} {race_description}. "
                          race_found = True
                          break  # Stop checking once a race is found
              if not race_found:
                  # If no race was indicated or if the race data does not match expected values
                  persona_description_str += f"{field_name} Unknown. "
            elif key in row:
                if key == "birthyr_dropdown" and 'integer' in value['values']:
                    age = datetime.now().year - row[key]
                    persona_description_str += f"{field_name} {age}. "
                else:
                    mapped_value = value['values'].get(str(row[key]), 'Unknown')
                    if "inapplicable" not in mapped_value.lower() and "legitimate skip" not in mapped_value.lower() and "unknown" not in mapped_value.lower():
                      persona_description_str += f"{field_name} {mapped_value}. "
        return persona_description_str.lower()

    def validate(self):
      assert self.original_task_description is not None, "Need to provide some task instructions"
      if self.ideology or self.query_str:
        assert self.data is not None and self.persona_mapping is not None, "If you use either `ideology' or `query_str' you need to provide both a dataframe and a persona mapping to process rows of the dataframe."


class Chain:
    """A class to represent a chain of agents processing tasks in sequence.

    Attributes:
        agents (list): A list of Agent objects that will process the task.
        responses (list): A list of responses from each agent in the chain.
        prompts (list): A list of prompts used by each agent in the chain.
        final_response (str): The final response produced by the last agent in the chain.
        shuffle (bool): Whether to shuffle the order of agents in the chain.
        cycles (int): The number of cycles to process through the chain.
        last_n (int): The number of last responses to consider for the next task processing.

    Args:
        agents (list): A list of Agent objects that will process the task.
        shuffle (bool): If True, the order of agents will be shuffled. Defaults to False.
        cycles (int): The number of times the list of agents will process the task. Defaults to 1.
        last_n (int): The number of last responses to consider for the next task processing. Defaults to 1.

    Methods:
        process_chain(): Processes the task through the chain of agents, each building upon the last.
        format_prior_responses(responses): Formats prior responses for inclusion in the next task description.
    """
    def __init__(self, agents, shuffle=False, cycles=1, last_n=1):
        """Initialize a chain with a list of agents."""
        self.agents = agents
        self.responses = []
        self.prompts = []
        self.final_response = None
        self.shuffle = shuffle
        self.last_n = last_n
        if self.shuffle:
          self.agents = random.sample(self.agents, len(self.agents))
        self.cycles = 1
        if last_n == -9:
          self.last_n = 1000000


    def process_chain(self):
        """Process the task through a chain of agents, each building upon the last."""
        previous_responses = []
        for i in range(self.cycles):
          for i, agent in enumerate(self.agents):
              previous_responses_slice = previous_responses[:-self.last_n]
              previous_responses_str = self.format_previous_responses(previous_responses_slice)
              response = agent.process_task(self.format_previous_responses(previous_responses))
              previous_responses.append(response)
              self.prompts.append(agent.current_task_description)
              self.responses.append(response)
        self.final_response = self.responses[-1]

    @staticmethod
    def format_previous_responses(responses):
      if not responses:
        return ""
      else:
        resp_list = ["Response {}: {}\n".format(i, responses[i]) for i in range(len(responses))]
        resp_str = "".join(resp_list)
        return resp_str


# Through Python

In [6]:
data = pd.read_csv("https://raw.githubusercontent.com/josh-ashkinaze/plurals/main/anes_pilot_2022_csv_20221214.csv")

TASK = "How should the United States handle climate change? Answer in 100 words."
lib = Agent(data=data, ideology='liberal', task_description=TASK, persona_mapping=anes_mapping)
con = Agent(data=data, ideology='conservative', task_description=TASK, persona_mapping=anes_mapping)

mixed = Chain([con, lib, con])
mixed.process_chain()


  data = pd.read_csv("https://raw.githubusercontent.com/josh-ashkinaze/plurals/main/anes_pilot_2022_csv_20221214.csv")


In [9]:
print("FINAL RESPONSE")
print(mixed.final_response)
print("Persona of first agent:")
print(mixed.agents[0].persona)
print("\n\n")
print("System instructions of first agent:")
print(mixed.agents[0].system_instructions)

FINAL RESPONSE
Addressing climate change means finding a middle ground where we can protect our environment without undermining the economy. It's about pushing forward with innovations in clean energy, recognizing such advancements can drive job creation and economic growth if approached correctly. Practical, everyday actions like recycling and saving energy should be encouraged among individuals, as these efforts collectively contribute to significant environmental benefits. It's equally important to support our farmers and industries in transitioning to more sustainable practices, making sure these changes are feasible and don't jeopardize livelihoods. What's needed is a pragmatic strategy that focuses on gradual and sustainable progress.
Persona of first agent:
your age is 66. your gender is female. your race is white. politically, you identify as a(n) republican. your ideology is conservative. regarding children, you do not have children living at home. you live in a rural area. yo

# UI Scrap

In [11]:
# @title UI Helpers (run this)
import ipywidgets as widgets
from IPython.display import display
import pandas as pd


def process_nested_values(nested_values):
    options = {}
    for sub_key, sub_values in nested_values.items():
        for value_key, human_readable in sub_values.items():
            if human_readable:  # Ensure we don't include empty values
                option_key = f"{sub_key} - {human_readable}"
                options[option_key] = value_key
    return options

def create_dropdowns(mapping):
    dropdowns = []
    for key, info in mapping.items():
        if key not in ['birthyr_dropdown', 'race']:
          options = {'---': None}  # Add 'no preference' option
          # Determine if we have a nested structure
          if all(isinstance(value, dict) for value in info['values'].values()):
              nested_options = process_nested_values(info['values'])
              options.update(nested_options)
          else:
              # Use human-readable values
              human_readable_options = {v: k for k, v in info['values'].items() if v}
              options.update(human_readable_options)

          # Create the dropdown with human-readable labels
          dropdown = widgets.Dropdown(
              options=options,
              description=info['clean_var'],
              description_tooltip=info['name'],
              style={'description_width': 'initial'}
          )
          dropdowns.append(dropdown)
    return dropdowns


def selections_to_query(dropdown_widgets, anes_mapping):
    query_parts = []
    for dropdown in dropdown_widgets:
        # Skip if the dropdown value is None
        if dropdown.value is None:
            continue

        # Reverse lookup the code for the selected value using the options dictionary
        # Since dropdown.value gives us the code directly, we use it to construct the query part
        for key, info in anes_mapping.items():
            if key != 'race':
              if info['clean_var'] == dropdown.description:
                  # The value in dropdown.options is already the code we need, so we use dropdown.value directly
                  selected_code = dropdown.value  # This is the code corresponding to the selected option
                  query_parts.append(f"{key}=={selected_code}")
                  break  # Break after handling the matching description to avoid unnecessary processing

    # Join all parts of the query with " and " to form a complete query string
    query_string = "&".join(query_parts)
    return query_string



# Function to create a single chain configuration form
def create_chain_form():
    # Define the options for the model selection dropdown
    model_options = {
        'chatgpt-3.5': 'chatgpt-3.5',
        'gpt-3': 'gpt-3',
        'gpt-4': 'gpt-4',  # Add more models as needed
    }

    # Create the model selection dropdown
    model_dropdown = widgets.Dropdown(
        options=model_options,
        description='Model:',
        style={'description_width': 'initial'}
    )

    # Create the text box for the task description
    task_textbox = widgets.Textarea(
        description='Task:',
        style={'description_width': 'initial'},
        layout=widgets.Layout(width='100%', height='100px')  # Adjust the size as needed
    )

    # Group the widgets into a form
    chain_form = widgets.VBox([model_dropdown, task_textbox], layout=widgets.Layout(margin='10px 0px 10px 0px'))

    return chain_form

# Container for the chain configuration and agent forms
chain_container = widgets.VBox([])
forms_container = widgets.VBox([])






In [12]:
# @title UI (run this)
import ipywidgets as widgets
from IPython.display import display
import random

# Assume necessary functions like anes_mapping, create_dropdowns, selections_to_query, Agent, Chain are defined

# Initialize UI components for chain configuration and agent forms
chain_container = widgets.VBox()
forms_container = widgets.VBox()
feedback_label = widgets.Label()

# Model options mapping
model_options = {'chatgpt-3.5': 'gpt-3.5-turbo-0125', 'fast gpt4': 'gpt-4-0125-preview'}

def print_wrapped_text(text, max_line_length=50):
    words = text.split()
    current_line = ''
    for word in words:
        # Check if adding the next word would exceed the max line length
        if len(current_line) + len(word) + 1 <= max_line_length:
            current_line += word + ' '
        else:
            print(current_line.rstrip())
            current_line = word + ' '
    # Print any text left in current_line
    print(current_line.rstrip())

def initialize_ui():
    # Initialize chain configuration form
    model_dropdown = widgets.Dropdown(options=model_options, description='Model:', style={'description_width': 'initial'})
    task_textbox = widgets.Textarea(description='Task:', layout=widgets.Layout(width='100%', height='100px'))
    chain_form = widgets.VBox([model_dropdown, task_textbox], layout=widgets.Layout(margin='10px 0'))
    chain_container.children = [chain_form]

    # Display UI components
    display(chain_container, forms_container, feedback_label)

    # Button to add more agent forms
    add_agent_btn = widgets.Button(description="Add Another Agent")
    add_agent_btn.on_click(lambda b: add_agent_form())
    display(add_agent_btn)

    # Button to process configurations and chain
    process_btn = widgets.Button(description="PROCESS")
    process_btn.on_click(lambda b: gather_and_process_chain(b))
    display(process_btn)

    # Initialize with one agent form
    add_agent_form()

def add_agent_form():
    form_box = widgets.VBox(create_dropdowns(anes_mapping), layout=widgets.Layout(margin='10px 0'))
    forms_container.children += (form_box,)

def gather_and_process_chain(button):
    # Update button text to indicate processing
    button.description = "Processing..."

    try:
        # Retrieve configurations
        chain_form = chain_container.children[0]
        selected_model = chain_form.children[0].value
        task_description = chain_form.children[1].value

        # Retrieve agent personas
        agent_personas = [selections_to_query(form.children, anes_mapping) for form in forms_container.children]

        # Initialize agents with configurations
        agents = [Agent(task_description=task_description, model=selected_model, persona=persona) for persona in agent_personas]

        # Initialize and process the chain
        chain = Chain(agents=agents, shuffle=False, cycles=1, last_n=1)
        chain.process_chain()

        # Output the final response
        if chain.responses:
            print("Final response from the chain:")
            print_wrapped_text(chain.responses[-1])
        else:
            print("No responses generated by the chain.")
    finally:
        # Reset button text regardless of process success or failure
        button.description = "PROCESS"
    return chain

chain = initialize_ui()


VBox(children=(VBox(children=(Dropdown(description='Model:', options={'chatgpt-3.5': 'gpt-3.5-turbo-0125', 'fa…

VBox()

Label(value='')

Button(description='Add Another Agent', style=ButtonStyle())

Button(description='PROCESS', style=ButtonStyle())

In [None]:
chain