In [8]:

import warnings
import pandas as pd
from typing import List
warnings.filterwarnings("ignore")
from langchain.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
from langchain_openai import ChatOpenAI
from langchain_core.prompts import (
    PromptTemplate, ChatPromptTemplate
)
from langchain_core.utils.function_calling import convert_to_openai_function
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.output_parsers import StrOutputParser
from langchain.output_parsers.openai_functions import JsonKeyOutputFunctionsParser

In [190]:
model_name = 'gpt-4-1106-preview'  #gpt model name
audience = 'Tech people in the United States'  #audience

#you can change questions and traits/count as you want, this part has become dynamic in this notebook.
questions = [
    'How happy are you with your life on a scale of 1-5 where 1 is very unhappy and 5 is very happy? (#)',
    'What are the few tech tools that you use daily?',
    'What is your profession?',
]

traits_and_counts = [
    ('Men being currently employed', 2),
    ('Women being currently employed', 3)
]


In [191]:
# This block of code is not needed, it is there to know how function calling works.

# To learn more about function calling, check this out https://www.datacamp.com/tutorial/open-ai-function-calling-tutorial
#Answer is the class which contains response given by each user, it also contain traits to know which person answered the questions.
#llm is inteligent enough to give unique response each time based on the given system and user prompt, we should not worry about it.
class Answer(BaseModel):
    """Human-like set of answers told by each person while being surveyed. 
    Each person should have their own unique set of answers that should be different from other person's set of answers."""

    answer1: str = Field(description="This is the human like answer to Question1.")
    answer2: str = Field(description="This is the human like answer to Question2.")
    answer3: str = Field(description="This is the human like answer to Question3.")
    answer4: str = Field(description="This is the human like answer to Question4.")
    traits: str = Field(description="This is the trait of the person.")
    
    
    
#list of answers, the formatted list that the llm model will generate which contains dictionaries of Answer class
#llm model will make sure that each dictionary in the list is unique from each other as it will treat the list like list of human responses.
class Answers(BaseModel):
    """Human-like survey answers to tell user."""

    answer: List[Answer]

In [192]:
# This is standard pattern of list of functions used for function calling.
# To know the pattern, you can print out openai_functions variable in previous New.ipynb file
# This function is responsible to give the output list of answers of each human that we want, based on the parameters provided.
# To learn more about function calling, check this out https://www.datacamp.com/tutorial/open-ai-function-calling-tutorial
def create_openai_functions(questions: List):
    fn  = [{'name': 'Answers',
            'description': 'Human-like survey answers to tell user.',
            'parameters': {'type': 'object',
            'properties': {'answer': {'type': 'array',
                'items': {'description': "Human-like set of answers told by each person while being surveyed. \nEach person should have their own unique set of answers that should be different from other person's set of answers.",
                'type': 'object',
                'properties': {
                'traits': {'description': 'This is the trait of the person.',
                    'type': 'string'}},
                'required': ['traits']}}},
            'required': ['answer']}}]
    answers = fn[0]['parameters']['properties']['answer']['items']['properties']
    required = fn[0]['parameters']['properties']['answer']['items']['required']
    for ind, _ in enumerate(questions, 1):
        answers[f'answer{ind}'] = {'description': f'This is the human like answer to Question{ind}.', 'type': 'string'}
        required.append(f'answer{ind}')
    return fn
        

In [193]:
#sample openai function example
openai_functions = create_openai_functions(questions)
openai_functions

[{'name': 'Answers',
  'description': 'Human-like survey answers to tell user.',
  'parameters': {'type': 'object',
   'properties': {'answer': {'type': 'array',
     'items': {'description': "Human-like set of answers told by each person while being surveyed. \nEach person should have their own unique set of answers that should be different from other person's set of answers.",
      'type': 'object',
      'properties': {'traits': {'description': 'This is the trait of the person.',
        'type': 'string'},
       'answer1': {'description': 'This is the human like answer to Question1.',
        'type': 'string'},
       'answer2': {'description': 'This is the human like answer to Question2.',
        'type': 'string'},
       'answer3': {'description': 'This is the human like answer to Question3.',
        'type': 'string'}},
      'required': ['traits', 'answer1', 'answer2', 'answer3']}}},
   'required': ['answer']}}]

In [194]:
parser = JsonKeyOutputFunctionsParser(key_name="answer")  # to parse the output, it will return list of answers based on function calling

In [195]:
#bind openai_functions to llm for function calling
llm = ChatOpenAI(
    temperature=1,
    model_name=model_name
).bind(functions=openai_functions)

In [196]:
#this is default system message, you can change it based on your need.
system_message = """
You are a survey answering bot that generates answers like a survey when questions are asked. The answer should be made as if you are a human. 
Give answers assuming you are a new human with different lifestyles while giving answers.
Give answers with new thoughts, new ideas, new moods, you can also choose to answer very rudely, but the main idea is to be random, do not try to repeat same answers since humans have different answers with different answer style.
While giving answers, you should be as creative as possible and you should deviate your answers as much as possible from previous answers.
In every answer, change styles of answers, change average sentence lengths of answer, change fk_grade_level of sentences of answer. Make it different from previous answers. But also make sure it is the answer given by a human. So, don't make it seem like it is AI generated. Add both simple and fancy words.
In 1 answer, give your answers assuming you are having a worse life, in another answer, give your answers assuming you are having best life. like this, keep on changing the lifestyle of human that you are.
Avoid same repeated answers as much as possible.
Do no repeat same pattern in each answers. Give short answers sometimes and sometimes long answers, be random.
Since, human can give both positive and negative answers, you should follow the same principles.
Your answers should be descriptive just like human answers.
Each set of answers should be different from another set of answers. 
If you are asked about 'top few things' or 'few things', each answers should have random number of comma separated sentences. For example:
sentence1, sentence2, and sentence3. (3 sentences)
sentence1. (1 sentence)
sentence1, sentence2, sentence3, sentence4, and sentence5. (5 sentences)
sentence1, sentence2. (2 sentences)
sentence1, sentence2, sentence3, and sentence4. (4 sentences)


If the output asks for a monetary output give just the dollar amount and no text before or after it unless prompted to do so. So for example if the answer is $40 just give $40.
If the output asks for a decimal output give just the value and no text before or after it unless prompted to do so. So for example if the answer is 40.5 just give 40.5.
If you are asked a question like where do you live? Be sure to just answer the place that you live. No text before or after it is needed unless prompted to do so.
If the answer is asking for a percentage output just give the value. So if the answer is 45% just give 45%. Do not give any text before or after that unless prompted to do so.
"""

In [197]:

#chat template
prompt = ChatPromptTemplate.from_messages(
    [("system", system_message), ("user", "{input}")]
)

In [198]:
#chain using prompt, llm and parser
#it uses the prompt with llm and generate the answer based on the parser
chain = prompt | llm | parser

In [199]:
#get total surveyed people
total_surveyed = 0
for dta in traits_and_counts:
    total_surveyed += int(dta[1])
print("Total surveyed: ", total_surveyed)


Total surveyed:  5


In [200]:
#input message given by user. It will be given to llm for output generation. This message has become dynamic in this notebook
input_message = f"Generate survey answers from {total_surveyed} people.\nAll of the surveyed people are {audience}\n"
for traits, counts in traits_and_counts:
    input_message += f'{counts} of the surveyed people have this trait: {traits}\n'
for ind, question in enumerate(questions, 1):
    input_message += f'Question{ind}: {questions[ind-1]}\n'
print(input_message)

Generate survey answers from 5 people.
All of the surveyed people are Tech people in the United States
2 of the surveyed people have this trait: Men being currently employed
3 of the surveyed people have this trait: Women being currently employed
Question1: How happy are you with your life on a scale of 1-5 where 1 is very unhappy and 5 is very happy? (#)
Question2: What are the few tech tools that you use daily?
Question3: What is your profession?



In [201]:

final_data = []
#get list of responses, see the output to know more.
responses = chain.invoke({"input": input_message})

for person_number, data in enumerate(responses, 1):
    dct = {'Traits': data['traits'], 'Person Number': person_number}
    for ind, question in enumerate(questions):
        dct[f'{questions[ind]}'] = data[f'answer{ind+1}']
    final_data.append(dct)
print(responses)

[{'traits': 'Men being currently employed', 'answer1': '4', 'answer2': 'Python, Docker, VS Code, and Postman.', 'answer3': 'Software Engineer'}, {'traits': 'Men being currently employed', 'answer1': '5', 'answer2': 'Git, JIRA, Slack, AWS.', 'answer3': 'DevOps Specialist'}, {'traits': 'Women being currently employed', 'answer1': '3', 'answer2': 'Tableau, Excel, R.', 'answer3': 'Data Analyst'}, {'traits': 'Women being currently employed', 'answer1': '5', 'answer2': 'Android Studio, Gradle, Kotlin.', 'answer3': 'Mobile Developer'}, {'traits': 'Women being currently employed', 'answer1': '2', 'answer2': 'Salesforce, Zoom, Google Drive.', 'answer3': 'Technical Support Engineer'}]


In [202]:
df = pd.DataFrame.from_dict(final_data)

In [203]:
df

Unnamed: 0,Traits,Person Number,How happy are you with your life on a scale of 1-5 where 1 is very unhappy and 5 is very happy? (#),What are the few tech tools that you use daily?,What is your profession?
0,Men being currently employed,1,4,"Python, Docker, VS Code, and Postman.",Software Engineer
1,Men being currently employed,2,5,"Git, JIRA, Slack, AWS.",DevOps Specialist
2,Women being currently employed,3,3,"Tableau, Excel, R.",Data Analyst
3,Women being currently employed,4,5,"Android Studio, Gradle, Kotlin.",Mobile Developer
4,Women being currently employed,5,2,"Salesforce, Zoom, Google Drive.",Technical Support Engineer


In [204]:
df.to_csv('surveyResults16.csv', index=False)