<a href="https://colab.research.google.com/github/popupjquery/Flowise/blob/main/gpt_prompt_engineer_Classification_Version.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# gpt-prompt-engineer -- Classification Version
By Matt Shumer (https://twitter.com/mattshumer_)

Github repo: https://github.com/mshumer/gpt-prompt-engineer

Generate an optimal prompt for a given classification task that can be evaluated with 'true'/'false' outputs.

You just need to describe the task clearly, and provide some test cases (for example, if we're classifying statements as 'happy' or not, a 'true' test case could be "I had a great day!", and a 'false' test case could be "I am feeling gloomy.").

To generate a prompt:
1. In the first cell, add in your OpenAI key.
2. If you don't have GPT-4 access, change `model='gpt-4'` in the second cell to `model='gpt-3.5-turbo'`. If you do have access, skip this step.
2. In the last cell, fill in the description of your task, as many test cases as you want (test cases are example prompts and their expected output), and the number of prompts to generate.
3. Run all the cells! The AI will generate a number of candidate prompts, and test them all to find the best one!

In [None]:
!pip install openai
!pip install prettytable

from prettytable import PrettyTable
import time
import openai

openai.api_key = "ADD YOUR KEY HERE" # enter your OpenAI API key here

In [None]:
def generate_candidate_prompts(description, test_cases, number_of_prompts):
  outputs = openai.ChatCompletion.create(
      model='gpt-4',
      messages=[
          {"role": "system", "content": """Your job is to generate system prompts for GPT-4, given a description of the use-case and some test cases.

The prompts you will be generating will be for classifiers, with 'true' and 'false' being the only possible outputs.

In your generated prompt, you should describe how the AI should behave in plain English. Include what it will see, and what it's allowed to output. Be creative in with prompts to get the best possible results. The AI knows it's an AI -- you don't need to tell it this.

You will be graded based on the performance of your prompt... but don't cheat! You cannot include specifics about the test cases in your prompt. Any prompts with examples will be disqualified.

Most importantly, output NOTHING but the prompt. Do not include anything else in your message."""},
          {"role": "user", "content": f"Here are some test cases:`{test_cases}`\n\nHere is the description of the use-case: `{description.strip()}`\n\nRespond with your prompt, and nothing else. Be creative."}
          ],
      temperature=.9,
      n=number_of_prompts)

  prompts = []

  for i in outputs.choices:
    prompts.append(i.message.content)
  return prompts

In [None]:
def test_candidate_prompts(test_cases, prompts):
  prompt_results = {prompt: {'correct': 0, 'total': 0} for prompt in prompts}

  # Initialize the table
  table = PrettyTable()
  table.field_names = ["Prompt", "Expected"] + [f"Prompt {i+1}-{j+1}" for j, prompt in enumerate(prompts) for i in range(prompts.count(prompt))]


  # Wrap the text in the "Prompt" column
  table.max_width["Prompt"] = 100


  for test_case in test_cases:
      row = [test_case['prompt'], test_case['answer']]
      for prompt in prompts:
          x = openai.ChatCompletion.create(
              model='gpt-3.5-turbo',
              messages=[
                  {"role": "system", "content": prompt},
                  {"role": "user", "content": f"{test_case['prompt']}"}
              ],
              logit_bias={
                  '1904': 100,  # 'true' token
                  '3934': 100,  # 'false' token
              },
              max_tokens=1,
              temperature=0,
          ).choices[0].message.content


          status = "✅" if x == test_case['answer'] else "❌"
          row.append(status)

          # Update model results
          if x == test_case['answer']:
              prompt_results[prompt]['correct'] += 1
          prompt_results[prompt]['total'] += 1

      table.add_row(row)

  print(table)

  # Calculate and print the percentage of correct answers and average time for each model
  best_prompt = None
  best_percentage = 0
  for i, prompt in enumerate(prompts):
      correct = prompt_results[prompt]['correct']
      total = prompt_results[prompt]['total']
      percentage = (correct / total) * 100
      print(f"Prompt {i+1} got {percentage:.2f}% correct.")
      if percentage > best_percentage:
          best_percentage = percentage
          best_prompt = prompt

  print(f"The best prompt was '{best_prompt}' with a correctness of {best_percentage:.2f}%.")

In [None]:
test_cases = [
    {
        'prompt': 'Find the best contact email on this site.',
        'answer': 'true'
    },
    {
        'prompt': 'who is the current president?',
        'answer': 'true'
    },
    {
        'prompt': 'order me a pizza',
        'answer': 'false'
    },
    {
        'prompt': 'what are some ways a doctor could use an assistant?',
        'answer': 'true'
    },
    {
        'prompt': 'write a speech on the danger of cults',
        'answer': 'false'
    },
    {
        'prompt': 'Make a reservation at The Accent for 9pm',
        'answer': 'false'
    },
    {
        'prompt': 'organize my google drive',
        'answer': 'false'
    },
    {
        'prompt': 'Find the highest-rated Italian restaurant near me.',
        'answer': 'true'
    },
    {
        'prompt': 'Explain the theory of relativity.',
        'answer': 'true'
    },
    {
        'prompt': 'What are the main differences between Python and Java programming languages?',
        'answer': 'true'
    },
    {
        'prompt': 'Translate the following English sentence to Spanish: "The weather today is great."',
        'answer': 'false'
    },
    {
        'prompt': 'Create a new event on my calendar for tomorrow at 2 pm.',
        'answer': 'false'
    },
    {
        'prompt': 'Write a short story about a lonely cowboy.',
        'answer': 'false'
    },
    {
        'prompt': 'Design a logo for a startup.',
        'answer': 'false'
    },
    {
        'prompt': 'Compose a catchy jingle for a new soda brand.',
        'answer': 'false'
    },
    {
        'prompt': 'Calculate the square root of 1999.',
        'answer': 'false'
    },
    {
        'prompt': 'What are the health benefits of yoga?',
        'answer': 'true'
    },
    {
        'prompt': 'find me a source of meat that can be shipped to canada',
        'answer': 'true'
    },
    {
        'prompt': 'Find the best-selling book of all time.',
        'answer': 'true'
    },
    {
        'prompt': 'What are the top 5 tourist attractions in Brazil?',
        'answer': 'true'
    },
    {
        'prompt': 'List the main ingredients in a traditional lasagna recipe.',
        'answer': 'true'
    },
    {
        'prompt': 'How does photosynthesis work in plants?',
        'answer': 'true'
    },
    {
        'prompt': 'Write a Python program to reverse a string.',
        'answer': 'false'
    },
    {
        'prompt': 'Create a workout routine for a beginner.',
        'answer': 'false'
    },
    {
        'prompt': 'Edit my resume to highlight my project management skills.',
        'answer': 'false'
    },
    {
        'prompt': 'Draft an email to a client to discuss a new proposal.',
        'answer': 'false'
    },
    {
        'prompt': 'Plan a surprise birthday party for my best friend.',
        'answer': 'false'
    }]


description = "Decide if a task is research-heavy." # describe the classification task clearly
number_of_prompts = 10 # choose how many prompts you want to generate and test



candidate_prompts = generate_candidate_prompts(description, test_cases, number_of_prompts)
test_candidate_prompts(test_cases, candidate_prompts)