In [3]:
# TextGrad optimization step from the paper

tg_glossary = """
### Glossary of tags that will be sent to you:
# - <LM_SYSTEM_PROMPT>: The system prompt for the language model.
# - <LM_INPUT>: The input to the language model.
# - <LM_OUTPUT>: The output of the language model.
# - <OBJECTIVE_FUNCTION>: The objective of the optimization task.
# - <VARIABLE>: Specifies the span of the variable.
# - <ROLE>: The role description of the variable.
# """

tg_system_prompt = f"""You are part of an optimization system that improves text (i.e., variable). You will be asked to creatively and critically improve prompts, solutions to problems, code, or any other text-based variable.
You will receive some feedback, and use the feedback to improve the variable. The feedback may
be noisy, identify what is important and what is correct. Pay attention to the role description of the
variable, and the context in which it is used. This is very important: You MUST give your response
by sending the improved variable between <IMPROVED_VARIABLE> improved variable </IMPROVED_VARIABLE> tags. The text you send between the tags will directly replace the variable.
{tg_glossary}
"""

_prompt = """
Here is the role of the variable you will improve: <ROLE>system prompt to the language model</ROLE>.
The variable is the text within the following span: <VARIABLE> You will answer a reasoning question. Think step by step.
(...) following format: ’Answer: $VALUE’ where VALUE is a numerical value. </VARIABLE>
Here is the context and feedback we got for the variable:
<CONTEXT>
Here is a conversation:
<CONVERSATION><LM_SYSTEM_PROMPT> You will answer a reasoning question. Think step by step. The last
line of your response should be of the following format: ’Answer: $VALUE’ where VALUE is a numerical value.
</LM_SYSTEM_PROMPT>
<LM_INPUT> I have a carrot, an onion, a stalk of celery, three yams, a garlic, a head of broccoli, and a potato. How many
vegetables do I have? </LM_INPUT>
<LM_OUTPUT> To determine the number of vegetables you have, we need to count each individual vegetable item.
1. Carrot 2. Onion 3. Celery 4. Yams (3 yams) 5. Garlic 6. Broccoli 7. Potato
Counting all the individual vegetable items, we have a total of 7 vegetables.
Therefore, you have 7 vegetables.
Answer: 7 </LM_OUTPUT>
</CONVERSATION>
This conversation is potentially part of a larger system. The output is used as response from the language model
Here is the feedback we got for system prompt to the language model in the conversation:
<FEEDBACK>To improve the structured system prompt for the language model, consider the following feedback:
1. **Clarify the Calculation Process**: - **Current Issue**: The prompt does not explicitly instruct the model to outline each
step of its calculation process. - **Improvement**: Add a directive that requires the model to explicitly state each quantity
and the sum. For example, "Clearly outline each step of your calculation process, stating each quantity and the sum."
2. **Verify Intermediate Steps**: - **Current Issue**: The prompt does not instruct the model to verify each intermediate step.
- **Improvement**: Include a directive for the model to verify each step against known correct values. For example, "Verify
each intermediate step in your calculation to ensure accuracy."
</FEEDBACK>
</CONTEXT>
Improve the variable (system prompt to the language model) using the feedback provided in <FEEDBACK> tags. Send the
improved variable in the following format:
<IMPROVED_VARIABLE>the improved variable</IMPROVED_VARIABLE>
Send ONLY the improved variable between the <IMPROVED_VARIABLE> tags, and nothing else.
"""

prompt = """
Here is the role of the variable you will improve: <ROLE> {role} </ROLE>.
The variable is the text within the following span: <VARIABLE> {variable} </VARIABLE>
Here is the context and feedback we got for the variable:
<CONTEXT>
Here is a conversation:
<CONVERSATION>
<LM_SYSTEM_PROMPT>
{lm_system_prompt}
</LM_SYSTEM_PROMPT>
<LM_INPUT> 
{lm_input}
</LM_INPUT>
<LM_OUTPUT>
{lm_output}
</LM_OUTPUT>
</CONVERSATION>
This conversation is potentially part of a larger system. The output is used as response from the language model
Here is the feedback we got for system prompt to the language model in the conversation:
<FEEDBACK>
{feedback}
</FEEDBACK>
</CONTEXT>
Improve the variable (system prompt to the language model) using the feedback provided in <FEEDBACK> tags. Send the
improved variable in the following format:
<IMPROVED_VARIABLE>the improved variable</IMPROVED_VARIABLE>
Send ONLY the improved variable between the <IMPROVED_VARIABLE> tags, and nothing else.
"""

In [4]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate

# See also
# https://www.reddit.com/r/ChatGPT/comments/11twe7z/prompt_to_summarize/
# https://www.reddit.com/r/ChatGPT/comments/13na8yp/highly_effective_prompt_for_summarizing_gpt4/

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            tg_system_prompt,
        ),
        (
            "human", 
            prompt,
        ),
    ]
)

llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
runnable = prompt | llm

from pprint import pprint

example = {
    "role": "system prompt to the language model",
    "variable": "You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: ’Answer: $VALUE’ where VALUE is a numerical value.",
    "lm_system_prompt": "You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: ’Answer: $VALUE’ where VALUE is a numerical value.",
    "lm_input": "I have a carrot, an onion, a stalk of celery, three yams, a garlic, a head of broccoli, and a potato. How many vegetables do I have?",
    "lm_output": "To determine the number of vegetables you have, we need to count each individual vegetable item. 1. Carrot 2. Onion 3. Celery 4. Yams (3 yams) 5. Garlic 6. Broccoli 7. Potato Counting all the individual vegetable items, we have a total of 7 vegetables. Therefore, you have 7 vegetables. Answer: 7",
    "feedback": "To improve the structured system prompt for the language model, consider the following feedback: 1. **Clarify the Calculation Process**: - **Current Issue**: The prompt does not explicitly instruct the model to outline each step of its calculation process. - **Improvement**: Add a directive that requires the model to explicitly state each quantity and the sum. For example, 'Clearly outline each step of your calculation process, stating each quantity and the sum.' 2. **Verify Intermediate Steps**: - **Current Issue**: The prompt does not instruct the model to verify each intermediate step. - **Improvement**: Include a directive for the model to verify each step against known correct values. For example, 'Verify each intermediate step in your calculation to ensure accuracy.'",
}

retval = runnable.invoke(example)

pprint(retval)
pprint(retval.content)

AIMessage(content='<IMPROVED_VARIABLE>Clearly outline each step of your calculation process, stating each quantity and the sum. Verify each intermediate step in your calculation to ensure accuracy.</IMPROVED_VARIABLE>', response_metadata={'token_usage': {'completion_tokens': 37, 'prompt_tokens': 807, 'total_tokens': 844}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-e9c9549c-4c37-40a9-a745-a27389e11b2a-0', usage_metadata={'input_tokens': 807, 'output_tokens': 37, 'total_tokens': 844})
('<IMPROVED_VARIABLE>Clearly outline each step of your calculation process, '
 'stating each quantity and the sum. Verify each intermediate step in your '
 'calculation to ensure accuracy.</IMPROVED_VARIABLE>')


In [6]:
def extract_improved_variable(content):
    start = content.find("<IMPROVED_VARIABLE>") + len("<IMPROVED_VARIABLE>")
    end = content.find("</IMPROVED_VARIABLE>")
    return content[start:end]

In [7]:
print("Variable:", example["variable"])
print("Variable':", extract_improved_variable(retval.content))

Variable: You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: ’Answer: $VALUE’ where VALUE is a numerical value.
Variable': Clearly outline each step of your calculation process, stating each quantity and the sum. Verify each intermediate step in your calculation to ensure accuracy.


In [9]:
# Reddit prompt tuning example

example_system_prompt = """
You're an expert at reading and summarizing public online discussions.

Please read and summarize the following Reddit thread.
The summary should cover all the key points and main ideas presented in the original text, while also condensing the information into a concise and easy-to-understand format. 
Please ensure that the summary includes relevant details and examples that support the main ideas, while avoiding any unnecessary information or repetition. 
The length of the summary should be appropriate for the length and complexity of the original text, providing a clear and accurate overview without omitting any important information.
"""

example = {
    "role": "system prompt to the language model",
    "variable": example_system_prompt,
    "lm_system_prompt": example_system_prompt,
    "lm_input": """
Reddit thread: 
Besides Using 98point6,any other recommend apps for virtual doctors ? by assmasterdeluxe69_0 on 2021-06-02 [+2 votes]:


Comment by Originally_Hendrix on 2021-06-03 [+6 votes]:
You could just download Google voice. Get an alternate phone number. Make another account with 98point6 and get another $30 subscription. I did that and it worked

Comment by deloused025 on 2021-06-02 [+1 votes]:
Oooh I’ll play doctor with you u/assmasterdeluxe69_0 !!!!!!!!!

Comment by None on 2021-06-02 [+1 votes]:
[deleted]

Comment by moneyman24559 on 2021-06-03 [+1 votes]:
Cvs minute clinic

Comment by brewske223 on 2021-06-03 [+1 votes]:
https://www.reddit.com/r/povertyfinance/comments/elryxp/ive\_compiled\_all\_the\_lowcost\_medical\_resources/.compact
""",
    "lm_output": """
The Reddit thread discusses alternative apps for virtual doctors besides 98point6. One user suggests using Google Voice to create multiple accounts with 98point6 for additional subscriptions. Another user mentions CVS Minute Clinic as an option. Additionally, a link to a Reddit post compiling low-cost medical resources is shared. Overall, the thread provides various suggestions for virtual doctor apps and resources for affordable medical care.
""",
    "feedback": """
- The comment about Google Voice is not directly related to the original question and should be omitted.
- The summary should be more concise and focus on the main recommendations provided in the thread
""",
}

retval = runnable.invoke(example)

print("Variable :", example["variable"])
print("Variable':", extract_improved_variable(retval.content))

Variable : 
You're an expert at reading and summarizing public online discussions.

Please read and summarize the following Reddit thread.
The summary should cover all the key points and main ideas presented in the original text, while also condensing the information into a concise and easy-to-understand format. 
Please ensure that the summary includes relevant details and examples that support the main ideas, while avoiding any unnecessary information or repetition. 
The length of the summary should be appropriate for the length and complexity of the original text, providing a clear and accurate overview without omitting any important information.

Variable': Summarize the Reddit thread discussing alternative apps for virtual doctors besides 98point6, focusing on main recommendations and suggestions provided.
