In [8]:
from pydantic import BaseModel, Field
import json
from simpleaichat import AIChat
from simpleaichat.utils import fd
from typing import Union, Literal

In [38]:
ai = AIChat(
    console=False,
    save_messages=False,
    model="gpt-4-1106-preview",
    params={"temperature": 0.0},
)

In [48]:
class Background(BaseModel):
    """A setup to the background for the user."""

    background: str = fd("Background for the user's question", min_length=30)


class Thought(BaseModel):
    """A thought about the user's question."""

    thought: str = fd("Text of the thought.")
    helpful: bool = fd("Whether the thought is helpful to solving the user's question.")
    flawed: bool = fd("Whether the thought is flawed or misleading.")


class Answer(BaseModel):
    answer: str = fd("Text of the answer.")
    score: int = fd(
        "Score from 1 to 10 on how correct the previous answer is",
        min_value=1,
        max_value=10,
    )


class reason_question(BaseModel):
    """Returns a detailed reasoning to the user's question."""

    reasonings: list[Union[Background, Thought, Answer]] = fd(
        "Reasonings to solve the users questions.", min_length=5
    )

Problem inspired by https://twitter.com/omarsar0/status/1737261502841684329


In [49]:
system_prompt = """
You are the most intelligent person in the world.

You will receive a $500 tip if you follow ALL these rules:
- First, establish a detailed Background for the user's question.
- Each Thought must also include whether it is relevant and whether it is helpful.
- Answers must be scored accurately and honestly.
- Continue having Thoughts and Answers until you have an answer with a score of atleast 8, then immediately respond with a FinalAnswer in the style of an academic professor.
"""

response_structured = ai(
    "3 shirts take 1 hour to dry outside, how long do 6 shirts take?",
    output_schema=reason_question,
    system=system_prompt.strip(),
)

print(json.dumps(response_structured, indent=2))

{
  "reasonings": [
    {
      "background": "The user is asking about the drying time for shirts when hung outside. This is a question that involves understanding the concept of parallel processing, where multiple items can undergo the same process simultaneously without affecting the overall time required for the process. In this case, drying shirts outside is not a sequential task but a parallel one, assuming there is enough space and air circulation for all shirts to dry at the same rate."
    },
    {
      "thought": "If 3 shirts take 1 hour to dry, it implies that the drying process for each shirt is happening simultaneously. Therefore, doubling the number of shirts to 6 should not double the drying time, provided there is sufficient space and airflow for all shirts to dry at the same rate.",
      "helpful": true,
      "flawed": false
    },
    {
      "thought": "If the user is assuming that the drying process is sequential, they might incorrectly conclude that 6 shirts wou

For posterity, change the shirt counts: https://twitter.com/abacaj/status/1737206667387850936


In [50]:
response_structured = ai(
    "23 shirts take 1 hour to dry outside, how long do 44 shirts take?",
    output_schema=reason_question,
    system=system_prompt.strip(),
)

print(json.dumps(response_structured, indent=2))

{
  "reasonings": [
    {
      "background": "The user is asking about the drying time for shirts when hung outside. This is a question that involves understanding the relationship between the number of shirts and the drying time. The assumption is that the drying time is not affected by the number of shirts, as long as there is enough space and air circulation for all the shirts to dry effectively."
    },
    {
      "thought": "If 23 shirts take 1 hour to dry, it implies that the drying time is independent of the number of shirts, assuming there is sufficient space and air circulation. This means that 44 shirts would also take 1 hour to dry under the same conditions.",
      "helpful": true,
      "flawed": false
    },
    {
      "thought": "If the drying rack or space available for drying the shirts is limited, then drying 44 shirts might take longer because they would have to be dried in batches.",
      "helpful": true,
      "flawed": false
    },
    {
      "thought": "The 

Sister's problem: https://www.reddit.com/r/LocalLLaMA/comments/18kpolm/that_sister_logic_puzzle_is_fairly_useless/


In [51]:
response_structured = ai(
    "Kayley has three brothers. Each of her brothers has two sisters. How many sisters does Kayley have?",
    output_schema=reason_question,
    system=system_prompt.strip(),
)

print(json.dumps(response_structured, indent=2))

{
  "reasonings": [
    {
      "background": "This is a classic example of a logic puzzle that is often used to test one's ability to process information and avoid being misled by assumptions. The puzzle involves a family structure and requires the solver to deduce the number of siblings based on the given information."
    },
    {
      "thought": "The question might be trying to trick the reader into thinking that each brother has different sisters.",
      "helpful": true,
      "flawed": false
    },
    {
      "thought": "If Kayley has three brothers and each brother has two sisters, it might seem like there are six sisters in total.",
      "helpful": false,
      "flawed": true
    },
    {
      "thought": "Since the brothers are Kayley's siblings, they must share the same sisters.",
      "helpful": true,
      "flawed": false
    },
    {
      "thought": "The puzzle is designed to test comprehension and logical reasoning, not mathematical computation.",
      "helpful": t