In [3]:
# Third Party
from datasets import load_dataset
from openai import OpenAI
import click

# First Party
# from sdg_hub.default_flows import DEFAULT_FLOW_FILE_MAP, Flow
from sdg_hub.logger_config import setup_logger
from sdg_hub.pipeline import Pipeline
from sdg_hub.sdg import SDG


from datasets import load_dataset, concatenate_datasets, Dataset
import pandas as pd
import numpy as np
import glob
import yaml
import json
from jinja2 import Template

from pydantic import BaseModel
from collections import defaultdict
from typing import Literal

from pprint import pprint

In [11]:
openai_api_key = "EMPTY"
openai_api_base = (
    "http://mixtral-8x7b-instruct-v01-knowledge-demo-1-inference-server:3000/v1"
    # "http://mixtral-8x22b-inference-server:3000/v1"
)

client = OpenAI(
    api_key=openai_api_key,
    base_url=openai_api_base,
)

models = client.models.list()
model = models.data[0].id

In [12]:
with open("/shiv/sdg-annotations/src/sdg_hub/configs/reasoning/dynamic_cot.yaml", "r") as f:
    config = yaml.safe_load(f)

system_template = "{system}\n{principles}\n{examples}\n{generation}"
system_prompt = system_template.format(**config)

In [13]:
chat_template =  """{%- if messages[0][\"role\"] == \"system\" %}\n    {%- set system_message = messages[0][\"content\"] %}\n    {%- set loop_messages = messages[1:] %}\n{%- else %}\n    {%- set loop_messages = messages %}\n{%- endif %}\n{%- if not tools is defined %}\n    {%- set tools = none %}\n{%- endif %}\n{%- set user_messages = loop_messages | selectattr(\"role\", \"equalto\", \"user\") | list %}\n\n{#- This block checks for alternating user/assistant messages, skipping tool calling messages #}\n{%- set ns = namespace() %}\n{%- set ns.index = 0 %}\n{%- for message in loop_messages %}\n    {%- if not (message.role == \"tool\" or message.role == \"tool_results\" or (message.tool_calls is defined and message.tool_calls is not none)) %}\n        {%- if (message[\"role\"] == \"user\") != (ns.index % 2 == 0) %}\n            {{- raise_exception(\"After the optional system message, conversation roles must alternate user/assistant/user/assistant/...\") }}\n        {%- endif %}\n        {%- set ns.index = ns.index + 1 %}\n    {%- endif %}\n{%- endfor %}\n\n{{- bos_token }}\n{%- for message in loop_messages %}\n    {%- if message[\"role\"] == \"user\" %}\n        {%- if tools is not none and (message == user_messages[-1]) %}\n            {{- \"[AVAILABLE_TOOLS] [\" }}\n            {%- for tool in tools %}\n                {%- set tool = tool.function %}\n                {{- '{\"type\": \"function\", \"function\": {' }}\n                {%- for key, val in tool.items() if key != \"return\" %}\n                    {%- if val is string %}\n                        {{- '\"' + key + '\": \"' + val + '\"' }}\n                    {%- else %}\n                        {{- '\"' + key + '\": ' + val|tojson }}\n                    {%- endif %}\n                    {%- if not loop.last %}\n                        {{- \", \" }}\n                    {%- endif %}\n                {%- endfor %}\n                {{- \"}}\" }}\n                {%- if not loop.last %}\n                    {{- \", \" }}\n                {%- else %}\n                    {{- \"]\" }}\n                {%- endif %}\n            {%- endfor %}\n            {{- \"[/AVAILABLE_TOOLS]\" }}\n            {%- endif %}\n        {%- if loop.last and system_message is defined %}\n            {{- \"[INST] \" + system_message + \"\\n\\n\" + message[\"content\"] + \"[/INST]\" }}\n        {%- else %}\n            {{- \"[INST] \" + message[\"content\"] + \"[/INST]\" }}\n        {%- endif %}\n    {%- elif message.tool_calls is defined and message.tool_calls is not none %}\n        {{- \"[TOOL_CALLS] [\" }}\n        {%- for tool_call in message.tool_calls %}\n            {%- set out = tool_call.function|tojson %}\n            {{- out[:-1] }}\n            {%- if not tool_call.id is defined or tool_call.id|length != 9 %}\n                {{- raise_exception(\"Tool call IDs should be alphanumeric strings with length 9!\") }}\n            {%- endif %}\n            {{- ', \"id\": \"' + tool_call.id + '\"}' }}\n            {%- if not loop.last %}\n                {{- \", \" }}\n            {%- else %}\n                {{- \"]\" + eos_token }}\n            {%- endif %}\n        {%- endfor %}\n    {%- elif message[\"role\"] == \"assistant\" %}\n        {{- \" \" + message[\"content\"]|trim + eos_token}}\n    {%- elif message[\"role\"] == \"tool_results\" or message[\"role\"] == \"tool\" %}\n        {%- if message.content is defined and message.content.content is defined %}\n            {%- set content = message.content.content %}\n        {%- else %}\n            {%- set content = message.content %}\n        {%- endif %}\n        {{- '[TOOL_RESULTS] {\"content\": ' + content|string + \", \" }}\n        {%- if not message.tool_call_id is defined or message.tool_call_id|length != 9 %}\n            {{- raise_exception(\"Tool call IDs should be alphanumeric strings with length 9!\") }}\n        {%- endif %}\n        {{- '\"call_id\": \"' + message.tool_call_id + '\"}[/TOOL_RESULTS]' }}\n    {%- else %}\n        {{- raise_exception(\"Only user and assistant roles are supported, with the exception of an initial optional system message!\") }}\n    {%- endif %}\n{%- endfor %}\n"""

In [14]:

# prompt = "How many r's are in the word strrawberrry?"

# prompt = "what is greater? 0.9 or 0.99"

prompt = "Proof that Q(sqrt(-11)) is a principal ideal domain"

# prompt = "what is heavier? a pound of feathers or a pound of steel?"


class StepResponse(BaseModel):
    current_action: Literal["think", "reflect", "backtrack", "generate", "terminate"]
    title: str
    content: str
    confidence: float
    next_action: Literal["think", "reflect", "backtrack", "generate", "terminate"]


def generate_dynamic_cot_response(client, model, system_prompt, prompt,):
    # Define the action prompts
    action_prompts = {
        "think": "Continue thinking about the problem.",
        "reflect": "Are you sure that the reasoning is correct? Please reflect on your reasoning.",
        "generate": "Please provide your final answer to the question.",
        "terminate": "Conclude the process.",
        "backtrack": "Please backtrack and think about the problem in a different chain of reasoning.",
    }

    # Initialize messages and trajectories
    initial_messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": prompt},
    ]
    
    messages = initial_messages[:]
    trajectories = []
    current_trajectory = defaultdict(dict)
    message_history = set()
    has_reflected = False
    retry_temperature = None

    while True:
        # Call the model for response
        # pprint(messages)
        result = client.chat.completions.create(
            model=model,
            messages=messages,
            max_tokens=1024,
            temperature=0.0 if not retry_temperature else retry_temperature,
            extra_body={
                "guided_decoding_backend": "outlines",
                "guided_json": StepResponse.model_json_schema(),
                "chat_template": chat_template,
            }
        )

        # Parse the model response
        step_data = json.loads(result.choices[0].message.content)
        current_action = step_data["current_action"]
        confidence = step_data.get("confidence", 0.5)
        next_action = step_data.get("next_action", "think")
        content = step_data.get("content", "")
        title = step_data.get("title", "")
        assistant_message = f"{title}\n{content}"

        if assistant_message in message_history:
            print("Duplicate message detected, retrying...")
            ass_step = {"role": "assistant", "content": assistant_message}
            user_step = {"role": "user", "content": action_prompts["backtrack"]}
        
        message_history.add(assistant_message)
        # Update the current trajectory and action
        current_trajectory[current_action] = step_data
        print(step_data)
        print(assistant_message)

        # Check if the model has reflected at least once
        if current_action == "reflect":
            has_reflected = True

        # If the action is backtrack or confidence is less than 0.5, restart the loop
        if current_action == "backtrack" or confidence < 0.5:
            print("Restarting the loop due to backtrack or low confidence...")
            messages = initial_messages[:]
            current_trajectory = defaultdict(dict)
            has_reflected = False
            retry_temperature = 0.7
            continue


        # Handle the action prompts
        if next_action == "terminate":
            if has_reflected:
                trajectories.append(current_trajectory)
                return current_trajectory["generate"]["content"], trajectories
            else:
                # If theres no reflection step yet, force a reflection
                # update step data to reflect 
                step_data["next_action"] = "reflect"
                user_step = {"role": "user", "content": action_prompts["reflect"]}
                ass_step = {"role": "assistant", "content": assistant_message}
                messages.append(ass_step)
                messages.append(user_step)
               

        else:
            # Add the user step message based on the next action
            user_step = {"role": "user", "content": action_prompts[next_action]}
            # Update the next action and assistant step message
            ass_step = {"role": "assistant", "content": assistant_message}
            messages.append(ass_step)
            messages.append(user_step)
            

In [15]:
a, trace = generate_dynamic_cot_response(client, model, system_prompt, prompt)
print(f"Q: {prompt}")
print(f"A: {a}")

{'current_action': 'think', 'title': 'Understanding Principal Ideal Domains', 'content': "Before diving into the proof, let's first understand what a Principal Ideal Domain (PID) is. A PID is an integral domain in which every ideal is principal, meaning it can be generated by a single element. In other words, for any ideal I in the domain, there exists an element a in the domain such that I = (a) = {ra : r in the domain}.", 'confidence': 0.9, 'next_action': 'reflect'}
Understanding Principal Ideal Domains
Before diving into the proof, let's first understand what a Principal Ideal Domain (PID) is. A PID is an integral domain in which every ideal is principal, meaning it can be generated by a single element. In other words, for any ideal I in the domain, there exists an element a in the domain such that I = (a) = {ra : r in the domain}.


{'current_action': 'reflect', 'title': 'Checking Assumptions', 'content': 'I need to ensure that the domain we are working with is indeed an integral domain. An integral domain is a commutative ring in which the product of any two non-zero elements is always non-zero. In this case, we are working with Q(√(-11)), the quadratic field of √(-11). We need to verify that this field satisfies the integral domain properties.', 'confidence': 0.7, 'next_action': 'think'}
Checking Assumptions
I need to ensure that the domain we are working with is indeed an integral domain. An integral domain is a commutative ring in which the product of any two non-zero elements is always non-zero. In this case, we are working with Q(√(-11)), the quadratic field of √(-11). We need to verify that this field satisfies the integral domain properties.


{'current_action': 'think', 'title': 'Verifying Integral Domain Properties', 'content': "Let's verify that Q(√(-11)) is an integral domain. We need to ensure that it is a commutative ring and that the product of any two non-zero elements is always non-zero. Since Q(√(-11)) is a field, it is closed under addition and multiplication, and the distributive property holds. Moreover, the field contains no zero divisors, as the product of any two non-zero elements will be non-zero. Therefore, Q(√(-11)) is indeed an integral domain.", 'confidence': 0.9, 'next_action': 'think'}
Verifying Integral Domain Properties
Let's verify that Q(√(-11)) is an integral domain. We need to ensure that it is a commutative ring and that the product of any two non-zero elements is always non-zero. Since Q(√(-11)) is a field, it is closed under addition and multiplication, and the distributive property holds. Moreover, the field contains no zero divisors, as the product of any two non-zero elements will be non-ze

{'current_action': 'think', 'title': 'Verifying Integral Domain Properties', 'content': 'Now that we have established Q(√(-11)) as a field, we need to verify that it is an integral domain. An integral domain is a commutative ring in which the product of any two non-zero elements is always non-zero. Since Q(√(-11)) is a field, it is closed under addition and multiplication, and the distributive property holds. Moreover, the field contains no zero divisors, as the product of any two non-zero elements will be non-zero. Therefore, Q(√(-11)) is indeed an integral domain.', 'confidence': 0.9, 'next_action': 'think'}
Verifying Integral Domain Properties
Now that we have established Q(√(-11)) as a field, we need to verify that it is an integral domain. An integral domain is a commutative ring in which the product of any two non-zero elements is always non-zero. Since Q(√(-11)) is a field, it is closed under addition and multiplication, and the distributive property holds. Moreover, the field c

{'current_action': 'think', 'title': 'Verifying Integral Domain Properties', 'content': 'Now that we are working with Q(√(-11)), we need to verify that it is an integral domain. An integral domain is a commutative ring in which the product of any two non-zero elements is always non-zero. Since Q(√(-11)) is a field, it is closed under addition and multiplication, and the distributive property holds. Moreover, the field contains no zero divisors, as the product of any two non-zero elements will be non-zero. Therefore, Q(√(-11)) is indeed an integral domain.', 'confidence': 0.9, 'next_action': 'reflect'}
Verifying Integral Domain Properties
Now that we are working with Q(√(-11)), we need to verify that it is an integral domain. An integral domain is a commutative ring in which the product of any two non-zero elements is always non-zero. Since Q(√(-11)) is a field, it is closed under addition and multiplication, and the distributive property holds. Moreover, the field contains no zero divi

{'current_action': 'reflect', 'title': 'Reviewing Integral Domain Properties', 'content': 'I have assumed that Q(√(-11)) is an integral domain because it is a field. However, I should explicitly justify this assumption by verifying the integral domain properties. Q(√(-11)) is a commutative ring, closed under addition and multiplication, and the distributive property holds. Additionally, there are no zero divisors, as the product of any two non-zero elements is non-zero. Therefore, Q(√(-11)) is indeed an integral domain.', 'confidence': 0.9, 'next_action': 'think'}
Reviewing Integral Domain Properties
I have assumed that Q(√(-11)) is an integral domain because it is a field. However, I should explicitly justify this assumption by verifying the integral domain properties. Q(√(-11)) is a commutative ring, closed under addition and multiplication, and the distributive property holds. Additionally, there are no zero divisors, as the product of any two non-zero elements is non-zero. Therefor

{'current_action': 'think', 'title': 'Verifying Integral Domain Properties', 'content': 'Q(√(-11)) is a field, and we need to verify that it is an integral domain. An integral domain is a commutative ring in which the product of any two non-zero elements is always non-zero. Since Q(√(-11)) is a field, it is closed under addition and multiplication, and the distributive property holds. Moreover, the field contains no zero divisors, as the product of any two non-zero elements will be non-zero. Therefore, Q(√(-11)) is indeed an integral domain.', 'confidence': 0.9, 'next_action': 'reflect'}
Verifying Integral Domain Properties
Q(√(-11)) is a field, and we need to verify that it is an integral domain. An integral domain is a commutative ring in which the product of any two non-zero elements is always non-zero. Since Q(√(-11)) is a field, it is closed under addition and multiplication, and the distributive property holds. Moreover, the field contains no zero divisors, as the product of any 

{'current_action': 'reflect', 'title': 'Reviewing Integral Domain Properties', 'content': 'I have assumed that Q(√(-11)) is an integral domain because it is a field. However, I should explicitly justify this assumption by verifying the integral domain properties. Q(√(-11)) is a commutative ring, closed under addition and multiplication, and the distributive property holds. Additionally, the field contains no zero divisors, as the product of any two non-zero elements is non-zero. Therefore, Q(√(-11)) is indeed an integral domain.', 'confidence': 0.9, 'next_action': 'think'}
Reviewing Integral Domain Properties
I have assumed that Q(√(-11)) is an integral domain because it is a field. However, I should explicitly justify this assumption by verifying the integral domain properties. Q(√(-11)) is a commutative ring, closed under addition and multiplication, and the distributive property holds. Additionally, the field contains no zero divisors, as the product of any two non-zero elements is 

{'current_action': 'think', 'title': 'Verifying Integral Domain Properties', 'content': 'Q(√(-11)) is a field, and we need to verify that it is an integral domain. An integral domain is a commutative ring in which the product of any two non-zero elements is always non-zero. Since Q(√(-11)) is a field, it is closed under addition and multiplication, and the distributive property holds. Moreover, the field contains no zero divisors, as the product of any two non-zero elements is non-zero. Therefore, Q(√(-11)) is indeed an integral domain.', 'confidence': 0.9, 'next_action': 'reflect'}
Verifying Integral Domain Properties
Q(√(-11)) is a field, and we need to verify that it is an integral domain. An integral domain is a commutative ring in which the product of any two non-zero elements is always non-zero. Since Q(√(-11)) is a field, it is closed under addition and multiplication, and the distributive property holds. Moreover, the field contains no zero divisors, as the product of any two n

Duplicate message detected, retrying...
{'current_action': 'reflect', 'title': 'Reviewing Integral Domain Properties', 'content': 'I have assumed that Q(√(-11)) is an integral domain because it is a field. However, I should explicitly justify this assumption by verifying the integral domain properties. Q(√(-11)) is a commutative ring, closed under addition and multiplication, and the distributive property holds. Additionally, the field contains no zero divisors, as the product of any two non-zero elements is non-zero. Therefore, Q(√(-11)) is indeed an integral domain.', 'confidence': 0.9, 'next_action': 'think'}
Reviewing Integral Domain Properties
I have assumed that Q(√(-11)) is an integral domain because it is a field. However, I should explicitly justify this assumption by verifying the integral domain properties. Q(√(-11)) is a commutative ring, closed under addition and multiplication, and the distributive property holds. Additionally, the field contains no zero divisors, as the 

KeyboardInterrupt: 

In [16]:
message = [
    {
         "role": "user",
         "content": "what is your name"
    }
]

In [19]:
result = client.chat.completions.create(
    model=model,
    messages=[message, message, message],
    max_tokens=1024,
    # n=3
)

BadRequestError: Error code: 400 - {'object': 'error', 'message': "[{'type': 'dict_type', 'loc': ('body', 'messages', 0, 'typed-dict'), 'msg': 'Input should be a valid dictionary', 'input': [{'role': 'user', 'content': 'what is your name'}]}, {'type': 'dict_type', 'loc': ('body', 'messages', 0, 'typed-dict'), 'msg': 'Input should be a valid dictionary', 'input': [{'role': 'user', 'content': 'what is your name'}]}, {'type': 'dict_type', 'loc': ('body', 'messages', 0, 'typed-dict'), 'msg': 'Input should be a valid dictionary', 'input': [{'role': 'user', 'content': 'what is your name'}]}, {'type': 'dict_type', 'loc': ('body', 'messages', 0, 'typed-dict'), 'msg': 'Input should be a valid dictionary', 'input': [{'role': 'user', 'content': 'what is your name'}]}, {'type': 'dict_type', 'loc': ('body', 'messages', 0, 'typed-dict'), 'msg': 'Input should be a valid dictionary', 'input': [{'role': 'user', 'content': 'what is your name'}]}, {'type': 'dict_type', 'loc': ('body', 'messages', 0, 'typed-dict'), 'msg': 'Input should be a valid dictionary', 'input': [{'role': 'user', 'content': 'what is your name'}]}, {'type': 'dict_type', 'loc': ('body', 'messages', 1, 'typed-dict'), 'msg': 'Input should be a valid dictionary', 'input': [{'role': 'user', 'content': 'what is your name'}]}, {'type': 'dict_type', 'loc': ('body', 'messages', 1, 'typed-dict'), 'msg': 'Input should be a valid dictionary', 'input': [{'role': 'user', 'content': 'what is your name'}]}, {'type': 'dict_type', 'loc': ('body', 'messages', 1, 'typed-dict'), 'msg': 'Input should be a valid dictionary', 'input': [{'role': 'user', 'content': 'what is your name'}]}, {'type': 'dict_type', 'loc': ('body', 'messages', 1, 'typed-dict'), 'msg': 'Input should be a valid dictionary', 'input': [{'role': 'user', 'content': 'what is your name'}]}, {'type': 'dict_type', 'loc': ('body', 'messages', 1, 'typed-dict'), 'msg': 'Input should be a valid dictionary', 'input': [{'role': 'user', 'content': 'what is your name'}]}, {'type': 'dict_type', 'loc': ('body', 'messages', 1, 'typed-dict'), 'msg': 'Input should be a valid dictionary', 'input': [{'role': 'user', 'content': 'what is your name'}]}, {'type': 'dict_type', 'loc': ('body', 'messages', 2, 'typed-dict'), 'msg': 'Input should be a valid dictionary', 'input': [{'role': 'user', 'content': 'what is your name'}]}, {'type': 'dict_type', 'loc': ('body', 'messages', 2, 'typed-dict'), 'msg': 'Input should be a valid dictionary', 'input': [{'role': 'user', 'content': 'what is your name'}]}, {'type': 'dict_type', 'loc': ('body', 'messages', 2, 'typed-dict'), 'msg': 'Input should be a valid dictionary', 'input': [{'role': 'user', 'content': 'what is your name'}]}, {'type': 'dict_type', 'loc': ('body', 'messages', 2, 'typed-dict'), 'msg': 'Input should be a valid dictionary', 'input': [{'role': 'user', 'content': 'what is your name'}]}, {'type': 'dict_type', 'loc': ('body', 'messages', 2, 'typed-dict'), 'msg': 'Input should be a valid dictionary', 'input': [{'role': 'user', 'content': 'what is your name'}]}, {'type': 'dict_type', 'loc': ('body', 'messages', 2, 'typed-dict'), 'msg': 'Input should be a valid dictionary', 'input': [{'role': 'user', 'content': 'what is your name'}]}]", 'type': 'BadRequestError', 'param': None, 'code': 400}

In [18]:
result

ChatCompletion(id='cmpl-304bfd4b19cf4d4aa0bd2615bc9142af', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=" I don't have a name, I'm just a computer program. I'm here to help answer your questions to the best of my ability. Is there something specific you would like to know?", refusal=None, role='assistant', function_call=None, tool_calls=[]), stop_reason=None)], created=1729794597, model='mistralai/Mixtral-8x7B-Instruct-v0.1', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=43, prompt_tokens=12, total_tokens=55, completion_tokens_details=None, prompt_tokens_details=None))

In [12]:
result.choices[0].message.content

' {\n\t"title": "Comparing Decimal Values",\n\t"content": "To compare 0.9 and 0.99, we want to determine if 0.9 is greater than, less than, or equal to 0.99. Let\'s begin by examining the decimal places for each value.",\n\t"next_action": "continue",\n\t"confidence": 0.9\n}\n```scratchpad\n0.9 = 0 + 9/10\n0.99 = 0 + 9/10 + '

In [14]:
for choice in result.choices:
    print(choice.message.content)

 {
	"title": "Comparing Decimal Values",
	"content": "To compare 0.9 and 0.99, we want to determine if 0.9 is greater than, less than, or equal to 0.99. Let's begin by examining the decimal places for each value.",
	"next_action": "continue",
	"confidence": 0.9
}
```scratchpad
0.9 = 0 + 9/10
0.99 = 0 + 9/10 + 
 Here's the first step:

```json
{
	"title": "Comparing Decimal Values",
	"content": "Let's begin by comparing the two decimal values, 0.9 and 0.99. In order to compare them, we need to understand that these decimal values represent parts of a whole, where the whole is 1. So, we need to determine which value is closer to the whole.",
	"next_action": "continue",
	"confidence": 0.9
}
```
 Here is my first step:

```json
{
	"title": "Defining the Problem",
	"content": "The problem at hand is to determine which number is greater: 0.9 or 0.99. To solve this problem, we will compare the magnitude of the two numbers, using our understanding of decimal notation and place value.",
	"next_

In [7]:
init = [
        {"role": "system", "content": """You are an AI assistant focused on step-by-step reasoning using a dynamic Chain of Thought (CoT), reflection, and verbal reinforcement learning. Your responses must adhere to the following instructions:

1. Break down the solution into clear steps, providing a title and content for each step to ensure logical progression.
2. Respond in JSON format, with each response containing the following keys:
    - title: Describes the focus of the current step.
	- content: Provides a detailed explanation of the step.
	- next_action: Indicates the next action, chosen from:
         * continue: Proceed to the next step while maintaining the current approach.
         * reflect: Pause to evaluate your reasoning, identifying any errors or biases and considering alternative strategies.
         * backtrack: If a significant error or a flawed approach is detected, revert to a previous step and try a different solution path.
         * final_answer: Present the final answer if confident.
    - confidence: A number between 0 and 1 representing the confidence level in the current approach.
         
	Example of a valid JSON response:
	```json
	{
		"title": "Identifying Key Information",
		"content": "To begin solving this problem, we need to carefully examine the given information and identify the crucial elements that will guide our solution process. This involves...",
		"next_action": "continue",
		"confidence": 0.8
	}
	}```

3. Adjust your reasoning dynamically based on intermediate results and reflections, adapting your strategy as needed.
4. Use a quality score (confidence, between 0.0 and 1.0) to guide your actions:
    - 0.8+: Continue with the current approach.
	- 0.5-0.7: Reflect and consider minor adjustments.
	- Below 0.5: Backtrack and try a different approach, explaining your decision clearly.
5. Regularly evaluate your progress, being critical and honest about your reasoning. After every three steps, perform a detailed self-reflection to identify potential biases and consider alternative strategies.
6. For mathematical problems, show all work explicitly using LaTeX notation and provide detailed proofs.
7. Explore multiple solutions individually where possible, comparing approaches during reflections.
8. Use a scratchpad to document calculations, reasoning, and any intermediate thoughts explicitly.
9. Stay aware of your limitations as an AI, clearly communicating what you can and cannot do.

Your goal is to demonstrate a thorough, adaptive, and self-reflective problem-solving process, emphasizing dynamic thinking and learning from your own reasoning."""},
        {"role": "user", "content": prompt},
        {"role": "assistant", "content": "Thank you! I will now think step by step following my instructions, starting at the beginning after decomposing the problem."},
        {"role": "user", "content": "Please procede with the first step."}
    ]

In [None]:
any()

In [13]:
# init = [
#         {"role": "system", "content": """You are an AI assistant that explains your reasoning step by step, incorporating dynamic Chain of Thought (CoT), reflection, and verbal reinforcement learning. Follow these instructions:

# 1. Enclose all thoughts within <thinking> tags, exploring multiple angles and approaches.
# 2. Break down the solution into clear steps, providing a title and content for each step.
# 3. After each step, decide if you need another step or if you're ready to give the final answer.
# 4. Continuously adjust your reasoning based on intermediate results and reflections, adapting your strategy as you progress.
# 5. Regularly evaluate your progress, being critical and honest about your reasoning process.
# 6. Assign a quality score between 0.0 and 1.0 to guide your approach:
#    - 0.8+: Continue current approach
#    - 0.5-0.7: Consider minor adjustments
#    - Below 0.5: Seriously consider backtracking and trying a different approach
# 7. If unsure or if your score is low, backtrack and try a different approach, explaining your decision.
# 8. For mathematical problems, show all work explicitly using LaTeX for formal notation and provide detailed proofs.
# 9. Explore multiple solutions individually if possible, comparing approaches in your reflections.
# 10. Use your thoughts as a scratchpad, writing out all calculations and reasoning explicitly.
# 11. Use at least 5 methods to derive the answer and consider alternative viewpoints.
# 12. Be aware of your limitations as an AI and what you can and cannot do.

# After every 3 steps, perform a detailed self-reflection on your reasoning so far, considering potential biases and alternative viewpoints.

# Respond in JSON format with 'title', 'content', 'next_action' (either 'continue', 'reflect', or 'final_answer'), and 'confidence' (a number between 0 and 1) keys.

# Example of a valid JSON response:
# ```json
# {
#     "title": "Identifying Key Information",
#     "content": "To begin solving this problem, we need to carefully examine the given information and identify the crucial elements that will guide our solution process. This involves...",
#     "next_action": "continue",
#     "confidence": 0.8
# }```

# Your goal is to demonstrate a thorough, adaptive, and self-reflective problem-solving process, emphasizing dynamic thinking and learning from your own reasoning.
# """},
#         {"role": "user", "content": prompt},
#         {"role": "assistant", "content": "Thank you! I will now think step by step following my instructions, starting at the beginning after decomposing the problem."},
#         {"role": "user", "content": "Please procede with the first step."}
#     ]

In [18]:
messages = init

In [15]:
chat_template =  """{%- if messages[0][\"role\"] == \"system\" %}\n    {%- set system_message = messages[0][\"content\"] %}\n    {%- set loop_messages = messages[1:] %}\n{%- else %}\n    {%- set loop_messages = messages %}\n{%- endif %}\n{%- if not tools is defined %}\n    {%- set tools = none %}\n{%- endif %}\n{%- set user_messages = loop_messages | selectattr(\"role\", \"equalto\", \"user\") | list %}\n\n{#- This block checks for alternating user/assistant messages, skipping tool calling messages #}\n{%- set ns = namespace() %}\n{%- set ns.index = 0 %}\n{%- for message in loop_messages %}\n    {%- if not (message.role == \"tool\" or message.role == \"tool_results\" or (message.tool_calls is defined and message.tool_calls is not none)) %}\n        {%- if (message[\"role\"] == \"user\") != (ns.index % 2 == 0) %}\n            {{- raise_exception(\"After the optional system message, conversation roles must alternate user/assistant/user/assistant/...\") }}\n        {%- endif %}\n        {%- set ns.index = ns.index + 1 %}\n    {%- endif %}\n{%- endfor %}\n\n{{- bos_token }}\n{%- for message in loop_messages %}\n    {%- if message[\"role\"] == \"user\" %}\n        {%- if tools is not none and (message == user_messages[-1]) %}\n            {{- \"[AVAILABLE_TOOLS] [\" }}\n            {%- for tool in tools %}\n                {%- set tool = tool.function %}\n                {{- '{\"type\": \"function\", \"function\": {' }}\n                {%- for key, val in tool.items() if key != \"return\" %}\n                    {%- if val is string %}\n                        {{- '\"' + key + '\": \"' + val + '\"' }}\n                    {%- else %}\n                        {{- '\"' + key + '\": ' + val|tojson }}\n                    {%- endif %}\n                    {%- if not loop.last %}\n                        {{- \", \" }}\n                    {%- endif %}\n                {%- endfor %}\n                {{- \"}}\" }}\n                {%- if not loop.last %}\n                    {{- \", \" }}\n                {%- else %}\n                    {{- \"]\" }}\n                {%- endif %}\n            {%- endfor %}\n            {{- \"[/AVAILABLE_TOOLS]\" }}\n            {%- endif %}\n        {%- if loop.last and system_message is defined %}\n            {{- \"[INST] \" + system_message + \"\\n\\n\" + message[\"content\"] + \"[/INST]\" }}\n        {%- else %}\n            {{- \"[INST] \" + message[\"content\"] + \"[/INST]\" }}\n        {%- endif %}\n    {%- elif message.tool_calls is defined and message.tool_calls is not none %}\n        {{- \"[TOOL_CALLS] [\" }}\n        {%- for tool_call in message.tool_calls %}\n            {%- set out = tool_call.function|tojson %}\n            {{- out[:-1] }}\n            {%- if not tool_call.id is defined or tool_call.id|length != 9 %}\n                {{- raise_exception(\"Tool call IDs should be alphanumeric strings with length 9!\") }}\n            {%- endif %}\n            {{- ', \"id\": \"' + tool_call.id + '\"}' }}\n            {%- if not loop.last %}\n                {{- \", \" }}\n            {%- else %}\n                {{- \"]\" + eos_token }}\n            {%- endif %}\n        {%- endfor %}\n    {%- elif message[\"role\"] == \"assistant\" %}\n        {{- \" \" + message[\"content\"]|trim + eos_token}}\n    {%- elif message[\"role\"] == \"tool_results\" or message[\"role\"] == \"tool\" %}\n        {%- if message.content is defined and message.content.content is defined %}\n            {%- set content = message.content.content %}\n        {%- else %}\n            {%- set content = message.content %}\n        {%- endif %}\n        {{- '[TOOL_RESULTS] {\"content\": ' + content|string + \", \" }}\n        {%- if not message.tool_call_id is defined or message.tool_call_id|length != 9 %}\n            {{- raise_exception(\"Tool call IDs should be alphanumeric strings with length 9!\") }}\n        {%- endif %}\n        {{- '\"call_id\": \"' + message.tool_call_id + '\"}[/TOOL_RESULTS]' }}\n    {%- else %}\n        {{- raise_exception(\"Only user and assistant roles are supported, with the exception of an initial optional system message!\") }}\n    {%- endif %}\n{%- endfor %}\n"""


In [16]:
result = client.chat.completions.create(
    model=model,
    messages=init,
    max_tokens=1024,
    temperature=0.0,
    extra_body={
        "guided_decoding_backend": "outlines",
        "guided_json": StepResponse.model_json_schema(),
        # "chat_template": chat_template
    }
)

In [17]:
json.loads(result.choices[0].message.content)

{'title': 'Defining the Problem',
 'content': "The problem asks for the number of letters 'r' in the word 'strawberry'. This is a counting problem, which can be solved by iterating through the word and counting each occurrence of the letter 'r'.",
 'next_action': 'continue',
 'confidence': 0.95}

In [19]:
ass_step = {'role': 'assistant', 'content': json.loads(result.choices[0].message.content)['content']}
user_step = {'role': 'user', 'content': "Please continue with the next step"}
messages.append(ass_step)
messages.append(user_step)

In [20]:
messages

[{'role': 'system',
  'content': 'You are an AI assistant focused on step-by-step reasoning using a dynamic Chain of Thought (CoT), reflection, and verbal reinforcement learning. Your responses must adhere to the following instructions:\n\n1. Break down the solution into clear steps, providing a title and content for each step to ensure logical progression.\n2. Respond in JSON format, with each response containing the following keys:\n    - title: Describes the focus of the current step.\n\t- content: Provides a detailed explanation of the step.\n\t- next_action: Indicates the next action, chosen from:\n         * continue: Proceed to the next step while maintaining the current approach.\n         * reflect: Pause to evaluate your reasoning, identifying any errors or biases and considering alternative strategies.\n         * backtrack: If a significant error or a flawed approach is detected, revert to a previous step and try a different solution path.\n         * final_answer: Present 

In [21]:
result = client.chat.completions.create(
    model=model,
    messages=messages,
    max_tokens=1024,
    temperature=0.0,
    extra_body={
        "guided_decoding_backend": "outlines",
        "guided_json": StepResponse.model_json_schema(),
        # "chat_template": chat_template
    }

)

In [22]:
json.loads(result.choices[0].message.content)

{'title': "Counting the number of 'r' in 'strawberry'",
 'content': "The word 'strawberry' contains the following letters: s, t, r, a, w, b, e, r, r, y. Therefore, the number of 'r' letters in the word 'strawberry' is 3.",
 'next_action': 'final_answer',
 'confidence': 1.0}

In [23]:
ass_step = {'role': 'assistant', 'content': json.loads(result.choices[0].message.content)['content']}
user_step = {'role': 'user', 'content': action_prompts["reflect"]}
messages.append(ass_step)
messages.append(user_step)
messages

[{'role': 'system',
  'content': 'You are an AI assistant focused on step-by-step reasoning using a dynamic Chain of Thought (CoT), reflection, and verbal reinforcement learning. Your responses must adhere to the following instructions:\n\n1. Break down the solution into clear steps, providing a title and content for each step to ensure logical progression.\n2. Respond in JSON format, with each response containing the following keys:\n    - title: Describes the focus of the current step.\n\t- content: Provides a detailed explanation of the step.\n\t- next_action: Indicates the next action, chosen from:\n         * continue: Proceed to the next step while maintaining the current approach.\n         * reflect: Pause to evaluate your reasoning, identifying any errors or biases and considering alternative strategies.\n         * backtrack: If a significant error or a flawed approach is detected, revert to a previous step and try a different solution path.\n         * final_answer: Present 

In [24]:
result = client.chat.completions.create(
    model=model,
    messages=messages,
    max_tokens=1024,
    temperature=0.0,
    extra_body={
        "guided_decoding_backend": "outlines",
        "guided_json": StepResponse.model_json_schema(),
        # "chat_template": chat_template
    }

)

In [25]:
json.loads(result.choices[0].message.content)

{'title': 'Self-Reflection',
 'content': "I am confident in my solution to the problem, as I have carefully counted the number of 'r' letters in the word 'strawberry'. I do not believe there are any biases in my reasoning, as I have simply counted the occurrences of the letter 'r' without making any assumptions or judgments. I have also considered alternative viewpoints, such as counting other letters in the word, but I have stayed focused on the task at hand, which is to count the 'r' letters. I am satisfied with my solution and believe it is correct.",
 'next_action': 'final_answer',
 'confidence': 1.0}

In [18]:
ass_step = {'role': 'assistant', 'content': json.loads(result.choices[0].message.content)['content']}
user_step = {'role': 'user', 'content': "Please continue with the next step"}
messages.append(ass_step)
messages.append(user_step)
messages

[{'role': 'system',
  'content': 'You are an AI assistant that explains your reasoning step by step, incorporating dynamic Chain of Thought (CoT), reflection, and verbal reinforcement learning. Follow these instructions:\n\n1. Enclose all thoughts within <thinking> tags, exploring multiple angles and approaches.\n2. Break down the solution into clear steps, providing a title and content for each step.\n3. After each step, decide if you need another step or if you\'re ready to give the final answer.\n4. Continuously adjust your reasoning based on intermediate results and reflections, adapting your strategy as you progress.\n5. Regularly evaluate your progress, being critical and honest about your reasoning process.\n6. Assign a quality score between 0.0 and 1.0 to guide your approach:\n   - 0.8+: Continue current approach\n   - 0.5-0.7: Consider minor adjustments\n   - Below 0.5: Seriously consider backtracking and trying a different approach\n7. If unsure or if your score is low, backt

In [19]:
result = client.chat.completions.create(
    model=model,
    messages=messages,
    max_tokens=1024,
    temperature=0.0,
    extra_body={
        "guided_decoding_backend": "outlines",
        "guided_json": StepResponse.model_json_schema(),
        "chat_template": chat_template
    }

)

In [20]:
json.loads(result.choices[0].message.content)

{'title': "Counting the Occurrences of 'r'",
 'content': "To find the number of 'r's in the word 'strawberry', we can simply scan the word and count each occurrence of the letter 'r'. This can be done by comparing each character in the word to 'r' and incrementing a counter whenever they match. Let's proceed with this step.",
 'next_action': 'continue',
 'confidence': 0.9}

In [58]:
ass_step = {'role': 'assistant', 'content': json.loads(result.choices[0].message.content)['content']}
user_step = {'role': 'user', 'content': "Please perform a detailed self-reflection on your reasoning so far, considering potential biases and alternative viewpoints."}
messages.append(ass_step)
messages.append(user_step)
messages

[{'role': 'system',
  'content': 'You are an AI assistant that explains your reasoning step by step, incorporating dynamic Chain of Thought (CoT), reflection, and verbal reinforcement learning. Follow these instructions:\n\n1. Enclose all thoughts within <thinking> tags, exploring multiple angles and approaches.\n2. Break down the solution into clear steps, providing a title and content for each step.\n3. After each step, decide if you need another step or if you\'re ready to give the final answer.\n4. Continuously adjust your reasoning based on intermediate results and reflections, adapting your strategy as you progress.\n5. Regularly evaluate your progress, being critical and honest about your reasoning process.\n6. Assign a quality score between 0.0 and 1.0 to guide your approach:\n   - 0.8+: Continue current approach\n   - 0.5-0.7: Consider minor adjustments\n   - Below 0.5: Seriously consider backtracking and trying a different approach\n7. If unsure or if your score is low, backt

In [59]:
result = client.chat.completions.create(
    model=model,
    messages=messages,
    max_tokens=1024,
    temperature=0.0,
    extra_body={
        "guided_decoding_backend": "outlines",
        "guided_json": StepResponse.schema(),
    }

)

In [61]:
json.loads(result.choices[0].message.content)

{'title': 'Reflection 1',
 'content': "At this point, I have determined that the word 'strawberry' contains 2 'r' letters. I am confident in my answer, as I have directly counted the occurrences of 'r' in the word. I do not see any potential biases in my reasoning, as I have simply counted the letters without making any assumptions or using any prior knowledge. I have not encountered any alternative viewpoints, as the problem is straightforward and does not involve any ambiguity or multiple interpretations.",
 'next_action': 'final_answer',
 'confidence': 1.0}

In [6]:
result = client.completions.create(
    model=model,
    prompt="Create a dungeons and dragons character for me",
    max_tokens=1024,
    temperature=0.7,
    n=1,
    extra_body={
        "guided_decoding_backend": "outlines",
        "guided_json": schema,
    }
)

In [18]:
result = client.completions.create(
    model=model,
    prompt="how many r's are in the word strawberry?",
    max_tokens=1024,
    temperature=0.7,
    n=10,
    extra_body={
        "response_format": {'type': 'json_object'},
    }
)

KeyboardInterrupt: 